In [11]:
# System imports
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime
from time import process_time
import math
# Extra library imports
from PIL import Image
from scipy.optimize import minimize
import scipy.ndimage as ndi
import pandas as pd
import cv2
from skimage.draw import line
from skimage.measure import label, regionprops
import mahotas as mh
from scipy.stats import kurtosis
from scipy.stats import skew
import glob

# **SYMMETRY INDEX**

In [12]:
def symmetry_index(mask_arr):
    mask_arr = (mask_arr * 255).astype(np.uint8)
    # Get the center, size, and angle from rect
    contours, _ = cv2.findContours(mask_arr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Combine all contours into one
    all_contours = np.concatenate(contours)

    # Find the minAreaRect for the combined contour
    rect = cv2.minAreaRect(all_contours)
    center, size, angle = rect

    # Convert the center to tuple and size to int
    center = tuple(map(int, center))
    size = tuple(map(int, size))

    # Get the rotation matrix
    M = cv2.getRotationMatrix2D(center, angle, 1)

    # Perform the affine transformation
    rotated_mask = cv2.warpAffine(mask_arr, M, mask_arr.shape[1::-1], flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)

    # Crop the rotated mask
    x, y = center
    w, h = size
    x, y, w, h = int(x - w/2), int(y - h/2), int(w), int(h)

    # Check if the rectangle is outside the bounds of the rotated_mask
    if x < 0 or y < 0 or x + w > rotated_mask.shape[1] or y + h > rotated_mask.shape[0]:
        # Calculate the necessary padding
        padding = max(max(x, y), max(rotated_mask.shape[1] - (x + w), rotated_mask.shape[0] - (y + h))) + 1
        print('Padding:', padding)
        # Add zero padding to the image
        rotated_mask = cv2.copyMakeBorder(mask_arr, top=padding, bottom=padding, left=padding, right=padding, borderType=cv2.BORDER_CONSTANT, value=0)


        # Get the center, size, and angle from rect
        contours, _ = cv2.findContours(rotated_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Combine all contours into one
        all_contours = np.concatenate(contours)

        # Find the minAreaRect for the combined contour
        rect = cv2.minAreaRect(all_contours)
        center, size, angle = rect

        # Convert the center to tuple and size to int
        center = tuple(map(int, center))
        size = tuple(map(int, size))

        # Get the rotation matrix
        M = cv2.getRotationMatrix2D(center, angle, 1)

        # Perform the affine transformation
        rotated_mask = cv2.warpAffine(rotated_mask, M, rotated_mask.shape[1::-1], flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=0)

        # Crop the rotated mask
        x, y = center
        w, h = size
        x, y, w, h = int(x - w/2), int(y - h/2), int(w), int(h)
        
    cropped_mask = rotated_mask[y:y+h, x:x+w]

    # Flip the cropped mask around the y-axis
    flipped_mask = np.fliplr(cropped_mask)

    # Calculate the absolute difference between the original and flipped mask
    diff = np.abs(cropped_mask - flipped_mask)

    # Calculate the symmetry index
    symmetry_index = np.sum(diff) / np.sum(cropped_mask)

    return symmetry_index

# **RATIO TWO LONGEST ORTHO LINES**

In [13]:
def Longest_ortho_lines_ratio(mask_arr):
    mask_uint8 = mask_arr.astype(np.uint8)

    contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    all_contours = np.concatenate(contours)

    rect = cv2.minAreaRect(all_contours)
    box = cv2.boxPoints(rect)
    box = np.intp(box)

    width = np.sqrt((box[0,0] - box[1,0])**2 + (box[0,1] - box[1,1])**2)
    height = np.sqrt((box[1,0] - box[2,0])**2 + (box[1,1] - box[2,1])**2)

    ratio = width / height if width < height else height / width

    return ratio
    

# **RATIO NUM_PIX_BUG / NUM_PIX_IM**

In [14]:
def number_of_pixel_ratio(mask_arr, im_arr):
    # Assume mask_arr is your mask array and image is your image array

    # Calculate the number of pixels of the bug
    num_pixels_bug = np.sum(mask_arr > 0)
    # Calculate the number of pixels in the full image
    num_pixels_image = im_arr.size
    # Calculate the ratio
    ratio = num_pixels_bug / num_pixels_image

    return ratio

# **MIN,MAX,MEAN,MEDIAN,STANDARD DEVIATION RGB MASK**

In [15]:
def rgb_mask_features(mask_arr, im_arr):
    # Assuming the bug mask is stored in the variable 'mask_arr' as a numpy array

    # Get the RGB values within the bug mask
    bug_pixels = im_arr[mask_arr == 1]
    # Get the Red, Green, and Blue channels
    red_channel = bug_pixels[:, 0]
    green_channel = bug_pixels[:, 1]
    blue_channel = bug_pixels[:, 2]

    # Calculate the minimum, maximum, and mean values for each channel
    red_min = np.min(red_channel)
    red_max = np.max(red_channel)
    red_mean = np.mean(red_channel)

    green_min = np.min(green_channel)
    green_max = np.max(green_channel)
    green_mean = np.mean(green_channel)

    blue_min = np.min(blue_channel)
    blue_max = np.max(blue_channel)
    blue_mean = np.mean(blue_channel)

    # Calculate median and standard deviation for red channel
    red_median = np.median(red_channel)
    red_std = np.std(red_channel)

    # Calculate median and standard deviation for green channel
    green_median = np.median(green_channel)
    green_std = np.std(green_channel)

    # Calculate median and standard deviation for blue channel
    blue_median = np.median(blue_channel)
    blue_std = np.std(blue_channel)

    # red_features = [red_min, red_max, red_mean, red_median, red_std]
    # green_features = [green_min, green_max, green_mean, green_median, green_std]
    # blue_features = [blue_min, blue_max, blue_mean, blue_median, blue_std]

    return red_min, red_max, red_mean, red_median, red_std, green_min, green_max, green_mean, green_median, green_std, blue_min, blue_max, blue_mean, blue_median, blue_std

# **HARALICK TEXTURE FEATURE**

In [16]:
def haralick_features(mask_arr, im_arr):
    # Assuming the bug mask is stored in the variable 'mask_arr' as a numpy array

    # Convert the image to grayscale
    gray_image = mh.colors.rgb2gray(im_arr*mask_arr[:,:,None])
    gray_image = gray_image.astype(np.uint8)

    # Calculate Haralick texture features
    haralick_features = mh.features.haralick(gray_image, return_mean=True)

    return haralick_features

# **ECCENTRICITY/COMPACTNESS FEATURE**

In [17]:
def eccentricity(mask_arr):
    # Calculate the eccentricity
    props = regionprops(mask_arr)
    eccentricity = props[0].eccentricity
    perimeter = props[0].perimeter
    area = props[0].area
    compactness = 4 * math.pi * (area / (perimeter ** 2))
    return eccentricity , compactness

# **KURTHOSIS,SKEWNESS FEATURE**

In [18]:
def kurthosis_skewness(mask_arr, im_arr):
    # Assuming im_arr is your image array and mask_arr is your mask
    # Apply the mask to the image
    masked_im = im_arr * mask_arr[:,:,None]

    # Flatten the array to 1D for the calculation
    flattened_im = masked_im.flatten()

    # Calculate kurtosis
    kurt = kurtosis(flattened_im)

    # Assuming flattened_im is your flattened image array from the previous step
    # Calculate skewness
    skewness = skew(flattened_im)

    return (kurt,skewness) 

# **Creating all features in a dataset**

In [19]:
def Creating_all_features(mask_arr, im_arr):
    # Calculate the symmetry index
    symmetry = symmetry_index(mask_arr)

    longest_ortho_ratio = Longest_ortho_lines_ratio(mask_arr)
    # Calculate the number of pixels ratio
    num_pixels_ratio = number_of_pixel_ratio(mask_arr, im_arr)

    # Calculate the RGB features
    red_min, red_max, red_mean, red_median, red_std, green_min, green_max, green_mean, green_median, green_std, blue_min, blue_max, blue_mean, blue_median, blue_std = rgb_mask_features(mask_arr, im_arr)

    # Calculate the Haralick features
    # haralick = haralick_features(mask_arr, im_arr)

    # Calculate the eccentricity
    ecc , compactness = eccentricity(mask_arr)

    # Calculate the kurtosis and skewness

    # kurt, skewness = kurthosis_skewness(mask_arr, im_arr)

    return (symmetry, longest_ortho_ratio, num_pixels_ratio, red_min, red_max, red_mean, red_median, 
            red_std, green_min, green_max, green_mean, green_median, green_std, 
            blue_min, blue_max, blue_mean, blue_median, blue_std, ecc, compactness)

In [20]:
# Get the file paths of the images and masks in the train folder
image_paths = [f'../train/{i}.jpg' for i in range(1, 251)]
mask_paths = [f'../train/masks/binary_{i}.tif' for i in range(1, 251)]

image_paths.pop(153)
mask_paths.pop(153)

dataframe = pd.DataFrame(columns=['symmetry', 'longest_ortho_ratio', 'num_pixels_ratio', 'red_min', 'red_max', 'red_mean', 
                                  'red_median', 'red_std', 'green_min', 'green_max', 'green_mean', 'green_median', 'green_std', 
                                  'blue_min', 'blue_max', 'blue_mean', 'blue_median', 'blue_std', 'ecc', 'compactness'])

# Iterate over the image and mask paths
for i, (image_path, mask_path) in enumerate(zip(image_paths, mask_paths)):
    print(image_path, mask_path)
    im_arr = np.array(Image.open(image_path))
    mask_arr = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    mask_arr = np.where(mask_arr > 0, 1, 0)
    mask_arr = ndi.binary_fill_holes(mask_arr).astype(int)
    
    # Appeler la fonction Creating_all_features pour obtenir les valeurs
    features = Creating_all_features(mask_arr, im_arr)

    # Create a new DataFrame from the features and append it to df
    dataframe.loc[i] = features


../train/1.jpg ../train/masks/binary_1.tif
../train/2.jpg ../train/masks/binary_2.tif
../train/3.jpg ../train/masks/binary_3.tif
../train/4.jpg ../train/masks/binary_4.tif
../train/5.jpg ../train/masks/binary_5.tif
../train/6.jpg ../train/masks/binary_6.tif
../train/7.jpg ../train/masks/binary_7.tif
../train/8.jpg ../train/masks/binary_8.tif
../train/9.jpg ../train/masks/binary_9.tif
../train/10.jpg ../train/masks/binary_10.tif
../train/11.jpg ../train/masks/binary_11.tif
../train/12.jpg ../train/masks/binary_12.tif
../train/13.jpg ../train/masks/binary_13.tif
../train/14.jpg ../train/masks/binary_14.tif
../train/15.jpg ../train/masks/binary_15.tif
../train/16.jpg ../train/masks/binary_16.tif
../train/17.jpg ../train/masks/binary_17.tif
../train/18.jpg ../train/masks/binary_18.tif
../train/19.jpg ../train/masks/binary_19.tif
../train/20.jpg ../train/masks/binary_20.tif
../train/21.jpg ../train/masks/binary_21.tif
../train/22.jpg ../train/masks/binary_22.tif
../train/23.jpg ../train/mas

In [21]:
dataframe

Unnamed: 0,symmetry,longest_ortho_ratio,num_pixels_ratio,red_min,red_max,red_mean,red_median,red_std,green_min,green_max,green_mean,green_median,green_std,blue_min,blue_max,blue_mean,blue_median,blue_std,ecc,compactness
0,0.288274,0.837457,0.002516,5.0,208.0,68.749506,57.0,48.239988,3.0,199.0,55.627400,38.0,45.736010,0.0,193.0,40.433209,24.0,36.701656,0.388091,0.071731
1,0.327541,0.685837,0.002891,2.0,248.0,64.560638,56.0,42.435025,2.0,251.0,52.931944,34.0,43.254608,0.0,244.0,36.428700,19.0,34.992745,0.595575,0.072312
2,0.338071,0.806587,0.007395,3.0,255.0,107.320344,115.0,58.736815,0.0,255.0,87.982980,86.0,60.055950,0.0,255.0,63.508531,52.0,54.819316,0.236830,0.077042
3,0.384825,0.710961,0.004452,5.0,219.0,88.170607,89.0,46.403770,3.0,213.0,71.713538,62.0,46.297466,0.0,201.0,51.065063,37.0,37.797139,0.735058,0.067748
4,0.406315,0.749827,0.003075,6.0,255.0,123.546796,134.0,62.406482,0.0,250.0,101.315632,92.0,63.138573,0.0,245.0,81.362602,64.0,60.768198,0.650280,0.070427
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,0.346617,0.984803,0.002083,7.0,214.0,98.700785,98.0,46.799467,7.0,208.0,75.144301,70.0,43.466326,0.0,204.0,55.657727,47.0,38.473859,0.776278,0.255672
245,0.371266,0.714982,0.006683,7.0,255.0,89.277366,82.0,52.397726,2.0,255.0,65.287788,52.0,46.710060,0.0,255.0,56.712233,46.0,39.267959,0.752551,0.212459
246,0.618560,0.773536,0.002344,5.0,252.0,79.894225,60.0,54.355815,4.0,239.0,60.236453,37.0,52.164212,0.0,233.0,57.192094,31.0,51.857752,0.648535,0.172757
247,0.499115,0.944126,0.003986,6.0,255.0,140.451749,141.0,57.828557,3.0,255.0,111.768241,119.0,52.958137,3.0,255.0,103.180300,102.0,53.474601,0.389568,0.313861


In [22]:
dataframe.to_csv('dataframe.csv', index=False)


In [23]:
dataframe_import = pd.read_csv('dataframe.csv')


In [24]:
dataframe_import

Unnamed: 0,symmetry,longest_ortho_ratio,num_pixels_ratio,red_min,red_max,red_mean,red_median,red_std,green_min,green_max,green_mean,green_median,green_std,blue_min,blue_max,blue_mean,blue_median,blue_std,ecc,compactness
0,0.288274,0.837457,0.002516,5.0,208.0,68.749506,57.0,48.239988,3.0,199.0,55.627400,38.0,45.736010,0.0,193.0,40.433209,24.0,36.701656,0.388091,0.071731
1,0.327541,0.685837,0.002891,2.0,248.0,64.560638,56.0,42.435025,2.0,251.0,52.931944,34.0,43.254608,0.0,244.0,36.428700,19.0,34.992745,0.595575,0.072312
2,0.338071,0.806587,0.007395,3.0,255.0,107.320344,115.0,58.736815,0.0,255.0,87.982980,86.0,60.055950,0.0,255.0,63.508531,52.0,54.819316,0.236830,0.077042
3,0.384825,0.710961,0.004452,5.0,219.0,88.170607,89.0,46.403770,3.0,213.0,71.713538,62.0,46.297466,0.0,201.0,51.065063,37.0,37.797139,0.735058,0.067748
4,0.406315,0.749827,0.003075,6.0,255.0,123.546796,134.0,62.406482,0.0,250.0,101.315632,92.0,63.138573,0.0,245.0,81.362602,64.0,60.768198,0.650280,0.070427
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,0.346617,0.984803,0.002083,7.0,214.0,98.700785,98.0,46.799467,7.0,208.0,75.144301,70.0,43.466326,0.0,204.0,55.657727,47.0,38.473859,0.776278,0.255672
245,0.371266,0.714982,0.006683,7.0,255.0,89.277366,82.0,52.397726,2.0,255.0,65.287788,52.0,46.710060,0.0,255.0,56.712233,46.0,39.267959,0.752551,0.212459
246,0.618560,0.773536,0.002344,5.0,252.0,79.894225,60.0,54.355815,4.0,239.0,60.236453,37.0,52.164212,0.0,233.0,57.192094,31.0,51.857752,0.648535,0.172757
247,0.499115,0.944126,0.003986,6.0,255.0,140.451749,141.0,57.828557,3.0,255.0,111.768241,119.0,52.958137,3.0,255.0,103.180300,102.0,53.474601,0.389568,0.313861


: 