In [1]:
# System imports
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
import datetime
from time import process_time

# Extra library imports
from PIL import Image
from scipy.optimize import minimize
import scipy.ndimage as ndi
import pandas as pd
import cv2
from skimage.draw import line
from skimage.measure import label, regionprops
import mahotas as mh
from scipy.stats import kurtosis
from scipy.stats import skew
import glob

# **SYMMETRY INDEX**

In [2]:
def symmetry_index(mask_arr):
    # Flip the mask around the y-axis
    flipped_mask = np.fliplr(mask_arr)
    # Calculate the absolute difference between the original and flipped mask
    diff = np.abs(mask_arr - flipped_mask)

    # Calculate the symmetry index
    symmetry_index = np.sum(diff) / np.sum(mask_arr)

    return symmetry_index

# **RATIO TWO LONGEST ORTHO LINES**

In [3]:
def Longest_ortho_lines_ratio(mask_arr,im_arr):
    return 0

# **RATIO NUM_PIX_BUG / NUM_PIX_IM**

In [4]:
def number_of_pixel_ratio(mask_arr, im_arr):
    # Assume mask_arr is your mask array and image is your image array

    # Calculate the number of pixels of the bug
    num_pixels_bug = np.sum(mask_arr > 0)
    # Calculate the number of pixels in the full image
    num_pixels_image = im_arr.size
    # Calculate the ratio
    ratio = num_pixels_bug / num_pixels_image

    return ratio

# **MIN,MAX,MEAN,MEDIAN,STANDARD DEVIATION RGB MASK**

In [5]:
def rgb_mask_features(mask_arr, im_arr):
    # Assuming the bug mask is stored in the variable 'mask_arr' as a numpy array

    # Get the RGB values within the bug mask
    bug_pixels = im_arr[mask_arr == 1]
    # Get the Red, Green, and Blue channels
    red_channel = bug_pixels[:, 0]
    green_channel = bug_pixels[:, 1]
    blue_channel = bug_pixels[:, 2]

    # Calculate the minimum, maximum, and mean values for each channel
    red_min = np.min(red_channel)
    red_max = np.max(red_channel)
    red_mean = np.mean(red_channel)

    green_min = np.min(green_channel)
    green_max = np.max(green_channel)
    green_mean = np.mean(green_channel)

    blue_min = np.min(blue_channel)
    blue_max = np.max(blue_channel)
    blue_mean = np.mean(blue_channel)

    # Calculate median and standard deviation for red channel
    red_median = np.median(red_channel)
    red_std = np.std(red_channel)

    # Calculate median and standard deviation for green channel
    green_median = np.median(green_channel)
    green_std = np.std(green_channel)

    # Calculate median and standard deviation for blue channel
    blue_median = np.median(blue_channel)
    blue_std = np.std(blue_channel)

    # red_features = [red_min, red_max, red_mean, red_median, red_std]
    # green_features = [green_min, green_max, green_mean, green_median, green_std]
    # blue_features = [blue_min, blue_max, blue_mean, blue_median, blue_std]

    return red_min, red_max, red_mean, red_median, red_std, green_min, green_max, green_mean, green_median, green_std, blue_min, blue_max, blue_mean, blue_median, blue_std

# **HARALICK TEXTURE FEATURE**

In [6]:
def haralick_features(mask_arr, im_arr):
    # Assuming the bug mask is stored in the variable 'mask_arr' as a numpy array

    # Convert the image to grayscale
    gray_image = mh.colors.rgb2gray(im_arr*mask_arr[:,:,None])
    gray_image = gray_image.astype(np.uint8)

    # Calculate Haralick texture features
    haralick_features = mh.features.haralick(gray_image, return_mean=True)

    return haralick_features

# **ECCENTRICITY FEATURE**

In [7]:
def eccentricity(mask_arr):
    # Calculate the eccentricity
    props = regionprops(mask_arr)
    eccentricity = props[0].eccentricity
    perimeter = props[0].perimeter
    area = props[0].area
    compactness = (perimeter ** 2) / area
    return eccentricity , compactness

# **KURTHOSIS,SKEWNESS FEATURE**

In [8]:
def kurthosis_skewness(mask_arr, im_arr):
    # Assuming im_arr is your image array and mask_arr is your mask
    # Apply the mask to the image
    masked_im = im_arr * mask_arr[:,:,None]

    # Flatten the array to 1D for the calculation
    flattened_im = masked_im.flatten()

    # Calculate kurtosis
    kurt = kurtosis(flattened_im)

    # Assuming flattened_im is your flattened image array from the previous step
    # Calculate skewness
    skewness = skew(flattened_im)

    return (kurt,skewness) 

# **Creating all features in a dataset**

In [9]:
def Creating_all_features(mask_arr, im_arr):
    # Calculate the symmetry index
    symmetry = symmetry_index(mask_arr)

    longest_ratio = Longest_ortho_lines_ratio(mask_arr,im_arr)
    # Calculate the number of pixels ratio
    num_pixels_ratio = number_of_pixel_ratio(mask_arr, im_arr)

    # Calculate the RGB features
    red_min, red_max, red_mean, red_median, red_std, green_min, green_max, green_mean, green_median, green_std, blue_min, blue_max, blue_mean, blue_median, blue_std = rgb_mask_features(mask_arr, im_arr)

    # Calculate the Haralick features
    # haralick = haralick_features(mask_arr, im_arr)

    # Calculate the eccentricity
    ecc , compactness = eccentricity(mask_arr)

    # Calculate the kurtosis and skewness
    kurt, skewness = kurthosis_skewness(mask_arr, im_arr)

    return (symmetry, longest_ratio, num_pixels_ratio, red_min, red_max, red_mean, red_median, 
            red_std, green_min, green_max, green_mean, green_median, green_std, 
            blue_min, blue_max, blue_mean, blue_median, blue_std, ecc, compactness, kurt, skewness)

In [10]:
# Get the file paths of the images and masks in the train folder
image_paths = [f'../train/{i}.jpg' for i in range(1, 21)]
mask_paths = [f'../train/masks/binary_{i}.tif' for i in range(1, 21)]


dataframe = pd.DataFrame(columns=['symmetry', 'longest_ratio', 'num_pixels_ratio', 'red_min', 'red_max', 'red_mean', 
                                  'red_median', 'red_std', 'green_min', 'green_max', 'green_mean', 'green_median', 'green_std', 
                                  'blue_min', 'blue_max', 'blue_mean', 'blue_median', 'blue_std', 'ecc', 'campactness', 'kurt', 'skewness'])

# Iterate over the image and mask paths
for i, (image_path, mask_path) in enumerate(zip(image_paths, mask_paths)):
    print(image_path, mask_path)
    im_arr = np.array(Image.open(image_path))
    mask_arr = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    mask_arr = np.where(mask_arr > 0, 1, 0)

    # Appeler la fonction Creating_all_features pour obtenir les valeurs
    features = Creating_all_features(mask_arr, im_arr)

    # Create a new DataFrame from the features and append it to df
    dataframe.loc[i] = features


../train/1.jpg ../train/masks/binary_1.tif
../train/2.jpg ../train/masks/binary_2.tif
../train/3.jpg ../train/masks/binary_3.tif
../train/4.jpg ../train/masks/binary_4.tif
../train/5.jpg ../train/masks/binary_5.tif
../train/6.jpg ../train/masks/binary_6.tif
../train/7.jpg ../train/masks/binary_7.tif
../train/8.jpg ../train/masks/binary_8.tif
../train/9.jpg ../train/masks/binary_9.tif
../train/10.jpg ../train/masks/binary_10.tif
../train/11.jpg ../train/masks/binary_11.tif
../train/12.jpg ../train/masks/binary_12.tif
../train/13.jpg ../train/masks/binary_13.tif
../train/14.jpg ../train/masks/binary_14.tif
../train/15.jpg ../train/masks/binary_15.tif
../train/16.jpg ../train/masks/binary_16.tif
../train/17.jpg ../train/masks/binary_17.tif
../train/18.jpg ../train/masks/binary_18.tif
../train/19.jpg ../train/masks/binary_19.tif
../train/20.jpg ../train/masks/binary_20.tif


In [11]:
dataframe

Unnamed: 0,symmetry,longest_ratio,num_pixels_ratio,red_min,red_max,red_mean,red_median,red_std,green_min,green_max,...,green_std,blue_min,blue_max,blue_mean,blue_median,blue_std,ecc,campactness,kurt,skewness
0,0.695058,0.0,0.002476,5.0,208.0,68.085745,56.0,47.955399,3.0,199.0,...,45.311197,0.0,193.0,39.891969,24.0,36.250747,0.392019,332.4543,415.810811,19.208069
1,0.833665,0.0,0.002851,2.0,248.0,63.786498,55.0,42.153508,2.0,251.0,...,42.866124,0.0,244.0,35.735147,19.0,34.617117,0.595904,316.780238,330.090332,17.303837
2,1.584773,0.0,0.007364,3.0,255.0,107.176333,114.0,58.771359,0.0,255.0,...,60.057604,0.0,255.0,63.383043,52.0,54.823596,0.240295,220.183483,95.486536,9.429777
3,1.989106,0.0,0.004396,5.0,219.0,87.588243,88.0,46.322587,3.0,213.0,...,46.08576,0.0,201.0,50.432535,36.0,37.445621,0.735434,357.688215,155.355812,12.064838
4,1.265982,0.0,0.003055,6.0,255.0,123.177003,134.0,62.355944,0.0,250.0,...,63.029355,0.0,245.0,80.94617,63.0,60.624962,0.650599,239.881985,203.554722,13.915533
5,1.989437,0.0,0.001972,7.0,201.0,100.47562,109.0,49.63504,4.0,199.0,...,51.367936,0.0,204.0,68.36625,55.0,48.982371,0.76447,188.368672,316.07162,17.277534
6,2.0,0.0,0.001886,11.0,238.0,91.578963,87.0,41.37875,9.0,234.0,...,36.760082,4.0,223.0,50.99799,42.0,34.731295,0.900116,196.610653,413.600942,18.821637
7,2.0,0.0,0.002485,4.0,168.0,55.604294,47.0,35.667323,4.0,171.0,...,35.565191,3.0,183.0,36.929927,24.0,37.342313,0.875602,222.535541,520.105458,20.903357
8,1.498709,0.0,0.0024,6.0,255.0,87.620073,93.0,52.732799,3.0,255.0,...,51.894715,0.0,254.0,68.250917,57.0,54.6318,0.513316,237.435868,291.35268,16.514611
9,1.36368,0.0,0.005261,4.0,255.0,82.9767,80.0,52.421581,0.0,255.0,...,48.940956,0.0,255.0,60.440382,45.0,50.82664,0.472818,159.965783,147.189839,11.632354


In [12]:
dataframe.to_csv('dataframe.csv', index=False)


In [13]:
dataframe_import = pd.read_csv('dataframe.csv')


In [14]:
dataframe_import

Unnamed: 0,symmetry,longest_ratio,num_pixels_ratio,red_min,red_max,red_mean,red_median,red_std,green_min,green_max,...,green_std,blue_min,blue_max,blue_mean,blue_median,blue_std,ecc,campactness,kurt,skewness
0,0.695058,0.0,0.002476,5.0,208.0,68.085745,56.0,47.955399,3.0,199.0,...,45.311197,0.0,193.0,39.891969,24.0,36.250747,0.392019,332.4543,415.810811,19.208069
1,0.833665,0.0,0.002851,2.0,248.0,63.786498,55.0,42.153508,2.0,251.0,...,42.866124,0.0,244.0,35.735147,19.0,34.617117,0.595904,316.780238,330.090332,17.303837
2,1.584773,0.0,0.007364,3.0,255.0,107.176333,114.0,58.771359,0.0,255.0,...,60.057604,0.0,255.0,63.383043,52.0,54.823596,0.240295,220.183483,95.486536,9.429777
3,1.989106,0.0,0.004396,5.0,219.0,87.588243,88.0,46.322587,3.0,213.0,...,46.08576,0.0,201.0,50.432535,36.0,37.445621,0.735434,357.688215,155.355812,12.064838
4,1.265982,0.0,0.003055,6.0,255.0,123.177003,134.0,62.355944,0.0,250.0,...,63.029355,0.0,245.0,80.94617,63.0,60.624962,0.650599,239.881985,203.554722,13.915533
5,1.989437,0.0,0.001972,7.0,201.0,100.47562,109.0,49.63504,4.0,199.0,...,51.367936,0.0,204.0,68.36625,55.0,48.982371,0.76447,188.368672,316.07162,17.277534
6,2.0,0.0,0.001886,11.0,238.0,91.578963,87.0,41.37875,9.0,234.0,...,36.760082,4.0,223.0,50.99799,42.0,34.731295,0.900116,196.610653,413.600942,18.821637
7,2.0,0.0,0.002485,4.0,168.0,55.604294,47.0,35.667323,4.0,171.0,...,35.565191,3.0,183.0,36.929927,24.0,37.342313,0.875602,222.535541,520.105458,20.903357
8,1.498709,0.0,0.0024,6.0,255.0,87.620073,93.0,52.732799,3.0,255.0,...,51.894715,0.0,254.0,68.250917,57.0,54.6318,0.513316,237.435868,291.35268,16.514611
9,1.36368,0.0,0.005261,4.0,255.0,82.9767,80.0,52.421581,0.0,255.0,...,48.940956,0.0,255.0,60.440382,45.0,50.82664,0.472818,159.965783,147.189839,11.632354
