In [1]:
import os
import cv2
import time
from skimage import io
import numpy as np
import pandas as pd
from skimage import feature
import re
from skimage.measure import label, regionprops
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from scipy.stats import skew, kurtosis, entropy
from tqdm import tqdm

Sort and read

In [2]:
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def read_image_data(folder_path):
    image_names = sorted(os.listdir(folder_path), key=natural_sort_key)
    if not image_names:
        print("No images found in the directory.")
        return []
    elif len(image_names) > 2:
        print(f"The names of the first three images in the directory are: {image_names[0]}, {image_names[1]}, {image_names[2]}")
    else:
        print("Not enough images to display three names.")
    image_paths = [os.path.join(folder_path, name) for name in image_names]
    return image_paths

Basic feature

In [3]:
def extract_basic_features(image_paths):
    features = []
    for image_path in tqdm(image_paths):
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"Failed to read image: {image_path}")
            continue
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        label_img = label(img_bin)
        props = regionprops(label_img)
        if props:
            largest_prop = max(props, key=lambda x: x.area)
            features.append([os.path.basename(image_path)] + [
                largest_prop.area,
                largest_prop.bbox[3] - largest_prop.bbox[1],
                largest_prop.bbox[2] - largest_prop.bbox[0],
                (largest_prop.bbox[3] - largest_prop.bbox[1]) / (largest_prop.bbox[2] - largest_prop.bbox[0]) if (largest_prop.bbox[2] - largest_prop.bbox[0]) != 0 else 0,
                largest_prop.major_axis_length,
                largest_prop.minor_axis_length,
                largest_prop.convex_area,
                cv2.arcLength(np.array(largest_prop.coords), closed=True),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1]),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2]),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 0])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 1])),
                np.sqrt(np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1], 2])),
                np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.std(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]]),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 2),
                np.sum((img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]] - np.mean(img[largest_prop.coords[:, 0], largest_prop.coords[:, 1]])) ** 3),
            ])
        else:
            features.append([os.path.basename(image_path)] + [np.nan] * 18 )

    df = pd.DataFrame(features, columns=['Name', 'area', 'length', 'width', 'length_width_ratio', 'major_axis_length',
                                         'minor_axis_length', 'convex_area', 'perimeter', 'r_mean', 'g_mean', 'b_mean',
                                         'rs', 'gs', 'bs', 'mean', 'std_dev', 'uniformity', 'third_moment',])
    return df

Advange basic features

In [4]:
def extract_advanced_features(image_paths):
    features = []
    for image_path in tqdm(image_paths):
        img = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"Failed to read image: {image_path}")
            continue

        # Preprocess the image
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
        label_img = label(img_bin)
        props = regionprops(label_img)
        
        # Convert image to different color spaces
        img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        
        # Calculate additional features
        if props:
            largest_prop = max(props, key=lambda x: x.area)
            coords = largest_prop.coords
            region_pixels = img[coords[:, 0], coords[:, 1]]
            region_pixels_hsv = img_hsv[coords[:, 0], coords[:, 1]]

            r, g, b = region_pixels[:, 0], region_pixels[:, 1], region_pixels[:, 2]
            h, s, v = region_pixels_hsv[:, 0], region_pixels_hsv[:, 1], region_pixels_hsv[:, 2]
            normalized_r = r / (r + g + b + 0.01)
            normalized_g = g / (r + g + b + 0.01)
            normalized_b = b / (r + g + b + 0.01)
            brightness = 0.299 * r + 0.587 * g + 0.114 * b
            r_squared = r ** 2
            g_squared = g ** 2
            b_squared = b ** 2

            mean_hue = np.mean(h)
            mean_saturation = np.mean(s)
            hue_variance = np.var(h)
            saturation_variance = np.var(s)
            
            r_skewness = skew(r)
            g_skewness = skew(g)
            b_skewness = skew(b)
            r_kurtosis = kurtosis(r)
            g_kurtosis = kurtosis(g)
            b_kurtosis = kurtosis(b)
            
            color_entropy = entropy(np.histogram(region_pixels, bins=256)[0])
            # dominant_color = region_pixels[np.argmax(np.bincount(region_pixels[:, 0] * 256 * 256 + region_pixels[:, 1] * 256 + region_pixels[:, 2]))]
            
            feature_row = [os.path.basename(image_path)] + [
                # largest_prop.area,  # Commented out the area feature
                normalized_r.mean(), normalized_g.mean(), normalized_b.mean(),
                brightness.mean(), r_squared.mean(), g_squared.mean(), b_squared.mean(),
                mean_hue, mean_saturation, hue_variance, saturation_variance,
                r_skewness, g_skewness, b_skewness, r_kurtosis, g_kurtosis, b_kurtosis,
                color_entropy,
                # dominant_color
            ]
            features.append(feature_row)
        else:
            features.append([os.path.basename(image_path)] + [np.nan] * 18)  

    columns = ['Name', 'norm_r', 'norm_g', 'norm_b', 'brightness', 'r_squared', 'g_squared', 'b_squared',
               'mean_hue', 'mean_saturation', 'hue_variance', 'saturation_variance',
               'r_skewness', 'g_skewness', 'b_skewness', 'r_kurtosis', 'g_kurtosis', 'b_kurtosis',
               'color_entropy']
    df = pd.DataFrame(features, columns=columns)
    return df

In [5]:
# def extract_advanced_features(image_paths):
#     features = []
#     for image_path in tqdm(image_paths):
#         img = cv2.imread(image_path, cv2.IMREAD_COLOR)
#         if img is None:
#             print(f"Failed to read image: {image_path}")
#             continue

#         # Preprocess the image
#         img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#         _, img_bin = cv2.threshold(img_gray, 128, 255, cv2.THRESH_BINARY)
#         label_img = label(img_bin)
#         props = regionprops(label_img)
        
#         # Convert image to different color spaces
#         img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        
#         # Calculate additional features
#         if props:
#             largest_prop = max(props, key=lambda x: x.area)
#             coords = largest_prop.coords
#             region_pixels = img[coords[:, 0], coords[:, 1]]
#             region_pixels_hsv = img_hsv[coords[:, 0], coords[:, 1]]

#             r, g, b = region_pixels[:, 0], region_pixels[:, 1], region_pixels[:, 2]
#             h, s, v = region_pixels_hsv[:, 0], region_pixels_hsv[:, 1], region_pixels_hsv[:, 2]
#             normalized_r = r / (r + g + b + 0.01)
#             normalized_g = g / (r + g + b + 0.01)
#             normalized_b = b / (r + g + b + 0.01)
#             brightness = 0.299 * r + 0.587 * g + 0.114 * b
#             r_squared = r ** 2
#             g_squared = g ** 2
#             b_squared = b ** 2

#             mean_hue = np.mean(h)
#             mean_saturation = np.mean(s)
#             hue_variance = np.var(h)
#             saturation_variance = np.var(s)
            
#             r_skewness = skew(r)
#             g_skewness = skew(g)
#             b_skewness = skew(b)
#             r_kurtosis = kurtosis(r)
#             g_kurtosis = kurtosis(g)
#             b_kurtosis = kurtosis(b)
            
#             color_entropy = entropy(np.histogram(region_pixels, bins=256)[0])
            
#             # Calculate dominant color safely
#             try:
#                 dominant_color_index = np.argmax(np.bincount(region_pixels[:, 0] * 256 * 256 + region_pixels[:, 1] * 256 + region_pixels[:, 2]))
#                 dominant_color = np.array([dominant_color_index // (256 * 256),
#                                            (dominant_color_index % (256 * 256)) // 256,
#                                            dominant_color_index % 256])
#             except Exception as e:
#                 print(f"Error calculating dominant color for image {image_path}: {e}")
#                 dominant_color = [np.nan, np.nan, np.nan]
            
#             feature_row = [os.path.basename(image_path)] + [
#                 normalized_r.mean(), normalized_g.mean(), normalized_b.mean(),
#                 brightness.mean(), r_squared.mean(), g_squared.mean(), b_squared.mean(),
#                 mean_hue, mean_saturation, hue_variance, saturation_variance,
#                 r_skewness, g_skewness, b_skewness, r_kurtosis, g_kurtosis, b_kurtosis,
#                 color_entropy, dominant_color
#             ]
#             features.append(feature_row)
#         else:
#             features.append([os.path.basename(image_path)] + [np.nan] * 19)  # Adjusted for one less feature

#     columns = ['Name', 'norm_r', 'norm_g', 'norm_b', 'brightness', 'r_squared', 'g_squared', 'b_squared',
#                'mean_hue', 'mean_saturation', 'hue_variance', 'saturation_variance',
#                'r_skewness', 'g_skewness', 'b_skewness', 'r_kurtosis', 'g_kurtosis', 'b_kurtosis',
#                'color_entropy', 'dominant_color']
#     df = pd.DataFrame(features, columns=columns)
#     return df


LBP feature

In [6]:
def extract_LBP_features(image_paths):
    data = []
    points = 8
    radius = 1
    for image_path in tqdm(image_paths):
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is not None:
            lbp = feature.local_binary_pattern(image, points, radius, method="uniform")
            (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, points + 3), range=(0, points + 2))
            hist = hist.astype("float")
            hist /= (hist.sum() + 1e-7)
            hist_series = pd.Series(hist, name=os.path.basename(image_path))
            data.append(hist_series)
        else:
            print(f"Failed to read {image_path}")
            data.append(pd.Series([np.nan]*10, name=os.path.basename(image_path)))
    lbp_df = pd.DataFrame(data).reset_index().rename(columns={"index": "Name"})
    return lbp_df

GIST feature

In [7]:
def extract_gist_features(image_paths, orientations=8, blocks=4):
    descriptors = []
    for path in tqdm(image_paths):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Failed to read image: {path}")
            descriptors.append([os.path.basename(path)] + [np.nan]*(orientations*blocks*blocks))
            continue
        height, width = img.shape
        cell_size = min(height, width) // blocks
        gx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
        gy = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
        gradient_magnitude = np.sqrt(gx**2 + gy**2)
        gradient_orientation = np.arctan2(gy, gx) * (180 / np.pi) + 180
        gradient_orientation_bins = np.floor(gradient_orientation / (360 / orientations)).astype(int)
        descriptor = np.zeros(orientations * blocks * blocks)
        for i in range(blocks):
            for j in range(blocks):
                cell_hist = np.zeros(orientations)
                for ii in range(cell_size):
                    for jj in range(cell_size):
                        x = i * cell_size + ii
                        y = j * cell_size + jj
                        if x >= height or y >= width:
                            continue
                        bin_idx = gradient_orientation_bins[x, y] % orientations
                        cell_hist[bin_idx] += gradient_magnitude[x, y]
                descriptor[(i * blocks + j) * orientations:(i * blocks + j + 1) * orientations] = cell_hist
        descriptor /= (np.sum(descriptor) + 1e-7)
        descriptors.append([os.path.basename(path)] + descriptor.tolist())
    gist_df = pd.DataFrame(descriptors, columns=['Name'] + [f'GIST_{i}' for i in range(orientations * blocks * blocks)])
    return gist_df

GLCM feature

In [10]:
def GLCM_all(image_paths, distance=3):
    list_GLCM = []
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    for image_path in tqdm(image_paths):
        img = io.imread(image_path, as_gray=True, plugin='pil')
        if img is None:
            print(f"Failed to read image: {image_path}")
            continue
        img = img.astype(np.uint8)
        
        glcm = graycomatrix(img, [distance], angles, 256, symmetric=True, normed=True)
        features = []
        for angle in angles:
            features.extend([
                graycoprops(glcm, 'contrast')[0, 0],
                graycoprops(glcm, 'correlation')[0, 0],
                graycoprops(glcm, 'energy')[0, 0],
                graycoprops(glcm, 'homogeneity')[0, 0]
            ])
        columns = [f"{prop}_{int(np.degrees(angle))}" for prop in ('contrast', 'correlation', 'energy', 'homogeneity') for angle in angles]
        features_df = pd.DataFrame([features], columns=columns)
        features_df['Name'] = os.path.basename(image_path)
        
        features_df = features_df[['Name'] + [col for col in columns]]
        
        list_GLCM.append(features_df)
    glcm_df = pd.concat(list_GLCM, ignore_index=True)
    return glcm_df


In [9]:
# def GLCM_all(image_paths, distance=3):
#     list_GLCM = []
#     angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
#     for image_path in tqdm(image_paths):
#         img = io.imread(image_path, as_gray=True)
#         if img is None:
#             print(f"Failed to read image: {image_path}")
#             continue
#         img = img.astype(np.uint8)
        
#         glcm = graycomatrix(img, [distance], angles, 256, symmetric=True, normed=True)
#         features = []
#         for angle in angles:
#             features.extend([
#                 graycoprops(glcm, 'contrast')[0, 0],
#                 graycoprops(glcm, 'correlation')[0, 0],
#                 graycoprops(glcm, 'energy')[0, 0],
#                 graycoprops(glcm, 'homogeneity')[0, 0]
#             ])
#         columns = [f"{prop}_{int(np.degrees(angle))}" for prop in ('contrast', 'correlation', 'energy', 'homogeneity') for angle in angles]
#         features_df = pd.DataFrame([features], columns=columns)
#         features_df['name'] = os.path.basename(image_path)
#         list_GLCM.append(features_df)
#     glcm_df = pd.concat(list_GLCM, ignore_index=True)
#     return glcm_df

SIFT features


In [None]:
# folder_path = r'D:\PROJECTWORSHOP\Soybean_Seeds\Pistachio_Image_Dataset\Kirmizi_Pistachio'
folder_path = r'D:\PROJECTWORSHOP\Potato Leaf Disease\Potato Leaf Disease Dataset in Uncontrolled Environment\Virus'
image_paths = read_image_data(folder_path)

####Extract Features

basic_features_df = extract_basic_features(image_paths)
# basic_advanced_df = extract_advanced_features(image_paths)
# lbp_features_df = extract_LBP_features(image_paths)
# gist_features_df = extract_gist_features(image_paths)
# glcm_features_df = GLCM_all(image_paths)

The names of the first three images in the directory are: 20230712_132401.jpg, 20230712_132441.jpg, 20230712_132641.jpg


100%|██████████| 532/532 [11:06<00:00,  1.25s/it]


In [25]:
# final_df1 = pd.merge(basic_advanced_df, gist_features_df, on='Name', how='inner')
# final_df2 = pd.merge(final_df1, lbp_features_df, on='Name', how='inner')
# final_df = pd.merge(final_df2, glcm_features_df)

final_df = pd.DataFrame(basic_features_df)
# final_df['class'] = 'Bacteria'

csv_file_path = 'add_new_Virus.csv'

final_df.to_csv(csv_file_path, index=False)

In [26]:
final_df.shape

(532, 19)

In [27]:
df1 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Bacteria.csv")
df2 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Fungi.csv")
df3 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Healthy.csv")
df4 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Nematode.csv")
df5 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Pest.csv")
df6 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Phytopthora.csv")
df7 = pd.read_csv(r"D:\PROJECTWORSHOP\Potato Leaf Disease\Code File\add_new_Virus.csv")

df1.shape, df2.shape, df3.shape, df4.shape, df5.shape, df6.shape, df7.shape

((569, 19), (748, 19), (201, 19), (68, 19), (611, 19), (347, 19), (532, 19))

In [28]:
result = pd.concat([df1, df2, df3, df4, df5, df6, df7], ignore_index=True)
result.to_csv("add_new_Potato_Leaf_Disease.csv")

In [51]:
df_final = pd.read_csv('Potato_Leaf_Disease.csv')
df_final_drop = df_final.drop(df_final.columns[:2], axis = 1)
df_final_drop.to_csv("Pototo_Leaf_Disease_Drop")

In [58]:
df_final_drop.to_csv("Potato_Leaf_Disease_Drop.csv")

In [60]:
df_final_drop.shape

(3076, 173)

In [52]:
df3= pd.read_csv('hat_cuoi.csv')
df4 = df3.drop(df3.columns[[0, 1]], axis=1)
df4.head()
df4.to_csv('hat_cuoi_basic_glcm_lbp.csv', index= False)

In [51]:
df4.head()

Unnamed: 0,area,length,width,length_width_ratio,major_axis_length,minor_axis_length,convex_area,perimeter,r_mean,g_mean,...,1,2,3,4,5,6,7,8,9,class
0,89673.0,260,459,0.566449,455.995692,254.486127,92794.0,182549.682674,193.28266,204.596936,...,0.015058,0.009503,0.032839,0.054997,0.056311,0.024992,0.018933,0.756933,0.022592,Kirmizi_Pistachio
1,77242.0,306,386,0.792746,419.518438,235.842058,80317.0,155983.720852,171.47201,187.837951,...,0.010692,0.006278,0.029842,0.047219,0.055489,0.022267,0.016508,0.789869,0.016433,Kirmizi_Pistachio
2,91228.0,368,410,0.897561,490.239529,239.81988,93077.0,183450.903866,191.164193,194.322412,...,0.013833,0.011358,0.037514,0.063042,0.0586,0.025261,0.016306,0.743939,0.022367,Kirmizi_Pistachio
3,93910.0,300,456,0.657895,485.497337,249.129221,94954.0,187818.199663,184.290374,199.038196,...,0.014108,0.010272,0.036328,0.060631,0.060633,0.025722,0.018108,0.745097,0.021761,Kirmizi_Pistachio
4,72568.0,228,414,0.550725,417.2703,228.51545,75303.0,148327.50596,216.744088,231.689395,...,0.007658,0.003603,0.016697,0.030242,0.041675,0.011364,0.011147,0.862417,0.0111,Kirmizi_Pistachio
