In [1]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import csv 

from skimage.feature import graycomatrix, graycoprops
from sklearn.cluster import KMeans

In [2]:
def image_processing(img):
    #1) Image Scaling to 256x256
    img_resized = img.resize((256, 256))

    #2) Min-Max Linear Contrast
    # Convert the resized image to numpy array
    img_array = np.array(img_resized)
    # Find the minimum and maximum pixel values
    min_val = np.min(img_array)
    max_val = np.max(img_array)
    # Apply Min-Max Linear Contrast Stretch
    img_stretched = ((img_array - min_val) / (max_val - min_val)) * 255
    img_stretched = np.clip(img_stretched, 0, 255).astype(np.uint8)

    # 3) Color Transformation (RGB to HSI and use only the Hue channel)
    # Convert RGB image to HSV
    img_hsv = cv2.cvtColor(img_stretched, cv2.COLOR_RGB2HSV)
    # Extract the Hue channel and set all channels of img_stretched to it
    img_stretched[:,:,0] = img_hsv[:,:,0]
    img_stretched[:,:,1] = img_hsv[:,:,0]
    img_stretched[:,:,2] = img_hsv[:,:,0]
    return img_stretched

In [3]:
def image_segmentation(img_hue, num_clusters = 3):
    #4) K-means clustering
    # Flatten the image array to a 2D array (pixels as rows, RGB as columns)
    img_flat = img_hue.reshape((-1, 3))
    kmeans = KMeans(n_clusters=num_clusters, random_state=22, n_init=10)
    kmeans.fit(img_flat)
    return kmeans.labels_

In [4]:
def feature_extraction(label, cluster_labels, num_clusters, angles, properties):
    grayscale_image = np.zeros_like(cluster_labels, dtype=np.uint8)
    # Map cluster labels to grayscale values using a linear or custom mapping
    grayscale_mapping = np.linspace(0, 255, num_clusters + 1)[1:]  # Linear mapping
    # grayscale_mapping = custom_mapping  # For custom mapping
    grayscale_image[cluster_labels == 0] = grayscale_mapping[0]
    for i in range(1, num_clusters):
        grayscale_image[cluster_labels == i] = grayscale_mapping[i]

    #Apply GLCM
    distances = [1]    
    glcm = graycomatrix(grayscale_image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Extract GLCM properties
    #features = np.hstack([graycoprops(glcm, prop).ravel() for prop in properties])
    features = []
    glcm_props = [propery for name in properties for propery in graycoprops(glcm, name)[0]]
    for item in glcm_props:
        features.append(item)
    features.append(label) 
    return features

In [5]:
def save_features(csv_file_path, angles, properties, features):
    columns = []
    for name in properties:
        for ang in angles:
            columns.append(name + "_" + ang)
    columns.append("label")
    #print(columns)
    
    # Check file existence and create if needed
    if not os.path.exists(csv_file_path):
        with open(csv_file_path, 'w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)
            csv_writer.writerow(columns)

    # Write data for each image (append mode)
    with open(csv_file_path, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(features)

    print(columns)
    print(features)

In [38]:
def processImages(data_dir, csv_file_path, printImages = True, num_clusters = 3, angles = ['0', '45', '90','135'], properties = ['contrast', 'correlation', 'energy', 'homogeneity',  'ASM', 'dissimilarity']):
    print("====================================================")
    print("==================Image Processing==================")
    print("====================================================")
    # Delete file if it exist
    if os.path.exists(csv_file_path):
        os.remove(csv_file_path)
        print(f"CSV file '{csv_file_path}' deleted successfully.")
    else:
        print(f"CSV file '{csv_file_path}' not found.")

    count = 0
    for class_dir in os.listdir(data_dir):
        class_label = class_dir  # class_label is the folder name
        for img_file in os.listdir(os.path.join(data_dir, class_dir)):
            count += 1 
            img_path = os.path.join(data_dir, class_dir, img_file)
            img = Image.open(img_path)

            img_hue = image_processing(img)

            cluster_labels = image_segmentation(img_hue, num_clusters).reshape(img_hue.shape[:2])

            features = feature_extraction(class_label, cluster_labels, num_clusters, angles, properties)
            print(count, class_label, img_file, "-------------------------------------------------------------")
            save_features(csv_file_path, angles, properties, features)
            if printImages:
                img_array = np.array(img_hue)
                # Reshape the labels to the shape of the original image
                cluster_labels = cluster_labels.reshape(img_array.shape[:2])

                fig, axes = plt.subplots(1, 2, figsize=(5, 5))
                axes[0].axis('off')
                axes[0].imshow(img)
                axes[1].axis('off')
                axes[1].imshow(cluster_labels, cmap='viridis')
                plt.show()

    print("Done!!")

In [74]:
data_dir = "D:/pythonProject/datasets/test-dataset/segmented/train-40"
csv_file_path = os.path.join("D:/pythonProject/Image-research/vision/train-40_data.csv")
processImages(data_dir, csv_file_path, printImages=False, num_clusters=3)

CSV file 'D:/pythonProject/Image-research/vision/train-40_data.csv' deleted successfully.
1 Pepper,_bell___Bacterial_spot 00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132_final_masked.jpg -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[308.125, 412.55555555555554, 269.27734375, 308.125, 0.933363574143275, 0.9107804083099893, 0.9417483337358321, 0.933363574143275, 0.67208880062775, 0.6675017350659574, 0.6735472562590353, 0.67208880062775, 0.9711891707984374, 0.9616348041967208, 0.9744516600351328, 0.9711891707984374, 0.45170335592924743, 0.44555856631606366, 0.45366590641407456, 0.4517033

In [45]:
data_dir = "D:/pythonProject/datasets/test-dataset/segmented/train-50"
csv_file_path = os.path.join("D:/pythonProject/Image-research/vision/train-50_data.csv")

#processImages(data_dir, csv_file_path)

#processImages(data_dir, csv_file_path, angles=['0'])

properties = ['contrast', 'correlation', 'energy', 'homogeneity',  'ASM', 'dissimilarity']
#properties = ['ASM']
#properties = ['contrast']
processImages(data_dir, csv_file_path, printImages=False, properties=properties)

CSV file 'D:/pythonProject/Image-research/vision/train-50_data.csv' deleted successfully.
1 Pepper,_bell___Bacterial_spot 00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132_final_masked.jpg -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[308.125, 412.55555555555554, 269.27734375, 308.125, 0.933363574143275, 0.9107804083099893, 0.9417483337358321, 0.933363574143275, 0.67208880062775, 0.6675017350659574, 0.6735472562590353, 0.67208880062775, 0.9711891707984374, 0.9616348041967208, 0.9744516600351328, 0.9711891707984374, 0.45170335592924743, 0.44555856631606366, 0.45366590641407456, 0.4517033

In [46]:
data_dir = "D:/pythonProject/datasets/test-dataset/segmented/train-50b"
csv_file_path = os.path.join("D:/pythonProject/Image-research/vision/train-50b_data.csv")
processImages(data_dir, csv_file_path, printImages=False)

CSV file 'D:/pythonProject/Image-research/vision/train-50b_data.csv' deleted successfully.
1 Pepper,_bell___Bacterial_spot 00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132_final_masked.jpg -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[308.125, 412.55555555555554, 269.27734375, 308.125, 0.933363574143275, 0.9107804083099893, 0.9417483337358321, 0.933363574143275, 0.67208880062775, 0.6675017350659574, 0.6735472562590353, 0.67208880062775, 0.9711891707984374, 0.9616348041967208, 0.9744516600351328, 0.9711891707984374, 0.45170335592924743, 0.44555856631606366, 0.45366590641407456, 0.451703

In [75]:
data_dir = "D:/pythonProject/datasets/test-dataset/color/train-50"
csv_file_path = "D:/pythonProject/Image-research/vision/trainColor-50_data.csv"

processImages(data_dir, csv_file_path, printImages=False, num_clusters=2)

CSV file 'D:/pythonProject/Image-research/vision/trainColor-50_data.csv' deleted successfully.
1 Pepper,_bell___Bacterial_spot 00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132.JPG -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[159.12156862745098, 198.29616301422533, 155.85882352941175, 159.12156862745098, 0.980503143720815, 0.9756905670030745, 0.9809031008922553, 0.980503143720815, 0.701605459945619, 0.700120944716222, 0.7017413031581775, 0.701605459945619, 0.9902885829339366, 0.9878977013723391, 0.9904877129368683, 0.9902885829339366, 0.49225022142550345, 0.49016933723033523, 0.4924408

In [98]:
data_dir = "D:/pythonProject/datasets/test-dataset/segmented/trainb-50"
csv_file_path = "D:/pythonProject/Image-research/vision/train-b-50_data.csv"

properties = ['contrast', 'correlation', 'energy', 'homogeneity',  'ASM', 'dissimilarity']
processImages(data_dir, csv_file_path, printImages=False, properties=properties, num_clusters=3)

CSV file 'D:/pythonProject/Image-research/vision/train-b-50_data.csv' deleted successfully.
1 Tomato___Early_blight 004cbe60-8ff9-4965-92df-e86694d5e9ba___RS_Erly.B 8253_final_masked.jpg -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[333.5807291666667, 483.33333333333314, 368.0013020833334, 333.5807291666667, 0.9277905733023949, 0.8950967295242874, 0.9204514714843329, 0.9277905733023949, 0.7090787747114814, 0.7043129558495828, 0.70925970777081, 0.7090787747114814, 0.9666095716177725, 0.9536850524959878, 0.9655679203250676, 0.9666095716177725, 0.5027927087463357, 0.49605673977757625, 0.503049

In [102]:
data_dir = "D:/pythonProject/datasets/test-dataset/segmented/train-mixed"
csv_file_path = "D:/pythonProject/Image-research/vision/train-mixed_data.csv"

properties = ['contrast', 'correlation', 'energy', 'homogeneity',  'ASM', 'dissimilarity']
processImages(data_dir, csv_file_path, printImages=False, properties=properties, num_clusters=3)

CSV file 'D:/pythonProject/Image-research/vision/train-mixed_data.csv' deleted successfully.
1 Pepper,_bell___Bacterial_spot 00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132_final_masked.jpg -------------------------------------------------------------
['contrast_0', 'contrast_45', 'contrast_90', 'contrast_135', 'correlation_0', 'correlation_45', 'correlation_90', 'correlation_135', 'energy_0', 'energy_45', 'energy_90', 'energy_135', 'homogeneity_0', 'homogeneity_45', 'homogeneity_90', 'homogeneity_135', 'ASM_0', 'ASM_45', 'ASM_90', 'ASM_135', 'dissimilarity_0', 'dissimilarity_45', 'dissimilarity_90', 'dissimilarity_135', 'label']
[308.125, 412.55555555555554, 269.27734375, 308.125, 0.933363574143275, 0.9107804083099893, 0.9417483337358321, 0.933363574143275, 0.67208880062775, 0.6675017350659574, 0.6735472562590353, 0.67208880062775, 0.9711891707984374, 0.9616348041967208, 0.9744516600351328, 0.9711891707984374, 0.45170335592924743, 0.44555856631606366, 0.45366590641407456, 0.4517