In [None]:
import cv2
import numpy as np
import os
import pandas as pd

def calculate_global_color_histogram(image_path, bins=256):
    # Read the image
    image = cv2.imread(image_path)
    
    # Convert image to RGB color space
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Calculate histograms for each channel (Red, Green, Blue)
    hist_red = cv2.calcHist([rgb_image], [0], None, [bins], [0, 256])
    hist_green = cv2.calcHist([rgb_image], [1], None, [bins], [0, 256])
    hist_blue = cv2.calcHist([rgb_image], [2], None, [bins], [0, 256])
    
    # Concatenate histograms
    global_hist = np.concatenate((hist_red, hist_green, hist_blue), axis=None)
    
    # Normalize the histogram
    global_hist /= global_hist.sum()
    
    return global_hist

# Path to the dataset folders
healthy_folder = "datasets-B/healthy"
infected_folder = "datasets-B/infected"

# Initialize lists to store features and labels
features = []
labels = []

# Extract features for healthy images
for filename in os.listdir(healthy_folder):
    if filename.endswith(".jpg"):
        image_path = os.path.join(healthy_folder, filename)
        global_color_hist = calculate_global_color_histogram(image_path)
        features.append(global_color_hist)
        labels.append("healthy")

# Extract features for infected images
for filename in os.listdir(infected_folder):
    if filename.endswith(".jpg"):
        image_path = os.path.join(infected_folder, filename)
        global_color_hist = calculate_global_color_histogram(image_path)
        features.append(global_color_hist)
        labels.append("infected")

# Create a DataFrame to store features and labels
df = pd.DataFrame(features)
df["Label"] = labels

# Save the DataFrame to an Excel file
df.to_excel("GCH-FeatureExtraction-RGB.xlsx", index=False)
