In [19]:
import numpy as np
import pandas as pd
from scipy.ndimage import uniform_filter
from scipy.stats import entropy
import cv2
import os

In [11]:
def extract_rgb_features(image_path):
    """Computes RGB color statistics for a given image."""
    
    image = cv2.imread(image_path)
    image = image.astype(np.float32)

    # Split into RGB channels
    B, G, R = cv2.split(image)

    # Create a mask to ignore black pixels (where all channels are 0)
    maskR = (R > 0)
    maskG = (G > 0)
    maskB = (B > 0)

    # Compute mean for each channel
    mean_R = np.mean(R[maskR]) if np.any(maskR) else 0
    mean_G = np.mean(G[maskG]) if np.any(maskG) else 0
    mean_B = np.mean(B[maskB]) if np.any(maskB) else 0

    # Compute Mean (R - G) ignoring black pixels
    maskR_G = (R>0) | (G>0)
    mean_R_G = np.mean((R - G)[maskR_G]) if np.any(maskR_G) else 0

    return {
        "Mean_Red": mean_R,
        "Mean_Green": mean_G,
        "Mean_Blue": mean_B,
        "Mean_R-G": mean_R_G
    }

In [12]:
def extract_erythema_index(image_path):
    """Computes the Erythema Index (EI) for a given image."""
    
    image = cv2.imread(image_path)
    b, g, r = cv2.split(image)
    r, g = r.astype(np.int32), g.astype(np.int32)

    # Create a mask to ignore completely black pixels (where R and G are both 0)
    mask = (r > 0) | (g > 0)

    # Initialize EI array with zeros
    ei_values = np.zeros_like(r, dtype=np.float32)

    # Avoid division by zero
    valid_pixels = (r + g) > 0  
    ei_values[valid_pixels] = 100 * ((r - g)[valid_pixels] / (r + g)[valid_pixels])

    # Compute the mean EI for the entire image (excluding black pixels)
    mean_ei = np.mean(ei_values[mask]) if np.any(mask) else 0

    return {"Erythema_Index": mean_ei}

In [13]:
def extract_hsv_features(image_path):
    """Computes mean Hue, Saturation, and Value (HSV) features for a given image."""
    
    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    h, s, v = h.astype(np.int32), s.astype(np.int32), v.astype(np.int32)

    # Create a mask to ignore black pixels (where V == 0)
    mask = v > 0

    # Compute mean values only for valid (non-black) pixels
    mean_h = np.mean(h[mask]) if np.any(mask) else 0
    mean_s = np.mean(s[mask]) if np.any(mask) else 0
    mean_v = np.mean(v[mask]) if np.any(mask) else 0

    return {
        "Mean_Hue": mean_h,
        "Mean_Saturation": mean_s,
        "Mean_Value": mean_v
    }

In [14]:
def extract_hhr(image_path, H_thresh=100):
    """Computes High Hue Ratio (HHR) for a given image based on a hue threshold."""

    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    h, v = h.astype(np.int32), v.astype(np.int32)

    # Create a mask to exclude black pixels (V == 0)
    valid_pixels_mask = v > 0  

    # Mask high hue pixels (H > H_thresh)
    high_hue_mask = (h > H_thresh) & valid_pixels_mask  

    # Compute High Hue Ratio (HHR)
    high_hue_count = np.count_nonzero(high_hue_mask)
    valid_pixel_count = np.count_nonzero(valid_pixels_mask)

    HHR = (high_hue_count / valid_pixel_count) if valid_pixel_count > 0 else 0

    return {"High_Hue_Ratio": HHR}


In [15]:
def extract_brightness(image_path):
    """Computes the mean brightness (B) of a given image, excluding black pixels."""
    
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Create a mask to ignore completely black pixels (where gray == 0)
    valid_pixels_mask = gray > 0  

    # Compute mean brightness, excluding black pixels
    mean_brightness = np.mean(gray[valid_pixels_mask]) if np.any(valid_pixels_mask) else 0

    return {"Brightness": mean_brightness}


In [16]:
def extract_entropy(image_path):
    """Computes the entropy (Ent) of a given image using pixel intensity distribution."""
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Compute histogram (256 bins for intensity levels 0-255)
    hist = cv2.calcHist([image], [0], None, [256], [0, 256])

    # Normalize histogram to get probability distribution
    hist_norm = hist.ravel() / hist.sum()  # Convert to 1D array and normalize

    # Compute entropy
    image_entropy = entropy(hist_norm, base=2)

    return {"Entropy": image_entropy}

In [9]:
def extract_gray_level_features(image_path, window_size=9):
    """Computes gray-level features (G1-G5) for a given image using local window analysis."""
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Create mask: Ignore completely black pixels (intensity == 0)
    mask = image > 0 

    # Convert image to float32 before processing to prevent integer NaN errors
    image_float = image.astype(np.float32)

    # G1: Difference between pixel intensity and min intensity in window
    G1 = image_float - cv2.erode(image_float, np.ones((window_size, window_size), np.uint8))

    # G2: Difference between max intensity in window and pixel intensity
    G2 = cv2.dilate(image_float, np.ones((window_size, window_size), np.uint8)) - image_float

    # G3: Difference between pixel intensity and mean intensity in window
    mean_window = uniform_filter(image_float, size=window_size)
    G3 = image_float - mean_window

    # G4: Standard deviation in the window
    def local_std(img):
        mean_sq = uniform_filter(img ** 2, size=window_size)
        return np.sqrt(mean_sq - mean_window**2)

    G4 = local_std(image_float)

    # G5: Center pixel intensity (original grayscale image)
    G5 = image_float.copy()

    # Apply mask: Set black pixels to NaN to exclude from analysis
    G1[~mask] = np.nan
    G2[~mask] = np.nan
    G3[~mask] = np.nan
    G4[~mask] = np.nan
    G5[~mask] = np.nan

    # Compute mean values **only for valid pixels (non-black)**
    mean_G1 = np.nanmean(G1)
    mean_G2 = np.nanmean(G2)
    mean_G3 = np.nanmean(G3)
    mean_G4 = np.nanmean(G4)
    mean_G5 = np.nanmean(G5)

    return {
        "Mean_G1": mean_G1,
        "Mean_G2": mean_G2,
        "Mean_G3": mean_G3,
        "Mean_G4": mean_G4,
        "Mean_G5": mean_G5
    }

In [17]:
def extract_lab_features(image_path):
    """Computes mean L (Lightness), A (Green-Red), and B (Blue-Yellow) features for a given image."""
    
    image = cv2.imread(image_path)
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab)
    L, A, B = L.astype(np.float32), A.astype(np.float32), B.astype(np.float32)

    # Create a mask to exclude black pixels (where L == 0)
    mask = L > 0

    # Compute mean values only for valid (non-black) pixels
    mean_L = np.mean(L[mask]) if np.any(mask) else 0
    mean_A = np.mean(A[mask]) if np.any(mask) else 0
    mean_B = np.mean(B[mask]) if np.any(mask) else 0

    return {
        "Mean_L": mean_L,
        "Mean_A": mean_A,
        "Mean_B": mean_B
    }

In [18]:
image_folder = "../Produced Images/Eye Masked"

In [20]:
data = []
image_files = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]

for image_name in image_files:
    image = os.path.join(image_folder, image_name)
   

    # Extract features
    features = {"Image_Name": image_name}
    features.update(extract_rgb_features(image))
    features.update(extract_erythema_index(image))
    features.update(extract_hsv_features(image))
    features.update(extract_hhr(image))
    features.update(extract_brightness(image))
    features.update(extract_entropy(image))
    features.update(extract_lab_features(image))
    features.update(extract_gray_level_features(image))

    # Append to data list
    data.append(features)

# Create DataFrame
df = pd.DataFrame(data)

In [22]:
df.head()

Unnamed: 0,Image_Name,Mean_Red,Mean_Green,Mean_Blue,Mean_R-G,Erythema_Index,Mean_Hue,Mean_Saturation,Mean_Value,High_Hue_Ratio,Brightness,Entropy,Mean_L,Mean_A,Mean_B,Mean_G1,Mean_G2,Mean_G3,Mean_G4,Mean_G5
0,1710062483477.jpg,101.627449,45.108303,55.293671,52.99736,35.746624,121.677321,194.601069,83.622014,0.66903,56.053989,0.080471,67.367943,154.843292,137.86911,31.428766,15.368295,4.067397,14.982734,75.389648
1,1710062524242.jpg,117.500572,55.494678,71.060692,58.833004,34.273388,119.977178,190.562089,97.064707,0.662908,66.841182,0.069666,78.645599,156.921341,136.755524,40.56905,26.164726,5.487703,20.474133,89.575912
2,1710062617330.jpg,152.00824,81.129654,99.034233,70.170151,32.546577,127.139373,174.271463,130.52527,0.706246,93.49262,0.092261,105.598091,160.381027,136.97702,52.915195,35.878963,6.661717,26.001249,118.035606
3,1710063043894.jpg,94.920563,47.228882,57.594391,48.904835,39.188961,127.861347,180.754998,83.367184,0.716936,57.087175,0.066401,66.429474,152.600616,135.476807,33.259434,20.789392,3.966457,15.859059,70.119621
4,1710063172053.jpg,94.496407,40.811356,47.772427,52.414085,40.464287,121.527485,197.41694,78.606722,0.684868,51.34905,0.073595,62.203156,154.453506,138.254028,36.85133,29.333897,5.226566,19.013559,67.644173


In [24]:
df.shape

(412, 20)

In [25]:
df1 = pd.read_excel("../Dataset Used/Blood Report_without patient.xlsx",sheet_name="left_eye")
df1.head()

Unnamed: 0,1. Take a photo of Left eye,Blood Sample Id,Unnamed: 2,Unnamed: 3,Annotated_Data,Avaiable ID,Hb Value
0,1709617490220.jpg,202403051,,,1709617490220.jpg,202403051.0,11.5
1,1709619661748.jpg,202403058,,,1709617782204.jpg,202403052.0,11.6
2,1709621461793.jpg,2024030512,,,1709618916134.jpg,202403056.0,11.1
3,1709624290874.jpg,2024030518,,,1709619661748.jpg,202403058.0,12.1
4,1709625767525.jpg,2024030522,,,1709620146694.jpg,202403059.0,12.2


In [39]:
df1_filtered = df1[['Annotated_Data','Hb Value']].rename(columns={'Annotated_Data':'Image_Name'})
df1_filtered.head()

Unnamed: 0,Image_Name,Hb Value
0,1709617490220.jpg,11.5
1,1709617782204.jpg,11.6
2,1709618916134.jpg,11.1
3,1709619661748.jpg,12.1
4,1709620146694.jpg,12.2


In [40]:
df2 = pd.read_excel("../Dataset Used/Updated Healthworker Form Part 1_without patient identifier.xlsx") 
df2.head()

Unnamed: 0,start,end,today,deviceid,username,Do you give consent?,Patient Id,Blood Sample Id,2. Enter age in completed years,4. Patient Sex,...,_id,_uuid,_submission_time,_validation_status,_notes,_status,_submitted_by,__version__,_tags,_index
0,2024-03-05T11:01:43.141+05:30,2024-03-05T12:36:06.048+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,1,202403051,63,Female,...,116,e7525154-4a03-4818-b76a-85f5d5657f73,2024-03-05T17:57:45,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,1
1,2024-03-05T11:39:13.925+05:30,2024-03-05T11:55:12.666+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,8,202403058,42,Female,...,117,a407f9d7-8643-4a35-a614-3b83184b83e0,2024-03-05T17:57:46,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,2
2,2024-03-05T12:07:50.954+05:30,2024-03-05T12:26:37.006+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,12,2024030512,65,Male,...,119,3f1f095e-d9d1-4a6c-bf3e-616632cf64ac,2024-03-05T17:57:47,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,3
3,2024-03-05T12:50:11.280+05:30,2024-03-05T13:13:53.977+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,18,2024030518,75,Female,...,121,7be32b46-35d2-4923-b256-ef11b633fd05,2024-03-05T17:57:52,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,4
4,2024-03-05T13:21:52.005+05:30,2024-03-05T13:40:16.946+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,22,2024030522,38,Female,...,122,b7d5c35e-cba8-46ff-9e2e-9d82ec8eedc5,2024-03-05T17:57:54,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,5


In [41]:
df2_filtered = df2[['1. Take a photo of Left eye','4. Patient Sex', '2. Enter age in completed years','8. Height (in cm)','9. Weight (in kg)']].rename(
    columns={'4. Patient Sex': 'Gender', '2. Enter age in completed years': 'Age','1. Take a photo of Left eye':'Image_Name','8. Height (in cm)':'Height','9. Weight (in kg)':'Weight'}
)

In [42]:
df2_filtered['Gender'] = df2_filtered['Gender'].map({'Male':0, 'Female':1})
df2_filtered.head()

Unnamed: 0,Image_Name,Gender,Age,Height,Weight
0,1709617490220.jpg,1,63,150.0,42.0
1,1709619661748.jpg,1,42,155.0,45.0
2,1709621461793.jpg,0,65,158.0,60.0
3,1709624290874.jpg,1,75,149.0,34.0
4,1709625767525.jpg,1,38,145.0,38.0


In [43]:
df1_filtered.shape

(426, 2)

In [44]:
df2_filtered.shape

(426, 5)

In [45]:
merge_df = pd.merge(df1_filtered,df2_filtered,on='Image_Name',how='inner')
merge_df.head()

Unnamed: 0,Image_Name,Hb Value,Gender,Age,Height,Weight
0,1709617490220.jpg,11.5,1,63,150.0,42.0
1,1709617782204.jpg,11.6,0,63,155.0,55.0
2,1709618916134.jpg,11.1,1,50,151.0,45.0
3,1709619661748.jpg,12.1,1,42,155.0,45.0
4,1709620146694.jpg,12.2,1,76,148.0,60.0


In [51]:
merge_df.shape

(404, 6)

In [52]:
final_df = pd.merge(merge_df,df,on='Image_Name',how='inner')

In [53]:
final_df.head()

Unnamed: 0,Image_Name,Hb Value,Gender,Age,Height,Weight,Mean_Red,Mean_Green,Mean_Blue,Mean_R-G,...,Brightness,Entropy,Mean_L,Mean_A,Mean_B,Mean_G1,Mean_G2,Mean_G3,Mean_G4,Mean_G5
0,1709617490220.jpg,11.5,1,63,150.0,42.0,131.706589,63.21516,60.592484,63.994053,...,70.704111,0.047401,85.339218,157.576202,145.748566,53.67408,27.372524,7.848403,26.354454,102.570213
1,1709617782204.jpg,11.6,0,63,155.0,55.0,109.073776,57.617134,54.754238,52.570099,...,63.238866,0.052394,74.141937,152.134354,141.959122,47.0,25.432749,6.17199,22.339546,81.861877
2,1709618916134.jpg,11.1,1,50,151.0,45.0,107.769363,60.255825,66.457344,49.842838,...,62.758943,0.031541,74.819908,152.460434,138.383453,66.334137,32.717117,10.2565,33.248707,92.80706
3,1709619661748.jpg,12.1,1,42,155.0,45.0,113.670876,53.096241,53.484196,56.65303,...,60.908574,0.042513,74.068512,155.240585,142.901154,48.659107,25.505156,7.698782,24.404131,87.301033
4,1709620146694.jpg,12.2,1,76,148.0,60.0,127.653755,73.17791,69.270592,55.199455,...,79.248177,0.087676,91.756958,153.043442,143.239899,41.057255,22.207729,4.998142,19.600569,102.189751


In [55]:
final_df.shape

(404, 25)

In [57]:
final_df.to_excel('Eye_Features.xlsx',index=False)