In [1]:
import numpy as np
import pandas as pd
from scipy.ndimage import uniform_filter
from scipy.stats import entropy
import cv2
import os

In [2]:
def extract_rgb_features(image_path):
    """Computes RGB color statistics for a given image."""
    
    image = cv2.imread(image_path)
    image = image.astype(np.float32)

    # Split into RGB channels
    B, G, R = cv2.split(image)

    # Create a mask to ignore black pixels (where all channels are 0)
    maskR = (R > 0)
    maskG = (G > 0)
    maskB = (B > 0)

    # Compute mean for each channel
    mean_R = np.mean(R[maskR]) if np.any(maskR) else 0
    mean_G = np.mean(G[maskG]) if np.any(maskG) else 0
    mean_B = np.mean(B[maskB]) if np.any(maskB) else 0

    # Compute Mean (R - G) ignoring black pixels
    maskR_G = (R>0) | (G>0)
    mean_R_G = np.mean((R - G)[maskR_G]) if np.any(maskR_G) else 0

    return {
        "Mean_Red": mean_R,
        "Mean_Green": mean_G,
        "Mean_Blue": mean_B,
        "Mean_R-G": mean_R_G
    }

In [3]:
def extract_erythema_index(image_path):
    """Computes the Erythema Index (EI) for a given image."""
    
    image = cv2.imread(image_path)
    b, g, r = cv2.split(image)
    r, g = r.astype(np.int32), g.astype(np.int32)

    # Create a mask to ignore completely black pixels (where R and G are both 0)
    mask = (r > 0) | (g > 0)

    # Initialize EI array with zeros
    ei_values = np.zeros_like(r, dtype=np.float32)

    # Avoid division by zero
    valid_pixels = (r + g) > 0  
    ei_values[valid_pixels] = 100 * ((r - g)[valid_pixels] / (r + g)[valid_pixels])

    # Compute the mean EI for the entire image (excluding black pixels)
    mean_ei = np.mean(ei_values[mask]) if np.any(mask) else 0

    return {"Erythema_Index": mean_ei}


In [4]:
def extract_hsv_features(image_path):
    """Computes mean Hue, Saturation, and Value (HSV) features for a given image."""
    
    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    h, s, v = h.astype(np.int32), s.astype(np.int32), v.astype(np.int32)

    # Create a mask to ignore black pixels (where V == 0)
    mask = v > 0

    # Compute mean values only for valid (non-black) pixels
    mean_h = np.mean(h[mask]) if np.any(mask) else 0
    mean_s = np.mean(s[mask]) if np.any(mask) else 0
    mean_v = np.mean(v[mask]) if np.any(mask) else 0

    return {
        "Mean_Hue": mean_h,
        "Mean_Saturation": mean_s,
        "Mean_Value": mean_v
    }

In [5]:
def extract_hhr(image_path, H_thresh=100):
    """Computes High Hue Ratio (HHR) for a given image based on a hue threshold."""

    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    h, v = h.astype(np.int32), v.astype(np.int32)

    # Create a mask to exclude black pixels (V == 0)
    valid_pixels_mask = v > 0  

    # Mask high hue pixels (H > H_thresh)
    high_hue_mask = (h > H_thresh) & valid_pixels_mask  

    # Compute High Hue Ratio (HHR)
    high_hue_count = np.count_nonzero(high_hue_mask)
    valid_pixel_count = np.count_nonzero(valid_pixels_mask)

    HHR = (high_hue_count / valid_pixel_count) if valid_pixel_count > 0 else 0

    return {"High_Hue_Ratio": HHR}


In [6]:
def extract_brightness(image_path):
    """Computes the mean brightness (B) of a given image, excluding black pixels."""
    
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Create a mask to ignore completely black pixels (where gray == 0)
    valid_pixels_mask = gray > 0  

    # Compute mean brightness, excluding black pixels
    mean_brightness = np.mean(gray[valid_pixels_mask]) if np.any(valid_pixels_mask) else 0

    return {"Brightness": mean_brightness}


In [7]:
def extract_entropy(image_path):
    """Computes the entropy (Ent) of a given image using pixel intensity distribution."""
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Compute histogram (256 bins for intensity levels 0-255)
    hist = cv2.calcHist([image], [0], None, [256], [0, 256])

    # Normalize histogram to get probability distribution
    hist_norm = hist.ravel() / hist.sum()  # Convert to 1D array and normalize

    # Compute entropy
    image_entropy = entropy(hist_norm, base=2)

    return {"Entropy": image_entropy}

In [8]:
def extract_gray_level_features(image_path, window_size=9):
    """Computes gray-level features (G1-G5) for a given image using local window analysis."""
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Create mask: Ignore completely black pixels (intensity == 0)
    mask = image > 0 

    # Convert image to float32 before processing to prevent integer NaN errors
    image_float = image.astype(np.float32)

    # G1: Difference between pixel intensity and min intensity in window
    G1 = image_float - cv2.erode(image_float, np.ones((window_size, window_size), np.uint8))

    # G2: Difference between max intensity in window and pixel intensity
    G2 = cv2.dilate(image_float, np.ones((window_size, window_size), np.uint8)) - image_float

    # G3: Difference between pixel intensity and mean intensity in window
    mean_window = uniform_filter(image_float, size=window_size)
    G3 = image_float - mean_window

    # G4: Standard deviation in the window
    def local_std(img):
        mean_sq = uniform_filter(img ** 2, size=window_size)
        return np.sqrt(mean_sq - mean_window**2)

    G4 = local_std(image_float)

    # G5: Center pixel intensity (original grayscale image)
    G5 = image_float.copy()

    # Apply mask: Set black pixels to NaN to exclude from analysis
    G1[~mask] = np.nan
    G2[~mask] = np.nan
    G3[~mask] = np.nan
    G4[~mask] = np.nan
    G5[~mask] = np.nan

    # Compute mean values **only for valid pixels (non-black)**
    mean_G1 = np.nanmean(G1)
    mean_G2 = np.nanmean(G2)
    mean_G3 = np.nanmean(G3)
    mean_G4 = np.nanmean(G4)
    mean_G5 = np.nanmean(G5)

    return {
        "Mean_G1": mean_G1,
        "Mean_G2": mean_G2,
        "Mean_G3": mean_G3,
        "Mean_G4": mean_G4,
        "Mean_G5": mean_G5
    }

In [9]:
def extract_lab_features(image_path):
    """Computes mean L (Lightness), A (Green-Red), and B (Blue-Yellow) features for a given image."""
    
    image = cv2.imread(image_path)
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab)
    L, A, B = L.astype(np.float32), A.astype(np.float32), B.astype(np.float32)

    # Create a mask to exclude black pixels (where L == 0)
    mask = L > 0

    # Compute mean values only for valid (non-black) pixels
    mean_L = np.mean(L[mask]) if np.any(mask) else 0
    mean_A = np.mean(A[mask]) if np.any(mask) else 0
    mean_B = np.mean(B[mask]) if np.any(mask) else 0

    return {
        "Mean_L": mean_L,
        "Mean_A": mean_A,
        "Mean_B": mean_B
    }

In [10]:
image_folder = "../Produced Images/Palm Masked"

In [11]:
data = []
image_files = [f for f in os.listdir(image_folder) if f.endswith(".jpg")]

for image_name in image_files:
    image = os.path.join(image_folder, image_name)
   

    # Extract features
    features = {"Image_Name": image_name}
    features.update(extract_rgb_features(image))
    features.update(extract_erythema_index(image))
    features.update(extract_hsv_features(image))
    features.update(extract_hhr(image))
    features.update(extract_brightness(image))
    features.update(extract_entropy(image))
    features.update(extract_lab_features(image))
    features.update(extract_gray_level_features(image))

    # Append to data list
    data.append(features)

# Create DataFrame
df = pd.DataFrame(data)

In [12]:
df.head()

Unnamed: 0,Image_Name,Mean_Red,Mean_Green,Mean_Blue,Mean_R-G,Erythema_Index,Mean_Hue,Mean_Saturation,Mean_Value,High_Hue_Ratio,Brightness,Entropy,Mean_L,Mean_A,Mean_B,Mean_G1,Mean_G2,Mean_G3,Mean_G4,Mean_G5
0,1710067869909.jpg,132.203522,90.34198,99.599403,42.233143,20.370171,168.078731,91.792906,130.12469,0.964534,102.870494,2.503741,108.982346,146.942886,130.263306,21.225544,19.121161,0.507343,8.920181,105.114723
1,1710068114115.jpg,166.695206,119.389946,95.96225,48.246185,18.986616,11.07733,119.025343,164.529271,0.010787,130.031437,3.050132,137.663116,144.3629,148.61525,34.343536,36.484924,0.5009,14.604977,131.759232
2,1710068399320.jpg,165.616043,116.382195,120.629471,49.978889,18.979443,110.699176,87.531183,162.783118,0.629755,130.202456,2.381161,137.027878,148.581924,134.171631,23.865772,18.794598,0.676129,10.008794,133.16629
3,1710068960508.jpg,140.075378,92.702194,101.127136,47.814999,21.78619,170.51178,94.196209,138.661554,0.972541,106.986084,2.578323,113.154045,148.878052,131.685394,20.464367,20.850979,0.373889,9.227047,108.63221
4,1710069156992.jpg,155.359161,108.803612,117.150909,47.013908,18.927881,168.537046,84.635077,153.675121,0.965648,122.830024,3.388446,128.863205,148.117355,131.144775,25.269421,24.090904,0.384049,10.903161,124.535583


In [14]:
df.shape

(426, 20)

In [13]:
df1 = pd.read_excel("../Dataset Used/Blood Report_without patient.xlsx",sheet_name="left_palm")
df1.head()

Unnamed: 0,1. Take a photo of left palm,Blood Sample Id,Unnamed: 2,Unnamed: 3,Annotated_Data,Avaiable ID,Hb Value
0,1709617238375.jpg,202403051,,,1709617238375.jpg,202403051.0,11.5
1,1709619358067.jpg,202403058,,,1709617534838.jpg,202403052.0,11.6
2,1709621100853.jpg,2024030512,,,1709618528581.jpg,202403056.0,11.1
3,1709623596231.jpg,2024030518,,,1709619358067.jpg,202403058.0,12.1
4,1709625468820.jpg,2024030522,,,1709619811692.jpg,202403059.0,12.2


In [15]:
df1_filtered = df1[['Annotated_Data','Hb Value']].rename(columns={'Annotated_Data':'Image_Name'})
df1_filtered.head()

Unnamed: 0,Image_Name,Hb Value
0,1709617238375.jpg,11.5
1,1709617534838.jpg,11.6
2,1709618528581.jpg,11.1
3,1709619358067.jpg,12.1
4,1709619811692.jpg,12.2


In [16]:
df2 = pd.read_excel("../Dataset Used/Updated Healthworker Form Part 1_without patient identifier.xlsx") 
df2.head()

Unnamed: 0,start,end,today,deviceid,username,Do you give consent?,Patient Id,Blood Sample Id,2. Enter age in completed years,4. Patient Sex,...,_id,_uuid,_submission_time,_validation_status,_notes,_status,_submitted_by,__version__,_tags,_index
0,2024-03-05T11:01:43.141+05:30,2024-03-05T12:36:06.048+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,1,202403051,63,Female,...,116,e7525154-4a03-4818-b76a-85f5d5657f73,2024-03-05T17:57:45,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,1
1,2024-03-05T11:39:13.925+05:30,2024-03-05T11:55:12.666+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,8,202403058,42,Female,...,117,a407f9d7-8643-4a35-a614-3b83184b83e0,2024-03-05T17:57:46,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,2
2,2024-03-05T12:07:50.954+05:30,2024-03-05T12:26:37.006+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,12,2024030512,65,Male,...,119,3f1f095e-d9d1-4a6c-bf3e-616632cf64ac,2024-03-05T17:57:47,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,3
3,2024-03-05T12:50:11.280+05:30,2024-03-05T13:13:53.977+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,18,2024030518,75,Female,...,121,7be32b46-35d2-4923-b256-ef11b633fd05,2024-03-05T17:57:52,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,4
4,2024-03-05T13:21:52.005+05:30,2024-03-05T13:40:16.946+05:30,2024-03-05,collect:wVnGkN86bc6HA9Fi,shila,Yes,22,2024030522,38,Female,...,122,b7d5c35e-cba8-46ff-9e2e-9d82ec8eedc5,2024-03-05T17:57:54,,,submitted_via_web,shila,v9nwsZRTukYiPhwTrkJqPu,,5


In [19]:
df2_filtered = df2[['1. Take a photo of left palm','4. Patient Sex', '2. Enter age in completed years','8. Height (in cm)','9. Weight (in kg)']].rename(
    columns={'4. Patient Sex': 'Gender', '2. Enter age in completed years': 'Age','1. Take a photo of left palm':'Image_Name','8. Height (in cm)':'Height','9. Weight (in kg)':'Weight'}
)

In [20]:
df2_filtered['Gender'] = df2_filtered['Gender'].map({'Male':0, 'Female':1})
df2_filtered.head()

Unnamed: 0,Image_Name,Gender,Age,Height,Weight
0,1709617238375.jpg,1,63,150.0,42.0
1,1709619358067.jpg,1,42,155.0,45.0
2,1709621100853.jpg,0,65,158.0,60.0
3,1709623596231.jpg,1,75,149.0,34.0
4,1709625468820.jpg,1,38,145.0,38.0


In [21]:
df1_filtered.shape

(426, 2)

In [22]:
df2_filtered.shape

(426, 5)

In [23]:
merge_df = pd.merge(df1_filtered,df2_filtered,on='Image_Name',how='inner')
merge_df.head()

Unnamed: 0,Image_Name,Hb Value,Gender,Age,Height,Weight
0,1709617238375.jpg,11.5,1,63,150.0,42.0
1,1709617534838.jpg,11.6,0,63,155.0,55.0
2,1709618528581.jpg,11.1,1,50,151.0,45.0
3,1709619358067.jpg,12.1,1,42,155.0,45.0
4,1709619811692.jpg,12.2,1,76,148.0,60.0


In [24]:
merge_df.shape

(418, 6)

In [25]:
final_df = pd.merge(merge_df,df,on='Image_Name',how='inner')

In [26]:
final_df.head()

Unnamed: 0,Image_Name,Hb Value,Gender,Age,Height,Weight,Mean_Red,Mean_Green,Mean_Blue,Mean_R-G,...,Brightness,Entropy,Mean_L,Mean_A,Mean_B,Mean_G1,Mean_G2,Mean_G3,Mean_G4,Mean_G5
0,1709617238375.jpg,11.5,1,63,150.0,42.0,171.058029,95.746063,79.132751,75.492599,...,115.143901,3.374992,124.489388,157.329834,151.398407,28.911028,21.55987,0.550662,12.050771,117.798874
1,1709617534838.jpg,11.6,0,63,155.0,55.0,177.839264,108.628555,91.947807,69.681519,...,126.185754,3.122405,134.675903,154.165115,149.248245,11.130719,8.395287,0.627475,4.990907,128.604538
2,1709618528581.jpg,11.1,1,50,151.0,45.0,164.460495,98.395569,86.722511,66.42588,...,115.146662,2.334258,123.541016,153.985092,146.616867,34.409153,23.362341,0.698524,13.612711,118.539879
3,1709619358067.jpg,12.1,1,42,155.0,45.0,151.329605,101.981934,96.311028,49.792988,...,115.088051,3.436739,120.985947,147.450485,139.969818,21.07444,15.596459,0.568201,9.252064,117.17453
4,1709619811692.jpg,12.2,1,76,148.0,60.0,183.59407,139.553665,137.314484,45.124718,...,150.919077,2.992105,157.727432,145.054489,136.321976,22.2897,17.030174,0.774384,9.076474,154.067825


In [27]:
final_df.to_excel('Palm_Features.xlsx',index=False)