In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import os
import glob
import matplotlib.pyplot as plt
import math
import cv2
import time


%matplotlib inline

In [2]:
def resize(img,size):
    """size is a tuple"""
    """ returns resized images"""
    return cv2.resize(img,size)

def to_hsv(img):
    """return image in HSV space"""
    return cv2.cvtColor(img,cv2.COLOR_RGB2HSV)

def to_gray(img):
    """return image in gray space"""
    return cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

def sum_rgb(img):
    r_sum = g_sum = b_sum = rg_sum = count = 0
    for i in img:
        for j in i:
            if (j[0] > 20 and j[0] < 240 and j[1] > 20 and j[1] < 240 and j[2] > 20 and j[2] < 240):
                r_sum += j[0]
                g_sum += j[1]
                b_sum += j[2]
                rg_sum += j[0] - j[1]
                count += 1

    return [r_sum,g_sum,b_sum,rg_sum,count]

# Assuming img is a 3D NumPy array with shape (height, width, 3) representing an RGB image
def new_sum(img):
    # Create a boolean mask where each pixel is within the specified RGB range
    mask = (img[:, :, 0] > 20) & (img[:, :, 0] < 240) & \
           (img[:, :, 1] > 20) & (img[:, :, 1] < 240) & \
           (img[:, :, 2] > 20) & (img[:, :, 2] < 240)

    # Use the mask to filter the pixels within the specified RGB range
    filtered_pixels = img[mask]

    # Calculate the sums of the RGB values for the filtered pixels
    sum_r = np.sum(filtered_pixels[:, 0])
    sum_g = np.sum(filtered_pixels[:, 1])
    sum_b = np.sum(filtered_pixels[:, 2])
    sum_rg = sum_r - sum_g

    return [sum_r, sum_g, sum_b, sum_rg, np.count_nonzero(filtered_pixels[:, 0])]


def sum(arr):
    # """returns sum and no. of pixels between 20 and 240"""
    # sum = 0
    # count = 0
    # for i in arr:
    #     for j in i:
    #         if(j>20 and j<240): #only pixels whose value is between 20 and 240
    #             sum+=j
    #             count+=1

    #only pixels whose value is between 20 and 240
    arr = arr[arr>20]
    arr = arr[arr<240]

    return (arr.sum(), arr.size)

    # return (sum,count)

def pooling(image, pool_size, code, padding):
    """
    different codes for different pooling
    code min :min pooling
    code max :max pooling 
    code mean :mean pooling 
    code std :standard deviation pooling
    returns a image with padding operation and pooling operation
    """

    padded = arr = np.zeros((image.shape[0] + padding*2, 
                       image.shape[1] + padding*2))
    
    #  inserting image into zero array
    padded[int(padding):-int(padding), int(padding):-int(padding)] = image
    
    
    # print(f'original image size: {image.shape}')
    # print(f'padded image size: {padded.shape}')

    input_height, input_width = padded.shape
    pool_height, pool_width = pool_size

    # Calculate the output dimensions
    output_height = input_height - pool_height + 1
    output_width = input_width - pool_width + 1

    # Initialize the output data
    output_data = np.zeros((output_height, output_width))

    for i in range(output_height):
        for j in range(output_width):
            # Extract the region of interest (ROI)
            roi = padded[i : i + pool_height, j : j + pool_width]
            
            if code=='min':
                # Apply min pooling within the ROI
                output_data[i, j] = np.min(roi)

            if code=='max':
                # Apply max pooling within the ROI
                output_data[i, j] = np.max(roi)

            if code=='mean':
                # Apply mean pooling within the ROI
                output_data[i, j] = np.mean(roi)

            if code=='std':
                # Apply min pooling within the ROI
                output_data[i, j] = np.std(roi)


    # print(f'{code} pooled image size: {output_data.shape}')
    return output_data

In [3]:
def feature(data):
    """Return all the 12 features as a numpy array"""
    uid,img,label = data
    img = resize(img,(250,250))

    #RGB SPACE
    sum_img = new_sum(img)
    count = sum_img.pop()
    mean_features = [i/count for i in sum_img]
    mean_r,mean_g,mean_b,mean_rg = mean_features
    # 4 features done in RGB SPACE

    
    #HSV SPACE
    hsv = to_hsv(img)
    h,s,v = cv2.split(hsv)
    _,h_counts = sum(h)
    h = h/h.max()
    nH = np.count_nonzero(h>0.95)
    HHR = nH/h_counts
    # HHR found

    
    #GRAY SPACE
    gray = to_gray(img)
    B_sum, B_size = sum(gray)
    B = B_sum/B_size # FOUND B

    #ENTROPY in gray space
    eq = cv2.equalizeHist(gray)
    unique, counts = np.unique(eq, return_counts=True)
    #only pixels whose value is between 20 and 240
    total_counts = counts[21:241].sum() 
    Ent = np.sum(np.array([-i*(i/total_counts)*math.log((i/total_counts),2) for i in counts[21:241]])) #Found Entropy

    #Calculating the 'G' features
    Ixy = gray
    min_Ixy = pooling(image=Ixy, pool_size=(3,3), code='min', padding=1)
    max_Ixy = pooling(image=Ixy, pool_size=(3,3), code='max', padding=1)
    mean_Ixy = pooling(image=Ixy, pool_size=(3,3), code='mean', padding=1)
    std_Ixy = pooling(image=Ixy, pool_size=(3,3), code='std', padding=1)
    
    g1 = Ixy - min_Ixy
    g2 = max_Ixy - Ixy
    g3 = Ixy - mean_Ixy
    g4 = std_Ixy
    g5 = Ixy
    
    G1 = sum(g1)[0]/sum(g1)[1]
    G2 = sum(g2)[0]/sum(g2)[1]
    G3 = sum(g3)[0]/sum(g3)[1]
    G4 = sum(g4)[0]/sum(g4)[1]
    G5 = sum(g5)[0]/sum(g5)[1]

    feature_all = [uid,mean_r,mean_g,mean_b,mean_rg,HHR,Ent,B,G1,G2,G3,G4,G5,label]
    return feature_all

In [4]:
folders = sorted(glob.glob("../../India_95/complete/*"))
images = []
for folder in folders:
    all_images = []
    for i in os.listdir(folder):
        path = os.path.join(folder,i)
        all_images.append(path)
    images.append([i for i in all_images if "forniceal_palpebral" in i][0])
print(f"Total Number of Images = {len(images)}")

Total Number of Images = 95


In [5]:
df = pd.read_excel("../../India_95/India.xlsx",0)

In [6]:
def mask(filename):
    input_image = Image.open(filename)
    input_image.load()
    image = Image.new("RGB", input_image.size, (255, 255, 255))
    image.paste(input_image, mask = input_image.split()[3])
    return np.array(image)

In [7]:
curr_time = time.time()
All_Data = []
error_names = []

for i in range(len(images)):
    try:
        number = int(images[i].split("\\")[-2])
        roi = mask(images[i])
        label = df.loc[df['Number'] == int(number)]['Hgb'].tolist()[0]
    
        data = [number,roi,label]
    
        All_Data.append(feature(data))

    except Exception as e:
        error = [number, e]
        error_names.append(error)

print(len(All_Data))
print(f"Time Taken = {time.time()-curr_time} sec")

95
Time Taken = 313.6424512863159 sec


In [8]:
cols = ['number','mean_r','mean_g','mean_b','mean_rg','HHR','Ent','B','G1','G2','G3','G4','G5','label']

complete_data = pd.DataFrame(All_Data,columns=cols)

In [9]:
complete_data.head()

Unnamed: 0,number,mean_r,mean_g,mean_b,mean_rg,HHR,Ent,B,G1,G2,G3,G4,G5,label
0,1,166.855593,84.030561,118.807302,82.825032,0.52257,4995.168514,112.314781,122.812655,118.816078,71.050559,84.590314,112.314781,12.2
1,10,160.048148,88.111943,120.417948,71.936204,0.431622,3567.373111,113.587725,95.557875,90.553173,70.576389,82.540834,113.587725,11.3
2,11,167.296178,105.985953,144.634863,61.310225,0.160986,7735.02089,128.852574,76.887975,74.195942,68.023431,78.08738,128.852574,13.2
3,12,175.376121,124.368315,156.700927,51.007805,0.270415,9703.310299,143.575611,88.513445,82.420336,66.372292,73.288759,143.575611,10.6
4,13,174.116938,112.974567,150.025433,61.14237,0.190075,7991.227751,135.681814,78.781697,77.020221,69.687116,76.05814,135.681814,10.6


In [10]:
error_names

[]

In [11]:
# complete_data.to_csv('phase1_data.csv',index = False)