# Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import os
import glob
import matplotlib.pyplot as plt
import math
import cv2


%matplotlib inline

# Features

## Feature Functions

In [2]:
def resize(img,size):
    """size is a tuple"""
    """ returns resized images"""
    return cv2.resize(img,size)

def to_hsv(img):
    """return image in HSV space"""
    return cv2.cvtColor(img,cv2.COLOR_RGB2HSV)

def to_gray(img):
    """return image in gray space"""
    return cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

def sum(arr):
    """returns sum and no. of pixels between 20 and 240"""
    sum = 0
    count = 0
    for i in arr:
        for j in i:
            if(j>20 and j<240): #only pixels whose value is between 20 and 240
                sum+=j
                count+=1

    return (sum,count)

def pooling(image, pool_size, code, padding):
    """
    different codes for different pooling
    code min :min pooling
    code max :max pooling 
    code mean :mean pooling 
    code std :standard deviation pooling
    returns a image with padding operation and pooling operation
    """

    padded = arr = np.zeros((image.shape[0] + padding*2, 
                       image.shape[1] + padding*2))
    
    #  inserting image into zero array
    padded[int(padding):-int(padding), int(padding):-int(padding)] = image
    
    
    # print(f'original image size: {image.shape}')
    # print(f'padded image size: {padded.shape}')

    input_height, input_width = padded.shape
    pool_height, pool_width = pool_size

    # Calculate the output dimensions
    output_height = input_height - pool_height + 1
    output_width = input_width - pool_width + 1

    # Initialize the output data
    output_data = np.zeros((output_height, output_width))

    for i in range(output_height):
        for j in range(output_width):
            # Extract the region of interest (ROI)
            roi = padded[i : i + pool_height, j : j + pool_width]
            
            if code=='min':
                # Apply min pooling within the ROI
                output_data[i, j] = np.min(roi)

            if code=='max':
                # Apply max pooling within the ROI
                output_data[i, j] = np.max(roi)

            if code=='mean':
                # Apply mean pooling within the ROI
                output_data[i, j] = np.mean(roi)

            if code=='std':
                # Apply min pooling within the ROI
                output_data[i, j] = np.std(roi)


    # print(f'{code} pooled image size: {output_data.shape}')
    return output_data

## Calling Function

Images should be sent in RGB format

In [3]:
def feature(data):
    """Return all the 12 features as a numpy array"""
    uid,img,label = data
    img = resize(img,(250,250))

    #RGB SPACE
    r, g, b = cv2.split(img)
    sum_img = [sum(r),sum(g),sum(b),sum(r-g)]
    mean_features = [i[0]/i[1] for i in sum_img]
    mean_r,mean_g,mean_b,mean_rg = mean_features
    # 4 features done in RGB SPACE

    
    #HSV SPACE
    hsv = to_hsv(img)
    h,s,v = cv2.split(hsv)
    h = h/h.max()
    nH = np.count_nonzero(h>0.95)
    HHR = nH/h.size
    # HHR found

    
    #GRAY SPACE
    gray = to_gray(img)
    B_sum, B_size = sum(gray)
    B = B_sum/B_size # FOUND B

    #ENTROPY in gray space
    eq = cv2.equalizeHist(gray)
    unique, counts = np.unique(eq, return_counts=True)
    #only pixels whose value is between 20 and 240
    total_counts = counts[21:240].sum() 
    Ent = np.sum(np.array([-i*(i/total_counts)*math.log((i/total_counts),2) for i in counts[21:240]])) #Found Entropy

    #Calculating the 'G' features
    Ixy = gray
    min_Ixy = pooling(image=Ixy, pool_size=(3,3), code='min', padding=1)
    max_Ixy = pooling(image=Ixy, pool_size=(3,3), code='max', padding=1)
    mean_Ixy = pooling(image=Ixy, pool_size=(3,3), code='mean', padding=1)
    std_Ixy = pooling(image=Ixy, pool_size=(3,3), code='std', padding=1)
    
    g1 = Ixy - min_Ixy
    g2 = max_Ixy - Ixy
    g3 = Ixy - mean_Ixy
    g4 = std_Ixy
    g5 = Ixy
    
    G1 = g1.sum()/g1.size
    G2 = g2.sum()/g2.size
    G3 = g3.sum()/g3.size
    G4 = g4.sum()/g4.size
    G5 = g5.sum()/g5.size

    feature_all = [uid,mean_r,mean_g,mean_b,mean_rg,HHR,Ent,B,G1,G2,G3,G4,G5,label]
    return feature_all

# Importing Data

## Right Nail Data

In [17]:
mask_right_nail_files = glob.glob("../../Field Data/Individual Anatomy/Galaxy M34/Right Fingernail_files/right_nail_segmask/SegmentationClass/*.png")
right_nail_path = "../../Field Data/Individual Anatomy/Galaxy M34/Right Fingernail_files/Images_right_fingernail\\"

In [5]:
right_nail_img_list = []
for mask_name in mask_right_nail_files:
    mask = mask_name.split("\\")[-1].split(".")[0] + ".jpg"
    right_nail_img_list.append(mask)

In [7]:
# df = pd.DataFrame(right_nail_img_list, columns=["Annotated_Data"])
# df.to_csv('annotations_right_nail.csv', index=False)

In [8]:
right_nail_truth = pd.read_csv("annotations_right_nail.csv")
right_nail_truth.head(5)

Unnamed: 0,Annotated_Data,Avaiable ID,Hb Value
0,1709617374918.jpg,202403051,11.5
1,1709617703730.jpg,202403052,11.6
2,1709618658975.jpg,202403056,11.1
3,1709619498294.jpg,202403058,12.1
4,1709620017738.jpg,202403059,12.2


### DataSet Creation

In [28]:
Total_Data = []
for mask_path in mask_right_nail_files:
    name = mask_path.split("\\")[-1].split(".")[0] + ".jpg"
    
    mask_bgr = cv2.imread(mask_path)
    img_bgr = cv2.imread(right_nail_path + name)
    mask = cv2.cvtColor(mask_bgr,cv2.COLOR_BGR2GRAY)
    img = cv2.cvtColor(img_bgr,cv2.COLOR_BGR2RGB)

    roi = cv2.bitwise_and(img, img, mask = mask)

    uid = right_nail_truth[right_nail_truth['Annotated_Data'] == name]["Avaiable ID"].tolist()[0]

    label = right_nail_truth[right_nail_truth['Annotated_Data'] == name]["Hb Value"].tolist()[0]

    data_sent = [uid,roi,label]

    features = feature(data_sent)

    Total_Data.append(features)

In [25]:
# right_nail_truth[right_nail_truth['Annotated_Data'] == "1709617703730.jpg"]["Hb Value"].tolist()[0]

In [26]:
# right_nail_truth[right_nail_truth['Annotated_Data'] == "1709617703730.jpg"]["Avaiable ID"].tolist()[0]

In [29]:
cols = ['number','mean_r','mean_g','mean_b','mean_rg','HHR','Ent','B','G1','G2','G3','G4','G5','label']

complete_data = pd.DataFrame(Total_Data,columns=cols)

In [31]:
complete_data.head()

Unnamed: 0,number,mean_r,mean_g,mean_b,mean_rg,HHR,Ent,B,G1,G2,G3,G4,G5,label
0,202403051,178.092988,108.245802,96.315789,70.652778,8e-05,60.139757,127.731298,0.453936,0.537184,-4.547474e-18,0.374701,1.340192,11.5
1,202403052,188.176768,144.711311,137.940812,44.52244,0.003872,165.375468,156.973003,0.927856,1.03912,9.094947e-19,0.737623,4.468912,11.6
2,202403056,150.851653,96.245249,90.541629,56.161086,0.001888,96.663681,111.877477,0.511744,0.59016,-2.955858e-18,0.40781,1.989872,11.1
3,202403058,172.266588,104.326555,91.253902,69.208234,1.6e-05,162.350588,123.304296,0.67432,0.745536,-1.193712e-18,0.541465,3.31064,12.1
4,202403059,168.711062,118.379599,113.825084,52.035575,0.00272,175.601154,132.529054,0.759872,0.835728,-3.865352e-18,0.592369,3.836576,12.2


In [1]:
# complete_data.to_csv('right_nail_data.csv',index = False)

## Tongue

In [7]:
mask_tongue_files = glob.glob("../../Field Data/Individual Anatomy/Galaxy M34/Tongue_files/tongue_segmasks/SegmentationClass/*.png")
tongue_path = "../../Field Data/Individual Anatomy/Galaxy M34/Tongue_files/Images_tongue\\"

In [8]:
tongue_img_list = []
for mask_name in mask_tongue_files:
    mask = mask_name.split("\\")[-1].split(".")[0] + ".jpg"
    tongue_img_list.append(mask)

In [10]:
# df = pd.DataFrame(tongue_img_list, columns=["Annotated_Data"])
# df.to_csv('annotations_tongue.csv', index=False)

In [11]:
tongue_truth = pd.read_csv("annotations_tongue.csv")
tongue_truth.head(5)

Unnamed: 0,Annotated_Data,Avaiable ID,Hb Value
0,1709617694248.jpg,202403051,11.5
1,1709617844626.jpg,202403052,11.6
2,1709619191362.jpg,202403056,11.1
3,1709619874657.jpg,202403058,12.1
4,1709620272757.jpg,202403059,12.2


### Dataset Creation

In [13]:
Total_Data = []
for mask_path in mask_tongue_files:
    name = mask_path.split("\\")[-1].split(".")[0] + ".jpg"
    
    mask_bgr = cv2.imread(mask_path)
    img_bgr = cv2.imread(tongue_path + name)
    mask = cv2.cvtColor(mask_bgr,cv2.COLOR_BGR2GRAY)
    img = cv2.cvtColor(img_bgr,cv2.COLOR_BGR2RGB)

    roi = cv2.bitwise_and(img, img, mask = mask)

    uid = tongue_truth[tongue_truth['Annotated_Data'] == name]["Avaiable ID"].tolist()[0]

    label = tongue_truth[tongue_truth['Annotated_Data'] == name]["Hb Value"].tolist()[0]

    data_sent = [uid,roi,label]

    features = feature(data_sent)

    Total_Data.append(features)

In [14]:
cols = ['number','mean_r','mean_g','mean_b','mean_rg','HHR','Ent','B','G1','G2','G3','G4','G5','label']

complete_data = pd.DataFrame(Total_Data,columns=cols)

In [15]:
complete_data.head()

Unnamed: 0,number,mean_r,mean_g,mean_b,mean_rg,HHR,Ent,B,G1,G2,G3,G4,G5,label
0,202403051,166.79687,90.204244,91.770974,77.318162,0.051248,547.254478,113.386518,1.837616,1.92656,2.273737e-18,1.271363,12.361536,11.5
1,202403052,156.875171,87.935703,89.592416,76.429971,0.04784,264.042677,108.996361,1.516944,1.686336,6.82121e-18,1.060669,9.127552,11.6
2,202403056,177.691041,103.062268,106.222751,81.251593,0.061056,391.859934,127.223085,1.731856,1.840096,4.547474e-18,1.227548,11.970176,11.1
3,202403058,155.03712,78.9168,81.114153,78.075411,0.090016,745.032076,101.292218,2.106624,2.28176,-3.637979e-18,1.447957,14.470496,12.1
4,202403059,196.63218,136.128788,146.453838,61.281295,0.127376,1031.161222,155.427062,2.918,2.901104,0.01805867,1.976244,24.009744,12.2


In [16]:
# complete_data.to_csv('tongue_data.csv',index = False)