In [1]:
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import pandas as pd
from sklearn.metrics import classification_report,confusion_matrix

# Path to image folders, 
#data_path = fr'../Datasets/config'
data_path = fr'../Datasets/full/Training'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels


def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]


def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y


def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)

    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished \n")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test


def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, Y_train, Y_validation, Y_test = process_files(data_path)

Starting...
Finished 



In [3]:
from sklearn.tree import DecisionTreeClassifier

# Red Train
dtc_red = DecisionTreeClassifier()
dtc_red.fit(X_red_train, Y_train)
vector_red = dtc_red.predict(X_red_train)

# Green Train
dtc_green = DecisionTreeClassifier()
dtc_green.fit(X_green_train, Y_train)
vector_green = dtc_green.predict(X_green_train)

# Blue Train
dtc_blue = DecisionTreeClassifier()
dtc_blue.fit(X_blue_train, Y_train)
vector_blue = dtc_blue.predict(X_blue_train)

# Validation
vector_red_val = dtc_red.predict(X_red_validation)
vector_green_val = dtc_green.predict(X_green_validation)
vector_blue_val = dtc_blue.predict(X_blue_validation)

print("Red Model Validation")
print(get_youdens_index(vector_red_val, Y_validation), "\n")

print("Green Model Validation")
print(get_youdens_index(vector_green_val, Y_validation), "\n")

print("Blue Model Validation")
print(get_youdens_index(vector_blue_val, Y_validation), "\n")

Red Model Validation
     Youdens Index
0         0.953750
1         0.974566
2         0.897540
3         0.923103
4         0.945624
5         0.923103
6         0.974569
7         0.884599
8         0.976763
9         0.886228
10        0.923159
11        0.907836
12        0.937359
13        0.937809
14        0.982400
15        0.930849
16        0.930232
17        0.892226
18        0.846543
19        0.807156
20        0.983459
21        0.923103
22        0.992366
23        0.953862
24        0.885160
25        0.822742
26        0.992254
27        0.908010
28        0.979527
29        0.984621
30        0.976875
31        0.976987
32        0.792491
33        0.999888
34        0.800068
35        0.916611
36        0.910345
37        0.865826
38        0.934654
39        0.884879
40        0.927607
41        0.967183
42        0.897951
43        0.931017
44        0.995859
45        0.869387
46        1.000000
47        0.984733
48        0.992254
49        0.952269
50        

In [4]:
# Stack the rgb predictions to get combi model values 
X_combined_train = np.column_stack((vector_red, vector_green, vector_blue)) # shape (1745, 18) - 18 features
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val)) # shape (582, 18) - 18 features

# Combi Train
dtc_combi =  DecisionTreeClassifier()
dtc_combi.fit(X_combined_train, Y_train)

# Validation
print("Combined Model Validation")
combi_val = dtc_combi.predict(X_combined_val)
print(classification_report(Y_validation, combi_val, zero_division=0))
print(get_youdens_index(combi_val, Y_validation), "\n")

Combined Model Validation
              precision    recall  f1-score   support

           0       0.93      0.86      0.89       131
           1       0.97      0.93      0.95       119
           2       0.88      0.76      0.82       128
           3       0.97      0.85      0.91       131
           4       0.97      0.97      0.97       129
           5       0.97      0.92      0.95       131
           6       0.93      0.91      0.92       122
           7       0.88      0.86      0.87       131
           8       0.97      0.89      0.92       131
           9       0.93      0.83      0.88       115
          10       0.96      0.94      0.95       131
          11       0.95      0.89      0.92       131
          12       0.92      0.92      0.92       178
          13       0.92      0.91      0.91       131
          14       0.96      0.91      0.94       114
          15       0.92      0.95      0.94       131
          16       0.94      0.90      0.92       131
 

In [5]:
vector_red_test = dtc_red.predict(X_red_test)
vector_green_test = dtc_green.predict(X_green_test)
vector_blue_test = dtc_blue.predict(X_blue_test)

X_combined_test = np.column_stack((vector_red_test, vector_green_test, vector_blue_test)) # shape (582, 18) - 18 features

# Test Final
print("Combined Model Test")
combi_test = dtc_combi.predict(X_combined_test)
print(classification_report(Y_test, combi_test, zero_division=0))
print(get_youdens_index(combi_test, Y_test), "\n")

Combined Model Test
              precision    recall  f1-score   support

           0       0.95      0.88      0.91       131
           1       0.97      0.95      0.96       118
           2       0.91      0.88      0.89       128
           3       0.95      0.87      0.91       131
           4       0.97      0.91      0.94       128
           5       0.93      0.92      0.92       132
           6       0.92      0.87      0.89       122
           7       0.90      0.85      0.88       131
           8       0.93      0.94      0.94       131
           9       0.94      0.84      0.89       115
          10       0.97      0.92      0.94       131
          11       0.90      0.90      0.90       131
          12       0.96      0.92      0.94       179
          13       0.85      0.89      0.87       131
          14       0.97      0.93      0.95       114
          15       0.92      0.90      0.91       132
          16       0.86      0.90      0.88       131
       