In [1]:
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import pandas as pd
from sklearn.metrics import classification_report,confusion_matrix

# Path to image folders, 
#data_path = fr'../Datasets/config'
data_path = fr'../Datasets/full/Training'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels


def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]


def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y


def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)

    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished \n")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test


def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, Y_train, Y_validation, Y_test = process_files(data_path)

Starting...
Finished 



In [3]:
from sklearn.tree import DecisionTreeClassifier

# Red Train
dtc_red = DecisionTreeClassifier()
dtc_red.fit(X_red_train, Y_train)
vector_red = dtc_red.predict(X_red_train)

# Green Train
dtc_green = DecisionTreeClassifier()
dtc_green.fit(X_green_train, Y_train)
vector_green = dtc_green.predict(X_green_train)

# Blue Train
dtc_blue = DecisionTreeClassifier()
dtc_blue.fit(X_blue_train, Y_train)
vector_blue = dtc_blue.predict(X_blue_train)

# Validation
vector_red_val = dtc_red.predict(X_red_validation)
vector_green_val = dtc_green.predict(X_green_validation)
vector_blue_val = dtc_blue.predict(X_blue_validation)

print("Red Model Validation")
print(get_youdens_index(vector_red_val, Y_validation), "\n")

print("Green Model Validation")
print(get_youdens_index(vector_green_val, Y_validation), "\n")

print("Blue Model Validation")
print(get_youdens_index(vector_blue_val, Y_validation), "\n")

Red Model Validation
     Youdens Index
0         0.945892
1         0.948963
2         0.858758
3         0.930568
4         0.906248
5         0.923215
6         0.966148
7         0.899641
8         0.961383
9         0.894643
10        0.884486
11        0.960991
12        0.953987
13        0.930063
14        0.964800
15        0.938595
16        0.900258
17        0.867600
18        0.876404
19        0.757436
20        0.983347
21        0.923215
22        1.000000
23        0.938539
24        0.877358
25        0.811990
26        0.991974
27        0.907672
28        0.974394
29        0.984733
30        0.984677
31        0.984621
32        0.776131
33        0.976931
34        0.853728
35        0.932829
36        0.910064
37        0.856807
38        0.911215
39        0.861642
40        0.919607
41        0.945961
42        0.847486
43        0.938370
44        0.992056
45        0.854233
46        0.999888
47        0.984733
48        0.999944
49        0.960205
50        

In [4]:
# Stack the rgb predictions to get combi model values 
X_combined_train = np.column_stack((vector_red, vector_green, vector_blue)) # shape (1745, 18) - 18 features
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val)) # shape (582, 18) - 18 features

# Combi Train
dtc_combi =  DecisionTreeClassifier()
dtc_combi.fit(X_combined_train, Y_train)

# Validation
print("Combined Model Validation")
combi_val = dtc_combi.predict(X_combined_val)
print(classification_report(Y_validation, combi_val, zero_division=0))
print(get_youdens_index(combi_val, Y_validation), "\n")

Combined Model Validation
              precision    recall  f1-score   support

           0       0.97      0.88      0.92       131
           1       0.96      0.92      0.94       119
           2       0.94      0.73      0.82       128
           3       0.98      0.92      0.94       131
           4       0.96      0.95      0.96       129
           5       0.98      0.95      0.96       131
           6       0.99      0.87      0.93       122
           7       0.88      0.85      0.87       131
           8       0.94      0.91      0.93       131
           9       0.93      0.87      0.90       115
          10       0.98      0.93      0.96       131
          11       0.90      0.92      0.91       131
          12       0.93      0.87      0.90       178
          13       0.91      0.90      0.90       131
          14       0.99      0.91      0.95       114
          15       1.00      0.97      0.98       131
          16       0.94      0.90      0.92       131
 

In [5]:
vector_red_test = dtc_red.predict(X_red_test)
vector_green_test = dtc_green.predict(X_green_test)
vector_blue_test = dtc_blue.predict(X_blue_test)

X_combined_test = np.column_stack((vector_red_test, vector_green_test, vector_blue_test)) # shape (582, 18) - 18 features

# Test Final
print("Combined Model Test")
combi_test = dtc_combi.predict(X_combined_test)
print(classification_report(Y_test, combi_test, zero_division=0))
print(get_youdens_index(combi_test, Y_test), "\n")

Combined Model Test
              precision    recall  f1-score   support

           0       0.97      0.89      0.92       131
           1       0.96      0.97      0.96       118
           2       0.94      0.79      0.86       128
           3       0.99      0.95      0.97       131
           4       0.95      0.89      0.92       128
           5       0.95      0.92      0.93       132
           6       0.95      0.87      0.91       122
           7       0.88      0.88      0.88       131
           8       0.95      0.92      0.94       131
           9       0.96      0.88      0.92       115
          10       0.95      0.92      0.94       131
          11       0.94      0.85      0.90       131
          12       0.90      0.91      0.90       179
          13       0.90      0.88      0.89       131
          14       0.98      0.87      0.92       114
          15       0.97      0.95      0.96       132
          16       0.94      0.90      0.92       131
       