In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Path to images
data_path = fr'../Datasets/Training'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels

def grayscale(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = tuple(pixel)
        newPixels.append(pixel)
    
    newImg = Image.new("RGB", (24,24))
    newImg.putdata(newPixels)
    greyImg = newImg.convert('L')
    return list(greyImg.getdata())

def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            pixels = grayscale(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_grey_train = []
    X_grey_validation = []
    X_grey_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)

    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
    
    for pixels in X_train:       
        X_grey_train.append(pixels.copy())
        
    for pixels in X_validation:       
        X_grey_validation.append(pixels.copy())
        
    for pixels in X_test:       
        X_grey_test.append(pixels.copy())
    
    print("Finished \n")
    return np.asarray(X_grey_train), np.asarray(X_grey_validation), np.asarray(X_grey_test), y_train, y_validation, y_test

def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
# Test/Configure runs
import time
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Call process_files and assign variables
X_grey_train, X_grey_validation, X_grey_test, Y_train, Y_validation, Y_test = process_files(data_path)

print("Hidden layers configurations test")
for hl in [(432), (216), (216, 108), (108), (108, 54)]:
    print("Hidden layers: ", hl)
    mlp = MLPClassifier(hidden_layer_sizes=hl, max_iter=500, random_state = 1)
    t0 = time.time()
    mlp.fit(X_grey_train, Y_train)
    t1 = time.time()
    print("Score: ", mlp.score(X_grey_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(mlp.predict(X_grey_validation), Y_validation), "\n")
    


Starting...
Finished 

Hidden layers configurations test
Hidden layers:  432
Score:  0.9497104032078414 Time:  383.7857594490051
     Youdens Index
0         0.917293
1         0.981762
2         0.968950
3         0.999607
4         0.921260
5         0.999944
6         0.976000
7         0.968504
8         0.999383
9         0.975000
10        0.952381
11        0.959596
12        1.000000
13        0.990741
14        0.999944
15        0.930233
16        0.956554
17        0.940510
18        0.937819
19        0.873950
20        1.000000
21        0.956778
22        0.928571
23        0.875862
24        0.993671
25        0.984211
26        1.000000
27        0.900474
28        0.954025
29        1.000000
30        1.000000
31        0.999775
32        0.887947
33        0.976154
34        0.888777
35        0.863580
36        0.950000
37        0.982883
38        0.984440
39        0.979171
40        0.992481
41        0.988580
42        0.882241
43        0.999720
44        1.0000

Score:  0.8485186010247271 Time:  173.36493253707886
     Youdens Index
0         0.999439
1         0.899496
2         0.683998
3         0.827629
4         0.982625
5         0.949775
6         0.967383
7         0.778630
8         0.976995
9         0.982997
10        0.919625
11        0.948935
12        0.956994
13        0.688159
14        0.877358
15        0.836985
16        0.978081
17        0.940566
18        0.868629
19        0.697311
20        0.982087
21        0.834027
22        0.921092
23        0.882702
24        0.942195
25        0.940867
26        0.993056
27        0.928234
28        0.830507
29        1.000000
30        0.992366
31        0.992690
32        0.812760
33        0.842071
34        0.777329
35        0.845174
36        0.891667
37        0.881356
38        0.953320
39        0.883393
40        0.962182
41        0.960002
42        0.926247
43        0.944920
44        0.999944
45        0.897077
46        0.912225
47        0.949468
48        0.9596

In [6]:
# default alpha=0.0001
# HL (216, 108) appears perform the best, use that for alpha tests
print("Alpha Tests")
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    mlp = MLPClassifier(hidden_layer_sizes=(216, 108), alpha=a, max_iter=500, random_state = 1)
    t0 = time.time()
    mlp.fit(X_grey_train, Y_train)
    t1 = time.time()
    print("Score: ", mlp.score(X_grey_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(mlp.predict(X_grey_validation), Y_validation), "\n")

Alpha Tests
Alpha:  0.0001
Score:  0.9411338828246826 Time:  189.20969986915588
     Youdens Index
0         0.999663
1         0.981818
2         0.915385
3         0.932780
4         0.991846
5         0.999832
6         0.991888
7         0.999551
8         0.984738
9         0.998935
10        0.983847
11        0.989843
12        0.998874
13        0.955107
14        0.952662
15        0.968824
16        0.956778
17        0.966046
18        0.930978
19        0.731092
20        0.973214
21        0.956722
22        0.985714
23        0.999214
24        0.987005
25        0.999493
26        1.000000
27        0.980648
28        0.958677
29        1.000000
30        0.992366
31        0.988248
32        0.962182
33        0.991677
34        0.856638
35        0.881594
36        0.966611
37        0.940678
38        0.992248
39        0.965641
40        0.977275
41        1.000000
42        0.941009
43        0.991963
44        0.999887
45        0.991789
46        0.894513
47      

Score:  0.9547226553798174 Time:  141.35582828521729
     Youdens Index
0         0.999776
1         0.772727
2         0.961538
3         0.970093
4         0.992126
5         1.000000
6         0.999832
7         0.992070
8         0.984962
9         0.999888
10        0.984071
11        0.999944
12        0.999775
13        0.999663
14        1.000000
15        0.992136
16        0.962906
17        0.940566
18        0.923913
19        0.823529
20        1.000000
21        0.985331
22        1.000000
23        0.986207
24        0.987342
25        0.999944
26        1.000000
27        0.976303
28        0.953969
29        1.000000
30        0.999607
31        0.999269
32        0.932724
33        0.992014
34        0.746032
35        0.917734
36        0.899944
37        0.923729
38        1.000000
39        0.945149
40        0.969925
41        0.943182
42        0.955882
43        0.999888
44        0.983806
45        0.952756
46        1.000000
47        0.999944
48        0.9757

In [7]:
# HL (216, 108) and alpha=1 seems to be the best based on results above.
# Train the model with this and let's finally see how it performs on test.
print('Beginning to fit model ... \n')
mlpc_grey =  MLPClassifier(hidden_layer_sizes=(216, 108), alpha=1, activation='relu', solver='adam', random_state=1, max_iter=500)
mlpc_grey.fit(X_grey_train, Y_train)
grey_test_result = mlpc_grey.predict(X_grey_test)

print("Grey model")
print(classification_report(Y_test, grey_test_result, zero_division=0))
print(get_youdens_index(grey_test_result, Y_test), "\n")

Beginning to fit model ... 

Grey model
              precision    recall  f1-score   support

           0       0.83      1.00      0.91       101
           1       1.00      1.00      1.00       110
           2       0.92      0.93      0.93       113
           3       0.98      0.99      0.99       131
           4       0.89      0.90      0.90        93
           5       1.00      0.90      0.95        99
           6       0.97      1.00      0.98        98
           7       1.00      0.92      0.96       117
           8       1.00      0.66      0.79       105
           9       0.96      0.99      0.97        94
          10       0.81      1.00      0.89        88
          11       0.98      1.00      0.99       133
          12       0.90      1.00      0.95       131
          13       0.93      0.95      0.94       114
          14       1.00      0.94      0.97        88
          15       0.83      1.00      0.90       100
          16       0.99      0.85      0.