In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Path to images
data_path = fr'../Datasets/full/Training'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels

def grayscale(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = tuple(pixel)
        newPixels.append(pixel)
    
    newImg = Image.new("RGB", (24,24))
    newImg.putdata(newPixels)
    greyImg = newImg.convert('L')
    return list(greyImg.getdata())

def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            pixels = grayscale(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_grey_train = []
    X_grey_validation = []
    X_grey_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)

    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
    
    for pixels in X_train:       
        X_grey_train.append(pixels.copy())
        
    for pixels in X_validation:       
        X_grey_validation.append(pixels.copy())
        
    for pixels in X_test:       
        X_grey_test.append(pixels.copy())
    
    print("Finished \n")
    return np.asarray(X_grey_train), np.asarray(X_grey_validation), np.asarray(X_grey_test), y_train, y_validation, y_test

def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
# Test/Configure runs
import time
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

# Call process_files and assign variables
X_grey_train, X_grey_validation, X_grey_test, Y_train, Y_validation, Y_test = process_files(data_path)

print("Hidden layers configurations test")
for hl in [(432), (216), (216, 108), (108), (108, 54)]:
    print("Hidden layers: ", hl)
    mlp = MLPClassifier(hidden_layer_sizes=hl, max_iter=500, random_state = 1)
    t0 = time.time()
    mlp.fit(X_grey_train, Y_train)
    t1 = time.time()
    print("Score: ", mlp.score(X_grey_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(mlp.predict(X_grey_validation), Y_validation), "\n")
    


Starting...
Finished 

Hidden layers configurations test
Hidden layers:  432
Score:  0.9329472042771219 Time:  181.0083291530609
     Youdens Index
0         0.991976
1         0.999888
2         0.976530
3         0.977387
4         0.936952
5         0.914005
6         0.944000
7         0.944882
8         0.992313
9         0.949944
10        0.976134
11        0.979798
12        0.989417
13        0.933165
14        0.877358
15        0.953376
16        0.877529
17        0.906780
18        0.937931
19        0.739496
20        1.000000
21        0.985555
22        0.999663
23        0.978861
24        0.980956
25        0.989474
26        1.000000
27        0.990465
28        0.987938
29        1.000000
30        1.000000
31        0.994096
32        0.991696
33        0.990612
34        0.714230
35        0.818182
36        0.949944
37        0.983051
38        0.984384
39        0.951999
40        0.999663
41        1.000000
42        0.897003
43        0.999888
44        1.0000

Score:  0.8665627088438405 Time:  67.73351287841797
     Youdens Index
0         0.969925
1         0.972279
2         0.806795
3         0.923858
4         0.999439
5         0.970531
6         0.967832
7         0.724297
8         0.953990
9         0.949720
10        0.928235
11        0.969697
12        0.915621
13        0.710999
14        0.933738
15        0.922312
16        0.920639
17        0.915142
18        0.813793
19        0.704481
20        0.999888
21        0.820032
22        0.806974
23        0.882366
24        0.961969
25        0.988686
26        1.000000
27        0.928234
28        0.881640
29        1.000000
30        0.992366
31        0.952823
32        0.842947
33        0.905063
34        0.761288
35        0.872055
36        0.907997
37        0.889662
38        0.813729
39        0.938075
40        0.932219
41        0.954377
42        0.823306
43        0.944695
44        0.995951
45        0.967943
46        0.929208
47        0.982913
48        0.97569

In [3]:
# default alpha=0.0001
# HL (216, 108) appears perform the best, use that for alpha tests
print("Alpha Tests")
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    mlp = MLPClassifier(hidden_layer_sizes=(216, 108), alpha=a, max_iter=500, random_state = 1)
    t0 = time.time()
    mlp.fit(X_grey_train, Y_train)
    t1 = time.time()
    print("Score: ", mlp.score(X_grey_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(mlp.predict(X_grey_validation), Y_validation), "\n")

Alpha Tests
Alpha:  0.0001
Score:  0.9323345956783248 Time:  98.70219111442566
     Youdens Index
0         1.000000
1         0.954041
2         0.815385
3         0.925261
4         0.858212
5         0.991959
6         1.000000
7         0.976266
8         0.969869
9         0.966667
10        0.928403
11        0.999776
12        0.989417
13        0.933221
14        0.971642
15        0.976632
16        0.963355
17        0.982995
18        0.882759
19        0.856919
20        0.991071
21        0.927889
22        0.914230
23        0.992991
24        0.948918
25        0.957838
26        1.000000
27        0.990070
28        0.979198
29        1.000000
30        1.000000
31        0.988248
32        0.932724
33        0.771654
34        0.872679
35        0.881706
36        0.941667
37        0.906780
38        0.976744
39        0.979340
40        0.977444
41        0.977273
42        0.955882
43        0.991963
44        0.983749
45        0.944770
46        0.956084
47       

Score:  0.9418578748050791 Time:  123.80176901817322
     Youdens Index
0         0.999383
1         0.881818
2         0.922740
3         0.955168
4         0.913330
5         0.956694
6         0.999888
7         0.913386
8         0.894737
9         0.949888
10        0.896825
11        0.949327
12        0.999550
13        0.962851
14        1.000000
15        0.992248
16        0.963804
17        0.982995
18        0.889655
19        0.899104
20        0.999776
21        0.983984
22        0.985714
23        0.972302
24        0.847989
25        0.999887
26        0.979167
27        0.957346
28        0.999718
29        0.999944
30        0.984733
31        0.999944
32        0.910392
33        0.944882
34        0.975293
35        0.899888
36        0.966611
37        0.991133
38        0.999944
39        0.979396
40        0.887218
41        0.999775
42        0.867647
43        0.999103
44        0.991903
45        1.000000
46        1.000000
47        1.000000
48        0.9838

In [4]:
# HL (216, 108) and alpha=1 seems to be the best based on results above.
# Train the model with this and let's finally see how it performs on test.
print('Beginning to fit model ... \n')
mlpc_grey =  MLPClassifier(hidden_layer_sizes=(216, 108), alpha=1, activation='relu', solver='adam', random_state=1, max_iter=500)
mlpc_grey.fit(X_grey_train, Y_train)
grey_test_result = mlpc_grey.predict(X_grey_test)

print("Grey model")
print(classification_report(Y_test, grey_test_result, zero_division=0))
print(get_youdens_index(grey_test_result, Y_test), "\n")

Beginning to fit model ... 

Grey model
              precision    recall  f1-score   support

           0       0.94      1.00      0.97       101
           1       1.00      0.88      0.94       110
           2       0.98      0.88      0.93       113
           3       0.99      0.95      0.97       131
           4       0.98      0.92      0.95        93
           5       0.97      0.98      0.97        99
           6       0.89      1.00      0.94        98
           7       1.00      0.94      0.97       117
           8       1.00      0.90      0.95       105
           9       0.98      0.91      0.95        94
          10       1.00      0.90      0.95        88
          11       1.00      0.98      0.99       133
          12       0.95      0.98      0.97       131
          13       0.99      0.96      0.97       114
          14       0.97      1.00      0.98        88
          15       0.98      0.98      0.98       100
          16       0.99      0.94      0.