In [1]:
import os
from PIL import Image
import numpy as np
import pandas as pd
# Split set
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.ensemble import RandomForestClassifier


# Path to image folders, 
data_path = fr'../Datasets/Training'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels



def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]



def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)
    
    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test

def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, Y_train, Y_validation, Y_test = process_files(data_path)

Starting...
Finished


In [3]:
import time


# Fit/train train-datasets and store prediction vectors in variables
from sklearn import svm

# Default n_estimator is 100, but seems way overkill

# Red Number of Trees Test
print("Red Config Trees Tests")
for trees in [5, 10, 15]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=trees, random_state = 1)
    t0 = time.time()
    rf.fit(X_red_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_red_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_red_validation), Y_validation), "\n")

# Green Number of Trees Test
print("Green Config Trees Tests")
for trees in [5, 10, 15]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=trees, random_state = 1)
    t0 = time.time()
    rf.fit(X_green_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_green_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_green_validation), Y_validation), "\n")
    
# Blue Number of Trees Test
print("Blue Config Trees Tests")
for trees in [5, 10, 15]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=trees, random_state = 1)
    t0 = time.time()
    rf.fit(X_blue_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_blue_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_blue_validation), Y_validation), "\n")

Red Config Trees Tests
Trees:  5
Score:  0.9498774782802406 Time:  19.6662278175354
     Youdens Index
0         0.969466
1         0.966387
2         0.921819
3         0.961832
4         0.976744
5         0.992366
6         0.975410
7         0.908397
8         0.946565
9         0.956522
10        0.954198
11        0.969466
12        0.943764
13        0.984733
14        0.999944
15        1.000000
16        0.946509
17        0.909091
18        0.931298
19        0.833333
20        0.983740
21        0.877863
22        1.000000
23        0.961832
24        0.954198
25        0.855615
26        0.984733
27        0.984772
28        0.994924
29        1.000000
30        0.992366
31        1.000000
32        0.818182
33        1.000000
34        0.839582
35        0.866667
36        0.926829
37        0.857143
38        0.984000
39        0.916031
40        0.928000
41        0.973262
42        0.784810
43        0.984733
44        1.000000
45        0.938931
46        1.000000
47  

Score:  0.9413009578970818 Time:  39.91840744018555
     Youdens Index
0         0.938931
1         0.949580
2         0.992188
3         0.992366
4         1.000000
5         1.000000
6         0.975410
7         0.893130
8         0.992366
9         0.982609
10        1.000000
11        0.961832
12        0.971910
13        0.977099
14        0.947368
15        0.992366
16        0.908397
17        0.892562
18        0.893130
19        0.750000
20        0.902439
21        0.816794
22        1.000000
23        1.000000
24        0.916031
25        0.839572
26        1.000000
27        0.994924
28        0.974619
29        1.000000
30        1.000000
31        1.000000
32        0.834711
33        0.961832
34        0.923664
35        0.925000
36        0.878049
37        0.847619
38        0.992000
39        0.770992
40        0.904000
41        0.978610
42        0.645570
43        0.908397
44        0.996198
45        0.908397
46        1.000000
47        1.000000
48        1.00000

Score:  0.9515482290042325 Time:  55.54091715812683
     Youdens Index
0         1.000000
1         1.000000
2         1.000000
3         1.000000
4         1.000000
5         1.000000
6         1.000000
7         0.946565
8         1.000000
9         0.930435
10        1.000000
11        0.984733
12        0.983146
13        0.969466
14        0.964912
15        1.000000
16        0.938931
17        0.900826
18        0.908397
19        0.800000
20        0.967480
21        0.870229
22        1.000000
23        1.000000
24        0.847328
25        0.877005
26        0.969466
27        0.989848
28        0.974619
29        1.000000
30        1.000000
31        1.000000
32        0.818182
33        0.992366
34        0.877863
35        0.775000
36        0.918699
37        0.809524
38        0.976000
39        0.786260
40        0.904000
41        0.978610
42        0.696203
43        0.946565
44        1.000000
45        0.916031
46        1.000000
47        1.000000
48        1.00000

In [4]:
# We choose 5 trees, the percentage increase doesn't seem worth it compared to the time,
# and we're combining these results to train on later anyways.

# Default is 0.
print("Red Config Alpha Tests")
# Red Model ALpha Config
for a in [0, 0.0001, 0.001]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=5, ccp_alpha=a, random_state=1)

    t0 = time.time()
    rf.fit(X_red_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_red_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_red_validation), Y_validation), "\n")
    
print("Green Config Alpha Tests")
# Green Model ALpha Config
for a in [0, 0.0001, 0.001]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=5, ccp_alpha=a, random_state=1)
    t0 = time.time()
    rf.fit(X_green_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_green_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_green_validation), Y_validation), "\n")
    
print("Blue Config Alpha Tests")
# Blue Model ALpha Config
for a in [0, 0.0001, 0.001]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=5, ccp_alpha=a, random_state=1)
    t0 = time.time()
    rf.fit(X_blue_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_blue_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_blue_validation), Y_validation), "\n")    

Red Config Alpha Tests
Alpha:  0
Score:  0.9498774782802406 Time:  20.392257928848267
     Youdens Index
0         0.969466
1         0.966387
2         0.921819
3         0.961832
4         0.976744
5         0.992366
6         0.975410
7         0.908397
8         0.946565
9         0.956522
10        0.954198
11        0.969466
12        0.943764
13        0.984733
14        0.999944
15        1.000000
16        0.946509
17        0.909091
18        0.931298
19        0.833333
20        0.983740
21        0.877863
22        1.000000
23        0.961832
24        0.954198
25        0.855615
26        0.984733
27        0.984772
28        0.994924
29        1.000000
30        0.992366
31        1.000000
32        0.818182
33        1.000000
34        0.839582
35        0.866667
36        0.926829
37        0.857143
38        0.984000
39        0.916031
40        0.928000
41        0.973262
42        0.784810
43        0.984733
44        1.000000
45        0.938931
46        1.000000
47

Score:  0.0 Time:  20.11303186416626
     Youdens Index
0              0.0
1              0.0
2              0.0
3              0.0
4              0.0
5              0.0
6              0.0
7              0.0
8              0.0
9              0.0
10             0.0
11             0.0
12             0.0
13             0.0
14             0.0
15             0.0
16             0.0
17             0.0
18             0.0
19             0.0
20             0.0
21             0.0
22             0.0
23             0.0
24             0.0
25             0.0
26             0.0
27             0.0
28             0.0
29             0.0
30             0.0
31             0.0
32             0.0
33             0.0
34             0.0
35             0.0
36             0.0
37             0.0
38             0.0
39             0.0
40             0.0
41             0.0
42             0.0
43             0.0
44             0.0
45             0.0
46             0.0
47             0.0
48             0.0
49           

Score:  0.0 Time:  19.45327854156494
     Youdens Index
0              0.0
1              0.0
2              0.0
3              0.0
4              0.0
5              0.0
6              0.0
7              0.0
8              0.0
9              0.0
10             0.0
11             0.0
12             0.0
13             0.0
14             0.0
15             0.0
16             0.0
17             0.0
18             0.0
19             0.0
20             0.0
21             0.0
22             0.0
23             0.0
24             0.0
25             0.0
26             0.0
27             0.0
28             0.0
29             0.0
30             0.0
31             0.0
32             0.0
33             0.0
34             0.0
35             0.0
36             0.0
37             0.0
38             0.0
39             0.0
40             0.0
41             0.0
42             0.0
43             0.0
44             0.0
45             0.0
46             0.0
47             0.0
48             0.0
49           

In [5]:
# 0 ccp_alpha chosen (this is default, hence not specified)
# Now create vectors that can be used for the final combi classifier, 
# need for both train and validation here

# Red
rf_red =  RandomForestClassifier(n_estimators=5, random_state=1)
rf_red.fit(X_red_train, Y_train)
vector_red = rf_red.predict(X_red_train)


# Green
rf_green =  RandomForestClassifier(n_estimators=5, random_state=1)
rf_green.fit(X_green_train, Y_train)
vector_green = rf_green.predict(X_green_train)


# Blue
rf_blue = RandomForestClassifier(n_estimators=5, random_state=1)
rf_blue.fit(X_blue_train, Y_train)
vector_blue = rf_blue.predict(X_blue_train)


# Validation
vector_red_val = rf_red.predict(X_red_validation)
vector_green_val = rf_green.predict(X_green_validation)
vector_blue_val = rf_blue.predict(X_blue_validation)

from sklearn.metrics import classification_report,confusion_matrix
print("Red Model Validation")
print(classification_report(Y_validation, vector_red_val, zero_division=0))
print(get_youdens_index(vector_red_val, Y_validation), "\n")

print("Green Model Validation")
print(classification_report(Y_validation, vector_green_val, zero_division=0))
print(get_youdens_index(vector_green_val, Y_validation), "\n")

print("Blue Model Validation")
print(classification_report(Y_validation, vector_blue_val, zero_division=0))
print(get_youdens_index(vector_blue_val, Y_validation), "\n")

Red Model Validation
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       131
           1       1.00      0.97      0.98       119
           2       0.99      0.92      0.96       128
           3       1.00      0.96      0.98       131
           4       1.00      0.98      0.99       129
           5       1.00      0.99      1.00       131
           6       1.00      0.98      0.99       122
           7       1.00      0.91      0.95       131
           8       1.00      0.95      0.97       131
           9       1.00      0.96      0.98       115
          10       1.00      0.95      0.98       131
          11       1.00      0.97      0.98       131
          12       0.99      0.94      0.97       178
          13       1.00      0.98      0.99       131
          14       0.99      1.00      1.00       114
          15       1.00      1.00      1.00       131
          16       0.99      0.95      0.97       131
      

     Youdens Index
0         0.954198
1         0.966387
2         0.984375
3         0.977099
4         0.992248
5         0.992366
6         0.975410
7         0.854962
8         0.961832
9         0.982609
10        1.000000
11        0.969466
12        0.977528
13        0.931298
14        0.947368
15        1.000000
16        0.900763
17        0.909091
18        0.908397
19        0.750000
20        0.910569
21        0.854962
22        1.000000
23        0.992366
24        0.885496
25        0.834225
26        0.992366
27        0.979639
28        0.944162
29        0.977099
30        1.000000
31        1.000000
32        0.826446
33        0.946565
34        0.915974
35        0.916611
36        0.894309
37        0.847619
38        0.976000
39        0.786260
40        0.904000
41        0.957219
42        0.708805
43        0.916031
44        1.000000
45        0.923664
46        1.000000
47        1.000000
48        1.000000
49        0.992063
50        1.000000
51        1.

In [6]:
# Stack the rgb predictions to get combi model values 
X_combined_train = np.column_stack((vector_red, vector_green, vector_blue)) 
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val))

# Combi hyperparameters
print("Combi n_estimator/tree Tests")
for trees in [5, 10, 15]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=trees)
    t0 = time.time()
    rf.fit(X_combined_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_combined_val, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_combined_val), Y_validation), "\n")


Combi n_estimator/tree Tests
Trees:  5
Score:  0.9792269993316998 Time:  8.291460037231445
     Youdens Index
0         0.969466
1         0.991597
2         0.984375
3         1.000000
4         1.000000
5         1.000000
6         1.000000
7         0.961832
8         0.961832
9         1.000000
10        1.000000
11        0.992310
12        0.988652
13        1.000000
14        0.973684
15        1.000000
16        0.961776
17        0.958678
18        0.954198
19        0.916667
20        0.991870
21        0.908397
22        1.000000
23        0.961832
24        0.992366
25        0.946524
26        1.000000
27        0.979639
28        0.984772
29        1.000000
30        1.000000
31        1.000000
32        0.900826
33        1.000000
34        0.977099
35        0.941667
36        0.934959
37        0.885714
38        0.976000
39        0.923664
40        0.936000
41        0.994652
42        0.911392
43        0.992366
44        1.000000
45        0.923664
46        0.9923

In [7]:
print("Combi Alpha Tests")
# Combi Model ALpha Config
for a in [0, 0.0001, 0.001]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=15, ccp_alpha=a)
    t0 = time.time()
    rf.fit(X_combined_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_combined_val, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_combined_val), Y_validation), "\n")

Combi Alpha Tests
Alpha:  0
Score:  0.9801180663844954 Time:  22.417953729629517
     Youdens Index
0         0.992366
1         0.966387
2         1.000000
3         1.000000
4         0.984496
5         1.000000
6         1.000000
7         0.961832
8         1.000000
9         1.000000
10        0.992366
11        0.999944
12        0.988764
13        1.000000
14        0.973684
15        1.000000
16        0.961776
17        0.958678
18        0.969466
19        0.866667
20        0.991870
21        0.908397
22        1.000000
23        1.000000
24        0.984733
25        0.946524
26        0.931298
27        0.989848
28        0.918782
29        1.000000
30        1.000000
31        1.000000
32        0.818182
33        0.984733
34        0.961832
35        0.941667
36        0.951220
37        0.923810
38        1.000000
39        0.938931
40        0.936000
41        1.000000
42        0.911392
43        0.992366
44        1.000000
45        0.961832
46        0.992366
47     

In [8]:
# We have now decided our hyper parameters, 0 alpha seems best here too, with 5 n_estimators.
# it's time to train and predict on test.

# Combi Train
rf_combi =  RandomForestClassifier(n_estimators=5, random_state=1)
rf_combi.fit(X_combined_train, Y_train)

# Create the combined X_test set
vector_red_test = rf_red.predict(X_red_test)
vector_green_test = rf_green.predict(X_green_test)
vector_blue_test = rf_blue.predict(X_blue_test)

X_combined_test = np.column_stack((vector_red_test, vector_green_test, vector_blue_test))

# Predict the combi test set
combi_test_set_prediction = rf_combi.predict(X_combined_test)

# Show Youdens Index
print(get_youdens_index(combi_test_set_prediction, Y_test))


     Youdens Index
0         1.000000
1         0.000000
2         0.784314
3         1.000000
4         0.980583
5         0.980952
6         0.886598
7         0.742857
8         0.923810
9         0.923913
10        0.904762
11        0.914286
12        0.944056
13        0.914286
14        0.879121
15        1.000000
16        0.971429
17        0.874019
18        0.866667
19        0.800542
20        0.767677
21        0.942857
22        1.000000
23        0.000000
24        0.952381
25        0.840000
26       -0.005259
27        0.000000
28        0.987342
29        1.000000
30        0.076190
31        0.257143
32        0.833333
33        0.952381
34        0.923810
35        0.906250
36        0.898990
37        0.855422
38        0.950000
39        0.971148
40        0.980000
41        0.911153
42        0.809524
43        0.971429
44        0.990476
45        0.742857
46        1.000000
47        1.000000
48        0.771429
49        0.089109
50        1.000000
51        0.