In [1]:
import os
from PIL import Image
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
# Split set
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.ensemble import RandomForestClassifier


# Path to image folders, 
data_path = fr'../Datasets/config'
# os.listdir(data_path)

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels



def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]



def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehut_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehut_labels(src)
    
    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test

def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [2]:
import time

# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, Y_train, Y_validation, Y_test = process_files(data_path)
# Fit/train train-datasets and store prediction vectors in variables
from sklearn import svm

# Red Number of Trees Test
print("Red Config Trees Tests")
for trees in [(432), (216), (216, 108), (108), (108, 54)]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=100)
    t0 = time.time()
    rf.fit(X_red_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_red_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_red_validation), Y_validation), "\n")

# Green Number of Trees Test
print("Green Config Trees Tests")
for trees in [(432), (216), (216, 108), (108), (108, 54)]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=100)
    t0 = time.time()
    rf.fit(X_green_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_green_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_green_validation), Y_validation), "\n")
    
# Blue Config HL Test
print("Blue Config Trees Tests")
for trees in [(432), (216), (216, 108), (108), (108, 54)]:
    print("Trees: ", trees)
    rf = RandomForestClassifier(n_estimators=100)
    t0 = time.time()
    rf.fit(X_blue_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_blue_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_blue_validation), Y_validation), "\n")

Starting...
Finished
Red Config Trees Tests
Trees:  432
Score:  1.0 Time:  1.042423963546753
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Trees:  216
Score:  1.0 Time:  1.0253500938415527
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Trees:  (216, 108)
Score:  1.0 Time:  1.0232570171356201
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Trees:  108
Score:  1.0 Time:  1.0292861461639404
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Trees:  (108, 54)
Score:  1.0 Time:  1.0281519889831543
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Green Config Trees Tests
Trees:  432
Score:  1.0 Time:  0.83689284324646
   Youdens Index

In [3]:
# default alpha=0.0001

print("Red Config Alpha Tests")
# Red Model ALpha Config
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=100, ccp_alpha=a) # IKKE SIKKER PÅ CCP ALPHA HER

    t0 = time.time()
    rf.fit(X_red_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_red_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_red_validation), Y_validation), "\n")
    
print("Green Config Alpha Tests")
# Green Model ALpha Config
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=100, ccp_alpha=a)
    t0 = time.time()
    rf.fit(X_green_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_green_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_green_validation), Y_validation), "\n")
    
print("Blue Config Alpha Tests")
# Blue Model ALpha Config
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=100, ccp_alpha=a)
    t0 = time.time()
    rf.fit(X_blue_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_blue_validation, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_blue_validation), Y_validation), "\n")    

Red Config Alpha Tests
Alpha:  0.0001
Score:  1.0 Time:  1.0981757640838623
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Alpha:  0.001
Score:  1.0 Time:  1.0716583728790283
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Alpha:  0.01
Score:  0.9759450171821306 Time:  1.0479309558868408
   Youdens Index
0       0.880000
1       0.989583
2       1.000000
3       1.000000
4       1.000000
5       0.972789 

Alpha:  0.1
Score:  0.0 Time:  1.0701313018798828
   Youdens Index
0            0.0
1            0.0
2            0.0
3            0.0
4            0.0
5            0.0 

Alpha:  1
Score:  0.0 Time:  1.0362226963043213
   Youdens Index
0            0.0
1            0.0
2            0.0
3            0.0
4            0.0
5            0.0 

Green Config Alpha Tests
Alpha:  0.0001
Score:  1.0 Time:  0.8478801250457764
   Youdens Index
0       

In [4]:
# Red
rf_red =  RandomForestClassifier(random_state=1)
rf_red.fit(X_red_train, Y_train)
vector_red = rf_red.predict(X_red_train)
vector_red_val = rf_red.predict(X_red_validation)

#Green
rf_green =  RandomForestClassifier(random_state=1)
rf_green.fit(X_blue_train, Y_train)
vector_green = rf_green.predict(X_blue_train)
vector_green_val = rf_green.predict(X_blue_validation)

#Blue
rf_blue =  RandomForestClassifier(random_state=1)
rf_blue.fit(X_blue_train, Y_train)
vector_blue = rf_red.predict(X_blue_train)
vector_blue_val = rf_red.predict(X_blue_validation)

from sklearn.metrics import classification_report,confusion_matrix
print("Red Model Validation")
print(classification_report(Y_validation, rf_red.predict(X_red_validation)))

print("Green Model Validation")
print(classification_report(Y_validation, rf_green.predict(X_green_validation)))

print("Blue Model Validation")
print(classification_report(Y_validation, rf_blue.predict(X_blue_validation)))

Red Model Validation
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        75
           1       1.00      1.00      1.00        96
           2       1.00      1.00      1.00        99
           3       1.00      1.00      1.00        73
           4       1.00      1.00      1.00        92
           5       1.00      1.00      1.00       147

   micro avg       1.00      1.00      1.00       582
   macro avg       1.00      1.00      1.00       582
weighted avg       1.00      1.00      1.00       582
 samples avg       1.00      1.00      1.00       582

Green Model Validation
              precision    recall  f1-score   support

           0       1.00      0.89      0.94        75
           1       1.00      0.98      0.99        96
           2       1.00      1.00      1.00        99
           3       0.00      0.00      0.00        73
           4       0.00      0.00      0.00        92
           5       0.47      1.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
# Stack the rgb predictions to get combi model values 
X_combined_train = np.column_stack((vector_red, vector_green, vector_blue)) # shape (1745, 18) - 18 features
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val)) # shape (582, 18) - 18 features

# Combi Config HL Test
print("Combi Config HL Tests")
for hl in [(18), (14), (14, 7)]:
    print("Hidden layers: ", hl)
    rf = RandomForestClassifier(n_estimators=100)
    t0 = time.time()
    rf.fit(X_combined_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_combined_val, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_combined_val), Y_validation), "\n")


print("Combi Config Alpha Tests")
# Combi Model ALpha Config
for a in [0.0001, 0.001, 0.01, 0.1, 1]:
    print("Alpha: ", a)
    rf = RandomForestClassifier(n_estimators=100, ccp_alpha=a)
    t0 = time.time()
    rf.fit(X_combined_train, Y_train)
    t1 = time.time()
    print("Score: ", rf.score(X_combined_val, Y_validation), "Time: ", t1 - t0)
    print(get_youdens_index(rf.predict(X_combined_val), Y_validation), "\n")

Combi Config HL Tests
Hidden layers:  18
Score:  1.0 Time:  0.23839306831359863
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Hidden layers:  14
Score:  1.0 Time:  0.2149970531463623
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Hidden layers:  (14, 7)
Score:  1.0 Time:  0.21904540061950684
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Combi Config Alpha Tests
Alpha:  0.0001
Score:  1.0 Time:  0.2304837703704834
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Alpha:  0.001
Score:  1.0 Time:  0.23412513732910156
   Youdens Index
0            1.0
1            1.0
2            1.0
3            1.0
4            1.0
5            1.0 

Alpha:  0.01
Score:  1.0 Time:  0.2259960174560547
   Youdens In

In [6]:
# Jeg prøvede nedenstående, men der findes ikke en relu activation function i random forest
# # Combi Train
# rf_combi =  RandomForestClassifier(n_estimators=100, activation='relu', solver='adam', random_state=1, max_iter=300)
# rf_combi.fit(X_combined_train, Y_train)

# # Validation
# print("Combined Model Validation")
# rf_combi = rf_combi.predict(X_combined_val)
# print(classification_report(Y_validation, combi_val, zero_division=0))
# print(get_youdens_index(combi_val, Y_validation), "\n")

In [9]:
#Er i tvivl om vi fortsat skal bruge nedenstående? - Jeppe

X_combined_train = np.column_stack((vector_red, vector_green, vector_blue))
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val))

rf_comb = RandomForestClassifier(random_state=1)
rf_comb.fit(X_combined_train, Y_train)

comb_validation = rf_comb.predict(X_combined_val)

print(f' VALIDATION PREDICTIONS: \n {classification_report(Y_validation, comb_validation)} \n')

 VALIDATION PREDICTIONS: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        75
           1       1.00      1.00      1.00        96
           2       1.00      1.00      1.00        99
           3       1.00      1.00      1.00        73
           4       1.00      1.00      1.00        92
           5       1.00      1.00      1.00       147

   micro avg       1.00      1.00      1.00       582
   macro avg       1.00      1.00      1.00       582
weighted avg       1.00      1.00      1.00       582
 samples avg       1.00      1.00      1.00       582
 

