In [1]:
import os
from PIL import Image
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# Split set
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import StratifiedShuffleSplit

# Path to image folders, 
data_path = fr'../Datasets/train'

def remove_white_background(pixels):
    newPixels = []
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0
        newPixels.append(pixel)
    
    return newPixels


def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]



def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehot_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehot_labels(src)

    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test

In [2]:
# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, y_train, y_validation, y_test = process_files(data_path)
# Fit/train train-datasets and store prediction vectors in variables
from sklearn import svm
from sklearn.neural_network import MLPClassifier

# Red Train
mlpc_red =  MLPClassifier(hidden_layer_sizes=(577), activation='relu', solver='adam', random_state=1)
mlpc_red.fit(X_red_train, y_train)
vector_red = mlpc_red.predict(X_red_train)

# Green
mlpc_green = MLPClassifier(hidden_layer_sizes=(577), activation='relu', solver='adam', random_state=1)
mlpc_green.fit(X_green_train, y_train)
vector_green = mlpc_green.predict(X_green_train)

# Blue
mlpc_blue = MLPClassifier(hidden_layer_sizes=(577), activation='relu', solver='adam', random_state=1)
mlpc_blue.fit(X_blue_train, y_train)
vector_blue = mlpc_blue.predict(X_blue_train)

# Validation
vector_red_val = mlpc_red.predict(X_red_validation)
vector_green_val = mlpc_green.predict(X_green_validation)
vector_blue_val = mlpc_blue.predict(X_blue_validation)


Starting...
Finished


In [3]:
import pandas as pd

def get_youdens_index(predictions, Y):
    # Calculate true positive/negative and false positive/negative
    tp = sum((Y == predictions) * (Y == 1) * 1)
    tn = sum((Y == predictions) * (Y == 0) * 1)
    fp = sum((Y != predictions) * (Y == 0) * 1)
    fn = sum((Y != predictions) * (Y == 1) * 1)
    
    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    
    result = sensitivity - (1 - specificity)
    # Put it in a dateframe for nicer visuals
    df = pd.DataFrame({'Youdens Index': result})
    pd.set_option('display.max_rows', 200)
    
    return df

In [4]:
from sklearn.metrics import classification_report,confusion_matrix

#print(confusion_matrix(mlpc_green.predict(X_green_test), y_test))
print("Red model")
print(classification_report(y_validation, mlpc_red.predict(X_red_validation)))
print("Green model")
print(classification_report(y_validation, mlpc_green.predict(X_green_validation)))
print("Blue model")
print(classification_report(y_validation, mlpc_blue.predict(X_blue_validation)))

#print(f'shape {X_red_train.shape}')
#print(f'y train shape {y_train.shape}')

Red model
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        30
           1       1.00      0.84      0.92        32
           2       0.94      1.00      0.97        33
           3       0.70      0.97      0.82        32
           4       0.94      1.00      0.97        33
           5       0.94      1.00      0.97        30
           6       1.00      0.94      0.97        33
           7       1.00      0.88      0.94        33
           8       0.88      1.00      0.94        29
           9       0.94      1.00      0.97        33
          10       0.97      0.97      0.97        33
          11       0.96      0.98      0.97        44
          12       0.70      1.00      0.82        33
          13       0.91      1.00      0.95        29
          14       1.00      0.45      0.62        33
          15       0.97      0.88      0.92        33
          16       1.00      0.90      0.95        30
          17     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.88      1.00      0.94        30
           1       1.00      1.00      1.00        32
           2       1.00      0.97      0.98        33
           3       0.48      1.00      0.65        32
           4       1.00      1.00      1.00        33
           5       0.94      0.97      0.95        30
           6       1.00      0.97      0.98        33
           7       0.97      0.97      0.97        33
           8       1.00      1.00      1.00        29
           9       1.00      0.88      0.94        33
          10       1.00      1.00      1.00        33
          11       0.98      0.98      0.98        44
          12       0.80      1.00      0.89        33
          13       1.00      1.00      1.00        29
          14       1.00      0.94      0.97        33
          15       0.97      0.94      0.95        33
          16       1.00      0.83      0.91        30
          17       1.00    

In [5]:
# new_classi = MLPClassifier(hidden_layer_sizes=(576), activation='relu', solver='adam')
# new_x = np.concatenate((vector_red, vector_green, vector_blue ))

# SKA HA SHAPE (1731, 6*3) GROUND TRUTH ER DET SAMME -- 
# SPØRGSMÅL FRA YUNES!!! ER DET EGENTLIG VORES TRAIN VÆRDIER VI SKAL STACKE HER, OG SÅ SE HVORDAN DEN PERFORMER
# VS PÅ FARVEKANALERNE INDIVIDUELT? 
# DET HER GIVER NEMLIG DEN KORREKTE SHAPE.

X_combined_train = np.column_stack((vector_red, vector_green, vector_blue))
X_combined_val = np.column_stack((vector_red_val, vector_green_val, vector_blue_val))


# For validation og test laver vi modellerne igen med validation og test dataset

print(f'Combined train dataset shape: {X_combined_train.shape} \n')

# vi skal finde ud af hvad fanden hidden layer size vil MARN
mlpc_combined = MLPClassifier(hidden_layer_sizes=(18), max_iter=500, activation='relu', solver='adam', random_state=1)

mlpc_combined.fit(X_combined_train, y_train)
X_combined_train_predictions = mlpc_combined.predict(X_combined_train)
x_combined_val_predictions = mlpc_combined.predict(X_combined_val)
#X_combined_validation_predictions = mlpc_combined.predict(X_combined_validation)

print(f' VALIDATION PREDICTIONS: \n {classification_report(x_combined_val_predictions, y_validation)} \n')

print("Finished")

# WARNINGS KOMMER FORDI VI DER ER LABELS SOM 

Combined train dataset shape: (17513, 375) 

 VALIDATION PREDICTIONS: 
               precision    recall  f1-score   support

           0       1.00      0.97      0.98        31
           1       1.00      1.00      1.00        32
           2       1.00      1.00      1.00        33
           3       0.97      1.00      0.98        31
           4       1.00      1.00      1.00        33
           5       1.00      1.00      1.00        30
           6       1.00      1.00      1.00        33
           7       0.97      1.00      0.98        32
           8       1.00      1.00      1.00        29
           9       1.00      1.00      1.00        33
          10       1.00      1.00      1.00        33
          11       0.93      1.00      0.96        41
          12       1.00      1.00      1.00        33
          13       0.97      1.00      0.98        28
          14       0.91      1.00      0.95        30
          15       0.91      1.00      0.95        30
         

  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
# print(vector_green_val.shape)
# print(X_combined_val.shape)


youden = get_youdens_index(x_combined_val_predictions, y_validation)
youden

tp 125
tn 125
fp 125
tn 125


Unnamed: 0,Youdens Index
0,0.999828
1,1.0
2,1.0
3,0.96875
4,1.0
5,1.0
6,1.0
7,0.969697
8,1.0
9,1.0
