In [1]:
import os
from PIL import Image
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# Split set
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Path to image folders, 
data_path = fr'../Datasets/train'
# os.listdir(data_path)

def remove_white_background(pixels):
    for pixel in pixels:
        pixel = list(pixel)
        if ((256 > pixel[0] > 200) and (256 > pixel[1] > 200) and (256 > pixel[2] > 200)):
            pixel[0] = 0
            pixel[1] = 0
            pixel[2] = 0

    return pixels


def redify(pixels):
    return [r for r, g, b in pixels]

                
def greenify(pixels):
    return [g for r, g, b in pixels]



def blueify(pixels):
    return [b for r, g, b in pixels]


def get_rgb_pixels_onehut_labels(src):
    print("Starting...")
    newPixels = []
    y = np.empty(shape=[0, 1])

    for subdir in os.listdir(src):
        current_path = os.path.join(src, subdir)
        for file in os.listdir(current_path):
            img = Image.open(os.path.join(current_path, file))
            imgResize = img.resize((24,24))
            pixels = list(imgResize.getdata())
            pixels = remove_white_background(pixels)
            newPixels.append(pixels)
            y = np.append(y, subdir)
    print(LabelBinarizer().fit_transform(y))
    return newPixels, LabelBinarizer().fit_transform(y) # OneHot encode y

def process_files(src):
    X_red_train = []
    X_red_validation = []
    X_red_test = []
    X_green_train = []
    X_green_validation = []
    X_green_test = []
    X_blue_train = []
    X_blue_validation = []
    X_blue_test = []
    all_pixels, y = get_rgb_pixels_onehut_labels(src)
        
    X_train, X_validation, y_train, y_validation = train_test_split(all_pixels, y, test_size=0.20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
    
    for pixels in X_train:       
        X_red_train.append(redify(pixels.copy()))
        X_green_train.append(greenify(pixels.copy()))
        X_blue_train.append(blueify(pixels.copy()))
        
    for pixels in X_validation:       
        X_red_validation.append(redify(pixels.copy()))
        X_green_validation.append(greenify(pixels.copy()))
        X_blue_validation.append(blueify(pixels.copy()))
        
    for pixels in X_test:       
        X_red_test.append(redify(pixels.copy()))
        X_green_test.append(greenify(pixels.copy()))
        X_blue_test.append(blueify(pixels.copy()))
    
    
    print("Finished")
    return np.asarray(X_red_train), np.asarray(X_red_validation), np.asarray(X_red_test), np.asarray(X_green_train), np.asarray(X_green_validation), np.asarray(X_green_test), np.asarray(X_blue_train), np.asarray(X_blue_validation), np.asarray(X_blue_test), y_train, y_validation, y_test

In [9]:
# Call process_files and assign variables
X_red_train, X_red_validation, X_red_test, X_green_train, X_green_validation, X_green_test, X_blue_train, X_blue_validation, X_blue_test, y_train, y_validation, y_test = process_files(data_path)
# Fit/train train-datasets and store prediction vectors in variables
from sklearn import svm
from sklearn.neural_network import MLPClassifier

# Red
mlpc_red =  MLPClassifier(hidden_layer_sizes=(576), activation='relu', solver='adam')
mlpc_red.fit(X_red_train, y_train)
vector_red = mlpc_red.predict(X_red_train)

# Green
mlpc_green = MLPClassifier(hidden_layer_sizes=(576), activation='relu', solver='adam')
mlpc_green.fit(X_green_train, y_train)
vector_green = mlpc_green.predict(X_green_train)

# Blue
mlpc_blue = MLPClassifier(hidden_layer_sizes=(576), activation='relu', solver='adam')
mlpc_blue.fit(X_blue_train, y_train)
vector_blue = mlpc_blue.predict(X_blue_train)

Starting...
[[1 0 0 0 0 0]
 [1 0 0 0 0 0]
 [1 0 0 0 0 0]
 ...
 [0 0 0 0 0 1]
 [0 0 0 0 0 1]
 [0 0 0 0 0 1]]
Finished
1731 And 1731


In [3]:
from sklearn.metrics import classification_report,confusion_matrix
#print(confusion_matrix(mlpc_green.predict(X_green_test), y_test))
print("Red model")
print(classification_report(mlpc_red.predict(X_red_validation), y_validation))
print("Green model")
print(classification_report(mlpc_green.predict(X_green_validation), y_validation))
print("Blue model")
print(classification_report(mlpc_blue.predict(X_blue_validation), y_validation))

print(f'shape {X_red_train.shape}')
print(f'y train shape {y_train.shape}')

Red model
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        79
           1       1.00      0.97      0.98        94
           2       1.00      1.00      1.00       100
           3       0.97      1.00      0.99        67
           4       1.00      1.00      1.00        93
           5       1.00      0.99      1.00       146

   micro avg       1.00      0.99      0.99       579
   macro avg       1.00      0.99      0.99       579
weighted avg       1.00      0.99      0.99       579
 samples avg       1.00      0.99      0.99       579

Green model
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        79
           1       1.00      1.00      1.00        91
           2       1.00      1.00      1.00       100
           3       1.00      1.00      1.00        69
           4       1.00      1.00      1.00        93
           5       1.00      1.00      1.00       145

 

  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
# new_classi = MLPClassifier(hidden_layer_sizes=(576), activation='relu', solver='adam')
# new_x = np.concatenate((vector_red, vector_green, vector_blue ))

# SKA HA SHAPE (1731, 1728) GROUND TRUTH ER DET SAMME -- 
# SPØRGSMÅL FRA YUNES!!! ER DET EGENTLIG VORES TRAIN VÆRDIER VI SKAL STACKE HER, OG SÅ SE HVORDAN DEN PERFORMER
# VS PÅ FARVEKANALERNE INDIVIDUELT? 
# DET HER GIVER NEMLIG DEN KORREKTE SHAPE.

X_combined_train = np.column_stack((X_red_train, X_green_train, X_blue_train))
X_combined_validation = np.column_stack((X_red_validation, X_green_validation, X_blue_validation))

print(f'Combined train dataset shape: {X_combined_train.shape} \n')

# vi skal finde ud af hvad fanden hidden layer size vil MARN
mlpc_combined = MLPClassifier(hidden_layer_sizes=(500), activation='relu', solver='adam')

mlpc_combined.fit(X_combined_train, y_train)
X_combined_train_predictions = mlpc_combined.predict(X_combined_train)
X_combined_validation_predictions = mlpc_combined.predict(X_combined_validation)

print(f' TRAIN PREDICTIONS: \n {classification_report(X_combined_train_predictions, y_train)} \n')

print(f' VALIDATION PREDICTIONS: \n {classification_report(X_combined_validation_predictions, y_validation)} \n')

print("Finished")

Combined train dataset shape: (1731, 1728) 

 TRAIN PREDICTIONS: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       218
           1       1.00      1.00      1.00       288
           2       1.00      1.00      1.00       277
           3       1.00      1.00      1.00       230
           4       1.00      1.00      1.00       276
           5       1.00      1.00      1.00       441

   micro avg       1.00      1.00      1.00      1730
   macro avg       1.00      1.00      1.00      1730
weighted avg       1.00      1.00      1.00      1730
 samples avg       1.00      1.00      1.00      1730
 

 VALIDATION PREDICTIONS: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        79
           1       1.00      1.00      1.00        91
           2       1.00      1.00      1.00       100
           3       1.00      1.00      1.00        69
           4       1.00      1.00   

  _warn_prf(average, modifier, msg_start, len(result))
