# Imports

In [4]:
import cv2
import glob
import random
import numpy as np
from IPython.core.debugger import set_trace
import os
from PIL import Image
from scipy.misc import imread
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import cross_val_score

# Data handling block

In [5]:
IMG_SIZE = 299
DATA_PATH = "C:/Users/Cerberus/Documents/ML/Project/dataset299"

print("Data path: ",DATA_PATH)
print("Image size: ",IMG_SIZE)

emotions = ["anger", "contempt", "disgust", "fear", "happy", "sadness", "surprise"] #Emotion list

#Define function to get file list, randomly shuffle it and split 80/20
def get_files(emotion):
    files = glob.glob("%s//%s//*" %(DATA_PATH,emotion))
    random.shuffle(files)
    training = files[:int(len(files)*0.8)] #get first 80% of file list
    prediction = files[-int(len(files)*0.2):] #get last 20% of file list
    return training, prediction

#Pre-processing the images, including conversion to greyscale since some images were in color
def make_flatsets():
    training_data = []
    training_labels = []
    prediction_data = []
    prediction_labels = []
    for emotion in emotions:
        training, prediction = get_files(emotion)
        #Append data to training and prediction list, and generate labels 0-7
        for item in training:
            image = cv2.imread(item) #open image
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to grayscale
            training_data.append(gray) 
            training_labels.append(emotions.index(emotion))

        for item in prediction: #repeat above process for prediction set
            image = cv2.imread(item) #open image
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to grayscale
            prediction_data.append(gray)
            prediction_labels.append(emotions.index(emotion))

    return training_data, training_labels, prediction_data, prediction_labels


def print_cm(cm, labels, hide_zeroes=False, hide_diagonal=False, hide_threshold=None):
    """pretty print for confusion matrixes"""
    columnwidth = max([len(x) for x in labels] + [5])  # 5 is value length
    empty_cell = " " * columnwidth
    
    # Begin CHANGES
    fst_empty_cell = (columnwidth-3)//2 * " " + "t/p" + (columnwidth-3)//2 * " "
    
    if len(fst_empty_cell) < len(empty_cell):
        fst_empty_cell = " " * (len(empty_cell) - len(fst_empty_cell)) + fst_empty_cell
    # Print header
    print("    " + fst_empty_cell, end=" ")
    # End CHANGES
    
    for label in labels:
        print("%{0}s".format(columnwidth) % label, end=" ")
        
    print()
    # Print rows
    for i, label1 in enumerate(labels):
        print("    %{0}s".format(columnwidth) % label1, end=" ")
        for j in range(len(labels)):
            cell = "%{0}i".format(columnwidth) % cm[i, j]
            if hide_zeroes:
                cell = cell if float(cm[i, j]) != 0 else empty_cell
            if hide_diagonal:
                cell = cell if i != j else empty_cell
            if hide_threshold:
                cell = cell if cm[i, j] > hide_threshold else empty_cell
            print(cell, end=" ")
        print()

Data path:  C:/Users/Cerberus/Documents/ML/Project/dataset299
Image size:  299


# Data loading block (299x299)

In [3]:
training_data, training_labels, prediction_data, prediction_labels = make_flatsets()
training_flatData = []
for i in range(len(training_data)):
    instance =  training_data[i].flatten()
    training_flatData.append(instance)
    
prediction_flatData = []
for i in range(len(prediction_data)):
    instance =  prediction_data[i].flatten()
    prediction_flatData.append(instance)
    

# Grid search and training

In [9]:
# Set the parameters by cross-validation
# param_grid = [
#   {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
#   {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
#  ]
param_grid = {'C' : [0.01, 0.1, 1, 10, 100, 1000], 'kernel': ('linear', 'poly', 'rbf', 'sigmoid')}
svc = svm.SVC()
# clf = GridSearchCV(svc, param_grid)
clf = GridSearchCV(estimator=svc, param_grid=param_grid, scoring='accuracy', n_jobs=-1, verbose=5)
clf.fit(training_flatData, training_labels)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   29.5s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:  5.3min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:  5.3min finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'C': [0.01, 0.1, 1, 10, 100, 1000], 'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=5)

# Best parameters

In [16]:
print("Best parameters set found on development set:")
print()
print(clf.best_params_)

Best parameters set found on development set:

{'C': 0.01, 'kernel': 'linear'}


# Results best classifier

In [15]:
#Enter best parameter here
clf_Best = svm.SVC(kernel='linear', C=0.01)
clf_Best.fit(training_flatData, training_labels)
    
y_pred = clf_Best.predict(prediction_flatData)

print("Confusion matrix")
cm = metrics.confusion_matrix(prediction_labels, y_pred)
print_cm(cm,emotions)

print()
print("Accuracy: %0.2f" % (metrics.accuracy_score(prediction_labels, y_pred)))
print("-----------------------------------------------")

print("Classification report for - \n{}:\n{}\n".format(
    clf_Best, metrics.classification_report(prediction_labels, y_pred)))

Confusion matrix
       t/p      anger contempt  disgust     fear    happy  sadness surprise 
       anger        8        0        1        0        0        0        0 
    contempt        0        1        0        1        0        1        0 
     disgust        1        0       10        0        0        0        0 
        fear        0        0        1        3        1        0        0 
       happy        0        0        0        0       13        0        0 
     sadness        1        0        0        2        0        2        0 
    surprise        1        0        0        1        0        1       13 

Accuracy: 0.81
-----------------------------------------------
Classification report for - 
SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False):
              precision    recall

# Cross Validation (299x299)

In [12]:
scores_Best = cross_val_score(clf_Best, training_flatData, training_labels, cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores_Best.mean(), scores_Best.std() * 2)) 

Accuracy: 0.80 (+/- 0.11)


# SVM (48x48)

In [15]:
IMG_SIZE = 48
DATA_PATH = "C:/Users/Cerberus/Documents/ML/Project/dataset48"

print("Data path: ",DATA_PATH)
print("Image size: ",IMG_SIZE)

training_data, training_labels, prediction_data, prediction_labels = make_flatsets()
training_flatData = []
for i in range(len(training_data)):
    instance =  training_data[i].flatten()
    training_flatData.append(instance)
    
prediction_flatData = []
for i in range(len(prediction_data)):
    instance =  prediction_data[i].flatten()
    prediction_flatData.append(instance)
    
param_grid = {'C' : [0.01, 0.1, 1, 10, 100, 1000], 'kernel': ('linear', 'poly', 'rbf', 'sigmoid')}
svc = svm.SVC()
# clf = GridSearchCV(svc, param_grid)
clf = GridSearchCV(estimator=svc, param_grid=param_grid, scoring='accuracy', n_jobs=-1, verbose=5)
clf.fit(training_flatData, training_labels)

Data path:  C:/Users/Cerberus/Documents/ML/Project/dataset48
Image size:  48
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    3.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    3.4s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'C': [0.01, 0.1, 1, 10, 100, 1000], 'kernel': ('linear', 'poly', 'rbf', 'sigmoid')},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=5)

In [16]:
print("Best parameters set found on development set (48x48):")
print()
print(clf.best_params_)

#Enter best parameter here
clf_Best = svm.SVC(kernel='linear', C=0.01)
clf_Best.fit(training_flatData, training_labels)
    
y_pred = clf_Best.predict(prediction_flatData)

print("Confusion matrix (48x48)")
cm = metrics.confusion_matrix(prediction_labels, y_pred)
print_cm(cm,emotions)

print()
print("Accuracy (48x48) : %0.2f" % (metrics.accuracy_score(prediction_labels, y_pred)))
print("-----------------------------------------------")

print("Classification report for (48x48) - \n{}:\n{}\n".format(
    clf_Best, metrics.classification_report(prediction_labels, y_pred)))

Best parameters set found on development set (48x48):

{'C': 0.01, 'kernel': 'linear'}
Confusion matrix (48x48)
       t/p      anger contempt  disgust     fear    happy  sadness surprise 
       anger        8        0        0        0        0        1        0 
    contempt        0        2        0        0        0        1        0 
     disgust        1        0        9        1        0        0        0 
        fear        0        0        0        3        0        1        1 
       happy        0        0        1        0       12        0        0 
     sadness        3        1        0        0        0        1        0 
    surprise        0        0        0        0        0        0       16 

Accuracy (48x48) : 0.82
-----------------------------------------------
Classification report for (48x48) - 
SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, prob

# Cross Validation (48x48)

In [17]:
scores_Best = cross_val_score(clf_Best, training_flatData, training_labels, cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores_Best.mean(), scores_Best.std() * 2)) 

Accuracy: 0.73 (+/- 0.13)
