In [165]:
import numpy as np 
from pandas import read_csv
from skimage.feature import hog, canny
from skimage import filters
from skimage import io
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier

In [166]:
datasetPath = './SMILE Dataset/'

dataset = read_csv(datasetPath + 'annotations.csv')

array = dataset.values

Y = array[:,1]

Y = np.where(Y=='happy', 1, Y)
Y = np.where(Y=='neutral', 0, Y)

Y = Y.astype('int')

In [167]:
# Get the names of the image files

imageFiles = array[:,0]
X = []

# HOG
for imageFile in imageFiles:
    image = io.imread(datasetPath + '/' + imageFile)
    fd = hog(image, orientations=10, pixels_per_cell=(16, 16), cells_per_block=(2, 2))
    if len(X)==0:
        X = fd
    else:
        X = np.vstack((X, fd))

In [168]:
# Build models
models = []

models.append(('Decision Tree', DecisionTreeClassifier(criterion='gini', max_depth=10)))
models.append(('Random Forest', RandomForestClassifier(criterion='entropy', n_estimators=50, max_depth=None)))
models.append(('Logistic Regression', LogisticRegression(max_iter=200, C=2)))
models.append(('SVM', SVC(C=2, kernel='poly', degree=3)))
models.append(('AdaBoost', AdaBoostClassifier(n_estimators=10)))    
models.append(('Extremely Randomized Trees', ExtraTreesClassifier(criterion='entropy',n_estimators=50, max_depth=None)))
models.append(('K-Nearest Neighbors', KNeighborsClassifier(n_neighbors=6, weights='distance')))

In [169]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=22)

for name, model, in models:
    scores = cross_validate(model, X, Y, cv=kfold, scoring=('f1', 'precision', 'recall', 'roc_auc'))
    print('For the '+name+' model:')
    print('\tF1 Score: '+str(scores['test_f1'].mean()))
    print('\tPrecision: '+str(scores['test_precision'].mean()))
    print('\tRecall: '+str(scores['test_recall'].mean()))
    print('\tROC AUC: '+str(scores['test_roc_auc'].mean()))
    print()

For the Decision Tree model:
	F1 Score: 0.902606045860218
	Precision: 0.9251243304339279
	Recall: 0.8850000000000001
	ROC AUC: 0.905

For the Random Forest model:
	F1 Score: 0.9500875076998312
	Precision: 0.9465983139667349
	Recall: 0.9550000000000001
	ROC AUC: 0.984375

For the Logistic Regression model:
	F1 Score: 0.9671445499301725
	Precision: 0.9761471861471861
	Recall: 0.96
	ROC AUC: 0.9945

For the SVM model:
	F1 Score: 0.9622691813962675
	Precision: 0.9714285714285713
	Recall: 0.9550000000000001
	ROC AUC: 0.9934999999999998

For the AdaBoost model:
	F1 Score: 0.9427615003738238
	Precision: 0.9420528594212805
	Recall: 0.945
	ROC AUC: 0.9721973684210526

For the Extremely Randomized Trees model:
	F1 Score: 0.9494549224844476
	Precision: 0.9556641604010025
	Recall: 0.9450000000000001
	ROC AUC: 0.9882302631578946

For the K-Nearest Neighbors model:
	F1 Score: 0.9402718315234362
	Precision: 0.9695938375350138
	Recall: 0.915
	ROC AUC: 0.98325



In [173]:
# Stacking 
stacking = StackingClassifier(estimators=models, final_estimator=SVC())
stacking_scores = cross_validate(stacking, X, Y, cv=kfold, scoring=('f1', 'precision', 'recall', 'roc_auc'))

print('Stacking Scores:')
print('\tF1 Score: '+str(scores['test_f1'].mean()))
print('\tPrecision: '+str(scores['test_precision'].mean()))
print('\tRecall: '+str(scores['test_recall'].mean()))
print('\tROC AUC: '+str(scores['test_roc_auc'].mean()))
print()

Stacking Scores:
	F1 Score: 0.9402718315234362
	Precision: 0.9695938375350138
	Recall: 0.915
	ROC AUC: 0.98325



In [None]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
import matplotlib.pyplot as plt

# for train_index, test_index in kfold.split(X, Y):
#     print("TRAIN:", train_index, "TEST:", test_index)
#     X_train, X_test = X[train_index], X[test_index]
#     Y_train, Y_test = Y[train_index], Y[test_index]

split_kfold = kfold.split(X, Y)