In [19]:
import matplotlib.pyplot as plt
import numpy as np 
from scipy.io import loadmat
import pandas as pd
from mpl_toolkits.axes_grid1 import ImageGrid
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,roc_auc_score, confusion_matrix
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
from keras.preprocessing.image import ImageDataGenerator
import random

In [24]:
def confusion_matrix(model, X_test, y_test, labels_list):
    ypred = model.predict(X_test)
    mat = confusion_matrix(y_test, ypred) 
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
                        xticklabels=labels_list,
                        yticklabels=labels_list)
    plt.xlabel('true label')
    plt.ylabel('predicted label');
    
def evaluate(model, X_test, y_test):
    ypred = model.predict(X_test)
    print('Accuracy: ', accuracy_score(y_test, ypred))
    print('F1: ', f1_score(y_test, ypred))
    print('Precision: ', precision_score(y_test, ypred))
    print('Recall: ', recall_score(y_test, ypred))
    print('Recall: ', roc_auc_score(y_test, ypred))

In [5]:
#load data chunks and converts to numpy arrays
def load_data_chunks():
    raw_face = np.array(loadmat('./data/data.mat')['face'])
    raw_pose = np.array(loadmat('./data/pose.mat')['pose'])
    raw_illum = np.array(loadmat('./data/illumination.mat')['illum'])
    aug_neutral = np.load('data/aug_neutral.npy', allow_pickle=True)
    aug_smile = np.load('data/aug_smile.npy', allow_pickle=True)
    return raw_face, (raw_pose, raw_illum), (aug_neutral,aug_smile)
raw_face, raw_pose, aug_data = load_data_chunks()

In [15]:
# creates face dataset and returns a tuple of 
def make_face_dataset(raw_face, augmented=False):
    neutral= raw_face[:,:,::3]
    smile = raw_face[:,:,1::3]
    variation = raw_face[:,:,2::3]
    
    if augmented == True:
        face_data = np.dstack((neutral, variation, smile))
        face_data = np.array(np.dsplit(face_data,face_data.shape[-1])).reshape((600,24,21))
        face_data = face_data/255
        face_data = face_data.reshape((600,24*21))
        face_labels = np.ravel(np.array([400 * [0] + 200 * [1]]))
        return face_data, face_labels
    else:
        face_data = np.dstack((neutral, smile))
        face_data = np.array(np.dsplit(face_data,face_data.shape[-1])).reshape((400,24,21))
        face_data = face_data/255
        face_data = face_data.reshape((400,24*21))
        face_labels = np.ravel(np.array([200 * [0] + 200 * [1]]))
        return face_data, face_labels

In [35]:
data, labels = make_face_dataset(raw_face, augmented=True)

In [36]:
#set dataset here
X_train, X_test, y_train, y_test = train_test_split(data, labels, 
                                                    test_size=0.15, random_state=42, shuffle='true', stratify=labels)
print(X_train.shape)
print(X_test.shape)

(510, 504)
(90, 504)


In [37]:
#Bayes pipe
pca = PCA()
gnb = GaussianNB()
pipe = Pipeline(steps=[('pca', pca), ('bayes', gnb)])
param_grid = {
    'pca__n_components': [i for i in range(1,40)]
}
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=1)
bayes_search = GridSearchCV(pipe, param_grid, scoring='accuracy', n_jobs=-1, cv=5)
bayes_search.fit(X_train, y_train)
print("Best parameter (CV score=%0.3f):" % bayes_search.best_score_)
print(bayes_search.best_params_)
print(bayes_search.best_score_)

Best parameter (CV score=0.912):
{'pca__n_components': 16}
0.9117647058823529


In [38]:
ypred = bayes_search.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, ypred))
print('F1: ', f1_score(y_test, ypred))
print('Precision: ', precision_score(y_test, ypred))
print('Recall: ', recall_score(y_test, ypred))
print('AUC: ', roc_auc_score(y_test, ypred))

Accuracy:  0.9333333333333333
F1:  0.8928571428571429
Precision:  0.9615384615384616
Recall:  0.8333333333333334
AUC:  0.9083333333333333


In [13]:
pca = PCA()
knn = KNeighborsClassifier()

pipe = Pipeline(steps=[('pca', pca), ('knn', knn)])

param_grid = {
    'pca__n_components': [i for i in range(1,31)],
    'knn__n_neighbors' : [i for i in range(1,11)],  
    'knn__metric' : ['euclidean','manhattan','chebyshev','minkowski','wminkowski','seuclidean','mahalanobis'],
    'knn__weights' : ['uniform','distance'],
    
}

cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=1)
knn_search = GridSearchCV(pipe, param_grid, scoring='accuracy', cv=5, n_jobs=-1)
knn_search.fit(X_train, y_train)
print("Best parameter (CV score=%0.3f):" % knn_search.best_score_)
print(knn_search.best_params_)

Best parameter (CV score=0.910):
{'knn__metric': 'euclidean', 'knn__n_neighbors': 10, 'knn__weights': 'distance', 'pca__n_components': 19}
