In [632]:
import matplotlib.pyplot as plt
import numpy as np 
from scipy.io import loadmat
import pandas as pd
from mpl_toolkits.axes_grid1 import ImageGrid
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score, confusion_matrix
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
from keras.preprocessing.image import ImageDataGenerator
import random

In [633]:
def confusion_matrix(model, X_test, y_test, labels_list):
    ypred = model.predict(X_test)
    mat = confusion_matrix(y_test, ypred) 
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
                        xticklabels=labels_list,
                        yticklabels=labels_list)
    plt.xlabel('true label')
    plt.ylabel('predicted label');
    
def evaluate(model, X_test, y_test, labels_list):
    ypred = model.predict(X_test)
    print('Accuracy: ', accuracy_score(y_test, ypred))
    print('F1: ', f1_score(y_test, ypred))
    print('Precision: ', precision_score(y_test, ypred))
    print('Recall: ', recall_score(y_test, ypred)) 

In [634]:
#load data chunks and converts to numpy arrays
def load_data_chunks():
    raw_face = np.array(loadmat('./data/data.mat')['face'])
    raw_pose = np.array(loadmat('./data/pose.mat')['pose'])
    raw_illum = np.array(loadmat('./data/illumination.mat')['illum'])
    aug_neutral = np.load('data/aug_neutral.npy', allow_pickle=True)
    aug_smile = np.load('data/aug_smile.npy', allow_pickle=True)
    return raw_face, (raw_pose, raw_illum), (aug_neutral,aug_smile)
raw_face, raw_pose, aug_data = load_data_chunks()

In [724]:
# creates face dataset and returns a tuple of 
def make_face_dataset(raw_face, augmented=False):
    neutral= raw_face[:,:,::3]
    smile = raw_face[:,:,1::3]
    variation = raw_face[:,:,2::3]
    
    
    test = []
    test_labels = []
    
    
    if augmented == True:
        face_data = np.dstack((neutral, variation, smile))
        face_data = np.array(np.dsplit(face_data,face_data.shape[-1])).reshape((600,24,21))
        face_data = face_data/255
        face_data = face_data.reshape((600,24*21))
        face_labels = np.ravel(np.array([400 * [0] + 200 * [1]]))
        return face_data, face_labels
    else:
        face_data = np.dstack((neutral, smile))
        face_data = np.array(np.dsplit(face_data,face_data.shape[-1])).reshape((400,24,21))
        face_data = face_data/255
        face_data = face_data.reshape((400,24*21))
        face_labels = np.ravel(np.array([200 * [0] + 200 * [1]]))
        return face_data, face_labels

In [839]:
data, labels = make_face_dataset(raw_face, augmented=True)

In [815]:
# creates face dataset and returns a tuple of 
def make(raw_face):
    neutral= raw_face[:,:,::3]
    neutral = list(neutral.reshape((200,24*21)))
    smile = raw_face[:,:,1::3]
    smile = list(smile.reshape((200,24*21)))
    
    variation = raw_face[:,:,2::3]
    
    np.random.shuffle(neutral)
    np.random.shuffle(smile)
    
    X_test = []
    y_test = []
    X_train = []
    y_train = []
    
    test_idx = random.sample([i for i in range(175)], 30)

    for i in test_idx:
        X_test.append(neutral[i])
        y_test.append(0)
        neutral.pop(i)
        
        X_test.append(smile[0])
        y_test.append(1)
        smile.pop(i)
    for i in range(170):
        X_train.append(neutral.pop())
        y_train.append(0)
        
        X_train.append(smile.pop())
        y_train.append(1)
    
    return X_test, y_test, X_train, y_train         

In [816]:
X_test, y_test, X_train, y_train = make(raw_face)

In [819]:
X_test = np.array(X_test)
X_train = np.array(X_train)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [636]:
def make_face_list(raw_face):
    neutral= raw_face[:,:,::3]
    smile = raw_face[:,:,1::3]
    variation = raw_face[:,:,2::3]
    face_data = np.dstack((neutral, smile))
    face_data = np.dsplit(face_data,face_data.shape[-1])
    face_data = [img.reshape((24*21)) for img in face_data]
    face_labels = list(np.array([200 * [0] + 200 * [1]])[0])
    
    return face_data, face_labels

In [637]:
def make_composite_dataset(raw_face, aug_data, num_augmented=0):
    aug_neutral = list(aug_data[0])
    aug_smile = list(aug_data[1])
    data, labels = make_face_list(raw_face)
    num = int(num_augmented)
    
    if num_augmented==0:
        real_data = np.array(data)
        real_labels = np.ravel(np.array(labels))
        return real_data, real_labels
    else:
        if (num//2) != 0:
            num+=1
        for i in range(num//2):
            neutral_idx = random.sample([i for i in range(600)], num//2)
            for idx in neutral_idx:
                data.append(aug_neutral[idx])
                labels.append(0)
        for i in range(num//2):
            smile_idx = random.sample([i for i in range(600)], num//2)
            for idx in smile_idx:
                data.append(aug_smile[idx])
                labels.append(1)
        
        data = np.array(data)
        labels = np.array(labels)
        
        return data, labels

In [717]:
def make_all_data(raw_face, aug_data):
    aug_neutral = list(aug_data[0])
    aug_smile = list(aug_data[1])
    data, labels = make_face_list(raw_face)
    
    for i in range(100):
        data.append(aug_neutral[i])
        labels.append(0)
    for i in range(100):
        data.append(aug_smile[i])
        labels.append(1)
    ds = np.array(list(zip(data,labels)))
    data = np.array(data)
    labels = np.array(labels)
    return ds

In [823]:
#make dataset
ds = make_all_data(raw_face, aug_data)

#preprocess data
shuffled_data = np.random.shuffle(ds)
data = np.array(list(ds[:,0]))
labels = np.array(list(ds[:,1]))
data = data/255
print(data.shape)
print(labels.shape)

(600, 504)
(600,)


In [719]:
labels.shape

(600,)

In [840]:
#set dataset here
X_train, X_test, y_train, y_test = train_test_split(data, labels, 
                                                    test_size=0.3, random_state=42, shuffle='true', stratify=labels)
print(X_train.shape)
print(X_test.shape)

(420, 504)
(180, 504)


In [841]:
#Bayes pipe
pca = PCA()
gnb = GaussianNB()
pipe = Pipeline(steps=[('pca', pca), ('bayes', gnb)])
param_grid = {
    'pca__n_components': [i for i in range(1,40)]
}

In [842]:
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=1)
bayes_search = GridSearchCV(pipe, param_grid, scoring='accuracy', n_jobs=-1, cv=5)
bayes_search.fit(X_train, y_train)
print("Best parameter (CV score=%0.3f):" % bayes_search.best_score_)
print(bayes_search.best_params_)
print(bayes_search.best_score_)

Best parameter (CV score=0.914):
{'pca__n_components': 17}
0.9142857142857143


In [838]:
pca = PCA()
knn = KNeighborsClassifier()

pipe = Pipeline(steps=[('pca', pca), ('knn', knn)])

param_grid = {
    'pca__n_components': [i for i in range(1,31)],
    'knn__n_neighbors' : [i for i in range(1,11)],  
    'knn__metric' : ['euclidean','manhattan','chebyshev','minkowski','wminkowski','seuclidean','mahalanobis'],
    'knn__weights' : ['uniform','distance'],
    
}

cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=1)
knn_search = GridSearchCV(pipe, param_grid, scoring='accuracy', cv=5, n_jobs=-1)
knn_search.fit(X_train, y_train)
print("Best parameter (CV score=%0.3f):" % knn_search.best_score_)
print(knn_search.best_params_)

Best parameter (CV score=0.829):
{'knn__metric': 'chebyshev', 'knn__n_neighbors': 10, 'knn__weights': 'uniform', 'pca__n_components': 28}
