# Classifying facial expressions

In [74]:
# Module imports
import numpy as np
import pandas as pd
import seaborn as sb

# Various components of an ML pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# For feature engineering
from sklearn.decomposition import NMF
from sklearn.preprocessing import StandardScaler

# And finally, a few actual models
from sklearn.svm import SVC

In [10]:
# Helper, reading files
def load_features_and_labels(subject, expression, path = "./data/"):
    """ Reads the text files of raw data for a specified subject and
    expression.
    """
    raw_features = pd.read_table(path + subject + "_" + expression + "_datapoints.txt",
                                 sep=" ",
                                 header=0)
    
    raw_labels = pd.read_table(path + subject + "_" + expression + "_targets.txt",
                               sep=" ",
                               header=None)
    
    return raw_features, raw_labels

## a)  Classify two expressions from subject A using "off the shelf" tech

NB;  Extra marks for coding my own implementation of the classifier

Initial SVM (rbf) parameters taken from https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html

In [62]:
# Load and preprocess the data
X_train_raw, y_train_raw = load_features_and_labels("a", "affirmative")

# Drop the timestamp!
X_train_raw = X_train_raw.drop('0.0', axis=1)

# convert to numpy array for use with sklearn
# Y converted to 1D array, not a vector, because sklearn is stupid
X_train = np.asarray(X_train_raw)
y_train = np.ravel(y_train_raw)

In [76]:
# Compose SVC pipeline and parameters to search
svc_pipeline = Pipeline([('SVM', SVC(kernel="linear"))])
svc_parameters = {'SVM__C':np.logspace(-2, 10, 13)}

# Set up a cross-validated (10-fold) grid search
svc_grid = GridSearchCV(pipeline,
                        param_grid = parameters,
                        cv=10,
                        scoring='f1',
                        verbose=True)

In [None]:
# Fit the model!
grid.fit(X_train, y_train)

Fitting 10 folds for each of 13 candidates, totalling 130 fits


In [None]:
# Report on performance!
grid.best_score_

In [None]:
# Report in some actually useful detail
y_pred = grid.predict(X_train)
confusion_matrix(y_train, y_pred)


## b)  Use these classifiers to classify two expressions from subject B
NB;  Extra marks for using something beyond simple accuracy (recycle ROC curve code?)

## c)  Additional Analysis of classifiers - reverse roles!
Train on B, classify A, comment on difference!
Try again using a different feature representation (eg; PCA.  Can I think of something better?)

## d)  Implement a different classifier
Training on single expression, testing (on B?) and extra marks for own implementation
Training and testing on a SECOND expression
The same by inverting roles
Repeating with a different feature representation again!  
REF;  for last, try multiclass classifier on principle there should be some shared information?  Data imbalance problem

## e)  Wrap-up, compare results of the two classifiers, make comments