# *Classifying facial expressions*

In [1]:
# Module imports
import numpy as np
import pandas as pd
import seaborn as sb

# Various components of an ML pipeline
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

# For feature engineering
from sklearn.decomposition import NMF
from sklearn.preprocessing import StandardScaler

# And finally, a few actual models
from sklearn.svm import SVC

In [2]:
# Helper, reading files and performing some pre-processing
def load_features_and_labels(subject, expression, path = "./data/"):
    """ Reads the text files of raw data for a specified subject and
    expression.  Also removes the timestamp column and converts to
    numpy arrays for sklearn compatibility.
    """
    raw_features = pd.read_table(path + subject + "_" + expression + "_datapoints.txt",
                                 sep=" ",
                                 header=0)
    
    # Drop the timestamp!
    raw_features = raw_features.drop('0.0', axis=1)
    
    raw_labels = pd.read_table(path + subject + "_" + expression + "_targets.txt",
                               sep=" ",
                               header=None)
    
    # Trainsform to numpy arrays
    return np.asarray(raw_features), np.ravel(raw_labels)

# a & b)  Classify two expressions from subject A using "off the shelf" tech

NB;  Extra marks for coding my own implementation of the classifier

Initial SVM (rbf) parameters taken from https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html

Use these classifiers to classify two expressions from subject B.

NB;  Extra marks for using something beyond simple accuracy (recycle ROC curve code?)

### Classifying "doubt" expression

This expression was chosen because in the original paper their own classification pipeline performed really well on it (f1 of 0.88 , precision of 0.94 and recall of 0.76 classifying single frames independently, engineered features WITHOUT depth features).

In [3]:
# Load and preprocess the data
X_A_doubt, y_A_doubt = load_features_and_labels("a", "doubt_question")
X_B_doubt, y_B_doubt = load_features_and_labels("b", "doubt_question")

In [4]:
# Compose SVC pipeline and parameters to search
svc_pipeline = Pipeline([('SVM', SVC(kernel="linear"))])
svc_parameters = {'SVM__C':np.logspace(-2, 10, 13)}

# Set up a cross-validated (10-fold) grid search
svc_grid = GridSearchCV(svc_pipeline,
                        param_grid = svc_parameters,
                        cv=3,
                        scoring='f1',
                        verbose=True)

In [None]:
# Fit the model!
svc_grid.fit(X_A_doubt, y_A_doubt)

# Report on performance!
scv_grid.best_score_

Fitting 3 folds for each of 13 candidates, totalling 39 fits


In [None]:
# Report in some actually useful detail
y_train_pred = svc_grid.predict(X_A_doubt)
confusion_matrix(y_A_doubt, y_train_pred)

### Predicting the "doubt" expression for subject B

In [None]:
# Report performance against unseen test data
y_test_pred = svc_grid.predict(X_B_doubt)
confusion_matrix(y_B_doubt, y_test_pred)

### Classifying "negative" expression

The original author's model performed poorly on this one (f1 score of 0.44, precision of 0.33, recall of 0.66), so after the apparently easier problem of classifying doubt this should give us our relative performance on a hard problem.

In [None]:
# Load and preprocess the data
X_A_neg, y_A_neg = load_features_and_labels("a", "negative")
X_B_neg, y_B_neg = load_features_and_labels("b", "negative")

In [None]:
# Compose SVC pipeline and parameters to search
svc_pipeline = Pipeline([('SVM', SVC(kernel="linear"))])
svc_parameters = {'SVM__C':np.logspace(-2, 10, 13)}

# Set up a cross-validated (10-fold) grid search
svc_grid = GridSearchCV(svc_pipeline,
                        param_grid = svc_parameters,
                        cv=10,
                        scoring='f1',
                        verbose=True)

In [None]:
# Fit the model!
svc_grid.fit(X_A_neg, y_A_neg)

# Report on performance!
svc_grid.best_score_

In [None]:
# Report in some actually useful detail
y_train_pred = svc_grid.predict(X_A_neg)
confusion_matrix(y_A_neg, y_train_pred)

### Predicting the "negative" expression for subject B

In [None]:
# Report performance against unseen test data
y_test_pred = svc_grid.predict(X_B_neg)
confusion_matrix(y_B_neg, y_test_pred)

# c)  Additional Analysis of classifiers - reverse roles!
Train on B, classify A, comment on difference!
Try again using a different feature representation (eg; PCA.  Can I think of something better?)

# d)  Implement a different classifier
Training on single expression, testing (on B?) and extra marks for own implementation
Training and testing on a SECOND expression
The same by inverting roles
Repeating with a different feature representation again!  
REF;  for last, try multiclass classifier on principle there should be some shared information?  Data imbalance problem

# e)  Wrap-up, compare results of the two classifiers, make comments