# `00` Loading Required Libraries:

In [7]:
import mediapipe as mp
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

# `01` Loading Data:

In [8]:
poses_df = pd.read_csv('./poses.csv')
poses_df

Unnamed: 0,nose_x,nose_y,forehead_x,forehead_y,left_eye_x,left_eye_y,mouth_left_x,mouth_left_y,chin_x,chin_y,right_eye_x,right_eye_y,mouth_right_x,mouth_right_y,pitch,yaw,roll
0,0.489194,0.639722,0.521101,0.295806,0.354743,0.441597,0.403753,0.680927,0.481321,0.807920,0.669544,0.473942,0.574203,0.702994,-0.399231,0.018227,0.085676
1,0.401499,0.604184,0.475316,0.344523,0.408574,0.481754,0.421570,0.668330,0.447475,0.764491,0.605816,0.506940,0.523344,0.691370,0.470065,1.189533,0.300959
2,0.445534,0.621876,0.443547,0.373616,0.424176,0.508653,0.480120,0.683903,0.528819,0.769280,0.586251,0.500861,0.582848,0.679839,-0.184650,0.881137,-0.236852
3,0.467992,0.648420,0.407912,0.370758,0.354222,0.524016,0.463581,0.699305,0.551854,0.780015,0.605186,0.467229,0.604576,0.666296,-0.175379,0.299208,-0.373374
4,0.372060,0.621196,0.395060,0.392921,0.397376,0.513964,0.433618,0.697691,0.467873,0.773994,0.525802,0.498256,0.508779,0.698543,-0.882169,1.198004,-1.033374
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.486602,0.624109,0.522991,0.301004,0.354324,0.441646,0.412282,0.681156,0.483026,0.786989,0.680817,0.491351,0.567541,0.702234,-0.197102,-0.070430,0.105118
1996,,,,,,,,,,,,,,,-0.232617,-1.418751,0.175960
1997,,,,,,,,,,,,,,,-1.447955,1.431267,-1.509418
1998,,,,,,,,,,,,,,,-0.420465,-1.191176,0.451515


In [9]:
poses_df = poses_df.dropna(axis=0)
poses_df.shape

(1792, 17)

# `02` Preprocessing:

To make the system independent of the size of the images it was trained on or the face position in them, we have to perform some preprocessing:  
1- Center the face features around the fixed point (nose), this makes the model work with faces that can take any position in the image, whether it was in the center of the image or the top right, etc..  
2- Make the face features scale independent so they don't get affected by the distance between the face and the camera (whether you was close to the camera or not), this can be done by scaling the features by the distacne between 2 points on the face.

In [10]:
def normalize(poses_df):
    normalized_df = poses_df.copy()
    
    for dim in ['x', 'y']:
        # Centerning around the nose 
        for feature in ['forehead_'+dim, 'nose_'+dim, 'mouth_left_'+dim, 'mouth_right_'+dim, 'left_eye_'+dim, 'chin_'+dim, 'right_eye_'+dim]:
            normalized_df[feature] = poses_df[feature] - poses_df['nose_'+dim]
        
        
        # Scaling
        diff = normalized_df['mouth_right_'+dim] - normalized_df['left_eye_'+dim]
        for feature in ['forehead_'+dim, 'nose_'+dim, 'mouth_left_'+dim, 'mouth_right_'+dim, 'left_eye_'+dim, 'chin_'+dim, 'right_eye_'+dim]:
            normalized_df[feature] = normalized_df[feature] / diff
    
    return normalized_df

In [11]:
poses_df = normalize(poses_df)
poses_df

Unnamed: 0,nose_x,nose_y,forehead_x,forehead_y,left_eye_x,left_eye_y,mouth_left_x,mouth_left_y,chin_x,chin_y,right_eye_x,right_eye_y,mouth_right_x,mouth_right_y,pitch,yaw,roll
0,0.0,0.0,0.145389,-1.315685,-0.612647,-0.757945,-0.389327,0.157636,-0.035876,0.643461,0.821789,-0.634208,0.387353,0.242055,-0.399231,0.018227,0.085676
1,0.0,0.0,0.643168,-1.238742,0.061645,-0.584067,0.174874,0.306017,0.400586,0.764764,1.780222,-0.463914,1.061645,0.415933,0.470065,1.189533,0.300959
2,0.0,0.0,-0.012524,-1.450241,-0.134608,-0.661405,0.217967,0.362334,0.524888,0.861074,0.886842,-0.706921,0.865392,0.338595,-0.184650,0.881137,-0.236852
3,0.0,0.0,-0.239980,-1.951528,-0.454436,-0.874364,-0.017618,0.357641,0.334972,0.924901,0.547998,-1.273491,0.545564,0.125636,-0.175379,0.299208,-0.373374
4,0.0,0.0,0.206458,-1.236728,0.227247,-0.580952,0.552568,0.414430,0.860054,0.827818,1.380055,-0.666052,1.227247,0.419048,-0.882169,1.198004,-1.033374
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1991,0.0,0.0,-0.069582,-1.516075,-0.835918,-0.675221,-0.497313,0.355848,-0.156862,0.798616,0.459186,-0.690928,0.164082,0.324779,-0.306358,-0.283822,0.038554
1992,0.0,0.0,-0.034515,-1.424327,-0.886287,-0.717177,-0.613809,0.292342,-0.229223,0.765727,0.467706,-0.711614,0.113713,0.282823,-0.367547,-0.429723,0.122791
1994,0.0,0.0,0.424978,-1.002710,-0.711851,-0.594513,-0.683567,0.258108,-0.404571,0.692207,0.930189,-0.319230,0.288149,0.405487,-0.156035,0.567114,-0.108536
1995,0.0,0.0,0.170664,-1.239908,-0.620394,-0.700198,-0.348568,0.218915,-0.016775,0.625044,0.910883,-0.509458,0.379606,0.299802,-0.197102,-0.070430,0.105118


In [12]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(poses_df, test_size=0.2, random_state=42)


X_train = train_df.drop(['pitch', 'yaw', 'roll'], axis=1)
y_train = train_df[['pitch', 'yaw', 'roll']]

X_val = val_df.drop(['pitch', 'yaw', 'roll'], axis=1)
y_val = val_df[['pitch', 'yaw', 'roll']]
print(X_train.shape, y_train.shape)

(1433, 14) (1433, 3)


# `03` Model Training:

### `a)` Support Vector Regressor:

In [14]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import RandomizedSearchCV
import scipy.stats
from sklearn.metrics import mean_squared_error

param_grid = {
    'estimator__kernel': ['linear', 'poly', 'rbf'],
    'estimator__C': scipy.stats.expon(scale=5),
    'estimator__degree': np.arange(2, 6),
    'estimator__coef0': np.arange(0, 6),
    'estimator__gamma': scipy.stats.expon(scale=.1),
    'estimator__epsilon': scipy.stats.expon(scale=.1)
}

svr = SVR()
multi_out_svr = MultiOutputRegressor(svr)
random_search = RandomizedSearchCV(multi_out_svr, param_grid, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train)
svr_model = random_search.best_estimator_
print('best parameters: ', random_search.best_params_)
print('train_rmse: ', np.sqrt(-random_search.best_score_))
print('validation_rmse: ', np.sqrt(mean_squared_error(y_val, svr_model.predict(X_val))))

best parameters:  {'estimator__C': 2.3463404498842957, 'estimator__coef0': 4, 'estimator__degree': 4, 'estimator__epsilon': 0.13167456935454494, 'estimator__gamma': 0.09129425537759533, 'estimator__kernel': 'rbf'}
train_rmse:  0.22580119825259917
validation_rmse:  0.1884249528604136


### `b)` Desicion Tree Regressor:

In [12]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    'estimator__max_depth': np.arange(2, 10),
    'estimator__min_samples_split': np.logspace(1, 7, 7, base=2).astype(int),
    'estimator__min_samples_leaf': np.logspace(0, 6, 7, base=2).astype(int),
    'estimator__max_features': np.arange(4, 15),
    'estimator__ccp_alpha': scipy.stats.expon(scale=.1),
}

reg_tree = DecisionTreeRegressor(random_state=42)
multi_out_tree = MultiOutputRegressor(reg_tree)

random_search = RandomizedSearchCV(multi_out_tree, param_grid, scoring='neg_mean_squared_error', n_iter=40)
random_search.fit(X_train, y_train)
tree_model = random_search.best_estimator_

print('best parameters: ', random_search.best_params_)
print('train_rmse: ', np.sqrt(-random_search.best_score_))
print('validation_rmse: ', np.sqrt(mean_squared_error(y_val, tree_model.predict(X_val))))

best parameters:  {'estimator__ccp_alpha': 0.0054293848034671355, 'estimator__max_depth': 8, 'estimator__max_features': 14, 'estimator__min_samples_leaf': 8, 'estimator__min_samples_split': 64}
train_rmse:  0.26910600093700265
validation_rmse:  0.23639105977796043


I will use the SVR model because it gave a better performance than the desicion tree.  
But before using it, I will perform some finetuning to the SVR model hyperparameters by doing grid search around the best hyperparameters the random search returned.

In [21]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'estimator__kernel': ['rbf'],
    'estimator__C': np.arange(0.6, 0.75, 0.01),
    'estimator__gamma': np.arange(0.09, 0.1, 0.001),
    'estimator__epsilon': np.arange(0.07, 0.08, 0.001)
}

svr = SVR()
multi_out_svr = MultiOutputRegressor(svr)
grid_search = GridSearchCV(multi_out_svr, param_grid, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
model = grid_search.best_estimator_
print('best parameters: ', grid_search.best_params_)
print('train_rmse: ', np.sqrt(-grid_search.best_score_))
print('validation_rmse: ', np.sqrt(mean_squared_error(y_val, model.predict(X_val))))

best parameters:  {'estimator__C': 0.7500000000000001, 'estimator__epsilon': 0.07900000000000001, 'estimator__gamma': 0.1, 'estimator__kernel': 'rbf'}
train_rmse:  0.22763984301415535
validation_rmse:  0.18924742614596657


In [None]:
pickle.dump(model, open('./model.pkl', 'wb'))