# `00` Loading Required Libraries:

In [4]:
import mediapipe as mp
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

# `01` Loading Data:

In [5]:
poses_df = pd.read_csv('./poses.csv')
poses_df

Unnamed: 0,nose_x,nose_y,forehead_x,forehead_y,left_eye_x,left_eye_y,mouth_left_x,mouth_left_y,chin_x,chin_y,right_eye_x,right_eye_y,mouth_right_x,mouth_right_y,pitch,yaw,roll
0,0.544290,0.627704,0.535101,0.343744,0.366981,0.471792,0.399425,0.656761,0.481995,0.761716,0.614610,0.483489,0.550751,0.663319,-0.332311,-0.030279,-0.000065
1,0.493257,0.628441,0.420613,0.344614,0.352432,0.519859,0.465585,0.704497,0.556968,0.780681,0.597400,0.471836,0.599195,0.673752,-0.037739,-0.000442,-0.040827
2,0.561215,0.607447,0.570616,0.334387,0.394510,0.477495,0.438743,0.675665,0.493689,0.766704,0.629722,0.505948,0.560886,0.684556,0.145460,0.957516,0.588709
3,,,,,,,,,,,,,,,-0.005980,-0.005527,-0.042742
4,0.483827,0.512279,0.461701,0.314703,0.317921,0.493625,0.414255,0.667665,0.515998,0.755006,0.649726,0.450877,0.593068,0.644932,0.656852,1.317826,0.519743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.519961,0.602572,0.503086,0.319962,0.359114,0.495091,0.427254,0.701936,0.512356,0.812246,0.640442,0.488798,0.579783,0.700339,-0.180190,1.237419,-0.151969
1996,0.494027,0.633804,0.503646,0.306564,0.341800,0.478559,0.406269,0.698055,0.485745,0.800190,0.661947,0.464295,0.569925,0.699854,-0.256642,0.447957,-0.202905
1997,,,,,,,,,,,,,,,-0.643371,1.344538,-0.855785
1998,0.592817,0.575068,0.387100,0.353269,0.352363,0.583354,0.475964,0.720126,0.627823,0.742482,0.536783,0.417765,0.607256,0.613400,-0.361741,-0.180095,-0.114000


In [7]:
poses_df = poses_df.dropna(axis=0)
poses_df

Unnamed: 0,nose_x,nose_y,forehead_x,forehead_y,left_eye_x,left_eye_y,mouth_left_x,mouth_left_y,chin_x,chin_y,right_eye_x,right_eye_y,mouth_right_x,mouth_right_y,pitch,yaw,roll
0,0.544290,0.627704,0.535101,0.343744,0.366981,0.471792,0.399425,0.656761,0.481995,0.761716,0.614610,0.483489,0.550751,0.663319,-0.332311,-0.030279,-0.000065
1,0.493257,0.628441,0.420613,0.344614,0.352432,0.519859,0.465585,0.704497,0.556968,0.780681,0.597400,0.471836,0.599195,0.673752,-0.037739,-0.000442,-0.040827
2,0.561215,0.607447,0.570616,0.334387,0.394510,0.477495,0.438743,0.675665,0.493689,0.766704,0.629722,0.505948,0.560886,0.684556,0.145460,0.957516,0.588709
4,0.483827,0.512279,0.461701,0.314703,0.317921,0.493625,0.414255,0.667665,0.515998,0.755006,0.649726,0.450877,0.593068,0.644932,0.656852,1.317826,0.519743
5,0.540706,0.607075,0.573657,0.343246,0.401439,0.467946,0.408362,0.653070,0.474518,0.775854,0.633936,0.507311,0.554528,0.678016,-0.118044,-1.167097,-0.034972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,0.481374,0.658518,0.469538,0.268690,0.318793,0.458149,0.422636,0.721352,0.504291,0.808352,0.659829,0.443138,0.583152,0.716946,-0.214680,0.162865,-0.110771
1995,0.519961,0.602572,0.503086,0.319962,0.359114,0.495091,0.427254,0.701936,0.512356,0.812246,0.640442,0.488798,0.579783,0.700339,-0.180190,1.237419,-0.151969
1996,0.494027,0.633804,0.503646,0.306564,0.341800,0.478559,0.406269,0.698055,0.485745,0.800190,0.661947,0.464295,0.569925,0.699854,-0.256642,0.447957,-0.202905
1998,0.592817,0.575068,0.387100,0.353269,0.352363,0.583354,0.475964,0.720126,0.627823,0.742482,0.536783,0.417765,0.607256,0.613400,-0.361741,-0.180095,-0.114000


In [9]:
poses_df.isna().sum()

nose_x           0
nose_y           0
forehead_x       0
forehead_y       0
left_eye_x       0
left_eye_y       0
mouth_left_x     0
mouth_left_y     0
chin_x           0
chin_y           0
right_eye_x      0
right_eye_y      0
mouth_right_x    0
mouth_right_y    0
pitch            0
yaw              0
roll             0
dtype: int64

In [8]:
poses_df.shape

(1802, 17)

# `02` Preprocessing:

To make the system independent of the size of the images it was trained on or the face position in them, we have to perform some preprocessing:  
1- Center the face features around the fixed point (nose), this makes the model work with faces that can take any position in the image, whether it was in the center of the image or the top right, etc..  
2- Make the face features scale independent so they don't get affected by the distance between the face and the camera (whether you was close to the camera or not), this can be done by scaling the features by the distacne between 2 points on the face.

In [10]:
def normalize(poses_df):
    normalized_df = poses_df.copy()
    
    for dim in ['x', 'y']:
        # Centerning around the nose 
        for feature in ['forehead_'+dim, 'nose_'+dim, 'mouth_left_'+dim, 'mouth_right_'+dim, 'left_eye_'+dim, 'chin_'+dim, 'right_eye_'+dim]:
            normalized_df[feature] = poses_df[feature] - poses_df['nose_'+dim]
        
        
        # Scaling
        diff = normalized_df['mouth_right_'+dim] - normalized_df['left_eye_'+dim]
        for feature in ['forehead_'+dim, 'nose_'+dim, 'mouth_left_'+dim, 'mouth_right_'+dim, 'left_eye_'+dim, 'chin_'+dim, 'right_eye_'+dim]:
            normalized_df[feature] = normalized_df[feature] / diff
    
    return normalized_df

In [12]:
poses_df = normalize(poses_df)
poses_df

Unnamed: 0,nose_x,nose_y,forehead_x,forehead_y,left_eye_x,left_eye_y,mouth_left_x,mouth_left_y,chin_x,chin_y,right_eye_x,right_eye_y,mouth_right_x,mouth_right_y,pitch,yaw,roll
0,0.0,0.0,-0.050007,-1.482610,-0.964844,-0.814045,-0.788297,0.151717,-0.338986,0.699705,0.382650,-0.752974,0.035156,0.185955,-0.332311,-0.030279,-0.000065
1,0.0,0.0,-0.294390,-1.844317,-0.570690,-0.705567,-0.112139,0.494217,0.258185,0.989263,0.422036,-1.017624,0.429310,0.294433,-0.037739,-0.000442,-0.040827
2,0.0,0.0,0.056504,-1.318744,-1.001976,-0.627603,-0.736118,0.329460,-0.405864,0.769131,0.411763,-0.490189,-0.001976,0.372397,0.145460,0.957516,0.588709
4,0.0,0.0,-0.080415,-1.305798,-0.602973,-0.123289,-0.252856,1.026957,0.116923,1.604198,0.602946,-0.405812,0.397027,0.876711,0.656852,1.317826,0.519743
5,0.0,0.0,0.215244,-1.255909,-0.909712,-0.662297,-0.864492,0.218949,-0.432352,0.803441,0.608998,-0.474907,0.090288,0.337703,-0.118044,-1.167097,-0.034972
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,0.0,0.0,-0.044771,-1.506306,-0.615001,-0.774230,-0.222189,0.242793,0.086687,0.578964,0.675049,-0.832234,0.384999,0.225770,-0.214680,0.162865,-0.110771
1995,0.0,0.0,-0.076471,-1.376913,-0.728904,-0.523662,-0.420115,0.484120,-0.034461,1.021563,0.545981,-0.554321,0.271096,0.476338,-0.180190,1.237419,-0.151969
1996,0.0,0.0,0.042163,-1.478750,-0.667299,-0.701531,-0.384696,0.290339,-0.036306,0.751872,0.736084,-0.765988,0.332701,0.298469,-0.256642,0.447957,-0.202905
1998,0.0,0.0,-0.807073,-7.382129,-0.943353,0.275805,-0.458438,4.827955,0.137335,5.572042,-0.219836,-5.235519,0.056647,1.275805,-0.361741,-0.180095,-0.114000


In [13]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(poses_df, test_size=0.2, random_state=42)


X_train = train_df.drop(['pitch', 'yaw', 'roll'], axis=1)
y_train = train_df[['pitch', 'yaw', 'roll']]

X_val = val_df.drop(['pitch', 'yaw', 'roll'], axis=1)
y_val = val_df[['pitch', 'yaw', 'roll']]
print('Train set: ', X_train.shape, y_train.shape)
print('Validation set: ', X_val.shape, y_val.shape)

Train set:  (1441, 14) (1441, 3)
Validation set:  (361, 14) (361, 3)


# `03` Model Training:

### `a)` Support Vector Regressor:

In [21]:
from sklearn.svm import SVR # Support Vector Regressor used for regression tasks
from sklearn.multioutput import MultiOutputRegressor # to extend single output regressor to multi-output regressor
from sklearn.model_selection import RandomizedSearchCV # Randomized search selects random combinations of hyperparameters from a grid to find the best solution for the model
import scipy.stats # for statistical functions
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Hyperparameter grid setup
param_grid = {
    'estimator__kernel': ['linear', 'poly', 'rbf'],
    'estimator__C': scipy.stats.expon(scale=5),
    'estimator__degree': np.arange(2, 6),
    'estimator__coef0': np.arange(0, 6),
    'estimator__gamma': scipy.stats.expon(scale=.1),
    'estimator__epsilon': scipy.stats.expon(scale=.1)
}

svr = SVR()
multi_out_svr = MultiOutputRegressor(svr)
random_search = RandomizedSearchCV(multi_out_svr, param_grid, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train) # Fits the randomized search object to the training data. Performing the search for the best hyperparameter combination.
svr_model = random_search.best_estimator_

# Predict on training set
train_predictions = svr_model.predict(X_train)
# Predict on validation set
val_predictions = svr_model.predict(X_val)

# Calculate metrics for training data
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
train_mae = mean_absolute_error(y_train, train_predictions)

# Calculate metrics for validation data
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
val_mae = mean_absolute_error(y_val, val_predictions)

# Print metrics
print('best parameters: ', random_search.best_params_)
print(f'Train RMSE: {train_rmse:.5f}')
print(f'Train MAE: {train_mae:.5f}')
print(f'Validation RMSE: {val_rmse:.5f}')
print(f'Validation MAE: {val_mae:.5f}')

best parameters:  {'estimator__C': 4.804891430206782, 'estimator__coef0': 1, 'estimator__degree': 3, 'estimator__epsilon': 0.000709138979446984, 'estimator__gamma': 0.002333252359832339, 'estimator__kernel': 'rbf'}
Train RMSE: 0.61080
Train MAE: 0.33323
Validation RMSE: 0.86758
Validation MAE: 0.36787


In [22]:
val_predictions

array([[-0.12228745,  0.02701132, -0.00723269],
       [-0.11603318,  0.04073206, -0.01348811],
       [-0.12689703,  0.01442463, -0.0072263 ],
       ...,
       [-0.1263186 ,  0.04541787, -0.015033  ],
       [-0.12358301,  0.04040745, -0.01856847],
       [-0.11791628,  0.04632653, -0.01598768]])

### `b)` Desicion Tree Regressor:

In [23]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    'estimator__max_depth': np.arange(2, 10),
    'estimator__min_samples_split': np.logspace(1, 7, 7, base=2).astype(int),
    'estimator__min_samples_leaf': np.logspace(0, 6, 7, base=2).astype(int),
    'estimator__max_features': np.arange(4, 15),
    'estimator__ccp_alpha': scipy.stats.expon(scale=.1),
}

reg_tree = DecisionTreeRegressor(random_state=42)
multi_out_tree = MultiOutputRegressor(reg_tree)

random_search = RandomizedSearchCV(multi_out_tree, param_grid, scoring='neg_mean_squared_error', n_iter=40)
random_search.fit(X_train, y_train)
tree_model = random_search.best_estimator_

# Predict on training set
train_predictions = tree_model.predict(X_train)
# Predict on validation set
val_predictions = tree_model.predict(X_val)

# Calculate metrics for training data
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
train_mae = mean_absolute_error(y_train, train_predictions)

# Calculate metrics for validation data
val_rmse = np.sqrt(mean_squared_error(y_val, val_predictions))
val_mae = mean_absolute_error(y_val, val_predictions)

# Print metrics
print('best parameters: ', random_search.best_params_)
print(f'Train RMSE: {train_rmse:.5f}')
print(f'Train MAE: {train_mae:.5f}')
print(f'Validation RMSE: {val_rmse:.5f}')
print(f'Validation MAE: {val_mae:.5f}')

best parameters:  {'estimator__ccp_alpha': 0.0646737577843958, 'estimator__max_depth': 3, 'estimator__max_features': 14, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2}
Train RMSE: 0.55468
Train MAE: 0.33114
Validation RMSE: 0.86746
Validation MAE: 0.36800


I will use the SVR model because it gave a better performance than the desicion tree.  
But before using it, I will perform some finetuning to the SVR model hyperparameters by doing grid search around the best hyperparameters the random search returned.

In [24]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'estimator__kernel': ['rbf'],
    'estimator__C': np.arange(0.6, 0.75, 0.01),
    'estimator__gamma': np.arange(0.09, 0.1, 0.001),
    'estimator__epsilon': np.arange(0.07, 0.08, 0.001)
}

svr = SVR()
multi_out_svr = MultiOutputRegressor(svr)
grid_search = GridSearchCV(multi_out_svr, param_grid, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
model = grid_search.best_estimator_
print('best parameters: ', grid_search.best_params_)
print('train_rmse: ', np.sqrt(-grid_search.best_score_))
print('validation_rmse: ', np.sqrt(mean_squared_error(y_val, model.predict(X_val))))

best parameters:  {'estimator__C': 0.6, 'estimator__epsilon': 0.07100000000000001, 'estimator__gamma': 0.09, 'estimator__kernel': 'rbf'}
train_rmse:  0.6139694846827053
validation_rmse:  0.8683562002585877


In [10]:
pickle.dump(model, open('./new_model.pkl', 'wb'))