# Packages 

In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

import cv2
import math
from math import cos, sin
from PIL import Image, ImageFilter

# Load Data

In [32]:
df_pose=pd.read_csv('data/head_pose.csv')
df_pose.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,129,130,131,132,133,134,135,yaw,pitch,roll
0,121.868034,122.367607,126.819237,130.831787,137.523132,148.521729,161.528015,182.876678,213.940063,248.005371,...,310.149048,313.233856,315.107117,322.384613,330.965576,330.701965,324.90387,1.044306,-22.874239,4.908886
1,281.238159,277.339417,274.876953,269.523773,257.788269,240.968155,221.801483,205.846298,207.085449,226.185638,...,303.093811,300.724457,300.060974,303.260895,299.485168,300.598602,302.286499,68.15524,26.932743,17.24367
2,236.385101,239.198257,242.566376,243.971375,242.828186,238.567902,232.561859,229.155289,238.303162,259.122467,...,303.122009,302.877289,302.146454,300.48938,303.481873,304.681732,304.631042,50.485413,-10.579652,-13.570645
3,168.029221,177.69751,190.641602,201.395294,211.957214,222.723099,231.35022,244.332855,268.327637,291.832214,...,315.469391,312.701294,310.133301,300.04129,315.5448,319.498596,320.843994,17.143373,-10.048455,-21.392782
4,280.46225,287.249817,293.892456,297.539368,295.677002,287.270355,271.661591,254.701157,250.219299,261.212463,...,306.811768,307.014893,305.351318,309.934814,311.558899,312.920746,312.05835,68.64055,-50.544582,-59.207973


# Split the data

In [33]:
x=df_pose.drop(columns=['yaw','pitch','roll'],axis=1)
y=df_pose[['yaw','pitch','roll']]

In [34]:
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.2)

# Data preprocessing

In [35]:
x_coordinate_idx=[str(i) for i in range(x_test.shape[1]) if i%2==0]
y_coordinate_idx=[str(i) for i in range(x_test.shape[1]) if i%2!=0]

In [38]:
class SubImputer(SimpleImputer):
    def __init__(self,x_num,x_corr,y_num,y_corr):
        self.x_num= x_num
        self.y_num= y_num
        self.x_corr=x_corr
        self.y_corr=y_corr
        return 

    def fit(self, X):
        
        return self

    def transform(self, X):
        #seperate x&y points
        x_data=X[self.x_corr]
        y_data=X[self.y_corr]
        #subtract the centre point
        x_data=x_data.sub(x_data[f'{self.x_num}'],axis=0)
        y_data=y_data.sub(y_data[f'{self.y_num}'],axis=0)
        #join the two data frames
        full_data=pd.concat([x_data, y_data], axis=1)
        #resort the coloumns
        data_idx=[str(i) for i in range(X.shape[1])]
        
        return full_data[data_idx]

In [51]:
class NormaliseImputer(SimpleImputer):
    def __init__(self,x_corr,y_corr):
        self.x_corr=x_corr
        self.y_corr=y_corr
        return 

    def fit(self, X):
        
        return self

    def transform(self, X):
        #seperate x&y points
        x_data=X[self.x_corr]
        y_data=X[self.y_corr]
        #normalise the points => p-min/max-min
        x_data=x_data.sub(x_data.min(axis=1),axis=0)
        x_data=x_data.div( (x_data.max(axis=1)-x_data.min(axis=1)) ,axis=0)
        
        y_data=y_data.sub(y_data.min(axis=1),axis=0)
        y_data=y_data.div( (y_data.max(axis=1)-y_data.min(axis=1)) ,axis=0)
        #join the two data frames
        full_data=pd.concat([x_data, y_data], axis=1)
        #resort the coloumns
        data_idx=[str(i) for i in range(X.shape[1])]
        
        return full_data[data_idx]

In [55]:
preprocessing_pip =Pipeline([
                              ("noramalise the face points",NormaliseImputer(x_coordinate_idx,y_coordinate_idx)),
                              ("centre the coordinate arround the nose",SubImputer(30,x_coordinate_idx,31,y_coordinate_idx)),
                            ])