In [126]:
import numpy as np
import os
import cv2
import scipy.io as sio
from math import cos, sin
import pandas as pd
import mediapipe
import warnings
import os
from sklearn.model_selection import train_test_split
import shutil
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler   
from joblib import dump, load

warnings.filterwarnings('ignore')

In [127]:
main_path= r"AFLW2000"
train_path= "train"
test_path= "test"

In [128]:
def draw_axis(img, pitch, yaw, roll, tdx=None, tdy=None, size = 100):
    height, width = img.shape[:2]
    yaw = -yaw
    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis | drawn in green
    #        v
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

In [129]:
def get_mesh(img):
    faceModule = mediapipe.solutions.face_mesh
    # loading image and its correspinding mat file
    with faceModule.FaceMesh(static_image_mode=True) as faces:
        # processing the face to extract the landmark points (468 point) for each x,y,z
        results = faces.process(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        if results.multi_face_landmarks != None: 
            return results.multi_face_landmarks

In [130]:
def get_angles_from_mat(mat_path):
    mat_file = sio.loadmat(mat_path)
    pose_para = mat_file["Pose_Para"][0][:3]
    pitch = pose_para[0]      
    yaw = pose_para[1]
    roll = pose_para[2]
    return [pitch, yaw, roll]

In [131]:
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)

data_files = os.listdir(main_path)
images = [img for img in data_files if img.endswith('.jpg')]
matrices = [matrix for matrix in data_files if matrix.endswith('.mat')]

image_train, image_test, matrix_train, matrix_test = train_test_split(images, matrices, test_size=0.2)

for img, mat in zip(image_test, matrix_test):
    img_src = os.path.join(main_path, img)
    img_dst = os.path.join(test_path, img)
    shutil.copy(img_src, img_dst)

    mat_src = os.path.join(main_path, mat)
    mat_dst = os.path.join(test_path, mat)
    shutil.copy(mat_src, mat_dst)

for img, mat in zip(image_train, matrix_train):
    img_src = os.path.join(main_path, img)
    img_dst = os.path.join(train_path, img)
    shutil.copy(img_src, img_dst)

    mat_src = os.path.join(main_path, mat)
    mat_dst = os.path.join(train_path, mat)
    shutil.copy(mat_src, mat_dst)


In [132]:
def get_mesh_of_dir(dir_path):
    x= []
    y= []
    for img in os.listdir(dir_path):
        if(img.endswith('jpg')):
            image = cv2.imread(os.path.join(dir_path,img))
            shape = image.shape
            mesh= get_mesh(image)
            if(mesh is not None):
                x0= []
                y0= []
                for landmark in mesh[0].landmark:
                    x0.append(int(landmark.x* shape[1]))
                    y0.append(int(landmark.y* shape[0]))
                
                x.append(x0)
                y.append(y0)
            else:
                os.remove(os.path.join(dir_path, img))
                os.remove(os.path.join(dir_path, img[:-3]+'mat'))
    
    x= np.array(x)
    print(x.shape)
    y= np.array(y)
    x_center = x - x[:,1].reshape(-1,1)
    y_center = y - y[:,1].reshape(-1,1)

    d = np.linalg.norm(np.array((x[:, 10],y[:, 10])) - np.array((x[:, 152],y[:, 152])),axis = 0).reshape(-1,1)
    x = x_center / d
    y = y_center / d
    
    return x, y


In [133]:
point_x_train, point_y_train= get_mesh_of_dir(train_path)

(1485, 468)


In [134]:
point_x_train.shape, point_y_train.shape

((1485, 468), (1485, 468))

In [135]:
def make_feature(x, col):
    max_length = max(len(sublist) for sublist in x)

    x_df = pd.DataFrame(columns=range(max_length))

    for i, sublist in enumerate(x):
        if len(sublist) < max_length:
            sublist += [None] * (max_length - len(sublist))
        x_df.loc[i] = sublist
    x_df.columns = [f'{col}{i+1}' for i in range(len(x_df.columns))]

    return x_df

In [136]:
point_x_df_train= make_feature(point_x_train, 'x')
point_x_df_train.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x459,x460,x461,x462,x463,x464,x465,x466,x467,x468
0,-0.008347,0.0,0.0,-0.020868,0.0,0.008347,0.025041,-0.237893,0.033388,0.037562,...,0.037562,0.05843,0.095992,0.029215,0.025041,0.12938,0.108512,0.095992,0.329711,0.350579
1,0.067483,0.0,0.085888,0.055214,0.0,0.030674,0.128832,0.128832,0.159506,0.153371,...,0.067483,0.079753,0.177911,0.055214,0.079753,0.294473,0.263799,0.233124,0.466249,0.490788
2,-0.009802,0.0,-0.009802,-0.04901,0.0,0.0,-0.019604,-0.323467,-0.029406,-0.029406,...,0.029406,0.04901,0.078416,0.029406,0.019604,0.058812,0.039208,0.029406,0.235249,0.245051
3,0.073167,0.0,0.0439,-0.058534,-0.019511,-0.029267,-0.039023,-0.239013,-0.073167,-0.092679,...,0.053656,0.068289,0.136579,0.048778,0.053656,0.082923,0.058534,0.0439,0.278036,0.292669
4,0.018006,0.0,0.013505,-0.022508,0.0,0.0,0.009003,-0.220578,0.013505,0.009003,...,0.045016,0.063022,0.108038,0.036013,0.036013,0.117041,0.094533,0.081028,0.292603,0.310609


In [137]:
point_y_df_train= make_feature(point_y_train, 'y')
point_y_df_train.head()

Unnamed: 0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,...,y459,y460,y461,y462,y463,y464,y465,y466,y467,y468
0,0.091818,0.0,0.016694,-0.141901,-0.033388,-0.091818,-0.24624,-0.350579,-0.358926,-0.409008,...,0.004174,-0.008347,-0.004174,0.008347,0.012521,-0.287975,-0.275455,-0.25876,-0.313017,-0.329711
1,0.134967,0.0,0.049079,-0.128832,-0.042944,-0.092023,-0.220855,-0.251529,-0.319012,-0.368091,...,0.018405,0.01227,0.049079,0.024539,0.036809,-0.233124,-0.220855,-0.21472,-0.251529,-0.269934
2,0.09802,0.0,0.019604,-0.137229,-0.039208,-0.09802,-0.235249,-0.254853,-0.323467,-0.372477,...,0.0,-0.009802,-0.009802,0.009802,0.009802,-0.254853,-0.245051,-0.235249,-0.284259,-0.294061
3,0.087801,0.0,0.009756,-0.141457,-0.039023,-0.097556,-0.258524,-0.248769,-0.375592,-0.429248,...,-0.014633,-0.034145,-0.048778,-0.004878,-0.004878,-0.336569,-0.317058,-0.297547,-0.434126,-0.458515
4,0.139549,0.0,0.049517,-0.126044,-0.040514,-0.094533,-0.211574,-0.193568,-0.288101,-0.337619,...,0.013505,0.0,0.031511,0.018006,0.027009,-0.207073,-0.19807,-0.19807,-0.229581,-0.238584


In [138]:
x_train = pd.concat([point_x_df_train, point_y_df_train], axis= 1)
x_train.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,y459,y460,y461,y462,y463,y464,y465,y466,y467,y468
0,-0.008347,0.0,0.0,-0.020868,0.0,0.008347,0.025041,-0.237893,0.033388,0.037562,...,0.004174,-0.008347,-0.004174,0.008347,0.012521,-0.287975,-0.275455,-0.25876,-0.313017,-0.329711
1,0.067483,0.0,0.085888,0.055214,0.0,0.030674,0.128832,0.128832,0.159506,0.153371,...,0.018405,0.01227,0.049079,0.024539,0.036809,-0.233124,-0.220855,-0.21472,-0.251529,-0.269934
2,-0.009802,0.0,-0.009802,-0.04901,0.0,0.0,-0.019604,-0.323467,-0.029406,-0.029406,...,0.0,-0.009802,-0.009802,0.009802,0.009802,-0.254853,-0.245051,-0.235249,-0.284259,-0.294061
3,0.073167,0.0,0.0439,-0.058534,-0.019511,-0.029267,-0.039023,-0.239013,-0.073167,-0.092679,...,-0.014633,-0.034145,-0.048778,-0.004878,-0.004878,-0.336569,-0.317058,-0.297547,-0.434126,-0.458515
4,0.018006,0.0,0.013505,-0.022508,0.0,0.0,0.009003,-0.220578,0.013505,0.009003,...,0.013505,0.0,0.031511,0.018006,0.027009,-0.207073,-0.19807,-0.19807,-0.229581,-0.238584


In [139]:
def get_angles_from_dir(dir_path):
    pitch= []
    yaw= []
    roll= []

    for mat in os.listdir(dir_path):
        if(mat.endswith('mat')):
            angles= get_angles_from_mat(os.path.join(dir_path,mat))
            pitch.append(angles[0])
            yaw.append(angles[1])
            roll.append(angles[2])
    return pitch, yaw, roll

In [140]:
pitch, yaw, roll= get_angles_from_dir(train_path)
y_train= {'pitch': pitch, 'yaw': yaw, 'roll': roll}

y_train= pd.DataFrame(y_train)
y_train.head()

Unnamed: 0,pitch,yaw,roll
0,-0.399231,0.018227,0.085676
1,0.470065,1.189533,0.300959
2,-0.18465,0.881137,-0.236852
3,-0.175379,0.299208,-0.373374
4,0.057119,0.110732,-0.043283


In [141]:
len(x_train), len(y_train)

(1485, 1485)

In [142]:
point_x_test, point_y_test= get_mesh_of_dir(test_path)

(368, 468)


In [143]:
point_x_test.shape, point_y_test.shape

((368, 468), (368, 468))

In [144]:
point_x_df_test= make_feature(point_x_test, 'x')
point_x_df_test.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x459,x460,x461,x462,x463,x464,x465,x466,x467,x468
0,0.035103,0.0,0.013164,-0.048267,-0.008776,-0.017552,-0.035103,-0.263274,-0.048267,-0.057043,...,0.039491,0.052655,0.096534,0.035103,0.030715,0.057043,0.035103,0.026327,0.219395,0.236947
1,0.03015,0.0,0.03015,-0.005025,-0.005025,0.005025,0.045226,-0.120601,0.055276,0.050251,...,0.050251,0.065326,0.130652,0.0402,0.045226,0.165827,0.140702,0.120601,0.336679,0.361804
2,-0.009064,0.0,0.0,-0.018128,0.004532,0.009064,0.018128,-0.240193,0.027192,0.031724,...,0.036256,0.054383,0.095171,0.027192,0.02266,0.113299,0.095171,0.081575,0.294576,0.312704
3,-0.008773,0.0,0.0,-0.01316,0.004387,0.008773,0.030706,-0.219328,0.039479,0.043866,...,0.035092,0.052639,0.096504,0.026319,0.021933,0.12721,0.105277,0.092118,0.307059,0.328992
4,-0.016241,0.0,-0.012181,-0.028421,0.00406,0.00406,0.0,-0.263914,0.00406,0.00812,...,0.028421,0.048723,0.077144,0.024361,0.016241,0.081204,0.060903,0.052783,0.247673,0.263914


In [145]:
point_y_df_test= make_feature(point_y_test, 'y')
point_y_df_test.head()

Unnamed: 0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,...,y459,y460,y461,y462,y463,y464,y465,y466,y467,y468
0,0.127249,0.0,0.039491,-0.122861,-0.039491,-0.092146,-0.219395,-0.179904,-0.29399,-0.337869,...,0.004388,-0.013164,0.004388,0.008776,0.017552,-0.245723,-0.232559,-0.223783,-0.302766,-0.311541
1,0.150752,0.0,0.055276,-0.115576,-0.0402,-0.085426,-0.201002,-0.180902,-0.276378,-0.326629,...,0.0201,0.005025,0.0402,0.0201,0.035175,-0.201002,-0.190952,-0.185927,-0.216078,-0.231153
2,0.113299,0.0,0.036256,-0.14049,-0.040787,-0.095171,-0.231129,-0.262853,-0.3263,-0.380683,...,0.013596,0.004532,0.02266,0.018128,0.027192,-0.231129,-0.226597,-0.217533,-0.235661,-0.249257
3,0.12721,0.0,0.039479,-0.12721,-0.039479,-0.087731,-0.210555,-0.250034,-0.302672,-0.355311,...,0.017546,0.004387,0.030706,0.017546,0.026319,-0.210555,-0.206168,-0.197395,-0.219328,-0.228101
4,0.113686,0.0,0.032482,-0.138047,-0.040602,-0.097445,-0.235492,-0.276094,-0.336998,-0.393841,...,0.00812,-0.00406,0.00812,0.016241,0.020301,-0.251733,-0.243613,-0.235492,-0.280155,-0.288275


In [146]:
x_test = pd.concat([point_x_df_test, point_y_df_test], axis= 1)
x_test.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,y459,y460,y461,y462,y463,y464,y465,y466,y467,y468
0,0.035103,0.0,0.013164,-0.048267,-0.008776,-0.017552,-0.035103,-0.263274,-0.048267,-0.057043,...,0.004388,-0.013164,0.004388,0.008776,0.017552,-0.245723,-0.232559,-0.223783,-0.302766,-0.311541
1,0.03015,0.0,0.03015,-0.005025,-0.005025,0.005025,0.045226,-0.120601,0.055276,0.050251,...,0.0201,0.005025,0.0402,0.0201,0.035175,-0.201002,-0.190952,-0.185927,-0.216078,-0.231153
2,-0.009064,0.0,0.0,-0.018128,0.004532,0.009064,0.018128,-0.240193,0.027192,0.031724,...,0.013596,0.004532,0.02266,0.018128,0.027192,-0.231129,-0.226597,-0.217533,-0.235661,-0.249257
3,-0.008773,0.0,0.0,-0.01316,0.004387,0.008773,0.030706,-0.219328,0.039479,0.043866,...,0.017546,0.004387,0.030706,0.017546,0.026319,-0.210555,-0.206168,-0.197395,-0.219328,-0.228101
4,-0.016241,0.0,-0.012181,-0.028421,0.00406,0.00406,0.0,-0.263914,0.00406,0.00812,...,0.00812,-0.00406,0.00812,0.016241,0.020301,-0.251733,-0.243613,-0.235492,-0.280155,-0.288275


In [147]:
pitch, yaw, roll= get_angles_from_dir(test_path)
y_test= {'pitch': pitch, 'yaw': yaw, 'roll': roll}

y_test= pd.DataFrame(y_test)
y_test.head()

Unnamed: 0,pitch,yaw,roll
0,-0.026812,0.011965,-0.220662
1,0.032106,0.442191,0.003891
2,-0.111781,0.013926,0.081501
3,-0.02673,0.036588,0.081005
4,-0.099396,-0.071514,0.046951


In [148]:
pitch_mean= np.array(pitch).mean()
yaw_mean= np.array(yaw).mean()
roll_mean= np.array(roll).mean()
base_model_pred= [[pitch_mean, yaw_mean, roll_mean]]*len(y_train)

mean_absolute_error(y_train, base_model_pred)

0.2933441351350559

In [149]:
regressor = MultiOutputRegressor(Ridge())

regressor.fit(x_train, y_train)

In [156]:
dump(regressor, 'Ridge_model.joblib')

['Ridge_model.joblib']

In [151]:
mean_absolute_error(y_test, regressor.predict(x_test))

0.10396528831742284

In [155]:
model = MultiOutputRegressor(SVR())
scaler= MinMaxScaler()
x_train= scaler.fit_transform(x_train)
x_test= scaler.transform(x_test)
model.fit(x_train, y_train)
model.score(x_test, y_test)

0.7453678660968367

In [157]:
dump(model, 'model.joblib')

['model.joblib']

In [158]:
def predict(img_path=None, img=None):
    # regressor= load('Ridge_model.joblib')
    model= load('model.joblib')
    if(img_path!=None):
        img = cv2.imread(img_path)
    shape= img.shape
    mesh= get_mesh(img)

    x= []
    y= []
    nose= []
    if(mesh is not None):
        x0= []
        y0= []

        for landmark in mesh[0].landmark:
            x0.append(landmark.x* shape[1])
            y0.append(landmark.y* shape[0])

        x.append(np.array(x0))
        y.append(np.array(y0))

        x= np.array(x)
        y= np.array(y)

        nose.append(x[:,1].reshape(-1,1))
        nose.append(y[:,1].reshape(-1,1))
        x_center = x - nose[0]
        y_center = y - nose[1]

        d = np.linalg.norm(np.array((x[:, 10],y[:, 10])) - np.array((x[:, 152],y[:, 152])),axis = 0).reshape(-1,1)
        x = x_center / d
        y = y_center / d
        if x != [] and y != []:
            x_df= make_feature(x, 'x')
            y_df= make_feature(y, 'y')
        else:
            return None, None

    x = pd.concat([x_df,y_df], axis= 1)

    return regressor.predict(x)[0], np.array(nose).reshape(2,)

In [159]:
def stream_predict():
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        frame= cv2.resize(frame, (450, 450))
        pred, nose= predict(img=frame)
        frame= draw_axis(frame, pred[0], pred[1], pred[2], nose[0], nose[1])
        
        cv2.imshow('Video', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [161]:
pred, nose= predict(img_path=r"D:\ITI\ML supervised\Head-Pose-Estimation-Using-ML-And-Mediapipe\test\image02079.jpg")
pred, nose

(array([-0.18461679,  0.15206686,  0.58924895]),
 array([212.57961541, 278.16065848]))

In [162]:
cv2.imwrite('frame11.jpg', draw_axis(cv2.imread(r"D:\ITI\ML supervised\Head-Pose-Estimation-Using-ML-And-Mediapipe\test\image02079.jpg"), pred[0], pred[1], pred[2], nose[0], nose[1]))

True

In [163]:
stream_predict()

In [5]:
def video_predict(vid_path):
    cap = cv2.VideoCapture(vid_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    out = cv2.VideoWriter("new video.mp4", fourcc, 30.0, (width, height))
    cv2.namedWindow("new Video", cv2.WINDOW_NORMAL)
    while cap.isOpened():
        print('.', end='')
        ret, frame = cap.read()
        if not ret:
            break
        pred, nose= predict(img=frame)
        frame= draw_axis(frame, pred[0], pred[1], pred[2], nose[0], nose[1])
        cv2.imshow("new Video", frame)
        cv2.waitKey(1)
        out.write(frame)
    
    cap.release()
    out.release()
    del out
    cv2.destroyAllWindows() 

In [4]:
video_predict(r"D:\ITI\Head-Pose_estimation-Using-ML-And-Mediapipe\video.mp4")