In [1]:
import cv2
import mediapipe as mp

import numpy as np
import pandas as pd
from scipy.io import loadmat
from math import cos, sin, dist

import os

import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
# Initializing Mediapipe

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic

In [3]:
# Initializing DataFrame
col_names = list()
col_names.append('img')
for val in range(1, 469):
    col_names += ['x{}'.format(val), 'y{}'.format(val)]

col_names.append('Yaw')
col_names.append('Pitch')
col_names.append('Roll')

df = pd.DataFrame(columns = col_names)
df

Unnamed: 0,img,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,y465,x466,y466,x467,y467,x468,y468,Yaw,Pitch,Roll


In [7]:
# Capturing Landmarks and returning Data to CSV

path_of_the_directory = 'D:\\ITI\\Statistical ML 1\\project\\AFLW2000\\'
ext_mat = '.mat'
ext_img = '.jpg'
params = list()

for file in os.listdir(path_of_the_directory):
    try:
        if file.endswith(ext_img):
            name = file
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                frame = cv2.imread(os.path.join(path_of_the_directory + name))
                image_height, image_width, _ = frame.shape
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # Make Detections
                results = holistic.process(image)
                        
                landmarks = results.face_landmarks.landmark

                NoseX = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width
                NoseY = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height
                LfeoX = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EYE_OUTER].x * image_width
                LfeoY = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EYE_OUTER].y * image_height
            
                distance = dist([NoseX, NoseY], [LfeoX, LfeoY])

                row = np.array([[ ((landmark.x * image_width) - NoseX) / distance, ((landmark.y * image_height) - NoseY) / distance] for landmark in landmarks]).flatten().tolist()
                row.insert(0, name)

        if (file.endswith(ext_mat)) and (results.face_landmarks != None): 
            annots = loadmat(os.path.join(path_of_the_directory + file))

            pre_pose_params = annots['Pose_Para'][0]
            pose_params = pre_pose_params[:3] * 180 / np.pi

            row.append(pose_params[1])
            row.append(pose_params[0])
            row.append(pose_params[2])

            params = [row]

            df_temp = pd.DataFrame(params, columns = col_names)
            df = pd.concat([df, df_temp], ignore_index=True)
    except:
        pass

In [35]:
df.to_csv('Mediapipe_dataset.csv', index = False)

In [6]:
# DRAW THE LINES

def draw_axis(img, yaw, pitch, roll, tdx = None, tdy = None, size = 50):

    pitch   = pitch * np.pi / 180
    yaw     = -(yaw * np.pi / 180)
    roll    = roll * np.pi / 180

    if tdx != None and tdy != None:
        tdx = tdx
        tdy = tdy
    else:
        height, width = img.shape[:2]
        tdx = width / 2
        tdy = height / 2

    # X-Axis pointing to right. drawn in red
    x1 = size * (cos(yaw) * cos(roll)) + tdx
    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

    # Y-Axis pointing downwards drawn in green
    x2 = size * (-cos(yaw) * sin(roll)) + tdx
    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

    # Z-Axis (out of the screen) drawn in blue
    x3 = size * (sin(yaw)) + tdx
    y3 = size * (-cos(yaw) * sin(pitch)) + tdy

    cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3)
    cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2)

    return img

In [11]:
df = pd.read_csv('Mediapipe_dataset.csv')
df

Unnamed: 0,img,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,y465,x466,y466,x467,y467,x468,y468,Yaw,Pitch,Roll
0,image00002.jpg,-0.159295,0.300003,-0.169120,0.067671,-0.146352,0.109180,-0.190952,-0.301066,-0.161892,...,-0.640003,0.132219,-0.604989,0.751723,-0.761274,0.807323,-0.806983,1.044306,-22.874239,4.908885
1,image00004.jpg,-0.416676,0.593898,-0.633476,0.088126,-0.330885,0.272012,-0.436058,-0.357271,-0.643118,...,-0.647413,0.209701,-0.620980,1.071519,-0.693557,1.184284,-0.772029,68.155235,26.932741,17.243670
2,image00006.jpg,-0.734650,1.149818,-0.709969,1.018326,-0.729771,1.051351,-0.769544,0.827564,-0.708485,...,0.671398,-0.662946,0.685196,-0.392484,0.619033,-0.365358,0.596331,50.485409,-10.579652,-13.570644
3,image00008.jpg,0.414719,0.409366,0.227379,0.177478,0.333828,0.206681,0.083406,-0.168262,0.184064,...,-0.603780,0.323671,-0.559950,0.887183,-0.897459,0.926187,-0.960158,17.143373,-10.048455,-21.392780
4,image00013.jpg,0.105902,0.580888,0.002575,0.216521,0.046239,0.327829,-0.146501,-0.159062,-0.024354,...,-0.483192,0.078249,-0.459206,0.645144,-0.682949,0.695530,-0.716031,0.685565,-1.536199,-12.643007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1706,image04348.jpg,0.161475,0.166346,0.320091,-0.213986,0.200256,-0.133883,0.180437,-0.725052,0.344361,...,-1.163841,0.386957,-1.121784,1.003067,-1.307187,1.060320,-1.376188,-24.621336,-21.058870,7.035404
1707,image04356.jpg,0.228584,0.862803,0.169952,0.387416,0.239740,0.571275,0.126067,-0.063783,0.166146,...,-0.376282,0.565102,-0.357284,1.371822,-0.485520,1.451630,-0.586411,32.493248,-8.940119,-6.218641
1708,image04358.jpg,0.028990,0.815713,0.083542,0.396861,0.066526,0.489935,0.004347,-0.178378,0.100984,...,-0.624474,0.427751,-0.586279,1.306075,-0.741529,1.380261,-0.796113,-4.035367,-11.293093,6.022806
1709,image04363.jpg,0.539739,0.966315,0.813460,0.513494,0.469862,0.683514,0.443490,0.076730,0.820358,...,-0.480620,0.160130,-0.418949,0.249507,-0.726866,0.237734,-0.830284,-81.288437,-13.327947,10.081746


In [4]:
# load
with open('SVM_YAW.pkl','rb') as f:
    regr_yaw = pickle.load(f)

with open('SVM_PITCH.pkl','rb') as f:
    regr_pitch = pickle.load(f)

with open('SVM_ROLL.pkl','rb') as f:
    regr_roll = pickle.load(f)

In [7]:
cap = cv2.VideoCapture(0)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)

vid = cv2.VideoWriter('Pose Recognition.mp4', 
                         cv2.VideoWriter_fourcc(*'MPEG'),
                         10, size)

# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  
        
        # Make Detections
        results = holistic.process(image)
        
        # Recolor image back to BGR for rendering
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image_height, image_width, _ = image.shape
           
        landmarks = results.face_landmarks.landmark

        NoseX = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].x * image_width
        NoseY = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.NOSE].y * image_height
        LfeoX = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EYE_OUTER].x * image_width
        LfeoY = results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EYE_OUTER].y * image_height
    
        distance = dist([NoseX, NoseY], [LfeoX, LfeoY])

        row = np.array([[((landmark.x * image_width) - NoseX) / distance, ((landmark.y * image_height) - NoseY) / distance] for landmark in landmarks]).flatten().tolist()

        params = [row]

        df_temp = pd.DataFrame(params, columns = col_names[1:-3])
        
        yaw = regr_yaw.predict(df_temp)
        pitch = regr_pitch.predict(df_temp)
        roll = regr_roll.predict(df_temp)
        

        cv2.imshow('Pose Recognition', draw_axis(frame, yaw[0], pitch[0], roll[0], NoseX, NoseY))

        vid.write(frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

KeyboardInterrupt: 

In [8]:
cap.release()
vid.release()
cv2.destroyAllWindows()