In [3]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd

In [4]:
mp_drawing=mp.solutions.drawing_utils
mp_holistic=mp.solutions.holistic

Initial feed and marking landmarks

In [85]:
cap=cv2.VideoCapture(1)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret,frame=cap.read()
        image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        image.flags.writeable=False


        results=holistic.process(image)


        image.flags.writeable=True
        image=cv2.cvtColor(image,cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_TESSELATION,mp_drawing.DrawingSpec(color=(0,0,0),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))

        mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))


        cv2.imshow("feed",image)
        
        if cv2.waitKey(100) & 0xFF==ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()

In [26]:
results.face_landmarks.landmark[0].visibility

0.0

CSV

In [8]:
import os
import csv

In [12]:
coord=len(results.pose_landmarks.landmark)+len(results.face_landmarks.landmark)+21+21
coord

543

In [13]:
landmarks=["class","subclass","condition"]
for i in range(1,coord+1):
    landmarks+=["x{}".format(i),"y{}".format(i),"z{}".format(i),"v{}".format(i)]


In [14]:
with open("coordinates.csv",mode="w",newline='') as f:
    csv_writer=csv.writer(f,delimiter=",",quotechar='"',quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

extracting poses and their coords

In [44]:
classname="hands"
subclass="expressive"
condition="good"

In [45]:
cap=cv2.VideoCapture(1)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret,frame=cap.read()
        image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        image.flags.writeable=False


        results=holistic.process(image)


        image.flags.writeable=True
        image=cv2.cvtColor(image,cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_TESSELATION,mp_drawing.DrawingSpec(color=(0,0,0),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))

        mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))

        left_hand_poses,right_hand_poses=None,None

        try:
            poses=results.pose_landmarks.landmark
            pose_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in poses]).flatten())

            face_poses=results.face_landmarks.landmark
            face_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in face_poses]).flatten())

            #deal with cases where hand isnt detected
            left_hand_poses=results.left_hand_landmarks
            if left_hand_poses:
                left_hand_poses=results.left_hand_landmarks.landmark
                left_hand_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in left_hand_poses]).flatten())
            else:
                left_hand_row=[0]*(21*4)

            right_hand_poses=results.right_hand_landmarks
            if right_hand_poses:
                right_hand_poses=results.right_hand_landmarks.landmark
                right_hand_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in right_hand_poses]).flatten())
            else:
                right_hand_row=[0]*(21*4)

            row=pose_row+left_hand_row+right_hand_row+face_row
            row.insert(0,classname)
            row.insert(1,subclass)
            row.insert(2,condition)

            with open("coordinates.csv",mode="a",newline='') as f:
                csv_writer=csv.writer(f,delimiter=",",quotechar='"',quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
        except:
            pass


        cv2.imshow("feed",image)
        
        if cv2.waitKey(100) & 0xFF==ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()

MODELS

In [40]:
from sklearn.model_selection import train_test_split

In [46]:
df=pd.read_csv("coordinates.csv")

In [62]:
df["subclass"].value_counts()

subclass
expressive    209
back          135
smile         114
neutral       113
pocket        100
clasped        63
slacked        62
too wide       44
straight       29
Name: count, dtype: int64

In [48]:
x=df.drop(["class","subclass","condition"],axis=1)
y=df[["class","subclass","condition"]]
x.values

array([[ 2.98854738e-01,  6.50725484e-01, -2.44543135e-01, ...,
         6.33080304e-01,  4.63177031e-03,  0.00000000e+00],
       [ 3.50300223e-01,  6.75458908e-01, -2.39987418e-01, ...,
         6.67716444e-01,  2.75367498e-03,  0.00000000e+00],
       [ 3.69128615e-01,  6.70882881e-01, -3.93913925e-01, ...,
         6.46089673e-01,  9.72074165e-04,  0.00000000e+00],
       ...,
       [ 4.49331224e-01,  4.83420134e-01, -2.44433448e-01, ...,
         4.57545936e-01, -4.94494161e-04,  0.00000000e+00],
       [ 4.24417704e-01,  4.87080961e-01, -3.82739902e-01, ...,
         4.58919585e-01, -3.42699210e-03,  0.00000000e+00],
       [ 4.09647286e-01,  4.99167830e-01, -4.49689060e-01, ...,
         4.71757472e-01, -4.73755831e-03,  0.00000000e+00]])

In [49]:
x_train,x_test,y_train,y_test=train_test_split(x.values,y,test_size=0.2,random_state=2)
x_test

array([[ 5.12893081e-01,  5.65308750e-01, -7.23792017e-02, ...,
         5.57660520e-01,  1.02971320e-03,  0.00000000e+00],
       [ 5.39364278e-01,  5.45223832e-01, -1.43911943e-01, ...,
         5.36703348e-01, -2.18318333e-03,  0.00000000e+00],
       [ 4.67237443e-01,  5.13473809e-01, -1.72205627e-01, ...,
         5.09416759e-01,  3.91779555e-04,  0.00000000e+00],
       ...,
       [ 5.78610003e-01,  5.60924649e-01, -1.16220713e-01, ...,
         5.47194481e-01, -4.56957053e-03,  0.00000000e+00],
       [ 6.14010453e-01,  5.47290862e-01, -1.41453102e-01, ...,
         5.30007541e-01,  6.28693087e-04,  0.00000000e+00],
       [ 5.97146571e-01,  5.86163878e-01, -1.63319111e-01, ...,
         5.79138875e-01,  3.26746143e-03,  0.00000000e+00]])

In [50]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression,RidgeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.multioutput import MultiOutputClassifier

In [53]:
pipelines={
    'lr':make_pipeline(StandardScaler(),LogisticRegression(max_iter=1000)),
    'rc':make_pipeline(StandardScaler(),RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(),RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(),GradientBoostingClassifier())
}

In [54]:
fit_models={}
for algo,pipeline in pipelines.items():
    multi_output_pipeline=MultiOutputClassifier(pipeline)
    model=multi_output_pipeline.fit(x_train,y_train)
    fit_models[algo]=model

In [55]:
fit_models['rf'].predict(x_test)

array([['hands', 'expressive', 'good'],
       ['hands', 'expressive', 'good'],
       ['hands', 'pocket', 'bad'],
       ['face', 'smile', 'good'],
       ['body', 'slacked', 'bad'],
       ['hands', 'clasped', 'good'],
       ['face', 'pocket', 'good'],
       ['hands', 'pocket', 'bad'],
       ['hands', 'expressive', 'good'],
       ['face', 'smile', 'good'],
       ['hands', 'back', 'bad'],
       ['hands', 'clasped', 'good'],
       ['hands', 'back', 'bad'],
       ['face', 'expressive', 'good'],
       ['face', 'smile', 'good'],
       ['hands', 'too wide', 'bad'],
       ['hands', 'pocket', 'bad'],
       ['face', 'neutral', 'bad'],
       ['body', 'slacked', 'bad'],
       ['body', 'slacked', 'bad'],
       ['face', 'neutral', 'bad'],
       ['hands', 'back', 'bad'],
       ['hands', 'expressive', 'good'],
       ['face', 'smile', 'good'],
       ['face', 'smile', 'good'],
       ['body', 'slacked', 'bad'],
       ['hands', 'pocket', 'bad'],
       ['hands', 'too wide', 'bad'],

Evaluation

In [56]:
from sklearn.metrics import precision_score
import pickle as p

In [57]:
for algo, model in fit_models.items():
    y_pred = model.predict(x_test)
    precisions=[]
    for i in range(y_test.shape[1]):
        #y_test is dataframe so we use iloc while y_pred is numpy array
        precision = precision_score(y_test.iloc[:,i], y_pred[:,i], average='weighted')
        precisions.append(precision)
    print(f"Precision score for {algo}: {precisions}")

Precision score for lr: [0.9887226198221645, 0.9785359951603145, 0.9541317179248214]
Precision score for rc: [0.9943076081007116, 0.9889005384695041, 0.9827980959015442]
Precision score for rf: [0.9542224343312748, 0.9670829649277926, 0.994327511568891]
Precision score for gb: [0.9886280264123257, 0.9889005384695041, 0.988735632183908]


In [58]:
with open("models.pkl","wb") as f:
    p.dump(fit_models["rf"],f)

MAKING PREDICTIONS

In [59]:
with open("models.pkl","rb") as f:
    model=p.load(f)

In [75]:
count=0
tot_confi=0
def cal(prediction):
    global count,tot_confi
    count+=1
    if prediction[0]=="face":
        if prediction[2]=="good":
            confi=2
        else:
            confi=-2
    else:
        if prediction[2]=="good":
            confi=1
        else:
            confi=-1
    tot_confi+=confi
    if tot_confi<0:
        tot_confi=0
    pcent=(tot_confi/count)*100
    return pcent

In [84]:
cap=cv2.VideoCapture(1)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret,frame=cap.read()
        image=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        image.flags.writeable=False


        results=holistic.process(image)


        image.flags.writeable=True
        image=cv2.cvtColor(image,cv2.COLOR_RGB2BGR)

        mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_TESSELATION,mp_drawing.DrawingSpec(color=(0,0,0),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))

        mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))
        
        mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1))

        left_hand_poses,right_hand_poses=None,None

        try:
            poses=results.pose_landmarks.landmark
            pose_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in poses]).flatten())

            face_poses=results.face_landmarks.landmark
            face_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in face_poses]).flatten())

            #deal with cases where hand isnt detected
            left_hand_poses=results.left_hand_landmarks
            if left_hand_poses:
                left_hand_poses=results.left_hand_landmarks.landmark
                left_hand_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in left_hand_poses]).flatten())
            else:
                left_hand_row=[0]*(21*4)

            right_hand_poses=results.right_hand_landmarks
            if right_hand_poses:
                right_hand_poses=results.right_hand_landmarks.landmark
                right_hand_row=list(np.array([[landmark.x,landmark.y,landmark.z,landmark.visibility] for landmark in right_hand_poses]).flatten())
            else:
                right_hand_row=[0]*(21*4)

            row=pose_row+left_hand_row+right_hand_row+face_row
            
            X=pd.DataFrame([row])
            prediction=model.predict(X)[0]

            p=cal(prediction)
            print(p)
            cv2.rectangle(image,(0,0),(350,60),(245,117,16),-1)
            cv2.putText(image,"Part",(15,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,prediction[0],(10,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,"Subclass",(65,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,prediction[1],(60,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,"Condition",(140,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,prediction[2],(135,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,"Score",(230,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            cv2.putText(image,"{:.2f}%".format(p),(225,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),1,cv2.LINE_AA)
            
        except:
            pass


        cv2.imshow("feed",image)
        
        if cv2.waitKey(100) & 0xFF==ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()

41.11111111111111
41.46341463414634
41.8141592920354
42.16335540838852
42.51101321585903
42.857142857142854
43.20175438596491
43.544857768052516
43.88646288209607
44.226579520697165
44.565217391304344
44.90238611713666
45.23809523809524
45.57235421166307
45.9051724137931
46.236559139784944
46.56652360515021
46.680942184154176
46.794871794871796
46.908315565031984
47.02127659574468
47.13375796178344
47.24576271186441
47.35729386892177
47.46835443037975
47.578947368421055
47.689075630252105
47.79874213836478
47.90794979079498
48.01670146137787
48.125
48.232848232848234
48.3402489626556
48.4472049689441
48.553719008264466
48.65979381443299
48.76543209876543
48.870636550308014
48.97540983606557
49.079754601226995
49.183673469387756
49.287169042769854
49.390243902439025
49.69574036511156
49.797570850202426
49.898989898989896
50.0
50.10060362173038
50.401606425702816
50.70140280561122
51.0
51.29740518962076
51.59362549800797
51.88866799204771
51.587301587301596
51.68316831683168
51.778656126