# 0. Install dependencies and libraries

In [8]:
!pip install mediapipe opencv-python pandas scikit-learn



In [4]:
import mediapipe as mp
import cv2
import time

# Disable jedi autocompleter
%config Completer.use_jedi = False

In [5]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

 # 1. Make Detections

In [12]:
cap = cv2.VideoCapture(0) # number varies with cameras

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor the image to RGB from BGR, as mediapipe expects it
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        
        
        # Make detections
        image_rgb.flags.writeable = False  # for perofmance enhancement for holistic model
        results = holistic.process(image_rgb)
        #print(results.face_landmarks)
        
        # Face_landmarks, pose_landmarks, left_hand_landmarks and right_hand_landmark       
        # Recolor back to BGR for rendering image in openCV
        image_rgb.flags.writeable = True  # default  perofmance tuner back for openCV
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
        
        # 1. Mark face landamarks, 
        mp_drawing.draw_landmarks(image_bgr,
                                  results.face_landmarks,
                                  mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80,110,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)
                                 )
        
        # 2. Mark Right Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.right_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(120,22,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )
        
        # 3. Mark Left Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.left_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(190,22,76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        
        # 4. Mark Pose Landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.pose_landmarks, 
                                  mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245,66,23), thickness=2, circle_radius=4)
                                 )
        

        # Render back image frame onto the web cam screen
        cv2.imshow('Webcam video for Holistic model', image_bgr)

        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

In [13]:
print('obect type of the mediapipe holistic model is: ', results)

obect type of the mediapipe holistic model is:  <class 'mediapipe.python.solution_base.SolutionOutputs'>


In [14]:
print('sample co-oridnates of face landmarks from mediapipe', results.face_landmarks.landmark[0:1])

sample co-oridnates of face landmarks from mediapipe [x: 0.5358875393867493
y: 0.5039317011833191
z: -0.019475294277071953
]


In [15]:
print('sample co-oridnates of pose landmarks from mediapipe', results.pose_landmarks.landmark[0:1])

sample co-oridnates of pose landmarks from mediapipe [x: 0.540930986404419
y: 0.44723352789878845
z: -1.062204122543335
visibility: 1.0
]


 # 2. Capture landmarks & Export to CSV

In [16]:
import csv
import os
import numpy as np

In [17]:
# get number of co-ordinates from face and pose landmarks individually
print('total number of co-ordinates from face landmarks: =', len(results.face_landmarks.landmark))
print('total number of co-ordinates from pose landmarks: =', len(results.pose_landmarks.landmark))
print('total number of co-ordinates from face and pose landmarks: =', 
      len(results.face_landmarks.landmark) + len(results.pose_landmarks.landmark)
     )

total number of co-ordinates from face landmarks: = 468
total number of co-ordinates from pose landmarks: = 33
total number of co-ordinates from face and pose landmarks: = 501


In [18]:
# get total number of co-ordinates from face and pose landmarks combined together
num_coords = len(results.face_landmarks.landmark) + len(results.pose_landmarks.landmark)
num_coords

501

In [19]:
# create header records for each cordinate and type of class, for each landmarks
landmarks=['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [20]:
print('print first 2 cordinates class typename', landmarks[0:8])
print('print last 2 cordinates class typename', landmarks[-8:])
print('print total column names from each cordinates: =', len(landmarks))

print first 2 cordinates class typename ['class', 'x1', 'y1', 'z1', 'v1', 'x2', 'y2', 'z2']
print last 2 cordinates class typename ['x500', 'y500', 'z500', 'v500', 'x501', 'y501', 'z501', 'v501']
print total column names from each cordinates: = 2005


In [21]:
# write the above header record into a CSV file
with open('coords.csv', mode = 'w', newline='') as f:
    csv_writer=csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [27]:
class_name = 'Happy'

In [28]:
cap = cv2.VideoCapture(0) # number varies with cameras

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor the image to RGB from BGR, as mediapipe expects it
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        
        
        # Make detections
        image_rgb.flags.writeable = False  # for perofmance enhancement for holistic model
        results = holistic.process(image_rgb)
        #print(results.face_landmarks)
        
        # Face_landmarks, pose_landmarks, left_hand_landmarks and right_hand_landmark       
        # Recolor back to BGR for rendering image in openCV
        image_rgb.flags.writeable = True  # default  perofmance tuner back for openCV
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
        
        # 1. Mark face landamarks, 
        mp_drawing.draw_landmarks(image_bgr,
                                  results.face_landmarks,
                                  mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80,110,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)
                                 )
        
        # 2. Mark Right Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.right_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(120,22,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )
        
        # 3. Mark Left Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.left_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(190,22,76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        
        # 4. Mark Pose Landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.pose_landmarks, 
                                  mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245,66,23), thickness=2, circle_radius=4)
                                 )
        
        # Export face and pose co-oridnates to CSV
        try:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(
                np.array(
                [[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]
            ).flatten()
            )
            
            # Extract face landmarks
            
            face = results.face_landmarks.landmark
            face_row = list(
                np.array(
                [[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]
            ).flatten()
            )  ### Note: there's no visibility co-oridnate in face_landmarks, its always be zero
            
            # Concate rows
            row = pose_row + face_row
            
            # Append class name
            row.insert(0, class_name)
            
            # export the above header record into a CSV file
            with open('coords.csv', mode = 'a', newline='') as f:
                csv_writer=csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row)
    
        except:
            pass

        # Render back image frame onto the web cam screen
        cv2.imshow('Webcam video for Holistic model', image_bgr)

        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()

# 3. Train custom model using Scikit Learn

   ## 3.1 read csv data and EDA

In [29]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [30]:
df = pd.read_csv('coords.csv')

In [31]:
df.head(3)

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Victorious,0.49109,0.200832,-0.965884,1.0,0.513681,0.147788,-0.902827,1.0,0.528294,...,-0.007841,0.0,0.542678,0.124769,0.004716,0.0,0.546242,0.121003,0.004561,0.0
1,Victorious,0.48784,0.2121,-0.790251,1.0,0.51245,0.15185,-0.724459,1.0,0.528101,...,-0.006405,0.0,0.541039,0.142056,0.006143,0.0,0.544683,0.137998,0.006125,0.0
2,Victorious,0.486357,0.22505,-0.890034,1.0,0.510885,0.163472,-0.826403,1.0,0.526918,...,-0.006337,0.0,0.5425,0.156152,0.006291,0.0,0.546043,0.152447,0.006294,0.0


In [32]:
df['class'].value_counts()

Happy         600
Sad           523
Victorious    343
Name: class, dtype: int64

In [33]:
X = df.drop('class', axis=1) # Faatures
y=df['class']   # target 

In [34]:
# Split the landmark dataset 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43, stratify=y)

In [35]:
print('shape of Train dataset', X_train.shape)
print('shape of Test dataset', X_test.shape)

shape of Train dataset (1026, 2004)
shape of Test dataset (440, 2004)


## 3.2 train the model

In [6]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier


In [37]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [38]:
fit_models={}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [39]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [40]:
# make predictions on test dataset example
fit_models['rc'].predict(X_test)

array(['Sad', 'Sad', 'Sad', 'Victorious', 'Sad', 'Victorious',
       'Victorious', 'Sad', 'Happy', 'Happy', 'Victorious', 'Victorious',
       'Happy', 'Victorious', 'Sad', 'Happy', 'Sad', 'Happy', 'Sad',
       'Happy', 'Happy', 'Happy', 'Happy', 'Sad', 'Happy', 'Sad',
       'Victorious', 'Sad', 'Happy', 'Sad', 'Sad', 'Happy', 'Sad', 'Sad',
       'Victorious', 'Victorious', 'Sad', 'Happy', 'Happy', 'Happy',
       'Victorious', 'Sad', 'Happy', 'Happy', 'Victorious', 'Happy',
       'Sad', 'Happy', 'Sad', 'Happy', 'Happy', 'Happy', 'Sad',
       'Victorious', 'Sad', 'Victorious', 'Sad', 'Sad', 'Happy', 'Sad',
       'Happy', 'Sad', 'Victorious', 'Sad', 'Sad', 'Happy', 'Happy',
       'Sad', 'Happy', 'Happy', 'Sad', 'Sad', 'Happy', 'Happy', 'Sad',
       'Happy', 'Happy', 'Victorious', 'Sad', 'Happy', 'Happy',
       'Victorious', 'Victorious', 'Happy', 'Sad', 'Happy', 'Victorious',
       'Happy', 'Sad', 'Victorious', 'Sad', 'Happy', 'Happy', 'Happy',
       'Victorious', 'Victoriou

   ## 3.3 Evaluate & Serialize the model

In [41]:
from sklearn.metrics import accuracy_score
import pickle

In [42]:
for algo, model in fit_models.items():
    y_pred = model.predict(X_test)
    print(algo, accuracy_score(y_test, y_pred))

lr 0.9863636363636363
rc 0.9818181818181818
rf 0.9840909090909091
gb 0.990909090909091


In [43]:
# save the best model, which random forest here
with open('body_language.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

   ## 4. Make detections with Model

In [11]:
import pickle

In [12]:
# Reload the saved model
with open('body_language.pkl', 'rb')  as f:
    model = pickle.load(f)

In [13]:
model

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

In [10]:
cap = cv2.VideoCapture(0) # number varies with cameras

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor the image to RGB from BGR, as mediapipe expects it
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        
        
        # Make detections
        image_rgb.flags.writeable = False  # for perofmance enhancement for holistic model
        results = holistic.process(image_rgb)
        #print(results.face_landmarks)
        
        # Face_landmarks, pose_landmarks, left_hand_landmarks and right_hand_landmark       
        # Recolor back to BGR for rendering image in openCV
        image_rgb.flags.writeable = True  # default  perofmance tuner back for openCV
        image_bgr = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)
        
        # 1. Mark face landamarks, 
        mp_drawing.draw_landmarks(image_bgr,
                                  results.face_landmarks,
                                  mp_holistic.FACE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(80,110,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)
                                 )
        
        # 2. Mark Right Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.right_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(120,22,10), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )
        
        # 3. Mark Left Hand landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.left_hand_landmarks, 
                                  mp_holistic.HAND_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(190,22,76), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        
        # 4. Mark Pose Landmarks
        mp_drawing.draw_landmarks(image_bgr, 
                                  results.pose_landmarks, 
                                  mp_holistic.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                  mp_drawing.DrawingSpec(color=(245,66,23), thickness=2, circle_radius=4)
                                 )
        
        # Export face and pose co-oridnates to CSV
        try:
            # Extract pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(
                np.array(
                [[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]
            ).flatten()
            )
            
            # Extract face landmarks
            
            face = results.face_landmarks.landmark
            face_row = list(
                np.array(
                [[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]
            ).flatten()
            )  ### Note: there's no visibility co-oridnate in face_landmarks, its always be zero
            
            # Concate rows
            row = pose_row + face_row
            
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_probability = model.predict_proba(X)[0]  # probability of correct classification
            
            # print 
            #print(body_language_class, body_language_probability)
            
            # Grab Ear co-ordinates
            coords = tuple(
                np.multiply(
                    np.array(
                        (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x,
                         results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y)
                    ),[640, 480]
                )
                .astype(int))
            
            cv2.rectangle(image_bgr,
                          (coords[0], coords[1]+5),
                          (coords[0]+len(body_language_class)*10,coords[1]-30),
                          (245,117, 16), -1)
            cv2.putText(image_bgr, 
                        body_language_class, 
                        coords,
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        1,
                        (255, 255, 255),
                        2,
                        cv2.LINE_AA
                       )
            
            
            # Add Status Box
            cv2.rectangle(image_bgr,(0,0),(250,60),(245,117, 16), -1)
            
            # Display CLASS
            cv2.putText(image_bgr,'CLASS',(95,12),cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0, 0, 0),1,cv2.LINE_AA)
            cv2.putText(image_bgr,body_language_class.split(' ')[0],
                        (90,40),cv2.FONT_HERSHEY_SIMPLEX, 1,(255, 255, 255),2,cv2.LINE_AA)


            
            # Display Prediction Probability
            cv2.putText(image_bgr,'PROB',(15,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0, 0, 0),1,cv2.LINE_AA)
            cv2.putText(image_bgr,str(round(body_language_probability[np.argmax(body_language_probability)],2)),
                        (10,40),cv2.FONT_HERSHEY_SIMPLEX, 1,(255, 255, 255),2,cv2.LINE_AA)
        except:
            pass

        # Render back image frame onto the web cam screen
        cv2.imshow('Webcam video for Holistic model', image_bgr)

        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
        
cap.release()
cv2.destroyAllWindows()