# 0. Install and Import Dependencies

In [1]:
!pip install mediapipe opencv-python pandas scikit-learn



In [2]:
import mediapipe as mp # Import mediapipe
import cv2 # Import opencv

In [3]:
mp_drawing = mp.solutions.drawing_utils # Drawing helpers
mp_holistic = mp.solutions.holistic # Mediapipe Solutions

# 1. Make Some Detections

In [4]:
import cv2
import mediapipe as mp

# Initialize holistic model
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        if not ret:
            break
        
        # Recolor feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make detections
        results = holistic.process(image)
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                   mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                   mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                  )
        
        # Draw right hand landmarks
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                   mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                   mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                  )

        # Draw left hand landmarks
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                   mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                   mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                  )

        # Draw pose detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                   mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                   mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                  )
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [5]:
results.face_landmarks.landmark[0].visibility

0.0

# 2. Capture Landmarks & Export to CSV
<!--<img src="https://i.imgur.com/8bForKY.png">-->
<!--<img src="https://i.imgur.com/AzKNp7A.png">-->

In [6]:
import csv
import os
import numpy as np

In [7]:
num_coords = len(results.pose_landmarks.landmark)+len(results.face_landmarks.landmark)
num_coords

501

In [8]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

In [None]:
landmarks

In [9]:
with open('coords.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [17]:
class_name = "bad"

In [18]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                   mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                   mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                  )
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
            # Append class name 
            row.insert(0, class_name)
            
            # Export to CSV
            with open('coords.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row) 
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 3. Train Custom Model Using Scikit Learn

## 3.1 Read in Collected Data and Process

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [20]:
df = pd.read_csv('coords.csv')

In [21]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Happy,0.573388,0.59913,-1.113702,0.99474,0.606031,0.506848,-1.078518,0.994289,0.626956,...,0.001296,0.0,0.34844,0.567113,0.009048,0.0,0.35148,0.564611,0.009227,0.0
1,Happy,0.573401,0.597008,-1.066072,0.995207,0.606078,0.505986,-1.027327,0.994799,0.627186,...,-0.011148,0.0,0.656445,0.500059,0.010752,0.0,0.662886,0.492891,0.011177,0.0
2,Happy,0.575019,0.597228,-1.205446,0.995566,0.606856,0.506607,-1.169656,0.995214,0.628043,...,0.001754,0.0,0.348905,0.569471,0.010985,0.0,0.351907,0.566806,0.011282,0.0
3,Happy,0.572802,0.593055,-1.201258,0.995972,0.606468,0.503919,-1.160252,0.995659,0.627953,...,0.001078,0.0,0.347598,0.570018,0.009339,0.0,0.350555,0.567626,0.009525,0.0
4,Happy,0.571846,0.576658,-1.199285,0.99635,0.605049,0.494655,-1.131657,0.996068,0.626859,...,0.000794,0.0,0.345228,0.568814,0.008428,0.0,0.348027,0.566546,0.008532,0.0


In [23]:
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
314,bad,0.648418,0.532262,-0.840183,0.999954,0.694065,0.451439,-0.787503,0.99994,0.715417,...,-0.011124,0.0,0.712138,0.500157,0.009734,0.0,0.719506,0.491298,0.010497,0.0
315,bad,0.646764,0.548848,-0.97249,0.999957,0.691899,0.465961,-0.920349,0.999944,0.712881,...,-0.015332,0.0,0.70185,0.511377,0.001477,0.0,0.709102,0.502514,0.001924,0.0
316,bad,0.645077,0.557957,-1.16917,0.999961,0.688222,0.475319,-1.118387,0.999948,0.707565,...,-0.017333,0.0,0.696197,0.505952,-0.000839,0.0,0.703173,0.497874,-0.000593,0.0
317,bad,0.639958,0.562235,-1.189116,0.999964,0.681596,0.480815,-1.131071,0.999953,0.700514,...,-0.01464,0.0,0.692886,0.502481,0.004465,0.0,0.699666,0.495526,0.004872,0.0
318,bad,0.634562,0.567011,-1.10884,0.999959,0.674287,0.485429,-1.051151,0.999944,0.693233,...,-0.015741,0.0,0.6986,0.502525,0.009799,0.0,0.704586,0.495026,0.010743,0.0


In [24]:
df[df['class']=='Sad']

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501


In [27]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [29]:
y_test

73     Happy
7      Happy
248     Good
268     Good
95     Happy
       ...  
92     Happy
314      bad
110    Happy
5      Happy
166      sad
Name: class, Length: 96, dtype: object

## 3.2 Train Machine Learning Classification Model

In [None]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [None]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [None]:
fit_models

In [None]:
fit_models['rc'].predict(X_test)

## 3.3 Evaluate and Serialize Model 

In [34]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [35]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9895833333333334
rc 1.0
rf 0.9791666666666666


In [36]:
fit_models['rf'].predict(X_test)

array(['Happy', 'Happy', 'Good', 'Good', 'Happy', 'sad', 'sad', 'Happy',
       'Good', 'Good', 'Good', 'sad', 'sad', 'Happy', 'Happy', 'Happy',
       'Happy', 'bad', 'bad', 'sad', 'sad', 'Good', 'Good', 'Happy',
       'Good', 'Good', 'Happy', 'sad', 'sad', 'Happy', 'Happy', 'sad',
       'Happy', 'sad', 'Good', 'bad', 'Good', 'Good', 'sad', 'Happy',
       'Happy', 'Good', 'Happy', 'Happy', 'sad', 'sad', 'sad', 'Happy',
       'Happy', 'Happy', 'Happy', 'Happy', 'sad', 'Happy', 'bad', 'sad',
       'Happy', 'bad', 'Good', 'Happy', 'sad', 'bad', 'Happy', 'sad',
       'bad', 'Good', 'sad', 'Happy', 'Happy', 'sad', 'sad', 'Happy',
       'sad', 'Happy', 'Happy', 'Happy', 'sad', 'sad', 'bad', 'sad',
       'Happy', 'Good', 'Happy', 'Happy', 'sad', 'Happy', 'Happy', 'Good',
       'bad', 'sad', 'sad', 'Happy', 'bad', 'Happy', 'Happy', 'sad'],
      dtype=object)

In [37]:
y_test

73     Happy
7      Happy
248     Good
268     Good
95     Happy
       ...  
92     Happy
314      bad
110    Happy
5      Happy
166      sad
Name: class, Length: 96, dtype: object

In [38]:
with open('body_language.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

# 4. Make Detections with Model

In [39]:
with open('body_language.pkl', 'rb') as f:
    model = pickle.load(f)

In [40]:
model

In [None]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                   mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                   mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                  )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
#             # Append class name 
#             row.insert(0, class_name)
            
#             # Export to CSV
#             with open('coords.csv', mode='a', newline='') as f:
#                 csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#                 csv_writer.writerow(row) 

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            print(body_language_class, body_language_prob)
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'CLASS'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()



Happy [0.04 0.68 0.1  0.18]




Happy [0.03 0.65 0.11 0.21]




Happy [0.03 0.67 0.14 0.16]




Happy [0.02 0.64 0.12 0.22]




Happy [0.03 0.63 0.13 0.21]




In [None]:
tuple(np.multiply(np.array((results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y)), [640,480]).astype(int))