## Import dependencies

In [1]:
!pip install mediapipe opencv-python pandas scikit-learn



###### MediaPipe Holistic
The MediaPipe Holistic pipeline integrates separate models for pose, face and hand components, each of which are optimized for their particular domain. However, because of their different specializations, the input to one component is not well-suited for the others. The pose estimation model, for example, takes a lower, fixed resolution video frame (256x256) as input. But if one were to crop the hand and face regions from that image to pass to their respective models, the image resolution would be too low for accurate articulation. Therefore, we designed MediaPipe Holistic as a multi-stage pipeline, which treats the different regions using a region appropriate image resolution.

In [1]:
import mediapipe as mp
import cv2

##### MediaPipe Holistic reference
https://google.github.io/mediapipe/solutions/holistic.html

In [2]:
mp_drawing = mp.solutions.drawing_utils ## drawing helpers
mp_holistic = mp.solutions.holistic ## mediapipe solutions

In [4]:
cap = cv2.VideoCapture(0)

# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ## for MediaPipe to work, image format is RGB
        image.flags.writeable = False
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
                        
        cv2.imshow('Body_Language', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [5]:
# results.pose_landmarks

In [6]:
# results.pose_landmarks.landmark

In [7]:
results.face_landmarks.landmark

[x: 0.584132
y: 0.6426987
z: -0.038019065
, x: 0.57989323
y: 0.5777148
z: -0.07429888
, x: 0.58119833
y: 0.59822696
z: -0.039106734
, x: 0.56325465
y: 0.51387376
z: -0.056253623
, x: 0.578504
y: 0.5572731
z: -0.07918175
, x: 0.57665443
y: 0.53072536
z: -0.07401748
, x: 0.5723442
y: 0.46799082
z: -0.03706833
, x: 0.47276515
y: 0.48210505
z: 0.01804843
, x: 0.56921697
y: 0.420636
z: -0.028476536
, x: 0.56766695
y: 0.39319426
z: -0.031477336
, x: 0.5618825
y: 0.30040693
z: -0.017875351
, x: 0.58464605
y: 0.6518899
z: -0.035757367
, x: 0.58498514
y: 0.65793806
z: -0.03102833
, x: 0.58514464
y: 0.659403
z: -0.024745584
, x: 0.5854406
y: 0.66149354
z: -0.02400607
, x: 0.5859345
y: 0.667817
z: -0.026407488
, x: 0.5864807
y: 0.6765351
z: -0.030003896
, x: 0.5870716
y: 0.686956
z: -0.029239867
, x: 0.5883799
y: 0.70996207
z: -0.018116975
, x: 0.5805897
y: 0.5883895
z: -0.06707105
, x: 0.566815
y: 0.58847755
z: -0.048081785
, x: 0.41381177
y: 0.41461
z: 0.08450568
, x: 0.51420873
y: 0.4931257
z:

In [8]:
results.face_landmarks.landmark[0].x

0.5841320157051086

<img src= "https://google.github.io/mediapipe/images/mobile/pose_tracking_full_body_landmarks.png">

## Capture Landmarks & Export to csv

In [3]:
import csv
import os
import numpy as np

In [4]:
num_coords = len(results.face_landmarks.landmark) + len(results.pose_landmarks.landmark)
num_coords

NameError: name 'results' is not defined

In [11]:
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]


In [12]:
# landmarks

In [13]:
with open('coordinates.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [14]:
# class_name = "Happy"
# class_name = "Sad"
# class_name = "Victorious"
class_name = "Wakanda Forever"

In [15]:
class_name

'Wakanda Forever'

In [16]:
cap = cv2.VideoCapture(0)

# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ## for MediaPipe to work, image format is RGB
        image.flags.writeable = False
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
            # Append class name 
            row.insert(0, class_name)
            
            # Export to CSV
            with open('coordinates.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(row) 
            
        except:
            pass
                        
        cv2.imshow('Body_Language', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [17]:
# face = results.face_landmarks.landmark
# np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten()

In [18]:
# pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
# face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())

In [19]:
row = pose_row + face_row
row.insert(0, class_name)
len(row)

2005

In [20]:
# row

# Train Custom Model Using Scikit Learn

### Read in collected data and process

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [14]:
df = pd.read_csv('coordinates.csv')

In [15]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,Wakanda Forever,0.604562,0.549794,-1.604311,0.999884,0.639117,0.459369,-1.523109,0.999698,0.663767,...,-0.010968,0.0,0.713174,0.466395,0.015452,0.0,0.720329,0.457706,0.016266,0.0
1,Wakanda Forever,0.604693,0.551036,-1.825083,0.999878,0.64066,0.459572,-1.765753,0.999671,0.665995,...,-0.009489,0.0,0.713181,0.468825,0.015444,0.0,0.720374,0.459863,0.016151,0.0
2,Wakanda Forever,0.604403,0.551772,-1.761126,0.999878,0.640867,0.459755,-1.700793,0.99966,0.66664,...,-0.009617,0.0,0.714964,0.467071,0.01509,0.0,0.722107,0.458694,0.015679,0.0
3,Wakanda Forever,0.604453,0.551694,-1.655979,0.999879,0.641371,0.45974,-1.593346,0.999657,0.667202,...,-0.009605,0.0,0.716253,0.466882,0.015598,0.0,0.723394,0.458605,0.016232,0.0
4,Wakanda Forever,0.604469,0.553217,-1.744007,0.999869,0.6424,0.460934,-1.67459,0.999621,0.668253,...,-0.01059,0.0,0.718833,0.468876,0.015103,0.0,0.725911,0.461101,0.01568,0.0


In [16]:
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
20,Wakanda Forever,0.596547,0.556396,-1.917839,0.9999,0.633484,0.460597,-1.855471,0.999705,0.661065,...,-0.008736,0.0,0.715475,0.464694,0.01849,0.0,0.722264,0.456204,0.019323,0.0
21,Wakanda Forever,0.603206,0.555517,-1.792723,0.999898,0.64041,0.460295,-1.711096,0.999703,0.666012,...,-0.007568,0.0,0.726784,0.4608,0.022349,0.0,0.733563,0.452301,0.023379,0.0
22,Wakanda Forever,0.604228,0.553445,-1.778987,0.999892,0.642911,0.459728,-1.695075,0.999679,0.667975,...,-0.0106,0.0,0.72793,0.453549,0.018764,0.0,0.73476,0.445573,0.019575,0.0
23,Wakanda Forever,0.602397,0.550017,-1.749198,0.999889,0.642537,0.457622,-1.677119,0.999667,0.667901,...,-0.009109,0.0,0.721648,0.452587,0.020599,0.0,0.728624,0.444082,0.021533,0.0
24,Wakanda Forever,0.602519,0.550001,-1.924334,0.99989,0.642569,0.457609,-1.856163,0.999667,0.668034,...,-0.010537,0.0,0.70855,0.451049,0.018477,0.0,0.715496,0.441799,0.019348,0.0


In [17]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [18]:
X

Unnamed: 0,x1,y1,z1,v1,x2,y2,z2,v2,x3,y3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,0.604562,0.549794,-1.604311,0.999884,0.639117,0.459369,-1.523109,0.999698,0.663767,0.458371,...,-0.010968,0.0,0.713174,0.466395,0.015452,0.0,0.720329,0.457706,0.016266,0.0
1,0.604693,0.551036,-1.825083,0.999878,0.64066,0.459572,-1.765753,0.999671,0.665995,0.458402,...,-0.009489,0.0,0.713181,0.468825,0.015444,0.0,0.720374,0.459863,0.016151,0.0
2,0.604403,0.551772,-1.761126,0.999878,0.640867,0.459755,-1.700793,0.99966,0.66664,0.458437,...,-0.009617,0.0,0.714964,0.467071,0.01509,0.0,0.722107,0.458694,0.015679,0.0
3,0.604453,0.551694,-1.655979,0.999879,0.641371,0.45974,-1.593346,0.999657,0.667202,0.458381,...,-0.009605,0.0,0.716253,0.466882,0.015598,0.0,0.723394,0.458605,0.016232,0.0
4,0.604469,0.553217,-1.744007,0.999869,0.6424,0.460934,-1.67459,0.999621,0.668253,0.459368,...,-0.01059,0.0,0.718833,0.468876,0.015103,0.0,0.725911,0.461101,0.01568,0.0
5,0.604594,0.553147,-1.736047,0.999867,0.643204,0.46084,-1.66961,0.999609,0.668988,0.459191,...,-0.011178,0.0,0.719831,0.466167,0.014602,0.0,0.726766,0.458911,0.015113,0.0
6,0.604587,0.552502,-1.773835,0.999869,0.64343,0.46019,-1.708966,0.99961,0.669217,0.458447,...,-0.009652,0.0,0.720878,0.466576,0.01581,0.0,0.727951,0.459241,0.0163,0.0
7,0.604027,0.552439,-1.677018,0.999873,0.643336,0.46006,-1.613568,0.999623,0.669212,0.458215,...,-0.010595,0.0,0.717438,0.46444,0.014207,0.0,0.724458,0.457217,0.014599,0.0
8,0.603938,0.552394,-1.688037,0.99988,0.643395,0.459977,-1.623328,0.999641,0.66931,0.458111,...,-0.011151,0.0,0.716997,0.458815,0.014185,0.0,0.723724,0.452692,0.014405,0.0
9,0.603119,0.552108,-1.883326,0.999887,0.642602,0.459599,-1.825844,0.999662,0.668991,0.457533,...,-0.011169,0.0,0.710234,0.455909,0.013345,0.0,0.717055,0.449236,0.013587,0.0


In [19]:
y

0     Wakanda Forever
1     Wakanda Forever
2     Wakanda Forever
3     Wakanda Forever
4     Wakanda Forever
5     Wakanda Forever
6     Wakanda Forever
7     Wakanda Forever
8     Wakanda Forever
9     Wakanda Forever
10    Wakanda Forever
11    Wakanda Forever
12    Wakanda Forever
13    Wakanda Forever
14    Wakanda Forever
15    Wakanda Forever
16    Wakanda Forever
17    Wakanda Forever
18    Wakanda Forever
19    Wakanda Forever
20    Wakanda Forever
21    Wakanda Forever
22    Wakanda Forever
23    Wakanda Forever
24    Wakanda Forever
Name: class, dtype: object

#### Split dataset

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=123)

In [21]:
y_train

9     Wakanda Forever
16    Wakanda Forever
12    Wakanda Forever
20    Wakanda Forever
24    Wakanda Forever
8     Wakanda Forever
21    Wakanda Forever
7     Wakanda Forever
14    Wakanda Forever
18    Wakanda Forever
3     Wakanda Forever
0     Wakanda Forever
5     Wakanda Forever
4     Wakanda Forever
15    Wakanda Forever
22    Wakanda Forever
11    Wakanda Forever
Name: class, dtype: object

### Train Machine learning Classification Model

In [22]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [23]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [24]:
list(pipelines.values())[1]

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('ridgeclassifier', RidgeClassifier())])

In [25]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train.values, y_train.values)
    fit_models[algo] = model

ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 'Wakanda Forever'

In [None]:
fit_models

In [None]:
# fit_models['rc'].predict(X_test)

### Evaluate and Serialize Model

In [8]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [9]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

In [None]:
# fit_models["rf"].predict(X_test)

In [None]:
# y_test

In [None]:
with open('body_language_rf.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

## Make Detection With Model

In [10]:
with open('body_language_rf.pkl', 'rb') as f:
    model = pickle.load(f)

In [None]:
model

In [11]:
cap = cv2.VideoCapture(0)

# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ## for MediaPipe to work, image format is RGB
        image.flags.writeable = False
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )
        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
#             # Append class name 
#             row.insert(0, class_name)
            
#             # Export to CSV
#             with open('coordinates.csv', mode='a', newline='') as f:
#                 csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#                 csv_writer.writerow(row) 

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            print(body_language_class, body_language_prob)
            
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'CLASS'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            
        except:
            pass
                        
        cv2.imshow('Body_Language', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [None]:
tuple(np.multiply(np.array((results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y)), [640,480]).astype(int))