# 0. Install and Import Dependencies

In [2]:
%pip install mediapipe opencv-python pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'd:\Python 3.10.4\python.exe -m pip install --upgrade pip' command.


# 1. Make Some Detections

In [1]:
import holistic_interface as hi

In [6]:
# let's make a train csv with 5 gestures: happy, sad, insecure, closed, open
hi.process_video_to_coordinates_csv(output_file="train_set.csv", class_name="happy") # input_file = 0 (using camera)
hi.process_video_to_coordinates_csv(output_file="train_set.csv", class_name="sad")
hi.process_video_to_coordinates_csv(output_file="train_set.csv", class_name="insecure")
hi.process_video_to_coordinates_csv(output_file="train_set.csv", class_name="closed")
hi.process_video_to_coordinates_csv(output_file="train_set.csv", class_name="open")

# 2. Capture Landmarks & Export to CSV
<!--<img src="https://i.imgur.com/8bForKY.png">-->
<!--<img src="https://i.imgur.com/AzKNp7A.png">-->

In [21]:
import csv
import os
import numpy as np

# 3. Train Custom Model Using Scikit Learn

## 3.1 Read in Collected Data and Process

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [22]:
df = pd.read_csv('output_csv_directory/train_set.csv')

In [23]:
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
0,happy,0.391849,0.516923,-0.894455,0.999691,0.427333,0.458827,-0.852394,0.999239,0.44819,...,-0.000998,0.0,0.459802,0.451261,-0.002874,0.0,0.465382,0.446142,-0.003864,0.0
1,happy,0.393469,0.518781,-0.786989,0.999701,0.429914,0.459826,-0.757943,0.999261,0.451027,...,-0.000832,0.0,0.463029,0.465176,-0.00085,0.0,0.468475,0.459475,-0.001631,0.0
2,happy,0.392925,0.521281,-0.754773,0.999654,0.429368,0.462215,-0.735322,0.999171,0.450317,...,-0.003051,0.0,0.454455,0.474659,-0.004697,0.0,0.460101,0.468695,-0.005572,0.0
3,happy,0.392943,0.523102,-0.856822,0.999607,0.429515,0.46314,-0.824333,0.999099,0.450395,...,-0.003357,0.0,0.453771,0.472003,-0.005598,0.0,0.459487,0.465917,-0.00666,0.0
4,happy,0.393619,0.524245,-0.904315,0.999538,0.429875,0.464322,-0.852316,0.998959,0.450752,...,-0.00315,0.0,0.451178,0.473983,-0.004575,0.0,0.456644,0.467928,-0.005507,0.0


In [24]:
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
174,bored,0.47122,0.460938,-0.946258,0.999734,0.498142,0.430002,-0.891383,0.999348,0.515607,...,0.01514,0.0,0.539257,0.440018,0.026564,0.0,0.544518,0.433301,0.027442,0.0
175,bored,0.472218,0.461757,-0.932575,0.999746,0.499847,0.430018,-0.87576,0.999362,0.517517,...,0.016118,0.0,0.541491,0.439133,0.028052,0.0,0.546566,0.432126,0.029014,0.0
176,bored,0.475249,0.462586,-0.930968,0.999759,0.503607,0.430034,-0.871068,0.999387,0.521546,...,0.015645,0.0,0.546675,0.440951,0.026197,0.0,0.551901,0.434144,0.027046,0.0
177,bored,0.47794,0.464975,-0.94241,0.999776,0.506738,0.431039,-0.877395,0.999421,0.524879,...,0.015817,0.0,0.546669,0.439357,0.02545,0.0,0.551743,0.432256,0.026243,0.0
178,bored,0.479654,0.464989,-0.886778,0.999787,0.508909,0.430819,-0.839502,0.999447,0.527222,...,0.014984,0.0,0.547394,0.438123,0.02452,0.0,0.552434,0.431336,0.025211,0.0


In [25]:
df[df['class']=='sad'].head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z499,v499,x500,y500,z500,v500,x501,y501,z501,v501
45,sad,0.513177,0.603558,-0.901145,0.999456,0.543913,0.538873,-0.843799,0.999229,0.559623,...,-0.017551,0.0,0.57438,0.544626,-0.012105,0.0,0.579595,0.540386,-0.013213,0.0
46,sad,0.513745,0.59295,-0.901265,0.999486,0.545287,0.527699,-0.845783,0.999261,0.561797,...,-0.016065,0.0,0.575444,0.512974,-0.00821,0.0,0.580036,0.509803,-0.009208,0.0
47,sad,0.514274,0.576734,-0.903625,0.99951,0.5461,0.51133,-0.850446,0.999286,0.562914,...,-0.014521,0.0,0.574565,0.501767,-0.006952,0.0,0.579541,0.499195,-0.007927,0.0
48,sad,0.51379,0.517526,-0.904559,0.999536,0.545001,0.455008,-0.852169,0.999298,0.562368,...,-0.015055,0.0,0.567047,0.425345,-0.003022,0.0,0.572251,0.421284,-0.003595,0.0
49,sad,0.512637,0.481048,-0.91653,0.999562,0.542094,0.417763,-0.863326,0.999322,0.560232,...,-0.0157,0.0,0.559143,0.396765,-0.002202,0.0,0.563664,0.393208,-0.00258,0.0


In [26]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [32]:
y_test

138    bored
144    bored
168    bored
130    bored
145    bored
95       sad
164    bored
74       sad
91       sad
39     happy
77       sad
102      sad
31     happy
153    bored
117    bored
40     happy
9      happy
54       sad
141    bored
79       sad
49       sad
43     happy
122    bored
176    bored
121    bored
63       sad
25     happy
59       sad
6      happy
51       sad
29     happy
165    bored
148    bored
142    bored
111    bored
48       sad
105      sad
65       sad
58       sad
27     happy
88       sad
92       sad
32     happy
99       sad
108    bored
159    bored
132    bored
11     happy
61       sad
171    bored
131    bored
17     happy
166    bored
83       sad
Name: class, dtype: object

## 3.2 Train Machine Learning Classification Model

In [33]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [34]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [35]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [32]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [36]:
fit_models['rc'].predict(X_test)

array(['bored', 'bored', 'bored', 'bored', 'bored', 'sad', 'bored', 'sad',
       'sad', 'happy', 'sad', 'sad', 'happy', 'bored', 'bored', 'happy',
       'happy', 'sad', 'bored', 'sad', 'sad', 'happy', 'bored', 'bored',
       'bored', 'sad', 'happy', 'sad', 'happy', 'sad', 'happy', 'bored',
       'bored', 'bored', 'bored', 'sad', 'sad', 'sad', 'sad', 'happy',
       'sad', 'sad', 'happy', 'sad', 'bored', 'bored', 'bored', 'happy',
       'sad', 'bored', 'bored', 'happy', 'bored', 'sad'], dtype='<U5')

## 3.3 Evaluate and Serialize Model 

In [34]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [35]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.4166666666666667
rc 0.375
rf 0.375
gb 0.375


In [36]:
fit_models['rf'].predict(X_test)

array(['sad', 'happy', 'sad', 'happy', 'happy', 'happy', 'sad', 'happy',
       'sad', 'happy', 'happy', 'happy', 'happy', 'sad', 'happy', 'sad',
       'sad', 'happy', 'happy', 'sad', 'happy', 'sad', 'happy', 'sad'],
      dtype=object)

In [37]:
y_test

22    happy
33    happy
31    happy
72      sad
40    happy
35    happy
4     happy
51      sad
25    happy
52      sad
21    happy
73      sad
45      sad
55      sad
42    happy
10    happy
18    happy
6     happy
27    happy
8     happy
64      sad
67      sad
66      sad
29    happy
Name: class, dtype: object

In [38]:
with open('body_language.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

# 4. Make Detections with Model

In [None]:
with open('body_language.pkl', 'rb') as f:
    model = pickle.load(f)

In [None]:
model

In [None]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                                 mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                 )

        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
#             # Append class name 
#             row.insert(0, class_name)
            
#             # Export to CSV
#             with open('coords.csv', mode='a', newline='') as f:
#                 csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
#                 csv_writer.writerow(row) 

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            print(body_language_class, body_language_prob)
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords, 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Get status box
            cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
            
            # Display Class
            cv2.putText(image, 'CLASS'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Probability
            cv2.putText(image, 'PROB'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [None]:
tuple(np.multiply(np.array((results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y)), [640,480]).astype(int))

AttributeError: 'NoneType' object has no attribute 'landmark'