# 0. Install and Import Dependencies

In [1]:
# !pip install mediapipe opencv-python pandas scikit-learn

In [9]:
import mediapipe as mp
import cv2
import numpy as np
import csv
import pandas as pd

mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

# 1. Make Some Detections

In [10]:
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
# cv2.destroyAllWindows()

I0000 00:00:1743323734.044716  527758 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
W0000 00:00:1743323734.132014  538176 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743323734.150471  538179 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743323734.152621  538175 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743323734.152769  538181 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743323734.153560  538177 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling suppor

In [11]:
results.pose_landmarks

landmark {
  x: 0.605693758
  y: 0.712444782
  z: -1.28636718
  visibility: 0.999649
}
landmark {
  x: 0.62649852
  y: 0.637136161
  z: -1.26711476
  visibility: 0.99898988
}
landmark {
  x: 0.639130712
  y: 0.635490119
  z: -1.26701796
  visibility: 0.998989105
}
landmark {
  x: 0.651958108
  y: 0.634259224
  z: -1.2673322
  visibility: 0.99870652
}
landmark {
  x: 0.581974924
  y: 0.630818188
  z: -1.28628874
  visibility: 0.999275267
}
landmark {
  x: 0.562523544
  y: 0.624185741
  z: -1.28605664
  visibility: 0.999406278
}
landmark {
  x: 0.543193698
  y: 0.61935997
  z: -1.28627706
  visibility: 0.999373376
}
landmark {
  x: 0.662966847
  y: 0.63908124
  z: -0.938956201
  visibility: 0.998913229
}
landmark {
  x: 0.505679846
  y: 0.621448159
  z: -1.01589513
  visibility: 0.99964273
}
landmark {
  x: 0.620621383
  y: 0.780305266
  z: -1.1435113
  visibility: 0.999538362
}
landmark {
  x: 0.568918586
  y: 0.784213483
  z: -1.16591501
  visibility: 0.999790668
}
landmark {
  x: 0.73

In [12]:
type(results.pose_landmarks), type(results.pose_landmarks.landmark)

(mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList,
 google._upb._message.RepeatedCompositeContainer)

# 2. Capture Landmarks & Export to CSV
<!--<img src="https://i.imgur.com/8bForKY.png">-->
<!--<img src="https://i.imgur.com/AzKNp7A.png">-->

In [13]:
from datetime import datetime, timedelta
import time

In [14]:
num_coords = len(results.pose_landmarks.landmark)

In [33]:
class_name = "squat_bad_side"

In [34]:

# delay for X seconds to have the person walk back to camera, ready in setup position
time.sleep(3)

cap = cv2.VideoCapture(0)
pose_data = []
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)
        # print(results.face_landmarks)
        
        # face_landmarks, pose_landmarks, left_hand_landmarks, right_hand_landmarks
        
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # 1. Draw face landmarks
        # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS, 
        #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
        #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
        #                          )
        
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            now = datetime.now()
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            pose_data.append([now] + pose_row)
            
        except Exception as e:
            print(e)
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1743324038.008501  527758 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
W0000 00:00:1743324038.094169  544712 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324038.108018  544719 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324038.109486  544713 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324038.109510  544718 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324038.110154  544719 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling suppor

'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'
'NoneType' object has no attribute 'landmark'


In [35]:
# write data
cols = ['timestamp']
for val in range(1, num_coords+1):
    cols += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]
df = pd.DataFrame(pose_data, columns=cols)
df['class'] = class_name

In [36]:
df

Unnamed: 0,timestamp,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,v31,x32,y32,z32,v32,x33,y33,z33,v33,class
0,2025-03-30 16:40:38.200707,0.476733,0.213201,-0.143098,0.999976,0.480921,0.195551,-0.150538,0.999980,0.483393,...,0.676951,0.479345,0.928843,-0.216223,0.971602,0.472859,0.891291,0.196392,0.667706,squat_bad_side
1,2025-03-30 16:40:38.288971,0.470751,0.221787,-0.029945,0.999960,0.474864,0.205590,-0.047526,0.999970,0.477077,...,0.665976,0.479645,0.930837,-0.204398,0.969924,0.473553,0.894434,0.116643,0.656240,squat_bad_side
2,2025-03-30 16:40:38.355052,0.463858,0.231111,-0.003770,0.999937,0.467117,0.213724,-0.023871,0.999955,0.468882,...,0.653222,0.472313,0.931134,-0.205569,0.967826,0.465123,0.894548,0.109633,0.643923,squat_bad_side
3,2025-03-30 16:40:38.418151,0.458250,0.236352,-0.022255,0.999932,0.460840,0.218845,-0.039493,0.999950,0.462594,...,0.645347,0.463976,0.939108,-0.251516,0.968009,0.463564,0.894905,0.142721,0.639971,squat_bad_side
4,2025-03-30 16:40:38.482994,0.452704,0.240680,-0.005211,0.999901,0.454494,0.223020,-0.026544,0.999919,0.456025,...,0.644175,0.456787,0.950811,-0.246295,0.966397,0.469829,0.894821,0.162836,0.639988,squat_bad_side
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,2025-03-30 16:41:11.930535,0.021043,0.489848,-1.758883,0.980962,0.047294,0.336898,-1.797445,0.978538,0.066236,...,0.055428,0.366944,3.924439,0.507810,0.076339,0.152070,3.948274,0.953310,0.053570,squat_bad_side
498,2025-03-30 16:41:11.999052,0.026148,0.488038,-1.892488,0.982060,0.050928,0.334679,-1.927807,0.979790,0.070094,...,0.049890,0.387726,3.919297,0.551864,0.068711,0.175237,3.947071,1.026092,0.048221,squat_bad_side
499,2025-03-30 16:41:12.066242,0.023907,0.486335,-1.795414,0.983072,0.050070,0.332368,-1.832239,0.980890,0.069235,...,0.044906,0.354684,3.903131,0.539094,0.061848,0.150660,3.932456,0.967336,0.043405,squat_bad_side
500,2025-03-30 16:41:12.131242,0.024787,0.456409,-1.715642,0.984011,0.051351,0.307821,-1.731259,0.982009,0.070295,...,0.040425,0.336297,3.925291,0.321882,0.055669,0.126039,3.940802,0.952583,0.039072,squat_bad_side


In [37]:
# remove data of last X seconds of frames - as it trace pose of person walking back to camera & quit app

clean_df = df[df['timestamp'] < df['timestamp'].max() - timedelta(seconds=3)]
clean_df.to_csv(f'{class_name}.csv', index=False)

In [38]:
clean_df.shape

(465, 134)

# 3. Train Custom Model Using Scikit Learn

## 3.1 Read in Collected Data and Process

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.DataFrame()
poses = ['squat_bad_side', 'squat_good_side', 'squat_bad_front', 'squat_good_front']
for pose in poses:
    tmp = pd.read_csv(f'{pose}.csv')
    df = pd.concat([df, tmp])

In [40]:
df['class'].value_counts()

class
squat_good_front    486
squat_bad_side      465
squat_bad_front     443
squat_good_side     441
Name: count, dtype: int64

In [41]:
df.shape

(1835, 134)

In [42]:
del df['timestamp']

In [43]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234, shuffle=True)

In [45]:
y_test

220      squat_bad_side
200     squat_bad_front
168    squat_good_front
414     squat_good_side
78       squat_bad_side
             ...       
281    squat_good_front
267    squat_good_front
167     squat_good_side
92     squat_good_front
249    squat_good_front
Name: class, Length: 551, dtype: object

## 3.2 Train Machine Learning Classification Model

In [46]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [47]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [48]:
fit_models = {}
for algo, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

In [49]:
fit_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression())]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [50]:
# fit_models['rc'].predict(X_test)

## 3.3 Evaluate and Serialize Model 

In [51]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [52]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

lr 0.9981851179673321
rc 0.9945553539019963
rf 1.0
gb 1.0


In [53]:
fit_models['gb'].predict(X_test)

array(['squat_bad_side', 'squat_bad_front', 'squat_good_front',
       'squat_good_side', 'squat_bad_side', 'squat_bad_front',
       'squat_good_front', 'squat_good_front', 'squat_bad_front',
       'squat_bad_side', 'squat_good_side', 'squat_good_side',
       'squat_bad_side', 'squat_bad_front', 'squat_bad_front',
       'squat_good_side', 'squat_good_front', 'squat_good_front',
       'squat_bad_side', 'squat_bad_side', 'squat_bad_front',
       'squat_bad_side', 'squat_good_side', 'squat_good_front',
       'squat_bad_front', 'squat_good_side', 'squat_bad_side',
       'squat_bad_side', 'squat_good_front', 'squat_bad_side',
       'squat_good_front', 'squat_bad_side', 'squat_good_side',
       'squat_good_side', 'squat_bad_front', 'squat_good_side',
       'squat_good_side', 'squat_bad_side', 'squat_bad_front',
       'squat_bad_side', 'squat_good_front', 'squat_good_side',
       'squat_good_side', 'squat_good_side', 'squat_good_side',
       'squat_bad_front', 'squat_good_front'

In [55]:
y_test

220      squat_bad_side
200     squat_bad_front
168    squat_good_front
414     squat_good_side
78       squat_bad_side
             ...       
281    squat_good_front
267    squat_good_front
167     squat_good_side
92     squat_good_front
249    squat_good_front
Name: class, Length: 551, dtype: object

In [57]:
with open('rf.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

# 4. Make Detections with Model

In [62]:
with open('gb.pkl', 'rb') as f:
    model = pickle.load(f)

In [63]:
model

In [64]:
import time

In [67]:
time.sleep(1)
cap = cv2.VideoCapture(0)
# Initiate holistic model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = holistic.process(image)

        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                
        # 2. Right hand
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                 )

        # 3. Left Hand
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                 )

        # 4. Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        # Export coordinates
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Concate rows
            row = pose_row

            # Make Detections
            X = pd.DataFrame([row])
            body_language_class = model.predict(X)[0]
            body_language_prob = model.predict_proba(X)[0]
            # print(body_language_class, body_language_prob)
            
            # Get status box
            cv2.rectangle(image, (0,0), (350, 80), (245, 117, 16), -1)

            # Display Probability
            cv2.putText(image, 'Prob'
                        , (15,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
            cv2.putText(image, str(round(body_language_prob[np.argmax(body_language_prob)],2))
                        , (15,60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            # Display Class
            cv2.putText(image, 'Class'
                        , (100,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
            cv2.putText(image, body_language_class.split(' ')[0]
                        , (100,60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1743324638.356580  527758 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2 Pro
W0000 00:00:1743324638.442209  559129 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324638.456791  559139 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324638.458138  559131 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324638.458141  559139 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1743324638.458151  559136 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling suppor