In [1]:
#Install the nessasary packages from pip
!pip install mediapipe opencv-python pandas scikit-learn --user



In [84]:
#Import all the nessacary libraries from the packages

#Import mediapipe to be use as the model
import mediapipe as mp
#Import opencv for rendaring and drawing capabilities
import cv2

import numpy as np #Handle numpy arrays
import pandas as pd #Handle tabular data
import csv #Handle csv files
import os #Handle folder structure
import glob
import pickle #Save and oad ML model

from sklearn.model_selection import train_test_split #Partition the data into training and testing

from sklearn.pipeline import make_pipeline #Creates a pipeline
from sklearn.preprocessing import StandardScaler #Standadize data 

#Classification algorithms
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.metrics import accuracy_score #Evaluate model through accuracy

In [85]:
#Helper to draw the landmarks and provide the landmark detection models
draw_helpers = mp.solutions.drawing_utils 
holistic_model = mp.solutions.holistic
# mp_pose = mp.solutions.pose

In [57]:
#Number of landmarks considered.
num_landmarks = 12

In [58]:
#Save the landmarks to a table to be exported as a csv file

#0th column of the table
table_columns = ['class']
#Add columns to the table according to the no.of landmarks
for num in range(1, num_landmarks + 1):
    table_columns += ['x{}'.format(num), 'y{}'.format(num), 'z{}'.format(num), 'v{}'.format(num)]
    
#Display columns of the table
table_columns

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12']

# Creating the csv file for store extracted data

In [59]:
csv_file_pth = 'data.csv'

#Write to the csv file
with open(csv_file_pth, mode='w', newline='') as f:
    #Define the csv writer
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(table_columns)
    print("File Created")

File Created


# Function to extract data from video and store inside the CSV

In [60]:

def video_processor(class_name, video_pth):
    #Connect the sample video from the device
    sample_video = cv2.VideoCapture(video_pth)
    processed = False


    #Load the holistic model
    with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

        #Loop through each frame of the video 
        while sample_video.isOpened():
            #Returns the status of the read and the frame as an image
            status, frame = sample_video.read()

            #If frame is read correctly, status is true
            if status == False:
                break

            #Recolor the captured frame from BGR to RGB (Medipipe requies frames to be in RGB format)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            #Prevent writing and copying frame data to improve performance while making the detection
            rgb_frame.flags.writeable = False        

            #Use holistic model to make detections
            result_frame = holistic.process(rgb_frame)

            #Set frame back to writable format after detection
            rgb_frame.flags.writeable = True   

            #Recolor the captured frame from BGR for rendering with opencv
            bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

            #Use pose model to detect only the landmarks of the the body and not the landmarks of the face and hand
            draw_helpers.draw_landmarks(bgr_frame, result_frame.pose_landmarks, holistic_model.POSE_CONNECTIONS, 
                                 draw_helpers.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 draw_helpers.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))
         

            #Export the coordinates of the landmarks to the csv file
            try:
                pose_landmarks_array = result_frame.pose_landmarks.landmark
                # Filter out only the upper body landmarks
                upper_body_landmarks = [pose_landmarks_array[i] for i in [11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28]]
                # Format the upper body landmarks into a numpy array for better structuring and collapse the array to 1 dimension
                pose_landmarks_nparray = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in upper_body_landmarks]).flatten() 
                              if result_frame.pose_landmarks else np.zeros(12*4))


                #Append class name as the Oth element
                pose_landmarks_nparray.insert(0, class_name)

                #Append the data to table in the csv file
                with open(csv_file_pth, mode='a', newline='') as f:
                    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    csv_writer.writerow(pose_landmarks_nparray) 
                
                processed = True
            except:
                pass
            
            #Display the frames    
            cv2.imshow('Results Feed', bgr_frame)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
    
    
    sample_video.release()
    cv2.destroyAllWindows()
    
    #If the try block is processed without any error
    if processed:
        print("Processed")

In [63]:
#Specify the location to the datasets
valid_path = "./datasets/valid/*.mp4"
invalid_path = "./datasets/invalid/*.mp4"

#Displat the no of files in the datasets
print("Valid Video Count: ", len(glob.glob(valid_path)))
print("Invalid Video Count: ", len(glob.glob(invalid_path)))

Valid Video Count:  1
Invalid Video Count:  6


In [64]:
#Adding landmarks of the invalid dataset to the csv
class_name = "correct"
dir_size = len(glob.glob(valid_path))
for i in range (1, dir_size + 1):
    video_pth = "./datasets/valid/" + str(i) + ".mp4"
    print("Video: ", str(i), "/", str(dir_size))
    video_processor(class_name, video_pth)

Video:  1 / 1
Processed


In [65]:
#Adding landmarks of the invalid dataset to the csv
class_name = "Incorrect"
dir_size = len(glob.glob(invalid_path))
for i in range (1, dir_size + 1):
    video_pth = "./datasets/invalid/" + str(i) + ".mp4"
    print("Video: ", str(i), "/", str(dir_size))
    video_processor(class_name, video_pth)

Video:  1 / 6
Processed
Video:  2 / 6
Processed
Video:  3 / 6
Processed
Video:  4 / 6
Processed
Video:  5 / 6
Processed
Video:  6 / 6
Processed


# Customize data read from the csv file

In [86]:
#Import dataframe
df = pd.read_csv(csv_file_pth)

In [87]:
#Display the first 5 rows in the dataframe
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z10,v10,x11,y11,z11,v11,x12,y12,z12,v12
0,correct,0.619047,0.375558,-0.243535,0.999884,0.409617,0.380511,-0.254297,0.999892,0.782678,...,-0.680581,0.990491,0.583658,0.71051,-0.128898,0.977198,0.469821,0.709493,-0.218497,0.98667
1,correct,0.618967,0.37571,-0.256573,0.999891,0.410921,0.379863,-0.289064,0.999891,0.782869,...,-0.680569,0.990081,0.583643,0.710109,-0.221347,0.976645,0.470897,0.706798,-0.231344,0.985518
2,correct,0.618902,0.375714,-0.26178,0.999896,0.411287,0.379051,-0.298899,0.999891,0.78343,...,-0.657295,0.989982,0.583918,0.709901,-0.188261,0.976416,0.472224,0.706341,-0.222894,0.985131
3,correct,0.618808,0.37572,-0.274036,0.999902,0.411282,0.378483,-0.307344,0.999891,0.783702,...,-0.649408,0.989829,0.584304,0.709735,-0.175713,0.976184,0.472952,0.706434,-0.209016,0.984776
4,correct,0.618726,0.375544,-0.273183,0.999907,0.411074,0.378134,-0.306599,0.999893,0.783683,...,-0.651564,0.989785,0.584698,0.709414,-0.173179,0.975898,0.47321,0.706502,-0.208052,0.984446


In [88]:
#Display the last 5 rows in the dataframe
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z10,v10,x11,y11,z11,v11,x12,y12,z12,v12
15251,Incorrect,0.620059,0.382603,-0.098221,0.99992,0.419419,0.380915,-0.11175,0.999925,0.770806,...,-0.928691,0.991005,0.665776,0.705403,-0.594797,0.970443,0.453099,0.703013,-0.708427,0.979679
15252,Incorrect,0.620094,0.382671,-0.096046,0.999922,0.419462,0.381014,-0.11174,0.999927,0.770414,...,-0.928643,0.991167,0.665777,0.705456,-0.595812,0.970722,0.453053,0.703011,-0.71089,0.979996
15253,Incorrect,0.620277,0.382685,-0.096509,0.999924,0.419675,0.381073,-0.112089,0.999927,0.770315,...,-0.925444,0.991313,0.665885,0.705477,-0.595897,0.971116,0.453051,0.703103,-0.710106,0.98036
15254,Incorrect,0.620493,0.382726,-0.09692,0.999925,0.419704,0.381218,-0.110789,0.999927,0.770307,...,-0.919223,0.991412,0.66596,0.705476,-0.595992,0.971434,0.45309,0.703207,-0.709133,0.980613
15255,Incorrect,0.620998,0.382751,-0.094447,0.999926,0.419879,0.381275,-0.106553,0.999927,0.770302,...,-0.91841,0.991522,0.665993,0.705455,-0.59612,0.971721,0.453107,0.703527,-0.709076,0.980894


In [89]:
#Remove the class column so the dataframe only contains features
X = df.drop('class', axis=1)
#Use the class as the target value
Y = df['class'] 

In [90]:
#Split the data with 30% for testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)

# Make predictions and select the best classifier

In [91]:
#Setup the machine learning model pipelines
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression(max_iter=20000)),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [92]:
#Dictionary to store the label of the model and model after training
train_models = {}
for label, pipeline in pipelines.items():
    model = pipeline.fit(X_train.values, Y_train.values)
    train_models[label] = model

In [93]:
train_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression(max_iter=20000))]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [94]:
#Test the accuracies of the model to choose the best classifier
for label, model in train_models.items():
    output_class = model.predict(X_test.values)
    print(label, accuracy_score(Y_test.values, output_class))

lr 0.9886388464059428
rc 0.9573956740222853
rf 1.0
gb 1.0


In [95]:
#Select and dump the classifier into a pickle file
model = train_models['rf']
model

In [96]:
#Save the model as a binary file
with open('shoulder_press.pkl', 'wb') as f:
    pickle.dump(model, f)

In [97]:
#Import the model from the binary file
with open('shoulder_press.pkl', 'rb') as f:
    model = pickle.load(f)

In [98]:
model

In [100]:
#PREDICT AND DISPLAY THE RESULTS OF THE MODEL BY PASSING THE TEST VIDEO

#Connect the test video from the device
sample_video = cv2.VideoCapture('datasets/IMG_0126.MOV')

#Load the holistic model
with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    #Loop through each frame of the video 
    while sample_video.isOpened():
        #Returns the status of the read and the frame as an image
        status, frame = sample_video.read()
        
        #If frame is read correctly, status is true
        if status == False:
            print("Done")
            break
          
        #Recolor the captured frame from BGR to RGB (Medipipe requies frames to be in RGB format)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        #Prevent writing and copying frame data to improve performance while making the detection
        rgb_frame.flags.writeable = False        
        
        #Use holistic model to make detections
        result_frame = holistic.process(rgb_frame)
        
        #Set frame back to writable format after detection
        rgb_frame.flags.writeable = True   
        
        #Recolor the captured frame from BGR for rendering with opencv
        bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

        #Use pose model to detect only the landmarks of the the body and not the landmarks of the face and hand
        draw_helpers.draw_landmarks(bgr_frame, result_frame.pose_landmarks, holistic_model.POSE_CONNECTIONS, 
                                 draw_helpers.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 draw_helpers.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        
        #Predict the coordinates of the landmarks (resulrs screen)
        try:
            pose_landmarks_array = result_frame.pose_landmarks.landmark
            # Filter out only the upper body landmarks
            upper_body_landmarks = [pose_landmarks_array[i] for i in [11, 12, 13, 14, 15, 16, 23, 24, 25, 26, 27, 28]]
            # Format the upper body landmarks into a numpy array for better structuring and collapse the array to 1 dimension
            pose_landmarks_nparray = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in upper_body_landmarks]).flatten() 
                              if result_frame.pose_landmarks else np.zeros(12*4))
            #Pass the numpy array into a data frame
            features = pd.DataFrame([pose_landmarks_nparray])
            
            #Store the top class of the prediction
            pose_class_status = model.predict(features.values)[0]
            #Store the probability of the prediction
            pose_class_status_prob = model.predict_proba(features.values)[0]
            
            print("Class:", pose_class_status)
            print(pose_class_status_prob)
            
            #Set a rectangle box to display the results of the prediction in the video frame
            #rectangle(container, top_coord, bottom_coord, color, line_thickness)
            cv2.rectangle(bgr_frame, (0,0), (250, 60), (245, 117, 16), -1)
            
            #Display the class label inside the rectangle box
            cv2.putText(bgr_frame, 'Class'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            
            #Extract =and display the top class of the prediction
            cv2.putText(bgr_frame, pose_class_status.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            #Display the class probability inside the rectangle box
            cv2.putText(bgr_frame, 'Probability'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            
            #Extract and dispthe maximum probability
            cv2.putText(bgr_frame, str(round(pose_class_status_prob[np.argmax(pose_class_status_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
 
        except:
            pass
                        
        #Display the frames    
        cv2.imshow('Results Feed', bgr_frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

sample_video.release()
cv2.destroyAllWindows()

Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.8 0.2]
Class: Incorrect
[0.81 0.19]
Class: Incorrect
[0.8 0.2]
Class: Incorrect
[0.8 0.2]
Class: Incorrect
[0.8 0.2]
Class: Incorrect
[0.8 0.2]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.82 0.18]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0.17]
Class: Incorrect
[0.83 0

KeyboardInterrupt: 