In [1]:
#Install the nessasary packages from pip
!pip install mediapipe opencv-python pandas scikit-learn --user



In [1]:
#Import all the nessacary libraries from the packages

#Import mediapipe to be use as the model
import mediapipe as mp
#Import opencv for rendaring and drawing capabilities
import cv2

import numpy as np #Handle numpy arrays
import pandas as pd #Handle tabular data
import csv #Handle csv files
import os #Handle folder structure
import glob
import pickle #Save and oad ML model

from sklearn.model_selection import train_test_split #Partition the data into training and testing

from sklearn.pipeline import make_pipeline #Creates a pipeline
from sklearn.preprocessing import StandardScaler #Standadize data 

#Classification algorithms
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

from sklearn.metrics import accuracy_score #Evaluate model through accuracy

In [2]:
#Helper to draw the landmarks and provide the landmark detection models
draw_helpers = mp.solutions.drawing_utils 
holistic_model = mp.solutions.holistic 

# Initialize the csv file 

In [5]:
#DETECT AND SAVE THE LANDMARKS COORDS AS COLUMNS FROM THE SAMPLE VIDEO

#Connect the sample video from the device
sample_video = cv2.VideoCapture('datasets/landmark_extractor/sample_video.mp4')

#Load the holistic model
with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    #Loop through each frame of the video 
    while sample_video.isOpened():
        #Returns the status of the read and the frame as an image
        status, frame = sample_video.read()
        
        #If frame is read correctly, status is true
        if status == False:
            print("Done")
            break
          
        #Recolor the captured frame from BGR to RGB (Medipipe requies frames to be in RGB format)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        #Prevent writing and copying frame data to improve performance while making the detection
        rgb_frame.flags.writeable = False        
        
        #Use holistic model to make detections
        result_frame = holistic.process(rgb_frame)
        
        #Set frame back to writable format after detection
        rgb_frame.flags.writeable = True   
        
        #Recolor the captured frame from BGR for rendering with opencv
        bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

        #Use pose model to detect only the landmarks of the the body and not the landmarks of the face and hand
        draw_helpers.draw_landmarks(bgr_frame, result_frame.pose_landmarks, holistic_model.POSE_CONNECTIONS, 
                                 draw_helpers.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 draw_helpers.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
         
        #Display the frames    
        cv2.imshow('Landmarks Extractor', bgr_frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
sample_video.release()
cv2.destroyAllWindows()

Cannot receive frame!


In [6]:
#Store the pose landmarks in an array 
landmarks_array = result_frame.pose_landmarks.landmark

#Save the no of pose landmarks in the Array 
num_landmarks = len(landmarks_array)
#Display landmarks
num_landmarks

33

In [7]:
#Save the landmarks to a table to be exported as a csv file

#0th column of the table
table_columns = ['class']
#Add columns to the table according to the no.of landmarks
for num in range(1, num_landmarks + 1):
    table_columns += ['x{}'.format(num), 'y{}'.format(num), 'z{}'.format(num), 'v{}'.format(num)]
    
#Display columns of the table
table_columns

['class',
 'x1',
 'y1',
 'z1',
 'v1',
 'x2',
 'y2',
 'z2',
 'v2',
 'x3',
 'y3',
 'z3',
 'v3',
 'x4',
 'y4',
 'z4',
 'v4',
 'x5',
 'y5',
 'z5',
 'v5',
 'x6',
 'y6',
 'z6',
 'v6',
 'x7',
 'y7',
 'z7',
 'v7',
 'x8',
 'y8',
 'z8',
 'v8',
 'x9',
 'y9',
 'z9',
 'v9',
 'x10',
 'y10',
 'z10',
 'v10',
 'x11',
 'y11',
 'z11',
 'v11',
 'x12',
 'y12',
 'z12',
 'v12',
 'x13',
 'y13',
 'z13',
 'v13',
 'x14',
 'y14',
 'z14',
 'v14',
 'x15',
 'y15',
 'z15',
 'v15',
 'x16',
 'y16',
 'z16',
 'v16',
 'x17',
 'y17',
 'z17',
 'v17',
 'x18',
 'y18',
 'z18',
 'v18',
 'x19',
 'y19',
 'z19',
 'v19',
 'x20',
 'y20',
 'z20',
 'v20',
 'x21',
 'y21',
 'z21',
 'v21',
 'x22',
 'y22',
 'z22',
 'v22',
 'x23',
 'y23',
 'z23',
 'v23',
 'x24',
 'y24',
 'z24',
 'v24',
 'x25',
 'y25',
 'z25',
 'v25',
 'x26',
 'y26',
 'z26',
 'v26',
 'x27',
 'y27',
 'z27',
 'v27',
 'x28',
 'y28',
 'z28',
 'v28',
 'x29',
 'y29',
 'z29',
 'v29',
 'x30',
 'y30',
 'z30',
 'v30',
 'x31',
 'y31',
 'z31',
 'v31',
 'x32',
 'y32',
 'z32',
 'v32',
 '

In [23]:
csv_file_pth = 'data.csv'

#Write to the csv file
with open(csv_file_pth, mode='w', newline='') as f:
    #Define the csv writer
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(table_columns)
    print("File Created")

# Get the coords of the landmarks in each training video 

In [21]:

def video_processor(class_name, video_pth, file_pth):
    #Connect the sample video from the device
    sample_video = cv2.VideoCapture(video_pth)
    processed = False


    #Load the holistic model
    with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

        #Loop through each frame of the video 
        while sample_video.isOpened():
            #Returns the status of the read and the frame as an image
            status, frame = sample_video.read()

            #If frame is read correctly, status is true
            if status == False:
                break

            #Recolor the captured frame from BGR to RGB (Medipipe requies frames to be in RGB format)
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            #Prevent writing and copying frame data to improve performance while making the detection
            rgb_frame.flags.writeable = False        

            #Use holistic model to make detections
            result_frame = holistic.process(rgb_frame)

            #Set frame back to writable format after detection
            rgb_frame.flags.writeable = True   

            #Recolor the captured frame from BGR for rendering with opencv
            bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

         

            #Export the coordinates of the landmarks to the csv file
            try:
                #Extracting all the landmarks of the pose as an array
                pose_landmarks_array = result_frame.pose_landmarks.landmark
                #Format landmarks in to a numpy array for better structuring(removing keys) and collapse array to 1 dimesnsion
                pose_landmarks_nparray = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose_landmarks_array]).flatten() 
                                              if result_frame.pose_landmarks else np.zeros(33*4))

                #Append class name as the Oth element
                pose_landmarks_nparray.insert(0, class_name)

                #Append the data to table in the csv file
                with open(file_pth, mode='a', newline='') as f:
                    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                    csv_writer.writerow(pose_landmarks_nparray) 
                
                processed = True
            except:
                pass
    
    
    sample_video.release()
    cv2.destroyAllWindows()
    
    #If the try block is processed without any error
    if processed:
        print("Processed")

In [14]:
#Specify the location to the datasets
valid_path = "./datasets/train/valid/*.valid.mp4"
invalid_path = "./datasets/train/invalid/*.invalid.mp4"

#Displat the no of files in the datasets
print("Valid Video Count: ", len(glob.glob(valid_path)))
print("Invalid Video Count: ", len(glob.glob(invalid_path)))

Valid Video Count:  16
Invalid Video Count:  14


In [None]:
#Adding landmarks of the invalid dataset to the csv
class_name = "correct"
dir_size = len(glob.glob(valid_path))
for i in range (1, dir_size + 1):
    video_pth = "./datasets/train/valid/" + str(i) + ".valid.mp4"
    print("Video: ", str(i), "/", str(dir_size))
    video_processor(class_name, video_pth, csv_file_pth)

In [None]:
#Adding landmarks of the invalid dataset to the csv
class_name = "Incorrect"
dir_size = len(glob.glob(invalid_path))
for i in range (1, dir_size + 1):
    video_pth = "./datasets/train/invalid/" + str(i) + ".invalid.mp4"
    print("Video: ", str(i), "/", str(dir_size))
    video_processor(class_name, video_pth, csv_file_pth)

# Customize data read from the csv file

In [26]:
#Import dataframe
df = pd.read_csv(csv_file_pth)

In [27]:
#Display the first 5 rows in the dataframe
df.head()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
0,Correct,0.536316,0.339423,-0.768718,0.999999,0.543914,0.327558,-0.720017,0.999999,0.550713,...,-0.532458,0.935829,0.648078,0.759154,-0.784168,0.980342,0.403833,0.762923,-0.829206,0.981134
1,Correct,0.535812,0.339426,-0.640492,0.999999,0.543273,0.327564,-0.592675,0.999999,0.550037,...,-0.558671,0.936197,0.64915,0.760126,-0.811137,0.980162,0.403762,0.76285,-0.840826,0.980871
2,Correct,0.535765,0.33952,-0.611194,0.999999,0.5432,0.327683,-0.563223,0.999999,0.549958,...,-0.582028,0.937019,0.649824,0.760489,-0.835859,0.980106,0.403667,0.762687,-0.861377,0.980782
3,Correct,0.535595,0.339544,-0.57045,0.999999,0.542992,0.327735,-0.522471,0.999999,0.549718,...,-0.585186,0.938727,0.650623,0.760608,-0.833408,0.980356,0.403167,0.762551,-0.863657,0.981052
4,Correct,0.534989,0.339545,-0.525364,0.999999,0.542532,0.32775,-0.480479,0.999999,0.549212,...,-0.565891,0.940532,0.651047,0.760766,-0.791081,0.980645,0.402748,0.762494,-0.823742,0.981383


In [28]:
#Display the last 5 rows in the dataframe
df.tail()

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z31,v31,x32,y32,z32,v32,x33,y33,z33,v33
3083,Incorrect,0.523471,0.270879,-0.649942,0.999959,0.528278,0.257377,-0.593772,0.999927,0.534015,...,-0.235254,0.809068,0.5587,0.697302,-0.398288,0.945025,0.513337,0.697991,-0.45932,0.94556
3084,Incorrect,0.523792,0.270879,-0.674124,0.999962,0.528367,0.257384,-0.61862,0.999931,0.534064,...,-0.219369,0.813985,0.55864,0.697023,-0.356795,0.945733,0.513335,0.698377,-0.437888,0.947244
3085,Incorrect,0.523217,0.27101,-0.673676,0.999963,0.527967,0.257568,-0.617763,0.999932,0.533492,...,-0.228799,0.816002,0.558991,0.69689,-0.381392,0.945157,0.513455,0.698255,-0.449284,0.947913
3086,Incorrect,0.523127,0.270961,-0.60362,0.999963,0.527951,0.257544,-0.548521,0.999932,0.53348,...,-0.249267,0.820218,0.558105,0.697246,-0.392599,0.944632,0.511086,0.69845,-0.46807,0.948643
3087,Incorrect,0.522498,0.27017,-0.589493,0.999964,0.527753,0.25707,-0.534282,0.999933,0.533286,...,-0.263217,0.823932,0.557408,0.698341,-0.4324,0.944821,0.510332,0.698623,-0.48365,0.949252


In [29]:
#Remove the class column so the dataframe only contains features
X = df.drop('class', axis=1)
#Use the class as the target value
Y = df['class'] 

In [30]:
#Split the data with 30% for testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)

# Make predictions and select the best classifier

In [44]:
#Setup the machine learning model pipelines
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression(max_iter=20000)),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [64]:
#Dictionary to store the label of the model and model after training
train_models = {}
for label, pipeline in pipelines.items():
    model = pipeline.fit(X_train.values, Y_train.values)
    train_models[label] = model

In [65]:
train_models

{'lr': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression', LogisticRegression(max_iter=20000))]),
 'rc': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('ridgeclassifier', RidgeClassifier())]),
 'rf': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier', RandomForestClassifier())]),
 'gb': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('gradientboostingclassifier', GradientBoostingClassifier())])}

In [66]:
#Test the accuracies of the model to choose the best classifier
for label, model in train_models.items():
    output_class = model.predict(X_test.values)
    print(label, accuracy_score(Y_test.values, output_class))

lr 1.0
rc 1.0
rf 1.0
gb 1.0


In [68]:
#Select and dump the classifier into a pickle file
model = train_models['rf']
model

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

In [71]:
#Save the model as a binary file
with open('rf_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
#Import the model from the binary file
with open('rf_model.pkl', 'rb') as f:
    model = pickle.load(f)

In [54]:
model

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

In [69]:
#PREDICT AND DISPLAY THE RESULTS OF THE MODEL BY PASSING THE TEST VIDEO

#Connect the test video from the device
sample_video = cv2.VideoCapture('datasets/test/valid/5.valid.mp4')

#Load the holistic model
with holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    #Loop through each frame of the video 
    while sample_video.isOpened():
        #Returns the status of the read and the frame as an image
        status, frame = sample_video.read()
        
        #If frame is read correctly, status is true
        if status == False:
            print("Done")
            break
          
        #Recolor the captured frame from BGR to RGB (Medipipe requies frames to be in RGB format)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        #Prevent writing and copying frame data to improve performance while making the detection
        rgb_frame.flags.writeable = False        
        
        #Use holistic model to make detections
        result_frame = holistic.process(rgb_frame)
        
        #Set frame back to writable format after detection
        rgb_frame.flags.writeable = True   
        
        #Recolor the captured frame from BGR for rendering with opencv
        bgr_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)

        #Use pose model to detect only the landmarks of the the body and not the landmarks of the face and hand
        draw_helpers.draw_landmarks(bgr_frame, result_frame.pose_landmarks, holistic_model.POSE_CONNECTIONS, 
                                 draw_helpers.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 draw_helpers.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                 )
        
         #Predict the coordinates of the landmarks (resulrs screen)
        try:
            #Extracting all the landmarks of the pose as an array
            pose_landmarks_array = result_frame.pose_landmarks.landmark
            #Format landmarks in to a numpy array for better structuring(removing keys) and collapse array to 1 dimesnsion
            pose_landmarks_nparray = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose_landmarks_array]).flatten())

            #Pass the numpy array into a data frame
            features = pd.DataFrame([pose_landmarks_nparray])
            
            #Store the top class of the prediction
            pose_class_status = model.predict(features.values)[0]
            #Store the probability of the prediction
            pose_class_status_prob = model.predict_proba(features.values)[0]
            
            print("Class:", pose_class_status)
            print(pose_class_status_prob)
            
            #Set a rectangle box to display the results of the prediction in the video frame
            #rectangle(container, top_coord, bottom_coord, color, line_thickness)
            cv2.rectangle(bgr_frame, (0,0), (250, 60), (245, 117, 16), -1)
            
            #Display the class label inside the rectangle box
            cv2.putText(bgr_frame, 'Class'
                        , (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            
            #Extract =and display the top class of the prediction
            cv2.putText(bgr_frame, pose_class_status.split(' ')[0]
                        , (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
            #Display the class probability inside the rectangle box
            cv2.putText(bgr_frame, 'Probability'
                        , (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            
            #Extract and dispthe maximum probability
            cv2.putText(bgr_frame, str(round(pose_class_status_prob[np.argmax(pose_class_status_prob)],2))
                        , (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
 
        except:
            pass
                        
        #Display the frames    
        cv2.imshow('Results Feed', bgr_frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

sample_video.release()
cv2.destroyAllWindows()

Class: Incorrect
[0.41 0.59]
Class: Incorrect
[0.42 0.58]
Class: Incorrect
[0.42 0.58]
Class: Incorrect
[0.43 0.57]
Class: Incorrect
[0.46 0.54]
Class: Incorrect
[0.42 0.58]
Class: Incorrect
[0.41 0.59]
Class: Incorrect
[0.41 0.59]
Class: Incorrect
[0.42 0.58]
Class: Incorrect
[0.44 0.56]
Class: Incorrect
[0.42 0.58]
Class: Incorrect
[0.44 0.56]
Class: Incorrect
[0.44 0.56]
Class: Incorrect
[0.44 0.56]
Class: Incorrect
[0.44 0.56]
Class: Incorrect
[0.46 0.54]
Class: Correct
[0.57 0.43]
Class: Correct
[0.6 0.4]
Class: Correct
[0.61 0.39]
Class: Correct
[0.59 0.41]
Class: Correct
[0.54 0.46]
Class: Correct
[0.6 0.4]
Class: Correct
[0.59 0.41]
Class: Correct
[0.58 0.42]
Class: Correct
[0.53 0.47]
Class: Correct
[0.53 0.47]
Class: Correct
[0.53 0.47]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
Class: Correct
[0.52 0.48]
