# Human Skeleton Pose prediction with a webcam

In [None]:
# Institution: Carleton University
# Course: OSS4009 Computer Vision 
# Term: W23
#
# Filename: 1 - MAIN - Dagerous_Skeleton_Pose_Detection.ipynb
#
# Students: Adam Thompson, Philippe Beaulieu
# Professor:  Dr. Marzieh Amini
#
# Description: This program load the Mediapipe .csv dataset, and load the light CNN model, if the CNN prediction
#              falls under the threshold (here 60%) the mediapipe hand model result will be used.
#
#              In this project we use Mediapipe as support when the CNN model goes below a set threshold.
#     

setup

In [1]:
import pathlib
import os
import json

import cv2

#import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import mediapipe as mp

import time
import pandas as pd

#from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier # KNN

#from tensorflow import keras
#from tensorflow.keras import layers
#from tensorflow.keras.models import Sequential

import warnings
warnings.filterwarnings('ignore')

img_height = 270
img_width = 480

Load the trained CNN model.tflite

In [2]:
TF_MODEL_FILE_PATH = 'model.tflite' # The default path to the saved TensorFlow Lite model

interpreter = tf.lite.Interpreter(model_path=TF_MODEL_FILE_PATH)

interpreter.allocate_tensors()         # Needed before execution!
interpreter.get_signature_list()

signatures = interpreter.get_signature_list()
print(signatures)


{'serving_default': {'inputs': ['rescaling_input'], 'outputs': ['dense_3']}}


Load the pipeline CSV model (KNN), and the class_name

In [3]:
mp_pose           = mp.solutions.pose
mp_drawing        = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

data        = pd.read_csv("dataset3.csv",index_col=0) 
feat, label = data.iloc[:,:132],data['target']

#{using SVC}
#model   = SVC(kernel = 'rbf')

#{using K Neighbors Classifier}  - good standing and sitting
model   = KNeighborsClassifier() # Initialize our classifier

model.fit(feat, label)

with open("class_name.json", 'r') as f:
    class_name = json.load(f)

print(class_name)

['shooting', 'sitting', 'standing']


Using the webcam input for images

In [8]:
# Label variable
asan  = ""
asan2 = ""

# Colors.
blue = (255, 127, 0)
red = (50, 50, 255)
colors = blue
color2 = blue

# For webcam input:
cap = cv2.VideoCapture(3)  # my webcam is 3, yours mighht be 0 or another number, start at 0 and try until it work.
with mp_pose.Pose(min_detection_confidence=0.6, min_tracking_confidence=0.6) as pose:
    while cap.isOpened():

        success, image = cap.read()
        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue
        
        # process image for pipeline pose
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        #image.flags.writeable = False
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #results = pose.process(image)
        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        
        # extracting the landmark for the prediction
        ttemp   = []
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark

            for j in landmarks:
                    ttemp = ttemp + [j.x, j.y, j.z, j.visibility]

            # KNN prediction
            y = model.predict([ttemp])
            asan2 = 'KNN ' + class_name[int(y)]
            if (int(y) == 0):
                color2 = red
            else:
                color2 = blue
            
            #cv2.putText(image, asan2, (250,50), cv2.FONT_HERSHEY_SIMPLEX,1,color2,3)

        # Draw the pose annotation on the image.
        #image.flags.writeable = True
        #image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        # Draw the pose annotation on the image.
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
        
        # CNN prediction
        # processing image with the landmark for prediction    
        img_array = tf.keras.utils.img_to_array(cv2.resize(image, (img_height, img_width)))
        #img_array = tf.keras.utils.img_to_array(image)
        img_array = tf.expand_dims(img_array, 0) # Create a batch
        classify_lite = interpreter.get_signature_runner('serving_default')
        
        # look at the CNN signature for the 'input' and 'output'
        # predictions_lite = classify_lite('input'=img_array)['output']
        predictions_lite = classify_lite(rescaling_input=img_array)['dense_3']
        score_lite = tf.nn.softmax(predictions_lite)        
        
        # set the color of the text for action predicted
        if (np.argmax(score_lite) == 0):
            colors = red
        else:
            colors = blue
            
        # print the class on the image
        asan = 'CNN ' + class_name[np.argmax(score_lite)]     # extract the class name to be displayed
        
        if ((100 * np.max(score_lite)) < 60):
            asan = asan2
            colors = color2
            
        cv2.putText(image, asan, (50,50), cv2.FONT_HERSHEY_SIMPLEX,1,colors,3)        
        
        # show the image.
        cv2.imshow('Human Pose', image)

        #print the prediction result
        #print("{}  -  {:.2f}".format(class_name[np.argmax(score_lite)], 100 * np.max(score_lite)) )

        if cv2.waitKey(5) & 0xFF == 27:
            cv2.destroyAllWindows()
            break

cap.release()
