In [None]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.8.7.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.4 MB)
[K     |████████████████████████████████| 30.4 MB 77 kB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.7.3


In [None]:

# Computer Vision features
import cv2
from google.colab.patches import cv2_imshow
from keras.preprocessing import image
from keras.utils import np_utils
import mediapipe as mp

# Data processing
import math
import numpy as np
import pandas as pd

# Data visualization
import matplotlib.pyplot as plt

# File path & temporal processes
import os
import time

# Modeling
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [None]:
# Set the holistic model
mp_holistic = mp.solutions.holistic

# Set the drawing utilities
mp_drawing = mp.solutions.drawing_utils

In [None]:
def mediapipe_detection(image, model):
    
    # Converts the image color.
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    
    # Prevents image write
    image.flags.writeable = False                  
    
    # Makes the prediction.
    results = model.process(image)
    
    # Enables image write.
    image.flags.writeable = True
    
    # Convert back to BGR.
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    
    # Return the image and prediction results.
    return image,results

In [None]:
# draw_landmarks: Takes a frame and results then applies the landmark 
# visualizations to hand and pose.
def draw_landmarks(image, results):
    
    # Draw left hand points.                    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, 
                              mp_holistic.HAND_CONNECTIONS)
    # Draw right hand points.
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, 
                              mp_holistic.HAND_CONNECTIONS) 
    
    # Draw pose points.
    mp_drawing.draw_landmarks(image, results.pose_landmarks, 
                              mp_holistic.POSE_CONNECTIONS)

In [None]:
# extract_keypoints: Gets the x,y,z coordinates of the keypoints of a frame and returns a concatenated
# array of those coordinates for the pose, left, and right hand.
def extract_keypoints(results):

    # Gets and flattens the coordinates for each of the landmark areas. If there
    # are no values for the frame, 0's are returned.
    pose = np.array([[res.x, res.y, res.z, res.visibility] for 
                     res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for 
                   res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for 
                   res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    
    # Returns the concatenated np array for each of the landmarks.
    return np.concatenate([pose, lh, rh])

In [None]:
# Set the folder path for the numpy arrays.
DATA_PATH = os.path.join('/content/Data')

# 
# Get the names of the gestures from the dataset dataframe.
# ********EXTRACT THE GESTURE NAME FROM THE DATASET HERE*******
# gestures = np.array(getGestureNameList(dataFrame, columnNumber))
#gestures = np.array(['thumbsup'])

# Set the number of videos contained in the dataset.
# ********EXTRACT THE NUMBER OF ROWS FROM THE DATASET HERE*******
#no_sequences = 4

Takes the video, performs keypoints extractions by frame, and saves the resulting numpy arrays to a folder. **** The numpy_array_path needs some tweaking****

In [None]:
# extractKPFromVid: Performs keypoints extraction on each frame of the input video
# and saves the keypoints to a numpy array folder
def extractKPFromVid(videoMP4,action,folderNum):
  with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # Sets count.
    count = 0
    
    # Set the numpyList
    numpyList = []

    # Sets the videoFile name. **INCLUDE** the extension
    videoFile = videoMP4

    # Captures the video
    cap = cv2.VideoCapture(videoMP4)

    # Set the framerate 
    frameRate = cap.get(5)

    # While the video is running, read in the video frames
    # and extract the keypoints to a file. 
    while(cap.isOpened()):

      # Sets the frame number
      frameId = cap.get(1)

      # Reads in the frame.
      ret, frame = cap.read()

      # Display the video frame with landmarks overlaid.
      #cv2_imshow(frame)

      # If there are no more frames, the capturing stops.
      # Otherwise, the next frame is read in. 
      # to the file.
      if (ret != True):
          break

      # If the frame number is divisible by 4, extract
      # and write the keypoints to a subfolder.
      if (frameId % 4 == 0):

        # Keypoints detections.
        image,results = mediapipe_detection(frame,holistic)

        #filename ="frame%d" % count;

        # Increment count
        count+=1
        
        #cv2.imwrite(filename, frame)
        #print(filename)

        #---Export keypoints---
        # Get the keypoints array
        keypoints = extract_keypoints(results)

        #***** Set the numpy array path(Data/Gesture/GestureVideo#/numpyfile)
        numpy_array_path = os.path.join(DATA_PATH,action,folderNum,str(count))
        print(numpy_array_path)

        # Save the keypoints to the path.
        np.save(numpy_array_path,keypoints)

        # Append the keypoints to the numpyList.
        res = np.load(os.path.join(DATA_PATH,action,folderNum,"{}.npy".format(str("frame%d" % count))))
        if(count<31):
          numpyList.append(res)
        #numpyList.append(keypoints)

    # Stops the capture.
    cap.release()
    cv2.destroyAllWindows

    # Output finish message.
    print ("Frame Capture Finished.")

    # Return the list of numpy arrays.
    return numpyList

In [None]:
# Read in the csv file to a dataframe.

# Get the labels and ids & set to a map.




Video keypoint Extraction

In [None]:
##-------------------##
## The videoGestures array will need to be updated to work with a dataframe. Extraction
## of the file names and gesture names need to be done here.
videoGestures = ['thumbsup01.mp4','thumbsup02.mp4','thumbsup03.mp4',
             'thumbsup04.mp4']

# Creates the map for gesture to classification values.
gestureLabelMap = {label:num for num,label in enumerate(gestures)}

folderCount = 1

sequences,labels = [],[]
#numpyList = []
window = []
for vid in videoGestures:  

    # Extract frames and set the list of numpy arrays from the video.
    #videoNumpys = extractKPFromVid(vid,'thumbsup',str(folderCount))
    window = extractKPFromVid(vid,'thumbsup',str(folderCount))

    # Add the numpyList for the gesture
    sequences.append(window)

    labels.append(gestureLabelMap['thumbsup'])

    folderCount+=1


In [None]:
print(type(window))
print(type(window[0]))
print(np.array(window).shape)

<class 'list'>
<class 'numpy.ndarray'>
(30, 258)


In [None]:
print(np.array(sequences).shape)
print(np.array(labels).shape)
X = np.array(sequences)
print(X.shape)

(4, 30, 258)
(4,)
(4, 30, 258)


In [None]:
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
np.array(sequences).shape

(4, 30, 258)

In [None]:
np.array(labels).shape

(4,)

In [None]:
X = np.array(sequences)
X.shape

(4, 30, 258)

In [None]:
y=to_categorical(labels).astype(int)

Train Test Split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.05)

Model Training

In [None]:
# Training tracking
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
# Set the model
model = Sequential()

# Add the LSTM layers.
model.add(LSTM(64,return_sequences=True,activation='relu',input_shape=(30,258)))
model.add(LSTM(128,return_sequences=True,activation='relu'))
model.add(LSTM(64,return_sequences=False,activation='relu'))
# Add the Dense layers
model.add(Dense(64,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(gestures.shape[0],activation='softmax'))

In [None]:
# Compile the model
# multiclass classification model --> categorical cross entropy used.
model.compile(optimizer='Adam',loss='categorical_crossentropy', metrics = ['categorical_accuracy'])

In [None]:
# Fit the model
model.fit(X_train,y_train,epochs=1000,callbacks=[tb_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f1c07912690>