# 1. Import and Install Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from ultralytics import YOLO
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
mp_pose = mp.solutions.pose

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    # mp_image = mp.Image.create_from_file('/path/to/image')
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)# Image is no longer writeable
    results = model.detect(mp_image)                 # Make prediction      # Image is now writeable
    return results

In [4]:
from mediapipe.framework.formats import landmark_pb2
def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]

    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image

In [5]:
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

model_file = open('pose_landmarker_lite.task', "rb")
model_data = model_file.read()
model_file.close()
    
base_options = python.BaseOptions(model_asset_buffer=model_data)
options = PoseLandmarkerOptions(
    base_options=base_options,
    running_mode=VisionRunningMode.IMAGE,
    num_poses =10)

In [6]:
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

model_file = open('pose_landmarker_lite.task', "rb")
model_data = model_file.read()
model_file.close()
    
base_options = python.BaseOptions(model_asset_buffer=model_data)

cap = cv2.VideoCapture("C://Users/user/PycharmProjects/Stavropol/train_dataset_train/videos/sword_exercise/How_to_Fence_-_How_to_do_the_Balestra_sword_exercise_f_nm_np1_le_bad_0.avi")
# Set mediapipe model
options = PoseLandmarkerOptions(
    base_options=base_options,
    running_mode=VisionRunningMode.IMAGE,
    num_poses =10)
frame_2 = None
results_2 = None
with PoseLandmarker.create_from_options(options) as landmarker:

    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        frame_2 = frame
        if not ret:
            break
        
        results = mediapipe_detection(frame, landmarker)
        results_2 = results
        # Draw landmarks
        annotated_image = draw_landmarks_on_image(frame, results)

        print(results.pose_landmarks)
        # Show to screen
        cv2.imshow('OpenCV Feed', annotated_image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [6]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 256, 256

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the directory containing the UCF50 dataset. 

# Specify the list containing the names of the classes used for training. Feel free to choose any set of classes.

In [7]:

def frames_extraction(video_path):
    '''
    This function will extract the required frames from a video after resizing and normalizing them.
    Args:
        video_path: The path of the video in the disk, whose frames are to be extracted.
    Returns:
        frames_list: A list containing the resized and normalized frames of the video.
    '''

    # Declare a list to store video frames.
    frames_list = []
    
    # Read the Video File using the VideoCapture object.
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video. 
        success, frame = video_reader.read() 

        # Check if Video frame is not successfully read then break the loop
        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Append the normalized frame into the frames list
        frames_list.append(resized_frame)
    
    # Release the VideoCapture object. 
    video_reader.release()

    # Return the frames list.
    return frames_list

# 3. Extract Keypoint Values

In [8]:
def extract_keypoints(pose_landmarks):
    pose =[]
    # if(len(results.pose_landmarks) == 2):
    #     print(pose_landmarks_list[0])
    #     print(pose_landmarks_list[1])
    pose = np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose_landmarks]).flatten() if pose_landmarks else np.zeros(33*4)
    # pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    # lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    # rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose])

# 4. Setup Folders for Collection

In [9]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('train_dataset_train\\videos')

# Actions that we try to detect
# actions = np.array(['cartwheel', 'catch', 'clap', 'climb'])
actions = np.array(['cartwheel', 'catch', 'clap', 'climb', 'dive', 'draw_sword', 'dribble', 'fencing',
                    'flic_flac', 'golf', 'handstand', 'hit', 'jump',
                    'pick', 'pour', 'pullup', 'push', 'pushup', 'shoot_ball', 'sit', 
                    'situp', 'swing_baseball', 'sword_exercise', 'throw'])
# actions = np.array(['cartwheel', 'catch'])

In [10]:
import pandas as pd 
with PoseLandmarker.create_from_options(options) as landmarker:
    features = []
    
    a = 0
    for action in actions:
        # Loop through sequences aka videos

        for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))):
            if os.path.isfile(os.path.join(DATA_PATH, action, sequence)):
                a = a+1
                video_file_path = os.path.join(DATA_PATH, action, sequence)
                # Loop through video length aka sequence length
                # for frame_num in range(sequence_length):
                frames_ = []
                frames = frames_extraction(video_file_path)
                if len(frames) == SEQUENCE_LENGTH:
                    for frame in frames:
                        frame_n = np.asarray(frame)
                        frame_n = frame_n.flatten()
                        frames_.append(frame_n)
                    features.append(frames_)
    print(a)
            # if len(frames) == SEQUENCE_LENGTH:
            #     cap = cv2.VideoCapture(os.path.join(DATA_PATH, action,sequence))
            #     for i in range(20):
            #     # Read feed
            #         # Make detections
            #         results = mediapipe_detection(frames[i], landmarker)
            #         # Draw landmarks
            #         # image = draw_landmarks_on_image(frames[i], results)
            #         # NEW Apply wait logic
            #         for idx in range(len(results.pose_landmarks)):
            #             pose_landmarks = results.pose_landmarks[idx]
            #             keypoints = extract_keypoints(pose_landmarks)
            #             if os.path.exists(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) == False:
            #                 os.mkdir(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) 
            #             npy_path = os.path.join(DATA_PATH, action, str (sequence.split('.')[0]), str(i))
            #             np.save(npy_path, keypoints)

            #     # Break gracefully
            #         if cv2.waitKey(10) & 0xFF == ord('q'):
            #             break

    # cap.release()
    # cv2.destroyAllWindows()

44


In [11]:
sequences, labels = [], []

with PoseLandmarker.create_from_options(options) as landmarker:
    

    for action in actions:
        # Loop through sequences aka videos
        window = []
        for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))):
            video_file_path = os.path.join(DATA_PATH, action, sequence)
            # Loop through video length aka sequence length
            # for frame_num in range(sequence_length):

            frames = frames_extraction(video_file_path)
            if len(frames) == SEQUENCE_LENGTH:
                cap = cv2.VideoCapture(os.path.join(DATA_PATH, action,sequence))
                for i in range(20):
                # Read feed
                    # Make detections
                    results = mediapipe_detection(frames[i], landmarker)
                    # Draw landmarks
                    # image = draw_landmarks_on_image(frames[i], results)
                    # NEW Apply wait logic
                    for idx in range(len(results.pose_landmarks)):
                        pose_landmarks = results.pose_landmarks[idx]
                        keypoints = extract_keypoints(pose_landmarks)
                        if os.path.exists(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) == False:
                            os.mkdir(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) 
                        npy_path = os.path.join(DATA_PATH, action, str (sequence.split('.')[0]), str(i))
                        
                        np.save(npy_path, keypoints)

                # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break

    cap.release()
    cv2.destroyAllWindows()

In [93]:
features.shape

(54, 20, 196608)

In [92]:
features = np.asarray(features)

In [66]:
features = features.flatten()

# 6. Preprocess Data and Create Labels and Features

In [10]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [11]:
label_map = {label:num for num, label in enumerate(actions)}

In [12]:
label_map

{'cartwheel': 0,
 'catch': 1,
 'clap': 2,
 'climb': 3,
 'dive': 4,
 'draw_sword': 5,
 'dribble': 6,
 'fencing': 7,
 'flic_flac': 8,
 'golf': 9,
 'handstand': 10,
 'hit': 11,
 'jump': 12,
 'pick': 13,
 'pour': 14,
 'pullup': 15,
 'push': 16,
 'pushup': 17,
 'shoot_ball': 18,
 'sit': 19,
 'situp': 20,
 'swing_baseball': 21,
 'sword_exercise': 22,
 'throw': 23}

In [12]:
sequences, labels = [], []

for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(str):
        if os.path.isdir(os.path.join(DATA_PATH, action,sequence)):
            window = []
            for frame_num in range(20):
                
                if os.path.exists(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]), "{}.npy".format(frame_num))):
                    res = np.load(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]), "{}.npy".format(frame_num)))
                else:
                    res = np.zeros(shape = (132,))
                window.append(res)
            sequences.append(window)
            labels.append(label_map[action])

In [13]:
X = np.asarray(sequences)

In [16]:
features = np.asarray(features)

In [17]:
X_f = features

In [17]:
print(labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 

In [14]:
y = to_categorical(labels).astype(int)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [63]:
np.save("X_train",X_train)

In [67]:
np.save("X_test",X_test)

In [68]:
np.save("y_train",y_train)
np.save("y_test",y_test)

In [65]:
X_test = X_test.reshape(X_test.shape[0], -1) 

In [20]:
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_f, y, test_size=0.05)

In [18]:
X_test.shape

(91, 20, 132)

In [54]:
print(y)
np.savetxt("y.txt", y)

[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]]


In [29]:
X_train_2.shape

(41, 20, 196608)

# 7. Build and Train LSTM Neural Network

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [20]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [67]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(20,132)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [24]:
model = tf.keras.models.Model()
input1 = tf.keras.Input(shape=(20,132), name = 'I1')
input2 = tf.keras.Input(shape=(20,196608), name = 'I2')
hidden1 = tf.keras.layers.LSTM(units = 64)(input1)
hidden2 = tf.keras.layers.LSTM(units = 64)(input2)
# hidden12 = tf.keras.layers.LSTM(units = 64)(hidden1)
# hidden22 = tf.keras.layers.LSTM(units = 64)(hidden2)
# hidden13 = tf.keras.layers.LSTM(units = 64)(hidden12)
# hidden23 = tf.keras.layers.LSTM(units = 64)(hidden22)
merge = tf.keras.layers.concatenate([hidden1, hidden2])
hidden3 = tf.keras.layers.Dense(units = 64, activation='relu')(merge)
output1 = tf.keras.layers.Dense(actions.shape[0], activation='softmax', name ='O1')(hidden3)

In [68]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
early_stopping_callback = EarlyStopping(monitor = 'loss', patience = 15, mode = 'min', restore_best_weights = True)
 
model.compile(optimizer='Adam', loss = 'categorical_crossentropy', metrics = ["accuracy"])


In [69]:
model.fit(X_train, y_train, epochs=1000, batch_size = 64, shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x252f243a1d0>

In [38]:
model.fit(X_train, y_train, epochs=1000, batch_size = 32, callbacks = [early_stopping_callback])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x252ef2dce20>

In [30]:
model = tf.keras.models.Model(inputs = [input1,input2], outputs = [output1])
    
model.compile(optimizer='Adam',
                loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [41]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [31]:
model.fit(x = {'I1':X_train, 'I2':X_train_2}, 
    y = {'O1':y_train},
    epochs = 10,
    callbacks = None)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2d27371de40>

In [42]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stop = EarlyStopping(monitor='val_acc', min_delta=0.001,
                           patience=10, verbose=1, mode='auto')
chkpt = ModelCheckpoint("final_model", 
                        monitor='val_loss', 
                        verbose=1, 
                        save_best_only=True, 
                        mode='auto')
callbacks = [early_stop, chkpt]

In [43]:
model.fit(X_train, y_train, epochs=180, callbacks=callbacks)

Epoch 1/180
Epoch 2/180
Epoch 3/180
Epoch 4/180
Epoch 5/180
Epoch 6/180
Epoch 7/180
Epoch 8/180
Epoch 9/180
Epoch 10/180
Epoch 11/180
Epoch 12/180
Epoch 13/180
Epoch 14/180
Epoch 15/180
Epoch 16/180
Epoch 17/180
Epoch 18/180
Epoch 19/180
Epoch 20/180
Epoch 21/180
Epoch 22/180
Epoch 23/180
Epoch 24/180
Epoch 25/180
Epoch 26/180
Epoch 27/180
Epoch 28/180
Epoch 29/180
Epoch 30/180
Epoch 31/180
Epoch 32/180
Epoch 33/180
Epoch 34/180
Epoch 35/180
Epoch 36/180
Epoch 37/180
Epoch 38/180
Epoch 39/180
Epoch 40/180
Epoch 41/180
Epoch 42/180
Epoch 43/180
Epoch 44/180
Epoch 45/180
Epoch 46/180
Epoch 47/180
Epoch 48/180
Epoch 49/180
Epoch 50/180
Epoch 51/180
Epoch 52/180
Epoch 53/180
Epoch 54/180
Epoch 55/180
Epoch 56/180
Epoch 57/180
Epoch 58/180
Epoch 59/180
Epoch 60/180
Epoch 61/180
Epoch 62/180
Epoch 63/180
Epoch 64/180
Epoch 65/180
Epoch 66/180
Epoch 67/180
Epoch 68/180
Epoch 69/180
Epoch 70/180
Epoch 71/180
Epoch 72/180
Epoch 73/180
Epoch 74/180
Epoch 75/180
Epoch 76/180
Epoch 77/180
Epoch 78

<keras.callbacks.History at 0x21a70daa770>

In [22]:
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Dropout, Flatten
def create_LRCN_model():
    '''
    This function will construct the required LRCN model.
    Returns:
        model: It is the required constructed LRCN model.
    '''

    # We will use a Sequential model for model construction.
    model = Sequential()
    
    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same',activation = 'relu'),
                              input_shape = (20, 256, 256, 3)))
    
    model.add(TimeDistributed(MaxPooling2D((4, 4)))) 
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    #model.add(TimeDistributed(Dropout(0.25)))
                                      
    model.add(TimeDistributed(Flatten()))
                                      
    model.add(LSTM(32))
                                      
    model.add(Dense(len(actions), activation = 'softmax'))

    ########################################################################################################################

    # Display the models summary.
    model.summary()
    
    # Return the constructed LRCN model.
    return model

In [20]:
def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(actions):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATA_PATH, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in files_list:
            
            # Get the complete video path.
            video_file_path = os.path.join(DATA_PATH, class_name, file_name)

            # Extract the frames of the video file.
            frames = frames_extraction(video_file_path)

            # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
            # So ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == SEQUENCE_LENGTH:
                
                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths

In [21]:
# Create the dataset.
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: cartwheel
Extracting Data of Class: catch
Extracting Data of Class: clap
Extracting Data of Class: climb
Extracting Data of Class: dive
Extracting Data of Class: draw_sword
Extracting Data of Class: dribble
Extracting Data of Class: fencing
Extracting Data of Class: flic_flac
Extracting Data of Class: golf
Extracting Data of Class: handstand
Extracting Data of Class: hit
Extracting Data of Class: jump
Extracting Data of Class: pick
Extracting Data of Class: pour
Extracting Data of Class: pullup
Extracting Data of Class: push
Extracting Data of Class: pushup
Extracting Data of Class: shoot_ball
Extracting Data of Class: sit
Extracting Data of Class: situp
Extracting Data of Class: swing_baseball
Extracting Data of Class: sword_exercise
Extracting Data of Class: throw


In [24]:
# Using Keras's to_categorical method to convert labels into one-hot-encoded vectors
one_hot_encoded_labels = to_categorical(labels)

In [25]:
seed_constant = 27
# Split the Data into Train ( 75% ) and Test Set ( 25% ).
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels,
                                                                            test_size = 0.25, shuffle = True,
                                                                            random_state = seed_constant)

In [23]:
# Construct the required LRCN model.
LRCN_model = create_LRCN_model()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_12 (TimeDi  (None, 20, 256, 256, 16)  448      
 stributed)                                                      
                                                                 
 time_distributed_13 (TimeDi  (None, 20, 64, 64, 16)   0         
 stributed)                                                      
                                                                 
 time_distributed_14 (TimeDi  (None, 20, 64, 64, 16)   0         
 stributed)                                                      
                                                                 
 time_distributed_15 (TimeDi  (None, 20, 64, 64, 32)   4640      
 stributed)                                                      
                                                                 
 time_distributed_16 (TimeDi  (None, 20, 16, 16, 32)  

In [27]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
# Create an Instance of Early Stopping Callback.
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True)
 
# Compile the model and specify loss function, optimizer and metrics to the model.
LRCN_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])

# Start training the model.
LRCN_model_training_history = LRCN_model.fit(x = features_train, y = labels_train, epochs = 30, batch_size = 4 ,
                                             shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30


In [48]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 20, 64)            50432     
                                                                 
 lstm_9 (LSTM)               (None, 20, 128)           98816     
                                                                 
 lstm_10 (LSTM)              (None, 64)                49408     
                                                                 
 dense_8 (Dense)             (None, 64)                4160      
                                                                 
 dense_9 (Dense)             (None, 32)                2080      
                                                                 
 dense_10 (Dense)            (None, 24)                792       
                                                                 
Total params: 205,688
Trainable params: 205,688
Non-tr

# 8. Make Predictions

In [30]:
res = LRCN_model.predict(X_test)

ValueError: in user code:

    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 2169, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 2155, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 2143, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 2111, in predict_step
        return self(x, training=False)
    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 20, 256, 256, 3), found shape=(None, 20, 132)


In [32]:
print(type(res[1]))

<class 'numpy.ndarray'>


In [None]:
import numpy as np 
from PIL import Image as im 
data = im.fromarray(res[4]) 
data.save('test.jpg')  

In [33]:
actions[np.argmax(res[2])]

'dribble'

In [34]:
actions[np.argmax(y_test[2])]

'golf'

# 9. Save Weights

In [74]:
model.save('modern_model_64.h5')

In [None]:
del model

In [156]:
model.load_weights('action.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [49]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [70]:
yhat = model.predict(X_test)



In [71]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [72]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[86,  1],
        [ 2,  2]],

       [[88,  0],
        [ 2,  1]],

       [[83,  1],
        [ 5,  2]],

       [[87,  2],
        [ 2,  0]],

       [[86,  3],
        [ 1,  1]],

       [[84,  1],
        [ 2,  4]],

       [[81,  4],
        [ 3,  3]],

       [[84,  4],
        [ 1,  2]],

       [[87,  1],
        [ 2,  1]],

       [[81,  2],
        [ 5,  3]],

       [[79,  5],
        [ 3,  4]],

       [[83,  5],
        [ 2,  1]],

       [[86,  4],
        [ 1,  0]],

       [[82,  4],
        [ 3,  2]],

       [[87,  1],
        [ 1,  2]],

       [[89,  0],
        [ 2,  0]],

       [[84,  5],
        [ 1,  1]],

       [[89,  0],
        [ 1,  1]],

       [[81,  2],
        [ 6,  2]],

       [[85,  2],
        [ 3,  1]],

       [[87,  1],
        [ 2,  1]],

       [[85,  5],
        [ 1,  0]],

       [[84,  3],
        [ 3,  1]],

       [[89,  0],
        [ 2,  0]]], dtype=int64)

In [73]:
accuracy_score(ytrue, yhat)

0.38461538461538464

# 11. Test in Real Time

In [25]:
from scipy import stats

In [26]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        print(num)
        print(prob)
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

    return output_frame

In [None]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

In [157]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture("C://Users/user/PycharmProjects/Stavropol/train_dataset_train/videos/sword_exercise/How_to_Fence_-_How_to_do_the_Balestra_sword_exercise_f_nm_np1_le_bad_0.avi")
# Set mediapipe model
with PoseLandmarker.create_from_options(options) as landmarker:
    while cap.isOpened():
        # video_file_path = os.path.join(DATA_PATH, action, sequence)
            # # Loop through video length aka sequence length
            # # for frame_num in range(sequence_length):

            # frames = frames_extraction(video_file_path)
            # if len(frames) == SEQUENCE_LENGTH:
            #     cap = cv2.VideoCapture(os.path.join(DATA_PATH, action,sequence))
            #     for i in range(20):
            #     # Read feed
            #         # Make detections
            #         results = mediapipe_detection(frames[i], landmarker)
            #         # Draw landmarks
            #         # image = draw_landmarks_on_image(frames[i], results)
            #         # NEW Apply wait logic
            #         for idx in range(len(results.pose_landmarks)):
            #             pose_landmarks = results.pose_landmarks[idx]
            #             keypoints = extract_keypoints(pose_landmarks)
            #             if os.path.exists(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) == False:
            #                 os.mkdir(os.path.join(DATA_PATH, action, str (sequence.split('.')[0]))) 
            #             npy_path = os.path.join(DATA_PATH, action, str (sequence.split('.')[0]), str(i))
            #             np.save(npy_path, keypoints)

            #     # Break gracefully
            #         if cv2.waitKey(10) & 0xFF == ord('q'):
            #             break
        # Read feed
        ret, frame = cap.read()
        if not ret:
            break

        # Make detections
        results = mediapipe_detection(frame, landmarker)
        print(results)

        # Draw landmarks
        annotated_image = draw_landmarks_on_image(frame, results)

        # 2. Prediction logic
        
        for idx in range(len(results.pose_landmarks)):
            pose_landmarks = results.pose_landmarks[idx]
            keypoints = extract_keypoints(pose_landmarks)
            
        sequence.append(keypoints)
        sequence = sequence[-20:]

        if len(sequence) == 20:
            
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            action = actions[np.argmax(res)]
            # predictions.append(np.argmax(res))
            cv2.rectangle(annotated_image, (0,0), (640, 40), (245, 117, 16), -1)
            cv2.putText(annotated_image, ' '.join(action), (3,30),
            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)


        # #3. Viz logic
        # if np.unique(predictions[-10:])[0]==np.argmax(res):
        #     if res[np.argmax(res)] > threshold:

        #         if len(sentence) > 0:
        #             if actions[np.argmax(res)] != sentence[-1]:
        #                 sentence.append(actions[np.argmax(res)])
        #         else:
        #             sentence.append(actions[np.argmax(res)])

        # if len(sentence) > 5:
        #     sentence = sentence[-5:]

        # # Viz probabilities
        # image = prob_viz(res, actions, frame, colors)

        # Show to screen
        cv2.imshow('OpenCV Feed', annotated_image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()