### Prediction

In [1]:
import numpy as np
import os
import yaml
import cv2
import mediapipe as mp
import constants as cs
import globali as gg
import config_loader as cl
import detect_keypoints as dk
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2

### Load configuration data

In [4]:
def read_yaml_fields():
    with open(cs.YAMLFILE, 'r') as ff:
        content = yaml.safe_load(ff)
        a = set(content.get('actions', []))
        c = content.get('counter', {})
        return a, c

# Load actions and counters
gg.actions, gg.counters = read_yaml_fields()

In [5]:
label_map = {label:num for num, label in enumerate(gg.actions)}
len(label_map)

58

In [6]:
sequences, labels = [], []

# get all the keypoints
for action in tqdm(gg.actions, desc="Processing Actions"):
    for sequence in range(gg.counters[action]):
        window = []
        for frame_num in range(cs.NUM_FRAME):
            res = np.load(os.path.join(cs.KEYPOINTS_FOLDER, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

Processing Actions: 100%|██████████| 58/58 [01:44<00:00,  1.80s/it]


In [7]:
X = np.array(sequences)
X.shape

(4787, 30, 1662)

In [8]:
y = to_categorical(labels).astype(int)
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
print(y_train.shape)
print(y_test.shape)

(4547, 58)
(240, 58)


In [10]:
# create model
np_actions = np.array(list(gg.actions))
print(np_actions.shape[0])
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='tanh', input_shape=(30, 1662)))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LSTM(64, return_sequences=False, activation='tanh'))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(np_actions.shape[0], activation='softmax'))

58


In [11]:
# Load trained model weights
model.load_weights('models/model_0/best_model.h5')

### Test in Real Time

In [13]:
mp_holistic = mp.solutions.holistic                                     # Holistic model
mp_drawing = mp.solutions.drawing_utils                                 # Drawing utilities

In [18]:
sequence = []
sentence = []
threshold = 0.8
actions_list = list(gg.actions)   

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        
        # Read feed
        ret, frame = cap.read()
        
        # Make detections
        image, results = dk.mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        dk.draw_styled_landmarks(image, results)
        
        # Prediction logic
        keypoints = dk.extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            # print(gg.actions[np.argmax(res)])
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions_list[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions_list[np.argmax(res)])
                else:
                    sentence.append(actions_list[np.argmax(res)])  
            if len(sentence) > 5: 
                sentence = sentence[-5:]
                
            print("Prediction: ", actions_list[np.argmax(res)])

        cv2.imshow("OpenCV Feed", image)
            
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti