In [26]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import mediapipe as mp


In [27]:
from tensorflow.keras.models import load_model

In [28]:
model = load_model('model.h5')

In [29]:
mp_holistic  = mp.solutions.holistic

In [30]:
import numpy as np

In [31]:
def landmark_to_numpy(result):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in result.pose_landmarks.landmark]).flatten() if result.pose_landmarks else np.zeros(132)
    left_hand = np.array([[res.x, res.y, res.z] for res in result.left_hand_landmarks.landmark]).flatten() if result.left_hand_landmarks else np.zeros(63)
    right_hand = np.array([[res.x, res.y, res.z] for res in result.right_hand_landmarks.landmark]).flatten() if result.right_hand_landmarks else np.zeros(63)
    return np.concatenate([pose, left_hand, right_hand]).tolist()

In [32]:
def captureLandmarks(cv2image , model ):
    image = cv2.cvtColor(cv2image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    cv2_img_result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return cv2_img_result, results


In [33]:
actions = ['back', 'forward', 'left', 'right', 'wave']

In [46]:
#predicting the actions using lstm model
def preidct_action():
    # action frames contain 30 frames for a given action and each frame contains 258 landmarks
   
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        cap = cv2.VideoCapture(0)
        action_frames = []
        threshold = 0.5
        while cap.isOpened():
            _ , frame = cap.read()
            _ , results = captureLandmarks(frame, holistic)
            landmarks = landmark_to_numpy(results)
            action_frames.append(landmarks)

            if len(action_frames) == 30:
                input_array = np.array(action_frames).reshape(1,30,258)
                #predicting the action
                result = model.predict(input_array)
                action_index = np.argmax(result)
                #predicted action
                actions[action_index]
                if result[0][action_index] > threshold:
                    predicted_action = actions[action_index]
                    cv2.putText(frame, predicted_action, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                # removing the first frame from the action frames 
                action_frames = action_frames[1:]

            cv2.imshow('Webcam Feed', frame)
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()
    return np.array(action_frames)

In [47]:
preidct_action()



array([[ 0.62323606,  0.50408149, -0.97352183, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.61570561,  0.50488746, -0.97872335, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.61476481,  0.50123483, -0.9344551 , ...,  0.61759186,
         0.7908107 , -0.02854566],
       ...,
       [ 0.61318576,  0.49062583, -1.11915755, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.60701203,  0.49007639, -0.99303025, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.6006549 ,  0.49506456, -1.00214934, ...,  0.        ,
         0.        ,  0.        ]])

In [23]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='my_model.png', show_shapes=True, show_layer_names=True, rankdir='TB')

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
