In [1]:
import cv2
import numpy as np
from collections import deque
import tensorflow as tf




In [2]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 128, 128

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the directory containing the UCF50 dataset.
DATASET_DIR = "Final_Dataset"

# Specify the list containing the names of the classes used for training. Feel free to choose any set of classes.
CLASSES_LIST = ["Idle","Block","Kicking","Punching"]


In [15]:
def predict_on_webcam(SEQUENCE_LENGTH, convlstm_model):
    '''
    This function will perform action recognition on a webcam feed using the LRCN model.
    Args:
    SEQUENCE_LENGTH:  The fixed number of frames of a video that can be passed to the model as one sequence.
    IMAGE_HEIGHT:     The height to which each frame is resized.
    IMAGE_WIDTH:      The width to which each frame is resized.
    convlstm_model:   The pre-trained ConvLSTM model used for prediction.
    CLASSES_LIST:     List of class names corresponding to the action classes.
    '''

    # Initialize the VideoCapture object to read from the webcam.
    video_reader = cv2.VideoCapture(0)  # Use 0 for the default webcam

    # Declare a queue to store video frames.
    frames_queue = deque(maxlen=SEQUENCE_LENGTH)

    # Initialize a variable to store the predicted action being performed in the video.
    predicted_class_name = ''
    probs = ''
    predicted_labels_probabilities = [0,0,0,0]

    # Iterate until the webcam is accessed successfully.
    while video_reader.isOpened():
        # Read the frame.
        ok, frame = video_reader.read()

        # Check if frame is not read properly then break the loop.
        if not ok:
            break

        # Resize the frame to fixed dimensions.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame by dividing it by 255 so that each pixel value lies between 0 and 1.
        normalized_frame = resized_frame / 255.0

        # Append the pre-processed frame into the frames queue.
        frames_queue.append(normalized_frame)

        # Check if the number of frames in the queue are equal to the fixed sequence length.
        if len(frames_queue) == SEQUENCE_LENGTH:
            # Pass the normalized frames to the model and get the predicted probabilities.
            predicted_labels_probabilities = convlstm_model.predict(np.expand_dims(frames_queue, axis=0))[0]
            
            # Get the index of the class with the highest probability.
            predicted_label = np.argmax(predicted_labels_probabilities)
            probs = predicted_labels_probabilities[predicted_label]
            # Get the class name using the retrieved index.
            predicted_class_name = CLASSES_LIST[predicted_label]
            
        # Write the predicted class name on top of the frame.
        
        #cv2.putText(frame, f'{str(predicted_labels_probabilities)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        #cv2.putText(frame, f'{predicted_class_name} {probs}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f'{CLASSES_LIST[0]} {predicted_labels_probabilities[0]}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f'{CLASSES_LIST[1]} {predicted_labels_probabilities[1]}', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f'{CLASSES_LIST[2]} {predicted_labels_probabilities[2]}', (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f'{CLASSES_LIST[3]} {predicted_labels_probabilities[3]}', (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        

        

        # Display the frame.
        cv2.imshow('Webcam Action Recognition', frame)

        # Exit the loop when 'q' key is pressed.
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the VideoCapture object.
    video_reader.release()
    cv2.destroyAllWindows()

In [None]:
from tensorflow.keras.layers import ConvLSTM2D

try:
    model = tf.keras.models.load_model('convlstm_model___Date_Time_2024_07_07__05_16_40___Loss_1.3186445236206055___Accuracy_0.48148149251937866.h5', custom_objects={'ConvLSTM2D': ConvLSTM2D})
except ValueError as e:
    print(f"Error loading model: {e}")
    model = tf.keras.models.load_model('convlstm_model___Date_Time_2024_07_07__05_16_40___Loss_1.3186445236206055___Accuracy_0.48148149251937866.h5')

In [5]:
model = tf.keras.models.load_model('modelv5.h5')

In [None]:
predict_on_webcam( 20, model)