In [1]:
import os
import cv2
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt
 
from moviepy.editor import *
%matplotlib inline
 
from sklearn.model_selection import train_test_split
 
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

In [2]:
# using this parameter makes sure that anyone who re-runs your code will get the exact same outputs.
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [3]:
# frame size
Image_height, Image_width = 64, 64

# specify the number of frames of a video that will be fed to the model as one sequence.
sequence_length = 30

# dataset path
Dataset_dir = "UCF50"

# Activities that will predict
class_list =  ["Basketball", "Biking", "WalkingWithDog", "HorseRace", "PlayingGuitar", "PlayingPiano", "Punch", "Skiing", "GolfSwing"]

In [27]:
# this function will extract the required frames from a video after resizing and normalizing them.
def frames_extraction(video_path):
    
    # list to store frames in video
    frames_list = []

    # read the video
    video_reader = cv2.VideoCapture(video_path)
    
    # total numbers of frame in the video
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # calculate the the interval after which frames will be added to the list
    skip_frames_window = max(int(video_frames_count/sequence_length), 1)
    
    for frame_counter in range(sequence_length):
        
        # set the current frame position of the video
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        
        # reading the frame from the video.
        success, frame = video_reader.read() 
        
        if not success:
            break
 
         # resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (Image_height, Image_width))
        
        #  normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255
        
        # append the normalized frame into the frames list
        frames_list.append(normalized_frame)
    
    video_reader.release()

    return frames_list


In [28]:
# this function will extract the data of the selected classes and create the required dataset.

def create_dataset():
   
    # features:          A list containing the extracted frames of the videos.
    # labels:            A list containing the indexes of the classes associated with the videos.
    # video_files_paths: A list containing the paths of the videos in the disk.

 
    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []
    
    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(class_list):
        
        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')
        
        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(Dataset_dir, class_name))
        
        # Iterate through all the files present in the files list.
        for file_name in files_list:
            
            # Get the complete video path.
            video_file_path = os.path.join(Dataset_dir, class_name, file_name)
 
            # Extract the frames of the video file.
            frames = frames_extraction(video_file_path)
 
            # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
            # So ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == sequence_length:
 
                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)
 
    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)  
    
    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths

In [29]:
# Create the dataset.
features, labels, video_files_paths = create_dataset()

Extracting Data of Class: Basketball
Extracting Data of Class: Biking
Extracting Data of Class: WalkingWithDog
Extracting Data of Class: HorseRace
Extracting Data of Class: PlayingGuitar
Extracting Data of Class: PlayingPiano
Extracting Data of Class: Punch
Extracting Data of Class: Skiing
Extracting Data of Class: GolfSwing


In [30]:
# Using Keras's to_categorical method to convert labels into one-hot-encoded vectors
one_hot_encoded_labels = to_categorical(labels)

In [31]:
# Split the Data into Train ( 75% ) and Test Set ( 25% ).
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels, test_size = 0.25, shuffle = True, random_state = seed_constant)

In [56]:
def create_LRCN_model():

    # We will use a Sequential model for model construction.
    model = Sequential()
    
    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same',activation = 'relu'),
                              input_shape = (sequence_length, Image_height, Image_width, 3)))
    
    model.add(TimeDistributed(MaxPooling2D((4, 4)))) 
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    #model.add(TimeDistributed(Dropout(0.25)))
                                      
    model.add(TimeDistributed(Flatten()))
                                      
    model.add(LSTM(32))
                                      
    model.add(Dense(len(class_list), activation = 'softmax'))

    model.summary()
    
    return model

In [57]:
# Construct the required LRCN model.
LRCN_model = create_LRCN_model()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_18 (TimeDis (None, 30, 64, 64, 16)    448       
_________________________________________________________________
time_distributed_19 (TimeDis (None, 30, 16, 16, 16)    0         
_________________________________________________________________
time_distributed_20 (TimeDis (None, 30, 16, 16, 16)    0         
_________________________________________________________________
time_distributed_21 (TimeDis (None, 30, 16, 16, 32)    4640      
_________________________________________________________________
time_distributed_22 (TimeDis (None, 30, 4, 4, 32)      0         
_________________________________________________________________
time_distributed_23 (TimeDis (None, 30, 4, 4, 32)      0         
_________________________________________________________________
time_distributed_24 (TimeDis (None, 30, 4, 4, 64)     

In [58]:
# Create an Instance of Early Stopping Callback.
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True)
 
# Compile the model and specify loss function, optimizer and metrics to the model.
LRCN_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])
 
# Start training the model.
LRCN_model_training_history = LRCN_model.fit(x = features_train, y = labels_train, epochs = 70, batch_size = 4 , shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70


In [59]:
# Evaluate the trained model.
model_evaluation_history = LRCN_model.evaluate(features_test, labels_test)



In [61]:
# Get the loss and accuracy from model_evaluation_history.
model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history

date_time_format = '%Y_%m_%d'
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt, date_time_format)
    
# Define a useful name for our model to make it easy for us while navigating through multiple saved models.
model_file_name = f'LRCN_model_Date_Time_{current_date_time_string}_Loss_{model_evaluation_loss}_Accuracy_{model_evaluation_accuracy}.h5'
 
# Save the Model.
LRCN_model.save(model_file_name)

In [6]:
from tensorflow import keras

# load model
LRCN_model = keras.models.load_model("LRCN_model_Date_Time_2022_05_01_Loss_0.4112197756767273_Accuracy_0.8713826537132263.h5")

In [5]:
def predict_single_action(video_file_path, sequence_length):

    # Initialize the VideoCapture object to read from the video file.
    video_reader = cv2.VideoCapture(video_file_path)

    # Get the width and height of the video.
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Declare a list to store video frames we will extract.
    frames_list = []
    
    # Initialize a variable to store the predicted action being performed in the video.
    predicted_class_name = ''

    # Get the number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/sequence_length),1)

    # Iterating the number of times equal to the fixed length of sequence.
    for frame_counter in range(sequence_length):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Read a frame.
        success, frame = video_reader.read() 

        # Check if frame is not read properly then break the loop.
        if not success:
            break

        # Resize the Frame to fixed Dimensions.
        resized_frame = cv2.resize(frame, (Image_height, Image_width))
        
        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1.
        normalized_frame = resized_frame / 255
        
        # Appending the pre-processed frame into the frames list
        frames_list.append(normalized_frame)

    # Passing the  pre-processed frames to the model and get the predicted probabilities.
    predicted_labels_probabilities = LRCN_model.predict(np.expand_dims(frames_list, axis = 0))[0]

    # Get the index of class with highest probability.
    predicted_label = np.argmax(predicted_labels_probabilities)

    # Get the class name using the retrieved index.
    predicted_class_name = class_list[predicted_label]
    
    # Display the predicted action along with the prediction confidence.
    if predicted_labels_probabilities[predicted_label] > 0.5:
        print(f'Action Predicted: {predicted_class_name}\nConfidence: {predicted_labels_probabilities[predicted_label]}')
    else:
        print("Couldn't detected")
        
    # Release the VideoCapture object. 
    video_reader.release()

In [7]:
input_video_file_path = f'test_videos/fazılsay.mp4'
 
# Perform Single Prediction on the Test Video.
predict_single_action(input_video_file_path, sequence_length)

Action Predicted: PlayingPiano
Confidence: 0.9897225499153137
