<a href="https://colab.research.google.com/github/Parsa-Baniamerian/Human_Activity_Recognition/blob/main/Human_Activity_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import random
import datetime as dt
import cv2

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import EarlyStopping

# **Download and Visualize the Data**

In [4]:
%%capture

# Download the UCF50 Dataset
!wget --no-check-certificate https://www.crcv.ucf.edu/data/UCF50.rar

# Extract the Dataset
!unrar x UCF50.rar

In [5]:
plt.figure(figsize=(20,20))

all_classes_names = os.listdir("UCF50")

for i in range(len(all_classes_names)):
  selected_class_name = all_classes_names[i]

  # Retrieve the list of the all video files present in selected class directory
  video_files_names_list = os.listdir(f"UCF50/{selected_class_name}")

  # Randomly select a video file the list retrieved from the selected class directory
  selected_video_file_name = random.choice(video_files_names_list)

  # Initialize a VideoCapture object to read from the video files
  video_reader = cv2.VideoCapture(f"UCF50/{selected_class_name}/{selected_video_file_name}")

  # Read the first frame of the video files
  _, bgr_frame = video_reader.read()

  # Release the VideoCapture object
  video_reader.release()

  # Cobvert BGR to RGB
  rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)

  # Write the class name on the video frame
  cv2.putText(rgb_frame, selected_class_name, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)

  # Display the frame
  plt.subplot(10, 5, i+1)
  plt.imshow(rgb_frame)
  plt.axis("off")

KeyboardInterrupt: 

<Figure size 2000x2000 with 0 Axes>

# **Preprocess the Dataset**

In [6]:
# Specify the height and width to which each video frame will be resized in the dataset
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence
SEQUENCE_LENGTH = 30

DATASET_DIR = "UCF50"
CLASSES_LIST = ["PlayingVioline", "Skiing", "HorseRiding", "Diving", "Biking"]

In [None]:
def frame_extraction(video_path):
  """
  This function will extract the required frames from a video after normalizing them.
  Args:
    video_path: The path of the video in the disk, whose frames are to be extracted.
  Returns:
    frames_list: A list containing the resized and normalized frames of the video

  """

  frames_list = []

  video_reader = cv2.VideoCapture(video_path)

  # Get the total number of frames in the video
  video_frames_count = int(video_reader.get(cv2.CAP.PROP_FRAME_COUNT))

  # Calculate the interval after which frames will be added to the list
  skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

  # Loop through the video frames
  for frame_counter in range(SEQUENCE_LENGTH):

    # Set the current frame position of the video
    video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

    # Reading frames from the video
    res, frame = video_reader.read()

    # Check if video frame is not successfully read then break the loop
    if not res:
      break

    # Resize the frames to fix height and width
    resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

    # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 , 1
    normalized_frame = resized_frame / 255

    frames_list.append(normalized_frame)

  video_reader.release()

  return frames_list

In [None]:
def create_dataset():
  """
  This function will extract the data of the selected classes and create the required dataset.
  Returns:
    features:     A numpy array containing the extracted frames from the videos.
    labels:       A numpy array containing the indexes of the classes associated with the videos.
    Videos_paths: A list containing the paths of the videos in the disk.

  """

  features = []
  labels = []
  videos_paths = []

  for class_index, class_name in enumerate(CLASSES_LIST):
    print(f"Extracting data of class: {class_name}")

    files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
    for file_name in files_list:
      video_path = os.path.join(DATASET_DIR, class_name, file_name)
      frames = frame_extraction(video_path)

      # Check to ignore the videos having frames less than the SEQUENCE_LENGTH
      if len(frames) == SEQUENCE_LENGTH:
        features.append(frames)
        labels.append(class_index)
        videos_paths.append(video_path)

  features = np.asarray(features)
  labels = np.array(labels)

  return features, labels, videos_paths

In [9]:
# Create dataset
features, labels, videos_paths = create_dataset()

# Convert labels into one-hot encoded vectors
one_hot_labels = to_categorical(labels)

# Split data into train and test sets
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_labels, test_size=0.25, shuffle=True, random_state=30)

SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (<ipython-input-9-1fc01ea52822>, line 8)

# **ConvLSTM model**

In [13]:
def create_convlstm_model():
  """
  This function will construct the required convlstm model.
  Returns:
    model: It is the required constructed convlstm model.

  """

  model = Sequential()

  model.add(ConvLSTM2D(filters=4, kernel_size=(3,3), activation="tanh", data_format="channels_last", recurrent_dropout=0.2, return_sequences=True, input_shape=(SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
  model.add(MaxPooling3D(pool_size=(1,2,2), padding="same", data_format="channels_last"))
  model.add(TimeDistributed(Dropout(0.2)))
  model.add(ConvLSTM2D(filters=8, kernel_size=(3,3), activation="tanh", data_format="channels_last", recurrent_dropout=0.2, return_sequences=True))
  model.add(MaxPooling3D(pool_size=(1,2,2), padding="same", data_format="channels_last"))
  model.add(TimeDistributed(Dropout(0.2)))
  model.add(ConvLSTM2D(filters=14, kernel_size=(3,3), activation="tanh", data_format="channels_last", recurrent_dropout=0.2, return_sequences=True))
  model.add(MaxPooling3D(pool_size=(1,2,2), padding="same", data_format="channels_last"))
  model.add(TimeDistributed(Dropout(0.2)))
  model.add(ConvLSTM2D(filters=16, kernel_size=(3,3), activation="tanh", data_format="channels_last", recurrent_dropout=0.2, return_sequences=True))
  model.add(MaxPooling3D(pool_size=(1,2,2), padding="same", data_format="channels_last"))
  model.add(TimeDistributed(Dropout(0.2)))
  model.add(Flatten())
  model.add(Dense(len(CLASSES_LIST), activation="softmax"))

  model.summary()

  return model

In [16]:
# Construct the model
convlstm_model = create_convlstm_model()
#plot_model(convlstm_model, to_file ="convlstm_model_structure_plot.png", show_shapes=True, show_layer_names=True)