In [None]:
# Imports

%cd /content/drive/MyDrive/video_classification_rnn

import os
import numpy as np
import random
import cv2
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, SimpleRNN, GRU, LSTM, Dense, Flatten, TimeDistributed
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from models import RNN_model, GRU_model, LSTM_model
from load_video import load_video

In [31]:
# Parameters setting

frame_width = 112
frame_height = 112

batch_size = 1
epochs = 10

dataset_path = "/content/drive/MyDrive/dataset/joon_del"


In [32]:
# Create directories

if not os.path.exists("weights"):
    os.makedirs("weights")

### Preparing Data

In [33]:
# Count maximum number of frames in all videos

num_classes = os.listdir(dataset_path)
count_frames = []

for class_label in num_classes:
  videos = os.listdir(os.path.join(dataset_path, class_label))

  for video in videos:
    video_path = os.path.join(dataset_path, class_label, video)
    frames = cv2.VideoCapture(video_path)

    total_frames = int(frames.get(cv2.CAP_PROP_FRAME_COUNT))
    count_frames.append(total_frames)

max_seq_len = np.max(count_frames)
num_sampels = len(count_frames)

In [34]:
def preparing_data():

  labels = []
  frames_dataset = []

  num_classes = os.listdir(dataset_path)
  frame_masks = np.zeros(shape=(num_sampels, max_seq_len), dtype="bool")
  video_count = 0

  for class_label in num_classes:
    videos = os.listdir(os.path.join(dataset_path, class_label))

    for video in videos:
      video_path = os.path.join(dataset_path, class_label, video)
      frames, num_frames = load_video(video_path, (frame_height, frame_width))

      padded_frames = np.zeros(shape=(max_seq_len, frame_height, frame_width, 3), dtype="float32")
      current_video_seq_len = min(max_seq_len, num_frames)

      # Normalize video frames
      for i, frame in enumerate(np.array(frames)):
          padded_frames[i, :] = frame / 255.

      frames_dataset.append(padded_frames)
      frame_masks[video_count, :current_video_seq_len] = 1

      video_count+=1

      # Create labels
      if class_label == "1" : 
        labels.append(1) 
      else: 
        labels.append(0)

  # Convert to np.array
  frames_dataset = np.array(frames_dataset)
  labels = np.array(labels)

  # Reshape labels
  labels = labels[..., np.newaxis]

  return [frames_dataset, frame_masks], labels

In [35]:
# Call data preparing function

X, Y = preparing_data()

In [7]:
# Splite data

X_train, X_val, mask_train, mask_val, Y_train, Y_val = train_test_split(X[0], X[1], Y, test_size = 0.2)
X_train.shape, Y_train.shape, mask_train.shape

((17, 300, 112, 112, 3), (17, 1), (17, 300))

### Define Models, Compile and fit

01- RNN

In [8]:
rnn_model = RNN_model(max_seq_len)

rnn_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

rnn_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f212804ae50>

In [9]:
rnn_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/rnn_model.h5')

In [None]:
rnn_model.summary()

#### 02- GRU

In [15]:
gru_model = GRU_model(max_seq_len, frame_height, frame_width)

gru_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

gru_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f09b432b5d0>

In [None]:
gru_model.summary()

In [18]:
gru_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/gru_model.h5')

#### 03- LSTM

In [16]:
lstm_model = LSTM_model(max_seq_len, frame_height, frame_width)

lstm_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

lstm_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f09d5d37990>

In [None]:
lstm_model.summary()

In [19]:
lstm_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/lstm_model.h5')

### Inference

In [27]:
def preparing_data(video_path):

  labels = []
  frames_dataset = []

  frames, num_frames = load_video(video_path, (frame_height, frame_width))

  frames_masks = np.zeros(shape=(1, num_frames), dtype="bool")

  # Normalize video frames
  frames = np.array(frames).astype("float32") / 255.
  frames = frames[np.newaxis, ...]
  frames_masks[0, :] = 1

  return frames, frames_masks, num_frames



In [28]:
input_frames, input_masks, num_frames = preparing_data("/content/drive/MyDrive/dataset/joon_del/1/009.mp4")
input_frames.shape, input_masks.shape, num_frames

((1, 90, 112, 112, 3), (1, 90), 90)

In [29]:
class_name = ["حالت معمولی", "الهی صد هزار مرتبه شکر"]

model = RNN_model(num_frames)
model.load_weights("/content/drive/MyDrive/video_classification_rnn/weights/rnn_model.h5")
pred = model.predict([input_frames, input_masks])

predicted_class = np.argmax(pred)
label = class_name[predicted_class]
label

'الهی صد هزار مرتبه شکر'