In [1]:
# Imports

%cd /content/drive/MyDrive/video_classification_rnn

import os
import numpy as np
import random
import cv2
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, SimpleRNN, GRU, LSTM, Dense, Flatten, TimeDistributed
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from models import RNN_model, GRU_model, LSTM_model
from load_video import load_video

/content/drive/MyDrive/video_classification_rnn


In [2]:
# Parameters setting

frame_width = 112
frame_height = 112

batch_size = 1
epochs = 10

dataset_path = "/content/drive/MyDrive/dataset/joon_del"


In [3]:
# Create directories

if not os.path.exists("weights"):
    os.makedirs("weights")

### Preparing Data

In [4]:
# Count maximum number of frames in all videos

num_classes = os.listdir(dataset_path)
count_frames = []

for class_label in num_classes:
  videos = os.listdir(os.path.join(dataset_path, class_label))

  for video in videos:
    video_path = os.path.join(dataset_path, class_label, video)
    frames = cv2.VideoCapture(video_path)

    total_frames = int(frames.get(cv2.CAP_PROP_FRAME_COUNT))
    count_frames.append(total_frames)

max_seq_len = np.max(count_frames)
num_sampels = len(count_frames)

In [13]:
def load_video(path, resize):
    import cv2

    cap = cv2.VideoCapture(path)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames = []

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)

    finally:
        cap.release()

    return frames, num_frames

In [5]:
def preparing_data():

  labels = []
  frames_dataset = []

  num_classes = os.listdir(dataset_path)
  frame_masks = np.zeros(shape=(num_sampels, max_seq_len), dtype="bool")
  video_count = 0

  for class_label in num_classes:
    videos = os.listdir(os.path.join(dataset_path, class_label))

    for video in videos:
      video_path = os.path.join(dataset_path, class_label, video)
      frames, num_frames = load_video(video_path, (frame_height, frame_width))

      padded_frames = np.zeros(shape=(max_seq_len, frame_height, frame_width, 3), dtype="float32")
      current_video_seq_len = min(max_seq_len, num_frames)

      # Padding video frames
      for i, frame in enumerate(np.array(frames)):
          padded_frames[i, :] = frame / 255.

      frames_dataset.append(padded_frames)
      frame_masks[video_count, :current_video_seq_len] = 1

      video_count+=1

      # Create labels
      if class_label == "1" : 
        labels.append(1) 
      else: 
        labels.append(0)

  # Convert to np.array
  frames_dataset = np.array(frames_dataset)
  labels = np.array(labels)

  # Reshape labels
  labels = labels[..., np.newaxis]

  return [frames_dataset, frame_masks], labels

In [6]:
# Call data preparing function

X, Y = preparing_data()

In [7]:
# Splite data

X_train, X_val, mask_train, mask_val, Y_train, Y_val = train_test_split(X[0], X[1], Y, test_size = 0.2)
X_train.shape, Y_train.shape, mask_train.shape

((17, 300, 112, 112, 3), (17, 1), (17, 300))

In [9]:
# Plot

# for frame in X_train[8]:
#     plt.subplot(2, 2, 1)
#     plt.imshow(frame[:, :, :], cmap="gray")
#     plt.show()

### Define Models, Compile and fit

01- RNN

In [8]:
rnn_model = RNN_model(max_seq_len)

rnn_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

rnn_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f212804ae50>

In [9]:
rnn_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/rnn_model.h5')

In [None]:
rnn_model.summary()

### Inference

In [27]:
def preparing_data(video_path):

  labels = []
  frames_dataset = []

  frames, num_frames = load_video(video_path, (frame_height, frame_width))

  frame_masks = np.zeros(shape=(1, num_frames), dtype="bool")

  # Reading video frames
  frames = np.array(frames).astype("float32") / 255.
  frames = frames[np.newaxis, ...]
  frame_masks[0, :] = 1

  return frames, frame_masks, num_frames



In [28]:
input_frames, input_masks, num_frames = preparing_data("/content/drive/MyDrive/dataset/joon_del/1/009.mp4")
input_frames.shape, input_masks.shape, num_frames

((1, 90, 112, 112, 3), (1, 90), 90)

In [29]:
class_name = ["حالت معمولی", "الهی صد هزار مرتبه شکر"]

model = RNN_model(num_frames)
model.load_weights("/content/drive/MyDrive/video_classification_rnn/weights/rnn_model.h5")
pred = model.predict([input_frames, input_masks])

predicted_class = np.argmax(pred)
label = class_name[predicted_class]
label

'الهی صد هزار مرتبه شکر'

In [None]:
color = (255, 255, 0)
video = cv2.VideoCapture("/content/drive/MyDrive/dataset/joon_del/1/009.mp4")

height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
print(height, width)

video_writer = cv2.VideoWriter('/content/drive/MyDrive/video_classification_rnn/out_put.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, (width, height), 0)

while True:
    ret, frame = video.read()
    
    if ret == True:
      cv2.putText(frame, label, (width // 12, height // 12), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 1,
                  cv2.LINE_AA)
      
      # cv2.imshow('color', frame_blur)
      video_writer.write(frame)

      plt.subplot(2, 2, 1)
      plt.imshow(frame[:, :, :], cmap="gray")
      plt.show()
        
    else:
      break

video.release()
video_writer.release()

In [20]:
cd /content/drive/MyDrive/video_classification_rnn/

/content/drive/MyDrive/video_classification_rnn


In [23]:
!python3 inference.py --input_path /content/drive/MyDrive/dataset/1/015.mp4

2022-04-23 10:02:04.852772: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-04-23 10:02:05.500628: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 983851008 exceeds 10% of free system memory.
2022-04-23 10:02:05.861094: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 5247205376 exceeds 10% of free system memory.
tcmalloc: large alloc 5247205376 bytes == 0x55723c942000 @  0x7f23bdc28b6b 0x7f23bdc48379 0x7f237bf24257 0x7f236a3ae30f 0x7f236a44ad2b 0x7f236a25ed97 0x7f236a25f600 0x7f236a25f708 0x7f237516cc3b 0x7f236a5f1228 0x7f236a580843 0x7f236a581918 0x7f236ffb7ce1 0x7f236ffb49a3 0x7f236aca98d5 0x7f23bd9fb6db 0x7f23bcb3061f
tcmalloc: large alloc 8854691840 bytes == 0x557375d62000 @  0x7f23bdc461e7 0x7f23721b3f3f 0x7f2374cb6eca 0x7f2375114464 0x7f237516445e 0x7f23751656f7 0x7f23751666fa 0x7f23751684ee 0x7f237516c842 0x7f237516cd2f 0x7f236a5f1228 0x7f236a580843 

#### 02- GRU

In [15]:
gru_model = GRU_model(max_seq_len, frame_height, frame_width)

gru_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

gru_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f09b432b5d0>

In [None]:
gru_model.summary()

In [18]:
gru_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/gru_model.h5')

#### 03- LSTM

In [16]:
lstm_model = LSTM_model(max_seq_len, frame_height, frame_width)

lstm_model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

lstm_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=3, epochs=epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f09d5d37990>

In [None]:
lstm_model.summary()

In [19]:
lstm_model.save_weights('/content/drive/MyDrive/video_classification_rnn/weights/lstm_model.h5')

### Inference

In [None]:
https://machinelearningmastery.com/use-different-batch-sizes-training-predicting-python-keras/
https://keras.io/api/layers/recurrent_layers/time_distributed/#:~:text=TimeDistributed(layer%2C%20**kwargs),to%20be%20the%20temporal%20dimension.
ffmpeg
https://keras.io/examples/vision/video_classification/

In [None]:
!ffmpeg -i /content/drive/MyDrive/dataset/0117.mp4 -ss 00:00:00 -t 00:00:08 -async 1 /content/drive/MyDrive/dataset/11.mp4