In [None]:
!pip install wandb


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Imports

%cd /content/drive/MyDrive/video_classification_rnn

import numpy as np
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow import keras
from imutils import paths
import pandas as pd
import os 

from load_video import load_video
from models import Models
import wandb
from wandb.keras import WandbCallback

/content/drive/MyDrive/video_classification_rnn


In [None]:
# Download ucf101_top5 dataset

# !pip install -q git+https://github.com/tensorflow/docs
# !wget -q https://git.io/JGc31 -O /content/drive/MyDrive/dataset/ucf101_top5.tar.gz

In [4]:
# Parameters setting for ucf101_top5 dataset


ucf101_top5_data_path = ""
frame_height = frame_width = 112
BATCH_SIZE = 70
batch_size = 10
EPOCHS = 10
epochs = 15

max_seq_len = 20
NUM_FEATURES = 2048

In [7]:
# Create directories

if not os.path.exists("weights"):
    os.makedirs("weights")

## Prepairing data for ucf101_top5 dataset

In [5]:
%cd /content/drive/MyDrive/dataset/ucf101_top5


train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")

train_df.sample(10)

/content/drive/MyDrive/dataset/ucf101_top5
Total videos for training: 594
Total videos for testing: 224


Unnamed: 0,video_name,tag
493,v_TennisSwing_g10_c04.avi,TennisSwing
377,v_ShavingBeard_g10_c05.avi,ShavingBeard
43,v_CricketShot_g14_c02.avi,CricketShot
42,v_CricketShot_g14_c01.avi,CricketShot
580,v_TennisSwing_g23_c05.avi,TennisSwing
453,v_ShavingBeard_g22_c04.avi,ShavingBeard
59,v_CricketShot_g16_c04.avi,CricketShot
123,v_PlayingCello_g08_c06.avi,PlayingCello
45,v_CricketShot_g14_c04.avi,CricketShot
534,v_TennisSwing_g16_c03.avi,TennisSwing


In [6]:
# The following two methods are taken from this tutorial:
# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub


def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_ucf101_video(path, max_frames=0, resize=(frame_height, frame_width)):
    cap = cv2.VideoCapture(path)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # if num_frames < max_seq_len:
    #   print(num_frames)

    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return frames, num_frames

In [7]:
label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_df["tag"])
)
print(label_processor.get_vocabulary())

['CricketShot', 'PlayingCello', 'Punch', 'ShavingBeard', 'TennisSwing']


In [8]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)

    video_paths = df["video_name"].values.tolist()
    labels = df["tag"].values
    labels = label_processor(labels[..., None]).numpy()

    frame_masks = np.zeros(shape=(num_samples, max_seq_len), dtype="bool")

    frames_dataset = []
    video_count = 0

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        frames, num_frames = load_ucf101_video(os.path.join(root_dir, path))
        # frames = frames[None, ...]

        padded_frames = np.zeros(shape=(max_seq_len, frame_height, frame_width, 3), dtype="float32")
        current_video_seq_len = min(max_seq_len, num_frames)

        # Normalize video frames
        for i, frame in enumerate(np.array(frames)):
            if i < max_seq_len:
               padded_frames[i, :] = frame / 255.

        frames_dataset.append(padded_frames)
        frame_masks[video_count, :current_video_seq_len] = 1
        video_count+=1

  # Convert to np.array
    frames_dataset = np.array(frames_dataset)
    labels = np.array(labels)


    return [frames_dataset, frame_masks], labels


train_data, train_labels = prepare_all_videos(train_df, "train")
test_data, test_labels = prepare_all_videos(test_df, "test")


In [9]:
# Splite data

X_train, X_val, mask_train, mask_val, Y_train, Y_val = train_test_split(train_data[0], train_data[1], train_labels, test_size = 0.2, random_state=42)

print("train data shape: ", X_train.shape, Y_train.shape, mask_train.shape, "validation data shape: ",  X_val.shape, mask_val.shape, Y_val.shape)

train data shape:  (475, 20, 112, 112, 3) (475, 1) (475, 20) validation data shape:  (119, 20, 112, 112, 3) (119, 20) (119, 1)


### Define Models, Compile and fit

01- RNN

In [None]:
wandb.init(project='Video Classification-CRNN')
#

In [11]:
config = wandb.config
config.learning_rate = 0.001

In [12]:
models = Models((frame_height, frame_width), max_seq_len)
rnn_model = models.RNN_model()

rnn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

rnn_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=batch_size, epochs=epochs, callbacks=[WandbCallback()])



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f584a85e8d0>

#### 02- GRU

In [14]:
models = Models((frame_height, frame_width), max_seq_len)
rnn_model = models.RNN_model()

rnn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate),
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])

rnn_model.fit([X_train, mask_train], Y_train, validation_data=[[X_val, mask_val], Y_val], batch_size=batch_size, epochs=epochs, callbacks=[WandbCallback()])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f585dc23f50>