<a href="https://colab.research.google.com/github/Eddy-Emmanuel/Video-Classification/blob/main/Video_Classfication.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'ucf101:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F841381%2F1436057%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20241007%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20241007T014737Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D7e178e64a3c1087d5ac4f8b29edb3a20fc7459add0a5f21f61b2e9fe6007c89b4ae405da88fbdbb608f737cba242b984bc49814eeae678734a9d1b279a17d2fdd5c75317889f08dbc305a1ff72420c52c331c0d489d92438dac4b3acb77fe717c45f1346ed4d81cdd0102d77708480ec5284fa0bb1ce0fc57afbcf13e3fe83ad98363e09980d738b3bcbb72d16c97f7975dbe682fc409a5db29590cf9cf14e97aad3cf82c2bb4750f9c7e674f111accadb74a246512a1ea3bc20cdb73217bc0432eca7f9b0983b1fe29a9660f3252f4963056b7100f266f3c8bca5fa6add915bf489b51ea358303360c56bbbc151c5d4be03699d2865a0c5912a1479cb0cf860'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This is needed to be able to build the model
# Upgrade TensorFlow and Keras to specific versions
!pip install --upgrade tensorflow==2.17.0
!pip install --upgrade keras==3.5.0

In [None]:
from warnings import filterwarnings
filterwarnings(action="ignore")

import cv2
import imageio
from IPython.display import Image, display

import numpy as np
import pandas as pd
from glob import glob

from tqdm.auto import tqdm
tqdm.pandas()

import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras import Input, layers, Sequential, Model, callbacks, optimizers

import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use("ggplot")
%matplotlib inline

In [None]:
class Config:
    random_state = 2024
    test_size = .3
    batch_size = 32
    data_path = "/kaggle/input/ucf101"

In [None]:
project_config = Config()

In [None]:
def GetVideoDf(path:str):
    data = dict(file_path=[], label=[])
    for i in tqdm(glob(path+"/**/**/**/**.avi")):
        data["file_path"].append(i)
        data["label"].append(i.split("/")[-2])
    return pd.DataFrame(data)

In [None]:
df = GetVideoDf(path=project_config.data_path)

print(df.shape)

df.head()

In [None]:
df["label"].value_counts().to_dict()

## Filter out just 5 kinds of videos to classify due to limited computational resources

In [None]:
filtered_df = df[df["label"].isin(["CricketShot",
                                   "TennisSwing",
                                   "PlayingDhol",
                                   "HorseRiding",
                                   "PlayingCello"
                                   ])].reset_index(drop=True)

print(filtered_df.shape)

filtered_df.head()

In [None]:
target_dist = filtered_df["label"].value_counts().plot(kind="bar",
                                                       figsize=(20, 6),
                                                       xlabel="Label",
                                                       ylabel="Count",
                                                       title="Target Distribution")

for p in target_dist.patches:
    height = p.get_height()
    target_dist.text(p.get_x() + p.get_width() / 2,
                     height,
                     f'{int(height)}',
                     ha='center',
                     va='bottom')

target_dist.set_xticklabels(labels=target_dist.get_xticklabels(), rotation=0)

plt.show()

In [None]:
encoder = LabelEncoder()

In [None]:
filtered_df["encoded_label"] = encoder.fit_transform(filtered_df["label"])

In [None]:
filtered_df

In [None]:
label_index = {j:i for i, j in zip(encoder.classes_, range(len(encoder.classes_)))}

label_index

In [None]:
def GetFrames(file_paths: list, fps: int):
    frames_per_video = []

    for path in tqdm(file_paths):
        extracted_frames = []
        video = cv2.VideoCapture(path)

        if not video.isOpened():
            print(f"Error loading {path}")
            continue

        total_frames_per_video = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        strides = total_frames_per_video // fps if fps > 0 else 0

        if strides == 0:
            print(f"Not enough frames in: {path}, total_frames_per_video: {total_frames_per_video}")
            continue

        frame_count = 0
        while True:
            suc, frame = video.read()

            if not suc:
                break

            if (frame_count % strides) == 0:
                bgr_2_rbg = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                transform = A.Compose([
                    A.Resize(height=64, width=64, p=1),
                    A.Normalize(p=1)
                ])

                augmented_image = transform(image=bgr_2_rbg)["image"]
                extracted_frames.append(augmented_image)

            frame_count += 1

            if len(extracted_frames) >= fps:
                break

        frames_per_video.append(extracted_frames)
        video.release()

    return np.array(frames_per_video)

In [None]:
X = GetFrames(filtered_df["file_path"], 30)

In [None]:
X.shape

In [None]:
def ShowVideo(frames):
    clipped_frame = np.clip(frames*255, 0, 255).astype(np.uint8)
    imageio.mimsave(f"video.gif", clipped_frame, fps=frames.shape[0])
    display(Image(filename="./video.gif", embed=True, height=400, width=400))

In [None]:
ShowVideo(X[30])

In [None]:
y = filtered_df["encoded_label"]

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y,
                                                test_size=project_config.test_size,
                                                stratify=y,
                                                shuffle=True,
                                                random_state=project_config.random_state)

print(Xtrain.shape, Xtest.shape, ytrain.shape, ytest.shape)

In [None]:
ytrain = to_categorical(ytrain)
ytest = to_categorical(ytest)

In [None]:
def BuildModel(input_shape: tuple, num_classes: int):
    resnet_model = ResNet50(weights='imagenet',
                            include_top=False,
                            input_shape=input_shape[1:])

    # Reshape input for time distributed layer
    input_layer = Input(shape=input_shape)
    reshaped_input = layers.TimeDistributed(resnet_model)(input_layer)

    # Flatten spatial features
    flattened_features = layers.TimeDistributed(layers.Flatten())(reshaped_input)

    # LSTM layer for temporal processing
    lstm_output = layers.LSTM(64, return_sequences=True)(flattened_features)
    lstm_output = layers.Dropout(0.2)(lstm_output)

    # Apply spatial attention
    attention = layers.Attention()([lstm_output, lstm_output])
    attended_features = layers.Multiply()([lstm_output, attention])

    # Global average pooling to obtain a fixed-size representation
    global_avg_pooled = layers.GlobalAveragePooling1D()(attended_features)

    # Dense layer for classification
    output = layers.Dense(num_classes, activation='softmax')(global_avg_pooled)

    # Create the model
    model = Model(inputs=input_layer, outputs=output)

    # Display the model summary
    model.summary()

    return model

In [None]:
attention_model = BuildModel(input_shape=X.shape[1:], num_classes=len(label_index))

In [None]:
plot_model(attention_model,
           to_file='model.png',
           show_shapes=True,
           show_dtype=True,
           show_layer_names=True,
           show_layer_activations=True,
           show_trainable=True,)

In [None]:
def RecallScore(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def PrecisionScore(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def F1Score(y_true, y_pred):
    precision = PrecisionScore(y_true, y_pred)
    recall = RecallScore(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
attention_model.compile(optimizer=optimizers.Adam(learning_rate=1e-5),
                        loss='categorical_crossentropy',
                        metrics=['accuracy',RecallScore, PrecisionScore, F1Score])

In [None]:
# Check if a GPU is available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"Available GPU(s): {gpus}")
else:
    print("No GPU available.")

In [None]:
history = attention_model.fit(Xtrain,ytrain,
                              validation_split=.2,
                              batch_size=project_config.batch_size,
                              epochs=50,
                              callbacks=[callbacks.EarlyStopping(patience=10),
                                         callbacks.LearningRateScheduler(lambda epoch, lr: lr if epoch < 10 else float(lr * K.exp(-0.1)))])

In [None]:
result = pd.DataFrame(history.history)


result

In [None]:
result[["accuracy", "f1_score", "precision_score", "recall_score",
        "val_accuracy", "val_f1_score", "val_precision_score", "val_recall_score"]].plot(figsize=(15, 5),
                                                                                         title="Model Evaluation",
                                                                                         xlabel="Epoch",
                                                                                         ylabel="Score")

plt.show()

In [None]:
result[["loss", "val_loss"]].plot(figsize=(15, 5),
                                  title="Model loss evaluation",
                                  xlabel="Epoch",
                                  ylabel="Score")

plt.show()

In [None]:
result["learning_rate"].plot(figsize=(15, 5),
                             title="Learning rate per epoch",
                             xlabel="Epoch",
                             ylabel="Learning rate")

plt.show()

### Prediction on test Set

In [None]:
test_pred = np.argmax(attention_model.predict(Xtest), axis=1)

test_pred

In [None]:
ytrue = np.argmax(ytest, axis=1)

ytrue

## Classification report

In [None]:
print(metrics.classification_report(ytrue, test_pred))

## Confusion Matrix

In [None]:
plt.figure(figsize=(15, 10))
cm = metrics.confusion_matrix(ytrue, test_pred)
sns.heatmap(cm,
            annot=True,
            xticklabels=list(label_index.values()),
            yticklabels=list(label_index.values()))

plt.xlabel("True value")
plt.ylabel("Prediction")
plt.show()

## Eddy