In [1]:
   # STEP 1: IMPORTS AND SETUP
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (ConvLSTM2D, BatchNormalization, MaxPooling3D,
                                     TimeDistributed, Dropout, Flatten, Dense, GlobalAveragePooling2D)
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator

np.random.seed(42)
tf.random.set_seed(42)

2025-05-15 19:12:20.086114: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747336340.340696      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747336340.416504      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# STEP 2: PARAMETERS
FRAME_HEIGHT = 112
FRAME_WIDTH = 112
SEQUENCE_LENGTH = 16
BATCH_SIZE = 8
EPOCHS = 15
NUM_CLASSES = 5

In [3]:
# STEP 3: FEATURE EXTRACTION

def feature_extraction(video_path):
    frames_list = []
    video_reader = cv2.VideoCapture(video_path)
    frame_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_interval = max(int(frame_count / SEQUENCE_LENGTH), 1)

    for counter in range(SEQUENCE_LENGTH):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, counter * skip_interval)
        ret, frame = video_reader.read()
        if not ret:
            break
        frame = cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT))
        frame = frame.astype('float32') / 255.0
        frames_list.append(frame)

    video_reader.release()

    while len(frames_list) < SEQUENCE_LENGTH:
        frames_list.append(np.zeros((FRAME_HEIGHT, FRAME_WIDTH, 3)))

    return np.array(frames_list)

In [4]:
# STEP 4: LOAD DATA

def load_video_data(paths, augment=False):
    datagen = ImageDataGenerator(rotation_range=15, width_shift_range=0.1,
                                 height_shift_range=0.1, horizontal_flip=True, zoom_range=0.1)
    features, labels = [], []
    label_index = 0

    for folder in paths:
        video_files = [f for f in os.listdir(folder) if f.endswith('.avi')][:100]
        for file in video_files:
            video_path = os.path.join(folder, file)
            frames = feature_extraction(video_path)
            if augment:
                frames = np.array([datagen.random_transform(f) for f in frames])
            features.append(frames)
            labels.append(label_index)
        label_index += 1

    return np.array(features, dtype='float32'), tf.keras.utils.to_categorical(np.array(labels), NUM_CLASSES)

In [5]:
# STEP 5: PATHS AND LABELS
class_labels = pd.read_csv("../input/ucf101/UCF101TrainTestSplits-RecognitionTask/ucfTrainTestlist/classInd.txt", sep=' ', header=None)
class_labels.columns = ['index', 'label']
paths = [f"../input/ucf101/UCF101/UCF-101/{label}/" for label in class_labels.label.values[:5]]

In [6]:
# STEP 6: LOAD DATA
X, y = load_video_data(paths, augment=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# STEP 7: CLASS WEIGHTING
y_labels = np.argmax(y_train, axis=1)
class_weights = dict(enumerate(compute_class_weight('balanced', classes=np.unique(y_labels), y=y_labels)))

In [8]:
# STEP 8: MODEL BUILDING
from keras.layers import Input
from keras.models import Model, Sequential


def build_model(input_shape, num_classes):
    base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(FRAME_HEIGHT, FRAME_WIDTH, 3))
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.get_layer("block4a_expand_activation").output)

    model = Sequential([
        Input(shape=input_shape),
        TimeDistributed(feature_extractor),
        TimeDistributed(BatchNormalization()),

        ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True),
        BatchNormalization(),
        MaxPooling3D(pool_size=(1, 2, 2)),
        Dropout(0.3),

        ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', return_sequences=False),
        BatchNormalization(),
        Dropout(0.3),

        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    return model


      


input_shape = (SEQUENCE_LENGTH, FRAME_HEIGHT, FRAME_WIDTH, 3)
model = build_model(input_shape, NUM_CLASSES)
model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

2025-05-15 19:16:11.832315: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [9]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available: 0


In [10]:
# STEP 9: TRAINING
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.1, patience=5),
    ModelCheckpoint('best_model.keras', save_best_only=True)
]

model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    class_weight=class_weights,
    callbacks=callbacks
) 

Epoch 1/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 4s/step - accuracy: 0.3472 - loss: 2.1438 - val_accuracy: 0.6600 - val_loss: 1.1845 - learning_rate: 1.0000e-04
Epoch 2/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 4s/step - accuracy: 0.7438 - loss: 0.5924 - val_accuracy: 0.7800 - val_loss: 0.8167 - learning_rate: 1.0000e-04
Epoch 3/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 4s/step - accuracy: 0.8645 - loss: 0.4224 - val_accuracy: 0.8600 - val_loss: 0.5292 - learning_rate: 1.0000e-04
Epoch 4/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 4s/step - accuracy: 0.8759 - loss: 0.2624 - val_accuracy: 0.9100 - val_loss: 0.3221 - learning_rate: 1.0000e-04
Epoch 5/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 4s/step - accuracy: 0.9442 - loss: 0.1557 - val_accuracy: 0.8900 - val_loss: 0.2500 - learning_rate: 1.0000e-04
Epoch 6/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7a8c603f9090>

In [11]:
# STEP 10: EVALUATION
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4s/step - accuracy: 0.9166 - loss: 0.1800
Test Accuracy: 94.00%


In [12]:
# STEP 11: SAVE MODEL
model.save("video_classifier.h5")

In [13]:
# STEP 12: VIDEO PREDICTION FUNCTION
def predict_video_class(video_path, model_path="video_classifier.h5"):
    model = load_model(model_path)
    frames = feature_extraction(video_path)
    input_frames = np.expand_dims(frames, axis=0)  # shape (1, 16, 112, 112, 3)
    predictions = model.predict(input_frames)
    predicted_index = np.argmax(predictions[0])
    predicted_class = class_labels.label.values[30:][predicted_index]  # offset due to class slicing
    confidence = predictions[0][predicted_index]
    print(f"Predicted Class: {predicted_class} ({confidence*100:.2f}%)")
    return predicted_class, confidence