In [None]:
import cv2
import os
import numpy as np

def extract_frames(video_path, num_frames=16):
    frames = []
    video = cv2.VideoCapture(video_path)
    
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(total_frames // num_frames, 1)
    
    for i in range(num_frames):
        video.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
        ret, frame = video.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))  # Resize to a fixed size
        frames.append(frame)
    
    video.release()
    return np.array(frames)


In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the model
def create_rnn_model(input_shape, num_classes):
    model = models.Sequential()
    
    # Conv3D layer for extracting features from video frames
    model.add(layers.Conv3D(32, kernel_size=(3, 3, 3), activation='relu', padding='same', input_shape=input_shape))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))
    
    model.add(layers.Conv3D(64, kernel_size=(3, 3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))
    
    model.add(layers.Conv3D(128, kernel_size=(3, 3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling3D(pool_size=(2, 2, 2)))
    
    # Flatten the output and add LSTM layers
    model.add(layers.TimeDistributed(layers.Flatten()))
    model.add(layers.LSTM(128, return_sequences=True))
    model.add(layers.LSTM(128))
    
    # Fully connected layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

# Parameters
input_shape = (16, 224, 224, 3)  # Example shape (num_frames, height, width, channels)
num_classes = 51  # Number of action categories in HMDB51

# Create the model
model = create_rnn_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Example data preparation
def prepare_data(video_paths, labels):
    X = []
    y = []
    for path, label in zip(video_paths, labels):
        frames = extract_frames(path)
        X.append(frames)
        y.append(label)
    return np.array(X), np.array(y)

# Replace with actual data
train_video_paths = ['path_to_video1', 'path_to_video2']  # Add your video paths
train_labels = [0, 1]  # Corresponding labels for the videos

X_train, y_train = prepare_data(train_video_paths, train_labels)

# Train the model
model.fit(X_train, y_train, batch_size=2, epochs=10, validation_split=0.2)


In [None]:
# Example evaluation
test_video_paths = ['path_to_test_video1', 'path_to_test_video2']  # Add your test video paths
test_labels = [0, 1]  # Corresponding labels for the test videos

X_test, y_test = prepare_data(test_video_paths, test_labels)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy:.4f}")
