In [1]:
import cv2
import os
import numpy as np

In [3]:
def extract_frames_from_videos(video_dir, output_dir, frame_rate=5):
    """Extract frames from videos and save them into subdirectories based on the video category."""
    class_names = ['Runouts', 'Wickets']  # Define class labels

    for class_name in class_names:
        class_folder = os.path.join(video_dir, class_name)
        output_class_dir = os.path.join(output_dir, 'extracted_frames', class_name)
        os.makedirs(output_class_dir, exist_ok=True)  # Create the output directory if it doesn't exist

        for video_file in os.listdir(class_folder):
            if video_file.endswith('.mp4'):
                video_path = os.path.join(class_folder, video_file)
                video = cv2.VideoCapture(video_path)
                
                # Get video properties
                fps = video.get(cv2.CAP_PROP_FPS)  # Frames per second of the video
                total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))  # Total number of frames
                
                print(f"Processing video: {video_file}, FPS: {fps}, Total Frames: {total_frames}")

                frame_count = 0
                success, image = video.read()
                
                while success:
                    # Save frame every 'frame_rate' frames
                    if frame_count % frame_rate == 0:
                        frame_filename = f"{os.path.splitext(video_file)[0]}_frame_{frame_count}.jpg"
                        frame_output_path = os.path.join(output_class_dir, frame_filename)
                        
                        # Resize frame (optional)
                        image_resized = cv2.resize(image, (224, 224))  # Resize for CNN
                        cv2.imwrite(frame_output_path, image_resized)
                    
                    success, image = video.read()
                    frame_count += 1

                video.release()
                print(f"Finished processing video: {video_file}")

In [4]:
video_dir = 'RunOut/'  # Path to your dataset directory 
output_dir = 'RunOut/data/'  # Root directory where all data will be saved

In [5]:
extract_frames_from_videos(video_dir, output_dir)

Processing video: #IndvsAus #ViratKohli .mp4, FPS: 30.0, Total Frames: 1627
Finished processing video: #IndvsAus #ViratKohli .mp4
Processing video: Australia penalised five runs for running on the pitch .mp4, FPS: 25.0, Total Frames: 2118
Finished processing video: Australia penalised five runs for running on the pitch .mp4
Processing video: Best Run-Outs in HBLPSL History (1).mp4, FPS: 59.94005994005994, Total Frames: 30546
Finished processing video: Best Run-Outs in HBLPSL History (1).mp4
Processing video: Best Run-Outs in HBLPSL History.mp4, FPS: 59.94005994005994, Total Frames: 30546
Finished processing video: Best Run-Outs in HBLPSL History.mp4
Processing video: Direct Hit! Some of the best run-outs in recent years.mp4, FPS: 25.0, Total Frames: 9732
Finished processing video: Direct Hit! Some of the best run-outs in recent years.mp4
Processing video: Indian Fielders 10 Best Run-Outs In Cricket 💀.mp4, FPS: 29.97002997002997, Total Frames: 5881
Finished processing video: Indian Fiel

In [10]:
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np
import os
import cv2
from keras.utils import to_categorical  # For one-hot encoding

In [11]:
def load_data_from_frames(output_dir):
    """Load frames from directories and prepare the data for training."""
    class_names = ['Runouts', 'Wickets']  # Define your class labels
    data = []
    labels = []

    for class_id, class_name in enumerate(class_names):
        class_folder = os.path.join(output_dir, 'extracted_frames', class_name)
        for frame_file in os.listdir(class_folder):
            if frame_file.endswith('.jpg'):
                frame_path = os.path.join(class_folder, frame_file)
                image = cv2.imread(frame_path)  # Read the image

                if image is not None:
                    data.append(image)
                    labels.append(class_id)  # Assign the corresponding class label

    data = np.array(data)
    labels = np.array(labels)
    
    # Normalize the frames to [0, 1] range
    data = data.astype('float32') / 255.0
    
    # One-hot encode the labels
    labels = to_categorical(labels, num_classes=len(class_names))  

    print(f"Loaded {len(data)} frames.")
    print(f"Data shape: {data.shape}")
    print(f"Labels shape: {labels.shape}")

    return data, labels


In [12]:
# Load frames and labels
frames_data, labels_data = load_data_from_frames(output_dir)

Loaded 14930 frames.
Data shape: (14930, 224, 224, 3)
Labels shape: (14930, 2)


In [13]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(frames_data, labels_data, test_size=0.2, random_state=42)

In [14]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [15]:
def create_cnn_lstm_model(input_shape):
    model = Sequential()
    
    # CNN part wrapped in TimeDistributed (to apply the CNN to each frame)
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=input_shape))
    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    
    model.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu')))
    model.add(TimeDistributed(BatchNormalization()))
    model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
    
    model.add(TimeDistributed(Flatten()))
    
    # LSTM part
    model.add(LSTM(64, return_sequences=False))
    model.add(Dropout(0.5))
    
    # Fully connected layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    
    # Output layer (2 classes: Runouts, Wickets)
    model.add(Dense(2, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [16]:
# Define the input shape: (10 frames, 224x224 pixels, 3 color channels)
input_shape = (10, 224, 224, 3)
model = create_cnn_lstm_model(input_shape)

  super().__init__(**kwargs)


In [17]:
#print the model summary
model.summary()

In [18]:
# Function to create a data generator for real-time augmentation during training
def augment_data_with_generator(X_train, y_train, batch_size=32):
    """
    Returns a generator that augments the data in real-time during training.
    This avoids loading all augmented data into memory.
    """
    # Fit the data generator on the data (this step is optional but helps with normalization)
    datagen.fit(X_train)

    # Create a data generator that will yield batches of augmented data and corresponding labels
    return datagen.flow(X_train, y_train, batch_size=batch_size)


In [19]:
# Data augmentation configuration
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

In [20]:
# Apply augmentation frame by frame before creating sequences
def augment_frames(X_train):
    augmented_frames = []
    for frame in X_train:
        augmented_frame = datagen.random_transform(frame)
        augmented_frames.append(augmented_frame)
    return np.array(augmented_frames)


In [21]:
def create_frame_sequences(data, labels, sequence_length=10):
    """Function to create frame sequences for the model"""
    sequences = []
    sequence_labels = []
    
    for i in range(0, len(data) - sequence_length, sequence_length):
        sequences.append(data[i:i + sequence_length])
        sequence_labels.append(labels[i + sequence_length - 1])  # Use the label of the last frame in the sequence
    
    return np.array(sequences), np.array(sequence_labels)

In [22]:
# Function to create frame sequences for the model
def create_frame_sequences(data, labels, sequence_length=10):
    sequences = []
    sequence_labels = []
    
    for i in range(0, len(data) - sequence_length, sequence_length):
        sequences.append(data[i:i + sequence_length])
        sequence_labels.append(labels[i + sequence_length - 1])  # Use the label of the last frame in the sequence
    
    return np.array(sequences), np.array(sequence_labels)

In [23]:
# Example data (X_train, y_train) should be processed with your dataset
# Assuming X_train and y_train are preprocessed
# Manually augment data frame by frame
X_train_augmented = augment_frames(X_train)

In [24]:
# Group the frames into sequences of 10 frames each
X_train_seq, y_train_seq = create_frame_sequences(X_train_augmented, y_train, sequence_length=10)
X_test_seq, y_test_seq = create_frame_sequences(X_test, y_test, sequence_length=10)

In [25]:
# Define learning rate scheduler and checkpoint
checkpoint = ModelCheckpoint('run_out_model.keras', monitor='val_loss', save_best_only=True, mode='min', verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)

In [26]:
# Train the model using the sequences of frames
history = model.fit(X_train_seq, y_train_seq,
                    epochs=50,
                    batch_size=4,
                    validation_data=(X_test_seq, y_test_seq),
                    callbacks=[checkpoint, reduce_lr])

MemoryError: Unable to allocate 6.70 GiB for an array with shape (1194, 10, 224, 224, 3) and data type float32