# Video Recognition

Project on video recognition whith the dataset HMDB51 (https://serre.lab.brown.edu/hmdb51.html). A special focus is given to the efficiency of the training.

In [None]:
# !pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl (39.0 MB)
   ---------------------------------------- 39.0/39.0 MB 1.9 MB/s  0:00:20
Installing collected packages: opencv-python
Successfully installed opencv-python-4.12.0.88




In [1]:
import os
import glob
import cv2
import numpy as np

dataset_path = "./dataset"

class VideoLoader:
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.samples = [] # Will store tuples: (video_path, label_string)
        self.classes = [] # List of your 10 folder names
        
        self._prepare_dataset()
        
    def _prepare_dataset(self):
        """Scans the folders and builds the file list."""
        if not os.path.exists(self.root_dir):
            print(f"Error: Directory '{self.root_dir}' not found.")
            return

        # Get all subfolders (these are your labels)
        self.classes = [d for d in os.listdir(self.root_dir) 
                       if os.path.isdir(os.path.join(self.root_dir, d))]
        
        print(f"Found {len(self.classes)} classes: {self.classes}")

        # Collect all video paths
        for label in self.classes:
            folder_path = os.path.join(self.root_dir, label)
            # Find all AVI files in this folder
            video_files = glob.glob(os.path.join(folder_path, "*.avi"))
            
            for video_file in video_files:
                self.samples.append((video_file, label))
                
        print(f"Total videos found: {len(self.samples)}")

    def load_video_frames(self, video_path, resize=(224, 224)):
        """Helper to read one video file into a numpy array"""
        cap = cv2.VideoCapture(video_path)
        frames = []
        try:
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                # Convert BGR to RGB
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                if resize:
                    frame = cv2.resize(frame, resize)
                frames.append(frame)
        finally:
            cap.release()
        return np.array(frames)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        """Loads and returns the video and label at index idx"""
        video_path, label = self.samples[idx]
        video_data = self.load_video_frames(video_path)
        return video_data, label

# --- USAGE ---

# 1. Initialize the loader
dataset = VideoLoader(dataset_path)

# 2. Iterate through the data (Safe for memory)
# We only load one video at a time here
if len(dataset) > 0:
    print("\n--- Testing Data Loading ---")
    
    # Let's load the very first video
    video_data, label = dataset[0]
    
    print(f"Label: {label}")
    print(f"Video Shape: {video_data.shape} (Frames, Height, Width, Channels)")
    
    # Or loop through the first 3
    for i in range(3):
        vid, lbl = dataset[i]
        print(f"Sample {i}: {lbl} - {vid.shape}")

Found 10 classes: ['catch', 'dribble', 'fall_floor', 'hit', 'jump', 'kick_ball', 'push', 'run', 'shoot_ball', 'walk']
Total videos found: 1816

--- Testing Data Loading ---
Label: catch
Video Shape: (62, 224, 224, 3) (Frames, Height, Width, Channels)
Sample 0: catch - (62, 224, 224, 3)
Sample 1: catch - (53, 224, 224, 3)
Sample 2: catch - (37, 224, 224, 3)
