In [26]:
import os
import cv2
import json
import matplotlib.pyplot as plt
import numpy as np
import random
import time
import torch
import torch.nn as nn
import torch.optim as optim

from datetime import datetime
from pathlib import Path
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from typing import Tuple, List, Dict

In [27]:
class UCF11Dataset(Dataset):
    """
    Dataset class for UCF11 action recognition dataset
    """
    def __init__(self, 
                 root_dir: str, 
                 frames_per_clip: int = 30,
                 frame_size: Tuple[int, int] = (64, 64),
                 train: bool = True,
                 train_split: float = 0.8,
                 transform=None):
        """
        Args:
            root_dir (str): Root directory of UCF11 dataset
            frames_per_clip (int): Number of frames to sample from each video
            frame_size (tuple): Size to resize frames to
            train (bool): Whether this is training or validation set
            train_split (float): Fraction of data to use for training
            transform: Optional transform to be applied on frames
        """
        self.root_dir = Path(root_dir)
        self.frames_per_clip = frames_per_clip
        self.frame_size = frame_size
        self.transform = transform
        
        # Get all action categories (folders)
        self.action_categories = sorted([d for d in os.listdir(self.root_dir) 
                                      if os.path.isdir(self.root_dir / d)])
        
        # Create class to index mapping
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.action_categories)}
        
        # Collect all video paths and their corresponding labels
        self.video_paths = []
        self.labels = []
        
        for category in self.action_categories:
            category_path = self.root_dir / category
            # Get all group folders
            group_folders = [d for d in os.listdir(category_path) 
                           if os.path.isdir(category_path / d)]
            
            for group in group_folders:
                group_path = category_path / group
                # Get all videos in this group
                videos = [f for f in os.listdir(group_path) if f.endswith('.mpg')]
                
                for video in videos:
                    self.video_paths.append(str(group_path / video))
                    self.labels.append(self.class_to_idx[category])
        
        # Split into train and validation sets
        num_videos = len(self.video_paths)
        indices = list(range(num_videos))
        random.seed(42)  # for reproducibility
        random.shuffle(indices)
        split = int(train_split * num_videos)
        
        if train:
            self.indices = indices[:split]
        else:
            self.indices = indices[split:]
            
        print(f"{'Training' if train else 'Validation'} set contains {len(self.indices)} videos")
        
    def __len__(self) -> int:
        return len(self.indices)
    
    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        # Get the actual index from our train/val split
        actual_idx = self.indices[idx]
        
        # Load video and label
        video_path = self.video_paths[actual_idx]
        label = self.labels[actual_idx]
        
        # Load and preprocess video frames
        frames = self.load_video(video_path)
        
        # Convert to tensor and adjust dimensions
        frames = torch.FloatTensor(frames)
        frames = frames.permute(0, 3, 1, 2)  # [T, H, W, C] -> [T, C, H, W]
        
        if self.transform:
            frames = self.transform(frames)
        
        return frames, torch.tensor(label, dtype=torch.long)
    
    def load_video(self, path: str) -> np.ndarray:
        """Load video and return preprocessed frames"""
        cap = cv2.VideoCapture(path)
        frames = []
        
        try:
            while len(frames) < self.frames_per_clip:
                ret, frame = cap.read()
                if not ret:
                    break
                    
                # Preprocess frame
                frame = self.preprocess_frame(frame)
                frames.append(frame)
                
            # Handle videos shorter than desired length
            if len(frames) < self.frames_per_clip:
                # Duplicate last frame if video is too short
                last_frame = frames[-1]
                while len(frames) < self.frames_per_clip:
                    frames.append(last_frame)
                    
        finally:
            cap.release()
            
        return np.array(frames)
    
    def preprocess_frame(self, frame: np.ndarray) -> np.ndarray:
        """Preprocess a single frame"""
        # Center crop
        frame = self.crop_center_square(frame)
        # Resize
        frame = cv2.resize(frame, self.frame_size)
        # Convert BGR to RGB
        frame = frame[:, :, [2, 1, 0]]
        # Normalize
        frame = frame / 255.0
        
        return frame
    
    @staticmethod
    def crop_center_square(frame: np.ndarray) -> np.ndarray:
        """Crop the center square from a frame"""
        y, x = frame.shape[0:2]
        min_dim = min(y, x)
        start_x = (x // 2) - (min_dim // 2)
        start_y = (y // 2) - (min_dim // 2)
        return frame[start_y:start_y+min_dim, start_x:start_x+min_dim]
    
    def get_class_names(self) -> List[str]:
        """Return list of class names"""
        return self.action_categories

In [28]:
def create_dataloaders(root_dir: str, 
                      batch_size: int = 32,
                      frames_per_clip: int = 30,
                      frame_size: Tuple[int, int] = (64, 64),
                      num_workers: int = 4) -> Tuple[DataLoader, DataLoader]:
    """
    Create training and validation dataloaders
    
    Args:
        root_dir: Root directory of UCF11 dataset
        batch_size: Batch size for dataloaders
        frames_per_clip: Number of frames to sample from each video
        frame_size: Size to resize frames to
        num_workers: Number of worker processes for data loading
        
    Returns:
        train_loader, val_loader: Training and validation dataloaders
    """
    # Create training dataset
    train_dataset = UCF11Dataset(
        root_dir=root_dir,
        frames_per_clip=frames_per_clip,
        frame_size=frame_size,
        train=True
    )
    
    # Create validation dataset
    val_dataset = UCF11Dataset(
        root_dir=root_dir,
        frames_per_clip=frames_per_clip,
        frame_size=frame_size,
        train=False
    )
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    return train_loader, val_loader

In [29]:
class ConvLSTM(nn.Module):
    def __init__(self, input_channels: int = 3, hidden_channels: int = 64, kernel_size: int = 3):
        super(ConvLSTM, self).__init__()
        self.conv_lstm = nn.Sequential(
            nn.Conv3d(input_channels, hidden_channels, kernel_size=(3, kernel_size, kernel_size), 
                     padding=(1, kernel_size//2, kernel_size//2)),
            nn.BatchNorm3d(hidden_channels),
            nn.ReLU(inplace=True)
        )
        self.dropout1 = nn.Dropout(0.2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(hidden_channels * 64 * 64, 256)
        self.dropout2 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256, 11)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x shape: [batch_size, time_steps, channels, height, width]
        x = x.permute(0, 2, 1, 3, 4)  # [batch_size, channels, time_steps, height, width]
        x = self.conv_lstm(x)
        x = x.mean(dim=2)  # Average over temporal dimension
        x = self.dropout1(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [30]:
class ConvFF(nn.Module):
    def __init__(self, input_channels: int = 3, hidden_channels: int = 64, kernel_size: int = 3):
        super(ConvFF, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv3d(input_channels, hidden_channels, kernel_size=(3, kernel_size, kernel_size),
                     padding=(1, kernel_size//2, kernel_size//2)),
            nn.BatchNorm3d(hidden_channels),
            nn.ReLU(inplace=True)
        )
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(hidden_channels * 64 * 64, 256)
        self.fc2 = nn.Linear(256, 11)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.permute(0, 2, 1, 3, 4)  # [batch_size, channels, time_steps, height, width]
        x = self.conv(x)
        x = x.mean(dim=2)  # Average over temporal dimension
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [31]:
class Trainer:
    def __init__(self,
                 model: nn.Module,
                 train_loader: DataLoader,
                 val_loader: DataLoader,
                 criterion: nn.Module = None,
                 optimizer: optim.Optimizer = None,
                 lr: float = 0.001,
                 num_epochs: int = 30,
                 device: str = None,
                 model_name: str = "model",
                 save_dir: str = "checkpoints"):
        """
        Initialize the trainer
        
        Args:
            model: PyTorch model to train
            train_loader: Training data loader
            val_loader: Validation data loader
            criterion: Loss function (default: CrossEntropyLoss)
            optimizer: Optimizer (default: Adam)
            lr: Learning rate
            num_epochs: Number of epochs to train
            device: Device to train on (default: auto-detect)
            model_name: Name for saving model checkpoints
            save_dir: Directory to save checkpoints and logs
        """
        self.model = model
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion if criterion else nn.CrossEntropyLoss()
        self.optimizer = optimizer if optimizer else optim.Adam(model.parameters(), lr=lr)
        self.num_epochs = num_epochs
        self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
        self.model_name = model_name
        self.save_dir = Path(save_dir)
        self.save_dir.mkdir(exist_ok=True)
        
        # Move model to device
        self.model = self.model.to(self.device)
        
        # Initialize tracking variables
        self.train_losses = []
        self.train_accuracies = []
        self.val_losses = []
        self.val_accuracies = []
        self.best_val_acc = 0.0
        
        # Create log directory with timestamp
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        self.log_dir = self.save_dir / f"{self.model_name}_{self.timestamp}"
        self.log_dir.mkdir(exist_ok=True)
        
        self.log_every_n_batches = 40
        self.gradient_accumulation_steps = 4
        
    def train_epoch(self):
        self.model.train()
        running_loss = 0.0
        running_acc = 0.0
        total_batches = len(self.train_loader)
        
        # Reset gradients
        self.optimizer.zero_grad()
        
        for batch_idx, (inputs, labels) in enumerate(self.train_loader):
            # Move data to device
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            
            # Forward pass
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels) / self.gradient_accumulation_steps
            
            # Backward pass
            loss.backward()
            
            # Only optimize every n steps
            if (batch_idx + 1) % self.gradient_accumulation_steps == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
            
            # Calculate accuracy
            with torch.no_grad():
                _, predicted = torch.max(outputs.data, 1)
                acc = (predicted == labels).sum().item() / labels.size(0)
            
            # Update running metrics
            running_loss += loss.item() * self.gradient_accumulation_steps
            running_acc += acc
            
            # Print progress less frequently
            if (batch_idx + 1) % self.log_every_n_batches == 0:
                print(f'Batch [{batch_idx + 1}/{total_batches}] - '
                      f'Loss: {loss.item() * self.gradient_accumulation_steps:.4f} - Acc: {acc:.4f}')
            
            # Clear cache periodically
            if (batch_idx + 1) % 10 == 0:
                torch.cuda.empty_cache()
        
        epoch_loss = running_loss / total_batches
        epoch_acc = running_acc / total_batches
        return epoch_loss, epoch_acc
    
    @torch.no_grad()
    def validate(self) -> Tuple[float, float]:
        """Validate the model"""
        self.model.eval()
        running_loss = 0.0
        running_acc = 0.0
        total_batches = len(self.val_loader)
        
        for inputs, labels in self.val_loader:
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)
            
            _, predicted = torch.max(outputs.data, 1)
            acc = (predicted == labels).sum().item() / labels.size(0)
            
            running_loss += loss.item()
            running_acc += acc
        
        val_loss = running_loss / total_batches
        val_acc = running_acc / total_batches
        return val_loss, val_acc
    
    def train(self):
        """Train the model for the specified number of epochs"""
        print(f"Training on {self.device}")
        print(f"Model: {self.model_name}")
        
        for epoch in range(self.num_epochs):
            print(f"\nEpoch {epoch + 1}/{self.num_epochs}")
            print("-" * 20)
            
            # Training phase
            epoch_start = time.time()
            train_loss, train_acc = self.train_epoch()
            epoch_time = time.time() - epoch_start
            
            # Validation phase
            val_loss, val_acc = self.validate()
            
            # Store metrics
            self.train_losses.append(train_loss)
            self.train_accuracies.append(train_acc)
            self.val_losses.append(val_loss)
            self.val_accuracies.append(val_acc)
            
            # Print epoch summary
            print(f"\nEpoch Summary:")
            print(f"Time taken: {epoch_time:.2f}s")
            print(f"Train Loss: {train_loss:.4f} - Train Acc: {train_acc:.4f}")
            print(f"Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.4f}")
            
            # Save best model
            if val_acc > self.best_val_acc:
                self.best_val_acc = val_acc
                self.save_checkpoint(f"best_{self.model_name}.pth")
            
#             # Save regular checkpoint
#             if (epoch + 1) % 10 == 0:
#                 self.save_checkpoint(f"{self.model_name}_epoch_{epoch + 1}.pth")
            
            # Save metrics
            self.save_metrics()
            
            # Plot and save learning curves
            self.plot_learning_curves()
    
    def save_checkpoint(self, filename: str):
        """Save model checkpoint"""
        checkpoint = {
            'epoch': len(self.train_losses),
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'train_losses': self.train_losses,
            'train_accuracies': self.train_accuracies,
            'val_losses': self.val_losses,
            'val_accuracies': self.val_accuracies,
            'best_val_acc': self.best_val_acc
        }
        torch.save(checkpoint, self.log_dir / filename)
    
    def save_metrics(self):
        """Save training metrics to JSON"""
        metrics = {
            'train_losses': self.train_losses,
            'train_accuracies': self.train_accuracies,
            'val_losses': self.val_losses,
            'val_accuracies': self.val_accuracies,
            'best_val_acc': self.best_val_acc
        }
        with open(self.log_dir / 'metrics.json', 'w') as f:
            json.dump(metrics, f, indent=4)
    
    def plot_learning_curves(self):
        """Plot and save learning curves"""
        plt.figure(figsize=(12, 5))
        
        # Plot loss
        plt.subplot(1, 2, 1)
        plt.plot(self.train_losses, label='Train Loss')
        plt.plot(self.val_losses, label='Val Loss')
        plt.title('Loss vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Plot accuracy
        plt.subplot(1, 2, 2)
        plt.plot(self.train_accuracies, label='Train Acc')
        plt.plot(self.val_accuracies, label='Val Acc')
        plt.title('Accuracy vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(self.log_dir / 'learning_curves.png')
        plt.close()
        
    @staticmethod
    def plot_separate_curves(model_name, metrics, save_dir):
        """Plot separate learning curves for a single model"""
        plt.figure(figsize=(12, 5))
        
        # Plot accuracy
        plt.subplot(1, 2, 1)
        plt.plot(metrics['train_accuracies'], label='Train')
        plt.plot(metrics['val_accuracies'], label='Validation')
        plt.title(f'{model_name} - Accuracy vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        # Plot loss
        plt.subplot(1, 2, 2)
        plt.plot(metrics['train_losses'], label='Train')
        plt.plot(metrics['val_losses'], label='Validation')
        plt.title(f'{model_name} - Loss vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(save_dir / f'{model_name}_curves.png')
        plt.close()
        
    @staticmethod
    def plot_combined_curves(metrics_dict, save_dir):
        """Plot combined learning curves for multiple models"""
        plt.figure(figsize=(12, 5))
        
        # Plot accuracy
        plt.subplot(1, 2, 1)
        for model_name, metrics in metrics_dict.items():
            plt.plot(metrics['train_accuracies'], label=f'{model_name} Train')
            plt.plot(metrics['val_accuracies'], label=f'{model_name} Val')
        plt.title('Combined Accuracy vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        
        # Plot loss
        plt.subplot(1, 2, 2)
        for model_name, metrics in metrics_dict.items():
            plt.plot(metrics['train_losses'], label=f'{model_name} Train')
            plt.plot(metrics['val_losses'], label=f'{model_name} Val')
        plt.title('Combined Loss vs. Epoch')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig(save_dir / 'combined_curves.png')
        plt.close()

In [32]:
root_dir = "/kaggle/input/realistic-action-recognition-ucf50-dataset/UCF11_updated_mpg"
batch_size = 32
frames_per_clip = 30
frame_size = (64, 64)
num_epochs = 100
all_metrics = {}

plots_dir = Path("plots_comparison")
plots_dir.mkdir(exist_ok=True)

# Create dataloaders
train_loader, val_loader = create_dataloaders(
    root_dir=root_dir,
    batch_size=batch_size // 2,  # Reduce batch size to save memory
    frames_per_clip=frames_per_clip,
    frame_size=frame_size,
    num_workers=2  # Reduce number of workers
)


# Print dataset information
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")

# Get class names
class_names = train_loader.dataset.get_class_names()
print("\nAction categories:")
for idx, name in enumerate(class_names):
    print(f"{idx}: {name}")

print("Training ConvLSTM model...")
convlstm_model = ConvLSTM()
convlstm_trainer = Trainer(
    model=convlstm_model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=num_epochs,
    model_name="convlstm",
    save_dir="model_comparison"
)
convlstm_trainer.train()

all_metrics['ConvLSTM'] = {
    'train_losses': convlstm_trainer.train_losses,
    'train_accuracies': convlstm_trainer.train_accuracies,
    'val_losses': convlstm_trainer.val_losses,
    'val_accuracies': convlstm_trainer.val_accuracies
}

Trainer.plot_separate_curves('ConvLSTM', all_metrics['ConvLSTM'], plots_dir)
    
del convlstm_model, convlstm_trainer
torch.cuda.empty_cache()

# Train ConvFF
print("\nTraining ConvFF model...")
convff_model = ConvFF()
convff_trainer = Trainer(
    model=convff_model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=num_epochs,
    model_name="convff",
    save_dir="model_comparison"
)
convff_trainer.train()

all_metrics['ConvFF'] = {
    'train_losses': convff_trainer.train_losses,
    'train_accuracies': convff_trainer.train_accuracies,
    'val_losses': convff_trainer.val_losses,
    'val_accuracies': convff_trainer.val_accuracies
}

# Plot separate curves for ConvFF
Trainer.plot_separate_curves('ConvFF', all_metrics['ConvFF'], plots_dir)

# Plot combined curves
Trainer.plot_combined_curves(all_metrics, plots_dir)

print("\nTraining complete! Plots have been saved to the 'model_comparison' directory.")

Training set contains 1280 videos
Validation set contains 320 videos
Number of training batches: 80
Number of validation batches: 20

Action categories:
0: basketball
1: biking
2: diving
3: golf_swing
4: horse_riding
5: soccer_juggling
6: swing
7: tennis_swing
8: trampoline_jumping
9: volleyball_spiking
10: walking
Training ConvLSTM model...
Training on cuda
Model: convlstm

Epoch 1/100
--------------------
Batch [40/80] - Loss: 125.8959 - Acc: 0.2500
Batch [80/80] - Loss: 63.2646 - Acc: 0.3750

Epoch Summary:
Time taken: 11.63s
Train Loss: 105.1894 - Train Acc: 0.2539
Val Loss: 43.8414 - Val Acc: 0.3531

Epoch 2/100
--------------------
Batch [40/80] - Loss: 21.7685 - Acc: 0.6250
Batch [80/80] - Loss: 17.7790 - Acc: 0.5625

Epoch Summary:
Time taken: 11.61s
Train Loss: 26.9333 - Train Acc: 0.5391
Val Loss: 20.5713 - Val Acc: 0.5188

Epoch 3/100
--------------------
Batch [40/80] - Loss: 7.9154 - Acc: 0.6875
Batch [80/80] - Loss: 0.7416 - Acc: 0.8750

Epoch Summary:
Time taken: 12.20s
