In [1]:
# import kagglehub
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
import torchvision.transforms as T
import random
import optuna
from optuna.trial import TrialState

In [2]:
data_dir = "../data"
images_dir = "../data/dataset/"
train_dir = images_dir + "train/"

badminton_train_dir = train_dir + "Badminton/"
tennis_train_dir = train_dir + "Tennis/"
cricket_train_dir = train_dir + "Cricket/"
soccer_train_dir = train_dir + "Soccer/"
swimming_train_dir = train_dir + "Swimming/"
karate_train_dir = train_dir + "Karate/"
wrestling_train_dir = train_dir + "Wrestling/"

test_dir = images_dir + "test/"

badminton_test_dir = test_dir + "Badminton/"
tennis_test_dir = test_dir + "Tennis/"
cricket_test_dir = test_dir + "Cricket/"
soccer_test_dir = test_dir + "Soccer/"
swimming_test_dir = test_dir + "Swimming/"
karate_test_dir = test_dir + "Karate/"
wrestling_test_dir = test_dir + "Wrestling/"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x1984d59e4f0>

In [4]:
classes = ['Badminton', 'Cricket', 'Tennis', 'Swimming', 'Soccer', 'Wrestling', 'Karate']

In [5]:
class ImageDataset1(Dataset):
    def __init__(self, root_dir, classes, transform=None, is_train=True):
        """
        Args:
            root_dir (str): Directory with all the class folders
            classes (list): List of class names (subfolder names)
            transform (callable, optional): Optional transform to be applied on a sample
            is_train (bool): Whether this is training data or not
        """
        self.root_dir = root_dir
        self.classes = classes
        self.transform = transform
        self.is_train = is_train
        self.class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
        self.samples = []

        # Default transforms if none provided
        if self.transform is None:
            if is_train:
                self.transform = T.Compose([
                    T.Resize(224),
                    T.CenterCrop(224), # Resize to 128x128
                    T.ToTensor(),
                ])
            else:
                self.transform = T.Compose([
                    T.Resize(224),
                    T.CenterCrop(224), # Resize to 128x128
                    T.ToTensor(),
                ])

        for idx, cls in enumerate(classes):
            class_folder = os.path.join(root_dir, cls)
            if not os.path.isdir(class_folder):
                continue
            for img_name in os.listdir(class_folder):
                if img_name.lower().endswith(('jpg', 'jpeg', 'png')):
                    img_path = os.path.join(class_folder, img_name)
                    self.samples.append((img_path, idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx, retry=0):
        img_path, label = self.samples[idx]
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            if retry < 3:
                return self.__getitem__(random.randint(0, len(self)-1), retry=retry+1)
            else:
                raise RuntimeError("Too many failed image loads.")

In [6]:
class ImageDataset2(Dataset):
    def __init__(self, root_dir, classes, transform=None, is_train=True, split_ratio=0.8, seed=42):
        """
        Args:
            root_dir (str): Directory with all the class folders
            classes (list): List of class names (subfolder names)
            transform (callable, optional): Optional transform to be applied on a sample
            is_train (bool): Whether this is training data or not
            split_ratio (float): Ratio for training data (default is 0.8)
            seed (int): Seed for reproducibility
        """
        self.root_dir = root_dir
        self.classes = classes
        self.transform = transform
        self.is_train = is_train
        self.class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
        self.samples = []

        all_samples = []
        for idx, cls in enumerate(classes):
            class_folder = os.path.join(root_dir, cls)
            if not os.path.isdir(class_folder):
                continue
            for img_name in os.listdir(class_folder):
                if img_name.lower().endswith(('jpg', 'jpeg', 'png')):
                    img_path = os.path.join(class_folder, img_name)
                    all_samples.append((img_path, idx))

        # Shuffle and split once
        random.seed(seed)
        random.shuffle(all_samples)
        split_point = int(len(all_samples) * split_ratio)
        if is_train:
            self.samples = all_samples[:split_point]
        else:
            self.samples = all_samples[split_point:]

        # Set default transforms if not provided
        if self.transform is None:
            if is_train:
                self.transform = T.Compose([
                    T.Resize(224),
                    T.CenterCrop(224), # Resize to 128x128
                    T.ToTensor(),
                ])
            else:
                self.transform = T.Compose([
                    T.Resize(224),
                    T.CenterCrop(224),
                    T.ToTensor(),
                ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx, retry=0):
        img_path, label = self.samples[idx]
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            if retry < 3:
                return self.__getitem__(random.randint(0, len(self)-1), retry=retry+1)
            else:
                raise RuntimeError("Too many failed image loads.")

In [7]:
classes = ['Badminton', 'Cricket', 'Tennis', 'Swimming', 'Soccer', 'Wrestling', 'Karate']

In [8]:
def freeze_all_but_last_n(model, n=2):
    for param in model.parameters():
        param.requires_grad = False

    # Get all modules with parameters
    modules_with_params = [m for m in model.modules() if any(p.requires_grad is False for p in m.parameters())]

    # Unfreeze last n modules with parameters
    for module in modules_with_params[-n:]:
        for param in module.parameters():
            param.requires_grad = True

    return model


def print_trainable_params(model):
    print("Trainable Parameters:")
    total = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            num_params = param.numel()
            # print(f"{name}: {num_params}")
            total += num_params
    print(f"Total Trainable Parameters: {total}")


In [9]:
def load_model(model_name):
    if model_name == "resnet18":
        resnet18 = models.resnet18(weights='DEFAULT')
        resnet18.fc = nn.Linear(resnet18.fc.in_features, 7)  # Change the output layer to match the number of classes
        resnet18 = freeze_all_but_last_n(resnet18, 2)  # Freeze all but the last 2 layers
        resnet18 = resnet18.to(device)
        return resnet18

    elif model_name == "resnet34":
        resnet34 = models.resnet34(weights='DEFAULT')
        resnet34.fc = nn.Linear(resnet34.fc.in_features, 7)
        resnet34 = freeze_all_but_last_n(resnet34, 2)
        resnet34 = resnet34.to(device)
        return resnet34
    
    elif model_name == "resnet50":
        resnet50 = models.resnet50(weights='DEFAULT')
        resnet50.fc = nn.Linear(resnet50.fc.in_features, 7)
        resnet50 = freeze_all_but_last_n(resnet50, 2)
        resnet50 = resnet50.to(device)
        return resnet50

    elif model_name == "resnet101":
        resnet101 = models.resnet101(weights='DEFAULT')
        resnet101.fc = nn.Linear(resnet101.fc.in_features, 7)
        resnet101 = freeze_all_but_last_n(resnet101, 2)
        resnet101 = resnet101.to(device)
        return resnet101
    
    elif model_name == "resnet152":
        resnet152 = models.resnet152(weights='DEFAULT')
        resnet152.fc = nn.Linear(resnet152.fc.in_features, 7)
        resnet152 = freeze_all_but_last_n(resnet152, 2)
        resnet152 = resnet152.to(device)
        return resnet152
    
    elif model_name == "vgg16":
        vgg16 = models.vgg16(weights='DEFAULT')
        vgg16.classifier[6] = nn.Linear(vgg16.classifier[6].in_features, 7)
        vgg16 = freeze_all_but_last_n(vgg16, 1)
        vgg16 = vgg16.to(device)
        return vgg16
    
    elif model_name == "alexnet":
        alexnet = models.alexnet(weights='DEFAULT')
        alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 7)
        alexnet = freeze_all_but_last_n(alexnet, 1)
        alexnet = alexnet.to(device)
        return alexnet
    
    elif model_name == "googlenet":
        googlenet = models.googlenet(weights='DEFAULT')
        googlenet.fc = nn.Linear(googlenet.fc.in_features, 7)
        googlenet = freeze_all_but_last_n(googlenet, 2)
        googlenet = googlenet.to(device)
        return googlenet
    
    else:
        raise ValueError(f"Model {model_name} not recognized. Please choose a valid model name.")    

In [10]:
def get_dataloaders(config, transform=None):
    dataset_type = config["dataset_class"]
    batch_size = config["batch_size"]
    seed = 42 

    if dataset_type == "ImageClass1": # using train and test directories
        train_dataset = ImageDataset1(root_dir=train_dir, transform=transform, classes=classes, is_train=True)
        val_dataset = ImageDataset1(root_dir=test_dir, transform=transform, classes=classes, is_train=False)
    else: # Splitting train into train and validation sets
        train_dataset = ImageDataset2(
            root_dir=train_dir,
            classes=classes,
            transform=transform,
            is_train=True,
            split_ratio=0.8,
            seed=seed
        )
        val_dataset = ImageDataset2(
            root_dir=train_dir,
            classes=classes,
            transform=transform,
            is_train=False,
            split_ratio=0.8,
            seed=seed
        )

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    return train_loader, val_loader

def validate_model(model, val_loader, criterion):
    model.eval()
    device = next(model.parameters()).device
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return val_loss / len(val_loader), 100. * correct / total

In [11]:
def initialize_weights(model, method):
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if method == "xavier":
                nn.init.xavier_uniform_(m.weight)
            elif method == "kaiming":
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')

def should_initialize(model_type):
    return model_type == "scratch"  # only initialize scratch models


In [16]:
from datetime import datetime

def train_model(config):
    model_type , model_name = config["model_choice"]
    train_loader, val_loader = get_dataloaders(config)

    init_method = config["init_method"]

    model = load_model(model_name)
    model.to(device)
    time_stamp = datetime.now().strftime("%Y%m%d_%H")
    unique_config = f"{model_name}_{config['dataset_class']}_{config['optimizer']}_{config['init_method']}_{config['batch_size']}_{config['lr']}_time_{time_stamp}"

    if should_initialize(config["model_choice"][0]) and init_method != "default":
        initialize_weights(model, init_method)

    # Optimizer
    if config["optimizer"] == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=1e-3)
    elif config["optimizer"] == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=config["lr"])
    else:
        optimizer = torch.optim.RMSprop(model.parameters(), lr=config["lr"])

    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

    best_val_loss = float('inf')
    patience_counter = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []

    epochs = config["epochs"]
    save_interval = 2 if model_type == "pretrained" else 50
    save_dir = os.path.join("logs_224_best_models_weight_decay", "checkpoints")
    os.makedirs(save_dir, exist_ok=True)

    total_batches = len(train_loader)
    total_steps = epochs * total_batches
    progress_bar = tqdm(total=total_steps, dynamic_ncols=True, desc="Training")

    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Update tqdm
            train_loss = running_loss / (i + 1)
            train_acc = 100. * correct / total
            progress_bar.update(1)
            progress_bar.set_postfix({
                "Epoch": f"{epoch+1}/{epochs}",
                "Train Loss": f"{train_loss:.4f}",
                "Train Acc": f"{train_acc:.2f}%"
            })

        # Validation phase
        val_loss, val_acc = validate_model(model, val_loader, criterion)
        scheduler.step(val_loss)

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        # Save model checkpoint
        if (epoch + 1) % save_interval == 0:
            os.makedirs(os.path.join(save_dir, unique_config), exist_ok=True)
            torch.save(model.state_dict(), os.path.join(save_dir, unique_config, f"epoch_{epoch+1}.pt"))

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= 20:
                progress_bar.set_description("Early Stopping")
                break
    
        progress_bar.set_postfix({"Epoch": f"{epoch+1}/{epochs}", "Train Loss": f"{train_loss:.4f}", "Train Acc": f"{train_acc:.2f}%", "Val Loss": f"{val_loss:.4f}", "Val Acc": f"{val_acc:.2f}%"})
    progress_bar.close()

    # Save metrics
    os.makedirs(os.path.join(save_dir, unique_config), exist_ok=True)
    torch.save({
        "train_losses": train_losses,
        "val_losses": val_losses,
        "train_accs": train_accs,
        "val_accs": val_accs
    }, os.path.join(save_dir, unique_config, "metrics.pt"))


    torch.save(model.state_dict(), os.path.join(save_dir, unique_config, "final_model.pt"))

    return max(val_accs)


In [13]:
config7 = {
    "model_choice": ("pretrained", "resnet50"),
    "optimizer": "adam",
    "lr": 0.01,
    "batch_size": 32,
    "dataset_class": "ImageClass2",
    "epochs": 7,
    "init_method": "default",
}

In [14]:
train_model(config7)

Training: 100%|██████████| 1442/1442 [10:54<00:00,  2.20it/s, Epoch=7/7, Train Loss=0.0475, Train Acc=98.62%, Val Loss=0.1348, Val Acc=96.05%] 


96.05103280680437

In [15]:
class ImageDataset2(Dataset):
    def __init__(self, root_dir, classes, transform=None, is_train=True, split_ratio=0.8, seed=42):
        """
        Args:
            root_dir (str): Directory with all the class folders
            classes (list): List of class names (subfolder names)
            transform (callable, optional): Optional transform to be applied on a sample
            is_train (bool): Whether this is training data or not
            split_ratio (float): Ratio for training data (default is 0.8)
            seed (int): Seed for reproducibility
        """
        self.root_dir = root_dir
        self.classes = classes
        self.transform = transform
        self.is_train = is_train
        self.class_to_idx = {cls: idx for idx, cls in enumerate(classes)}
        self.samples = []

        all_samples = []
        for idx, cls in enumerate(classes):
            class_folder = os.path.join(root_dir, cls)
            if not os.path.isdir(class_folder):
                continue
            for img_name in os.listdir(class_folder):
                if img_name.lower().endswith(('jpg', 'jpeg', 'png')):
                    img_path = os.path.join(class_folder, img_name)
                    all_samples.append((img_path, idx))

        # Shuffle and split once
        random.seed(seed)
        random.shuffle(all_samples)
        split_point = int(len(all_samples) * split_ratio)
        if is_train:
            self.samples = all_samples[:split_point]
        else:
            self.samples = all_samples[split_point:]

        # Set default transforms if not provided
        if self.transform is None:
            if is_train:
                self.transform = T.Compose([
                    T.RandomResizedCrop(224),
                    T.ToTensor(),
                ])
            else:
                self.transform = T.Compose([
                    T.Resize(224),
                    T.CenterCrop(224),
                    T.ToTensor(),
                ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx, retry=0):
        img_path, label = self.samples[idx]
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            if retry < 3:
                return self.__getitem__(random.randint(0, len(self)-1), retry=retry+1)
            else:
                raise RuntimeError("Too many failed image loads.")

In [17]:
train_model(config7)

Training: 100%|██████████| 1442/1442 [08:52<00:00,  2.71it/s, Epoch=7/7, Train Loss=0.2861, Train Acc=90.34%, Val Loss=0.1605, Val Acc=94.78%] 


94.95747266099636