In [1]:
import os
import math
import torch
import random
import pandas as pd
from PIL import Image
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
from torchviz import make_dot
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,Dataset
from torch.utils.tensorboard import SummaryWriter
from utils.yaml import Config
# from utils.optuna import run_optuna_study
from functools import partial
import optuna

# Constants

In [2]:
cfg=Config.load()

In [3]:
# BATCH_SIZE = 32
# LEARNING_RATE = 0.001
EPOCHS = 100
NUM_CLASSES = cfg['NUM_CLASSES']
BETAS=cfg['BETAS']

In [4]:
# Model_trainingmethod_optimizer_lr_scheduler_batch_size_lossfn_weightdecay
run_names = cfg['run_names']
run_names

['Custom_Naive_adam_0.001_None_32_crossentropy_wd0',
 'Custom_Naive_adam_0.001_None_32_KLDivLoss_wd0',
 'Custom_Naive_adam_0.001_None_32_svm_wd0',
 'Custom_Naive_sgd_0.1_step_32_crossentropy_wd0',
 'Custom_Naive_sgd_0.1_cosine_32_crossentropy_wd0',
 'Custom_Naive_rmsprop_0.1_None_32_crossentropy_wd0',
 'Custom_Naive_rmsprop_0.01_None_32_crossentropy_wd0',
 'Custom_Naive_rmsprop_0.1_None_16_crossentropy_wd0',
 'Custom_Naive_rmsprop_0.01_None_16_crossentropy_wd0',
 'Custom_Naive_adagrad_0.01_None_32_crossentropy_wd1e-6',
 'Custom_Naive_adagrad_0.01_None_32_crossentropy_wd1e-7',
 'ResNet_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'ResNet_Naive_adam_0.01_None_32_crossentropy_wd0',
 'ResNet18_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'ResNet34_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'ResNet101_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'ResNet152_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'AlexNet_Transfer_adam_0.01_None_32_crossentropy_wd0',
 'AlexNet_Naive_adam_0.0

In [None]:
run_names[13]

'ResNet18_Transfer_adam_0.01_None_32_crossentropy_wd0'

In [5]:
# dataset_path = '../data/sidharkal-sports-image-classification/dataset'
dataset_path = cfg['dataset_path']

# Dataset Class

In [6]:
class SportsDataset(Dataset):
    def __init__(self, csv_file, file_path, split='train', transform=None):

        self.data_info = pd.read_csv(csv_file)
        # self.root_dir = os.path.join(file_path, split)
        self.root_dir = os.path.join(file_path, 'train')
        self.transform = transform
        self.split = split
        # Build string → index mapping from all labels in this split
        # if split == 'train':
        label_column = self.data_info.iloc[:, 1]  # assuming 2nd column is the label
        classes = sorted(label_column.unique())
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
        print(f"Classes: {self.class_to_idx}")

    def __len__(self):
        return len(self.data_info)

    def __getitem__(self, idx):
        # 1) Load image
        img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')

        # 2) Apply transforms
        if self.transform:
            image = self.transform(image)

        # if self.split=='train':
        # 3) String label → integer index
        label_str = self.data_info.iloc[idx, 1]
        label_idx = self.class_to_idx[label_str]

            # 4) Return image tensor, label tensor
        return image, torch.tensor(label_idx, dtype=torch.int8)
        # else:
        #     # 3) Return image tensor, label tensor
        #     return image


In [7]:
train_csv_path= os.path.join(dataset_path, 'train.csv')
test_csv_path= os.path.join(dataset_path, 'test.csv')

In [8]:
data_csv = pd.read_csv(train_csv_path)

# Data Splitting

# Data Preprocessing

In [9]:
transforms = transforms.Compose([

    transforms.RandomResizedCrop(
        224,
        scale=(0.8, 1.0),
        ratio=(0.75, 1.3333)
    ),

    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(10),

    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2,
        hue=0.1
    ),
    # transforms.Resize((224, 224)),  # remove if RandomResizedCrop already gives 224×224
    transforms.ToTensor(),

    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
])

In [17]:
class Block(nn.Module):
    def __init__(self,in_channels,out_channels,act='relu',drop=False,Bnorm=True,Inorm=False,pool=False):
        super().__init__()
        self.drop=drop
        self.Bnorm=Bnorm
        self.pool=pool
        self.dropout=nn.Dropout(0.5)
        if self.Bnorm==True:
            self.block=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,4,2,1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU() if act=='relu' else nn.LeakyReLU(0.2),
            )
        elif Inorm==True:
            self.block=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,4,2,1),
                nn.InstanceNorm2d(out_channels),
                nn.ReLU() if act=='relu' else nn.LeakyReLU(0.2),
            )
        else:
            self.block=nn.Sequential(
                nn.Conv2d(in_channels,out_channels,4,2,1),
                nn.ReLU() if act=='relu' else nn.LeakyReLU(0.2),
            )
    def forward(self,x):
        x=self.block(x)
        if self.pool==True:
            x=nn.MaxPool2d(x,kernel_size=2,stride=2)
        return self.dropout(x) if self.drop else x

In [18]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=7, input_channels=3, dropout=0.5,hidden_dim=64,Bnorm=True, Inorm=False,pool=True):
        super(CustomCNN, self).__init__()
        self.Bnorm=Bnorm
        self.Inorm=Inorm
        self.pool=pool
        self.block1 = Block(input_channels, hidden_dim, act='relu', drop=False, Bnorm=self.Bnorm,pool=self.pool,Inorm=self.Inorm) # 3x224x224 -> 64x112x112
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 64x112x112 -> 64x56x56
        self.block2 = Block(hidden_dim, hidden_dim, act='relu', drop=False, Bnorm=self.Bnorm,pool=self.pool,Inorm=self.Inorm) # 64x56x56 -> 64x28x28
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 64x28x28 -> 64x14x14
        self.block3 = Block(hidden_dim, hidden_dim*2, act='relu', drop=False, Bnorm=self.Bnorm,pool=self.pool,Inorm=self.Inorm) # 64x14x14 -> 128x7x17
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # 128x7x7 -> 128x3x3
        self.block4 = Block(hidden_dim*2, hidden_dim*2, act='relu', drop=False, Bnorm=self.Bnorm,pool=self.pool,Inorm=self.Inorm) 
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2) # 128x3x3 -> 128x1x1
        self.fc1 = nn.Linear(128, 128)  #  128 -> 128
        self.relu=nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(128, num_classes) # 128 -> num_classes
        self.sigmoid = nn.Sigmoid()
        self.fc3 = nn.Linear(128*3*3, 128) # 128 -> num_classes

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
model=CustomCNN(num_classes=NUM_CLASSES,Bnorm=True,Inorm=False,pool=True).to(device)