In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim 
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x1eabe461990>

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using Device:", device)

Using Device: cuda


In [4]:
df = pd.read_csv("E:\\Notes\\Python'\\PyTorch\\Datasets\\fashion_mnist_small.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [5]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train = X_train/255.0
X_test = X_test/255.0

In [7]:
# Data Augmentation 

from torchvision import transforms

train_transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert numpy/tensor array to PIL Image
    transforms.RandomRotation(15),  # Randomly rotate images by up to 15 degrees
    transforms.RandomHorizontalFlip(p=0.5), # Randomly flip images horizontally with a probability of 0.5
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Randomly translate images by up to 10% of their size
    transforms.ToTensor() # Convert PIL Image back to tensor
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [8]:
class CustomDataset(Dataset):

    def __init__(self, features, labels, transform=None):
        self.features = torch.tensor(features, dtype=torch.float32).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels, dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        feature, label = self.features[index], self.labels[index]

        if self.transform:
            feature = self.transform(feature.squeeze(0).numpy())   # Apply the transformation
        return feature, label    

In [9]:
# Using Augmentations in Training and Testing Datasets
train_dataset = CustomDataset(X_train, y_train, transform=train_transform)
test_dataset = CustomDataset(X_test, y_test, transform=test_transform)

In [10]:
# Define the Dynamic CNN Model Class 
class DynamicCNN(nn.Module):

    def __init__(self, num_conv_layers, num_filters, kernel_size, num_fc_layer_size, fc_layer_size, dropout_rate):
        super().__init__()
        layers=[]
        in_channels=1   # Input channel for grayscale images

        # Convolutional Layers
        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels, num_filters, kernel_size=kernel_size, padding="same"))
            layers.append(nn.BatchNorm2d(num_filters))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            in_channels = num_filters

        self.features = nn.Sequential(*layers)

        # Fully Connected Layers
        fc_layers = [nn.Flatten()]
        input_size = num_filters * (28 // (2 ** num_conv_layers)) ** 2

        for _ in range(num_fc_layer_size):
            fc_layers.append(nn.Linear(input_size, fc_layer_size))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_rate))
            input_size = fc_layer_size

        fc_layers.append(nn.Linear(fc_layer_size, 10))  # Output layer for 10 classes
        self.classifier = nn.Sequential(*fc_layers)

    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [13]:
# Defining the Objective Function for Optuna

def Objective(trial):
    # Hyperparameters
    num_conv_layers = trial.suggest_int("num_conv_layers", 1, 3)
    num_filters = trial.suggest_categorical("num_filters", [16,32,64,128])
    kernel_size = trial.suggest_categorical("kernel_size", [3, 5])
    num_fc_layer_size = trial.suggest_int("num_fc_layer_size", 1, 3)
    fc_layer_size = trial.suggest_categorical("fc_layer_size", [64, 128, 256])
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5, step=0.1)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    epochs = trial.suggest_int("epochs", 10, 50)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["SGD", "Adam", "RMSprop"])

    # Model Initialization
    model = DynamicCNN(num_conv_layers, num_filters, kernel_size, num_fc_layer_size, fc_layer_size, dropout_rate).to(device)

    # Data Loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=False)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()

    if optimizer_name=="Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    elif optimizer_name=="SGD":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    elif optimizer_name=="RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    
    # Training Loop
    for epch in range(epochs):
        model.train()

        for batch_features, batch_labels in train_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    
    # Validation Loop
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_features, batch_labels in test_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct / total
    return accuracy


In [14]:
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(Objective, n_trials=20)

[I 2025-07-31 11:08:31,312] A new study created in memory with name: no-name-fbb6b015-e034-412d-ab7c-80a125f7d81a
[I 2025-07-31 11:09:21,789] Trial 0 finished with value: 0.675 and parameters: {'num_conv_layers': 2, 'num_filters': 64, 'kernel_size': 3, 'num_fc_layer_size': 1, 'fc_layer_size': 256, 'dropout_rate': 0.30000000000000004, 'learning_rate': 0.00016293156995198782, 'epochs': 48, 'batch_size': 128, 'weight_decay': 0.0007910011151204362, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.675.
[I 2025-07-31 11:09:53,833] Trial 1 finished with value: 0.7991666666666667 and parameters: {'num_conv_layers': 3, 'num_filters': 64, 'kernel_size': 3, 'num_fc_layer_size': 2, 'fc_layer_size': 64, 'dropout_rate': 0.5, 'learning_rate': 0.0019637324449583568, 'epochs': 26, 'batch_size': 64, 'weight_decay': 4.545207339170257e-05, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.7991666666666667.
[I 2025-07-31 11:10:06,053] Trial 2 finished with value: 0.7491666666666666 and parameters: {'nu