### Import libraries

In [1]:
import pandas as pd
import numpy as np

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import KFold

### Loading datasets and data preprocessing

In [2]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load and normalize the KMNIST dataset
# Data Augmentation and Normalization for the KMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.KMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.KMNIST(root='./data', train=False, download=True, transform=transform)

### Defining model

In [3]:
# Model Architecture
class SimpleANN(nn.Module):
    def __init__(self):
        super(SimpleANN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10),
            nn.BatchNorm1d(10),
            nn.Softmax(dim=1)
        )
        self.init_weights()

    def init_weights(self):
        for module in self.model.modules():
            if isinstance(module, nn.Linear):
                nn.init.uniform_(module.weight, a=-0.1, b=0.1)   # Initializing the weights
                nn.init.constant_(module.bias, 0.0)

    def forward(self, x):
        return self.model(x)

### Hyperparameter tuning and cross validaiton

In [5]:
# Hyperparameter Tuning and Cross-Validation
def train_and_evaluate(optimizer_name, learning_rate, batch_size):
    # K-Fold Cross-Validation
    kf = KFold(n_splits=5)
    accuracy_scores = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(train_dataset)):
        print(f"Fold {fold + 1}/{kf.n_splits}")

        # Creating data loaders for cross-validation
        train_sampler = SubsetRandomSampler(train_idx)
        val_sampler = SubsetRandomSampler(val_idx)

        train_loader_cv = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
        val_loader_cv = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)

        model = SimpleANN().to(device)  # Move model to GPU

        if optimizer_name == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        elif optimizer_name == 'rmsprop':
            optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
        elif optimizer_name == 'adamw':
            optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

        criterion = nn.MSELoss()

        # Training
        model.train() # Set model in training mode.
        for epoch in range(10):  # Using 10 epochs
            print(f"  Epoch {epoch + 1}/10")
            running_loss = 0.0
            train_correct = 0
            for batch_idx, (inputs, targets) in enumerate(train_loader_cv):
                # Move data to GPU.
                inputs, targets = inputs.to(device), targets.to(device)
                one_hot_targets = F.one_hot(targets, num_classes=10).float()

                # Zero the parameter gradients.
                optimizer.zero_grad()

                # Forward pass.
                outputs = model(inputs)
                loss = criterion(outputs, one_hot_targets)

                # Backward pass and update.
                loss.backward()
                optimizer.step()

                # Accumulate metrics.
                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                train_correct += (predicted == targets).sum().item()
                if batch_idx % 10 == 0:
                    print(f"    Batch {batch_idx + 1}/{len(train_loader_cv)}, Loss: {running_loss / (batch_idx + 1):.4f}")

            train_loss = running_loss / len(train_loader_cv)
            train_accuracy = train_correct / len(train_sampler)
            
            # Evaluate the model on the test dataset. Identical to loop above but without
            # weight adjustment.
            # Validation
            model.eval() # Set model in inference mode.
            val_loss = 0.0
            val_correct = 0
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(val_loader_cv):
                    inputs, targets = inputs.to(device), targets.to(device)
                    one_hot_targets = F.one_hot(targets, num_classes=10).float()
                    outputs = model(inputs)
                    loss = criterion(outputs, one_hot_targets)
                    val_loss += loss.item()
                    _, predicted = torch.max(outputs, 1)
                    val_correct += (predicted == targets).sum().item()
                    if batch_idx % 10 == 0:
                        print(f"    Validation Batch {batch_idx + 1}/{len(val_loader_cv)}")

            val_loss /= len(val_loader_cv)
            val_accuracy = val_correct / len(val_sampler)

            print(f"  Epoch {epoch + 1}/{10} - train_loss: {train_loss:.4f} - train_acc: {train_accuracy:.4f} - val_loss: {val_loss:.4f} - val_acc: {val_accuracy:.4f}")

        accuracy_scores.append(val_accuracy)
        print(f"  Fold {fold + 1} Accuracy: {val_accuracy:.4f}")

    return np.mean(accuracy_scores)

### Define hyperparameters

In [6]:
results = []

# Define hyperparameters
learning_rates = [1e-3, 1e-4, 1e-5]
batch_sizes = [32, 64, 128]

for optimizer_name in ['adam', 'rmsprop', 'adamw']:    # Loop through all the values to find the best combinations 
    for lr in learning_rates:
        for bs in batch_sizes:
            print(f"Testing {optimizer_name} with learning rate {lr} and batch size {bs}")
            accuracy = train_and_evaluate(optimizer_name, lr, bs)
            results.append((optimizer_name, lr, bs, accuracy))    # Append resuls in list for adding it in dataframe 

Testing adam with learning rate 0.001 and batch size 32
Fold 1/5
  Epoch 1/10
    Batch 1/1500, Loss: 0.1007
    Batch 11/1500, Loss: 0.0962
    Batch 21/1500, Loss: 0.0951
    Batch 31/1500, Loss: 0.0942
    Batch 41/1500, Loss: 0.0933
    Batch 51/1500, Loss: 0.0929
    Batch 61/1500, Loss: 0.0917
    Batch 71/1500, Loss: 0.0908
    Batch 81/1500, Loss: 0.0894
    Batch 91/1500, Loss: 0.0883
    Batch 101/1500, Loss: 0.0873
    Batch 111/1500, Loss: 0.0859
    Batch 121/1500, Loss: 0.0847
    Batch 131/1500, Loss: 0.0835
    Batch 141/1500, Loss: 0.0823
    Batch 151/1500, Loss: 0.0813
    Batch 161/1500, Loss: 0.0803
    Batch 171/1500, Loss: 0.0793
    Batch 181/1500, Loss: 0.0785
    Batch 191/1500, Loss: 0.0776
    Batch 201/1500, Loss: 0.0770
    Batch 211/1500, Loss: 0.0762
    Batch 221/1500, Loss: 0.0754
    Batch 231/1500, Loss: 0.0746
    Batch 241/1500, Loss: 0.0738
    Batch 251/1500, Loss: 0.0732
    Batch 261/1500, Loss: 0.0725
    Batch 271/1500, Loss: 0.0720
    Batch

### Evaluation and comparison

In [7]:
# Convert results to a DataFrame for easy comparison
results_df = pd.DataFrame(results, columns=['Optimizer', 'Learning Rate', 'Batch Size', 'Accuracy'])
best_results = results_df.loc[results_df.groupby('Optimizer')['Accuracy'].idxmax()]

print(best_results)

   Optimizer  Learning Rate  Batch Size  Accuracy
3       adam         0.0001          32  0.953050
20     adamw         0.0010         128  0.953900
10   rmsprop         0.0010          64  0.953617


### Displaying the results

In [8]:
results_df.sort_values(by='Accuracy', )

Unnamed: 0,Optimizer,Learning Rate,Batch Size,Accuracy
8,adam,1e-05,128,0.832633
17,rmsprop,1e-05,128,0.84655
26,adamw,1e-05,128,0.848033
16,rmsprop,1e-05,64,0.862617
25,adamw,1e-05,64,0.8669
7,adam,1e-05,64,0.866967
24,adamw,1e-05,32,0.878983
15,rmsprop,1e-05,32,0.885833
6,adam,1e-05,32,0.88725
5,adam,0.0001,128,0.940933


## Getting final test accuracy

In [9]:
# Training the final model on the entire training dataset
def train_final_model(optimizer_name, learning_rate, batch_size, num_epochs=100):

    model = SimpleANN().to(device) # Move model to GPU

    if optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'adamw':
        optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Training
    model.train()  # Set model in training mode.
    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        running_loss = 0.0
        train_correct = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):

            # Move data to GPU.
            inputs, targets = inputs.to(device), targets.to(device)
            one_hot_targets = F.one_hot(targets, num_classes=10).float()

            # Zero the parameter gradients.
            optimizer.zero_grad()

            # Forward pass.
            outputs = model(inputs)
            loss = criterion(outputs, one_hot_targets)

            # Backward pass and update.
            loss.backward()
            optimizer.step()

            # Accumulate metrics.
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == targets).sum().item()
            if batch_idx % 10 == 0:
                print(f"  Batch {batch_idx + 1}/{len(train_loader)}, Loss: {running_loss / (batch_idx + 1):.4f}")

        train_loss = running_loss / len(train_loader)
        train_accuracy = train_correct / len(train_dataset)
        print(f"Epoch {epoch + 1}/{num_epochs} - train_loss: {train_loss:.4f} - train_acc: {train_accuracy:.4f}")

    return model   # Returning Model

# Evaluate the model on the test set
def evaluate_test_set(model, test_dataset, batch_size):
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    model.eval()  # Set model in inference mode.
    test_correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            test_correct += (predicted == targets).sum().item()

    test_accuracy = test_correct / len(test_dataset)
    return test_accuracy   # Returning Test Accuracy

### trying best values we got for getting Test Accuracy

In [10]:
result = []

best_optimizer = 'adam'
best_learning_rate = 0.0001 
best_batch_size = 32 

# Train final model
final_model = train_final_model(best_optimizer, best_learning_rate, best_batch_size)
test_accuracy = evaluate_test_set(final_model, test_dataset, best_batch_size)
result.append((best_optimizer, best_learning_rate, best_batch_size, test_accuracy))

Epoch 1/100
  Batch 1/1875, Loss: 0.1016
  Batch 11/1875, Loss: 0.0982
  Batch 21/1875, Loss: 0.0983
  Batch 31/1875, Loss: 0.0989
  Batch 41/1875, Loss: 0.0988
  Batch 51/1875, Loss: 0.0988
  Batch 61/1875, Loss: 0.0982
  Batch 71/1875, Loss: 0.0978
  Batch 81/1875, Loss: 0.0976
  Batch 91/1875, Loss: 0.0974
  Batch 101/1875, Loss: 0.0973
  Batch 111/1875, Loss: 0.0971
  Batch 121/1875, Loss: 0.0968
  Batch 131/1875, Loss: 0.0967
  Batch 141/1875, Loss: 0.0965
  Batch 151/1875, Loss: 0.0963
  Batch 161/1875, Loss: 0.0961
  Batch 171/1875, Loss: 0.0958
  Batch 181/1875, Loss: 0.0956
  Batch 191/1875, Loss: 0.0954
  Batch 201/1875, Loss: 0.0952
  Batch 211/1875, Loss: 0.0949
  Batch 221/1875, Loss: 0.0947
  Batch 231/1875, Loss: 0.0944
  Batch 241/1875, Loss: 0.0941
  Batch 251/1875, Loss: 0.0939
  Batch 261/1875, Loss: 0.0936
  Batch 271/1875, Loss: 0.0934
  Batch 281/1875, Loss: 0.0931
  Batch 291/1875, Loss: 0.0929
  Batch 301/1875, Loss: 0.0925
  Batch 311/1875, Loss: 0.0922
  Batch

In [11]:
best_optimizer = 'rmsprop' 
best_learning_rate = 0.0010 
best_batch_size = 64 

# Train final model
final_model = train_final_model(best_optimizer, best_learning_rate, best_batch_size)
test_accuracy = evaluate_test_set(final_model, test_dataset, best_batch_size)
result.append((best_optimizer, best_learning_rate, best_batch_size, test_accuracy))

Epoch 1/100
  Batch 1/938, Loss: 0.0955
  Batch 11/938, Loss: 0.0953
  Batch 21/938, Loss: 0.0923
  Batch 31/938, Loss: 0.0895
  Batch 41/938, Loss: 0.0859
  Batch 51/938, Loss: 0.0831
  Batch 61/938, Loss: 0.0807
  Batch 71/938, Loss: 0.0780
  Batch 81/938, Loss: 0.0758
  Batch 91/938, Loss: 0.0739
  Batch 101/938, Loss: 0.0720
  Batch 111/938, Loss: 0.0707
  Batch 121/938, Loss: 0.0695
  Batch 131/938, Loss: 0.0684
  Batch 141/938, Loss: 0.0674
  Batch 151/938, Loss: 0.0665
  Batch 161/938, Loss: 0.0655
  Batch 171/938, Loss: 0.0644
  Batch 181/938, Loss: 0.0635
  Batch 191/938, Loss: 0.0628
  Batch 201/938, Loss: 0.0621
  Batch 211/938, Loss: 0.0614
  Batch 221/938, Loss: 0.0608
  Batch 231/938, Loss: 0.0601
  Batch 241/938, Loss: 0.0595
  Batch 251/938, Loss: 0.0589
  Batch 261/938, Loss: 0.0584
  Batch 271/938, Loss: 0.0579
  Batch 281/938, Loss: 0.0573
  Batch 291/938, Loss: 0.0567
  Batch 301/938, Loss: 0.0563
  Batch 311/938, Loss: 0.0559
  Batch 321/938, Loss: 0.0554
  Batch 3

In [12]:
best_optimizer = 'adamw'
best_learning_rate = 0.0010 
best_batch_size = 128

# Train final model
final_model = train_final_model(best_optimizer, best_learning_rate, best_batch_size)
test_accuracy = evaluate_test_set(final_model, test_dataset, best_batch_size)
result.append((best_optimizer, best_learning_rate, best_batch_size, test_accuracy))

Epoch 1/100
  Batch 1/469, Loss: 0.0998
  Batch 11/469, Loss: 0.0957
  Batch 21/469, Loss: 0.0927
  Batch 31/469, Loss: 0.0892
  Batch 41/469, Loss: 0.0861
  Batch 51/469, Loss: 0.0836
  Batch 61/469, Loss: 0.0812
  Batch 71/469, Loss: 0.0790
  Batch 81/469, Loss: 0.0771
  Batch 91/469, Loss: 0.0751
  Batch 101/469, Loss: 0.0735
  Batch 111/469, Loss: 0.0717
  Batch 121/469, Loss: 0.0702
  Batch 131/469, Loss: 0.0691
  Batch 141/469, Loss: 0.0677
  Batch 151/469, Loss: 0.0665
  Batch 161/469, Loss: 0.0655
  Batch 171/469, Loss: 0.0644
  Batch 181/469, Loss: 0.0634
  Batch 191/469, Loss: 0.0624
  Batch 201/469, Loss: 0.0615
  Batch 211/469, Loss: 0.0606
  Batch 221/469, Loss: 0.0598
  Batch 231/469, Loss: 0.0590
  Batch 241/469, Loss: 0.0583
  Batch 251/469, Loss: 0.0576
  Batch 261/469, Loss: 0.0569
  Batch 271/469, Loss: 0.0563
  Batch 281/469, Loss: 0.0556
  Batch 291/469, Loss: 0.0550
  Batch 301/469, Loss: 0.0544
  Batch 311/469, Loss: 0.0539
  Batch 321/469, Loss: 0.0534
  Batch 3

### Displaying the results

In [13]:
tests_df = pd.DataFrame(result, columns=['Optimizer', 'Learning Rate', 'Batch Size', 'Test Accuracy'])
tests_df.head()

Unnamed: 0,Optimizer,Learning Rate,Batch Size,Test Accuracy
0,adam,0.0001,32,0.9179
1,rmsprop,0.001,64,0.9167
2,adamw,0.001,128,0.9134
