<a href="https://colab.research.google.com/github/GodishalaAshwith/MachineLearningLab/blob/main/MLAssignment2AdamResearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Imports ---
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pandas as pd
import matplotlib.pyplot as plt
import random

# --- Reproducibility ---
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# --- Transform with Data Augmentation ---
transform_train = transforms.Compose([
    transforms.RandomRotation(10),       # small rotation
    transforms.RandomAffine(0, translate=(0.1,0.1)), # small shift
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# --- Datasets ---
train_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
test_data = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)

# --- Device ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# --- Improved Model Definition ---
class Net(nn.Module):
    def __init__(self, hidden_units=128, dropout_rate=0.2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, hidden_units)
        self.fc2 = nn.Linear(hidden_units, hidden_units)
        self.fc3 = nn.Linear(hidden_units, hidden_units//2)
        self.fc4 = nn.Linear(hidden_units//2, 10)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# --- Training Function with Scheduler ---
def train_and_eval(lr, batch_size, hidden_units, dropout_rate, optimizer_name, epochs=10):
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_data, batch_size=128, shuffle=False, num_workers=2, pin_memory=True)

    model = Net(hidden_units, dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()

    # Use AdamW for better regularization
    optimizer = optim.AdamW(model.parameters(), lr=lr) if optimizer_name == 'Adam' else optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    # Cosine Annealing Scheduler
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        scheduler.step()
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {running_loss/len(train_loader):.4f}")

    # --- Evaluation ---
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# --- Set Best Hyperparameters from Random Search ---
best_params = (0.001, 64, 128, 0.3, 'Adam')
lr, batch, hidden, drop, opt = best_params

# --- Retrain Full Model ---
print(f"\nRetraining Best Model with {best_params} for 15 epochs...\n")
final_acc = train_and_eval(lr, batch, hidden, drop, opt, epochs=15)
print(f"Final Accuracy after retraining: {final_acc:.2f}%")


100%|██████████| 9.91M/9.91M [00:00<00:00, 19.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 496kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.73MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 14.0MB/s]


Using device: cuda

Retraining Best Model with (0.001, 64, 128, 0.3, 'Adam') for 15 epochs...

Epoch [1/15] - Loss: 1.2458
Epoch [2/15] - Loss: 0.8018
Epoch [3/15] - Loss: 0.7020
Epoch [4/15] - Loss: 0.6368
Epoch [5/15] - Loss: 0.5994
Epoch [6/15] - Loss: 0.5690
Epoch [7/15] - Loss: 0.5458
Epoch [8/15] - Loss: 0.5291
Epoch [9/15] - Loss: 0.5057
Epoch [10/15] - Loss: 0.4837
Epoch [11/15] - Loss: 0.4680
Epoch [12/15] - Loss: 0.4557
Epoch [13/15] - Loss: 0.4507
Epoch [14/15] - Loss: 0.4433
Epoch [15/15] - Loss: 0.4358
Final Accuracy after retraining: 96.40%
