<a href="https://colab.research.google.com/github/ChinmayeeJuturu/DL_assignment_148/blob/main/dl_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader

In [None]:
# Load dataset using torchvision
def load_data(batch_size=32):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

In [None]:
# Define the Feedforward Neural Network class
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, activation_fn=nn.ReLU):
        super(FeedforwardNN, self).__init__()
        layers = []
        prev_size = input_size
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(activation_fn())
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, output_size))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
# Training function
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10, device='cpu'):
    model.to(device)
    best_val_acc = 0
    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for images, labels in train_loader:
            images, labels = images.view(images.size(0), -1).to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        val_acc = evaluate_model(model, val_loader, device)
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")
        best_val_acc = max(best_val_acc, val_acc)
    return best_val_acc

In [None]:
# Evaluation function
def evaluate_model(model, data_loader, device='cpu'):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.view(images.size(0), -1).to(device), labels.to(device)
            outputs = model(images)
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)
    return correct / total

In [None]:
 # Experimentation function
def run_experiments():
    input_size = 28 * 28
    output_size = 10
    batch_sizes = [16, 32, 64]
    hidden_layers_list = [[32, 32, 32], [64, 64, 64], [128, 128, 128]]
    optimizers_list = ['sgd', 'momentum', 'nesterov', 'rmsprop', 'adam', 'nadam']
    activations = [nn.ReLU, nn.Sigmoid]

    best_config = None
    best_accuracy = 0

    train_loader, val_loader, test_loader = load_data(batch_size=32)

    for hidden_layers in hidden_layers_list:
        for batch_size in batch_sizes:
            for optimizer_name in optimizers_list:
                for activation_fn in activations:
                    model = FeedforwardNN(input_size, hidden_layers, output_size, activation_fn)
                    criterion = nn.CrossEntropyLoss()

                    if optimizer_name == 'sgd':
                        optimizer = optim.SGD(model.parameters(), lr=0.001)
                    elif optimizer_name == 'momentum':
                        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
                    elif optimizer_name == 'nesterov':
                        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True)
                    elif optimizer_name == 'rmsprop':
                        optimizer = optim.RMSprop(model.parameters(), lr=0.001)
                    elif optimizer_name == 'adam':
                        optimizer = optim.Adam(model.parameters(), lr=0.001)
                    elif optimizer_name == 'nadam':
                        optimizer = optim.NAdam(model.parameters(), lr=0.001)

                    val_acc = train_model(model, train_loader, val_loader, optimizer, criterion, epochs=5)
                    print(f"Hidden Layers: {hidden_layers}, Batch Size: {batch_size}, Optimizer: {optimizer_name}, Activation: {activation_fn.__name__}, Val Acc: {val_acc:.4f}")

                    if val_acc > best_accuracy:
                        best_accuracy = val_acc
                        best_config = (hidden_layers, batch_size, optimizer_name, activation_fn.__name__)

    print("Best Configuration:", best_config)
    return best_config

# Run experiments
best_config = run_experiments()


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 52.1MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 2.18MB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 14.9MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 7.90MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Epoch 1, Loss: 3870.7463, Train Acc: 0.1679, Val Acc: 0.2208
Epoch 2, Loss: 3768.5873, Train Acc: 0.2578, Val Acc: 0.2797
Epoch 3, Loss: 3389.8802, Train Acc: 0.3727, Val Acc: 0.5247
Epoch 4, Loss: 2383.5792, Train Acc: 0.6037, Val Acc: 0.6988
Epoch 5, Loss: 1507.5685, Train Acc: 0.7506, Val Acc: 0.7942
Hidden Layers: [32, 32, 32], Batch Size: 16, Optimizer: sgd, Activation: ReLU, Val Acc: 0.7942
Epoch 1, Loss: 3897.6474, Train Acc: 0.1124, Val Acc: 0.1125
Epoch 2, Loss: 3884.7702, Train Acc: 0.1124, Val Acc: 0.1125
Epoch 3, Loss: 3884.0894, Train Acc: 0.1124, Val Acc: 0.1125
Epoch 4, Loss: 3883.9647, Train Acc: 0.1124, Val Acc: 0.1125
Epoch 5, Loss: 3883.8455, Train Acc: 0.1124, Val Acc: 0.1125
Hidden Layers: [32, 32, 32], Batch Size: 16, Optimizer: sgd, Activation: Sigmoid, Val Acc: 0.1125
Epoch 1, Loss: 2105.2878, Train Acc: 0.6014, Val Acc: 0.8407
Epoch 2, Loss: 688.8953, Train Acc: 0.8803, Val Acc: 0.9062
Epoch 3, Loss: 514.4595, Train Acc: 0.9088, Val Acc: 0.9148
Epoch 4, Loss: 4