In [16]:
#importing libraries
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import pandas as pd
import matplotlib.pyplot as plt

Now lets download training and test data from MNIST dataset


In [24]:
train_data = datasets.MNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor(),
    )

test_data = datasets.MNIST(
    root = "data",
    download = True,
    train = False,
    transform = ToTensor(),
    )

In [25]:
#  creating dataloaders 
torch.manual_seed(42)
train_data_loader = DataLoader(train_data,batch_size=64)
test_data_loader = DataLoader(test_data,batch_size=64)# batch size of test will remain same for all experiments
for X,y in test_data_loader :
    print(f"The image is of shape: {X.shape}")
    print(f"The shape of the label: {y.shape}")
    break

The image is of shape: torch.Size([64, 1, 28, 28])
The shape of the label: torch.Size([64])


In [26]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available
    else "cpu"
    )
print(f"Using {device}")

Using cuda


In [30]:
def train_and_evaluate(epochs, batch_size, learning_rate, optim: str):
    # Create the train DataLoader with the given batch_size
    # create a base model
    class SimpleNeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.flatten = nn.Flatten()
            self.linear_relu_stack = nn.Sequential(
                nn.Linear(28*28,512),
                nn.ReLU(),
                nn.Linear(512,512),
                nn.ReLU(),
                nn.Linear(512,10),
            )
        def forward(self,x):
            x = self.flatten(x)
            logits = self.linear_relu_stack(x)
            return logits
    torch.manual_seed(42)
    model = SimpleNeuralNetwork().to(device)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    
    # Define the optimizer
    if optim == "Adagrad":
        optimizer = torch.optim.Adagrad(params=model.parameters(), lr=learning_rate)
    elif optim == "Adam":
        optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    elif optim == "RMSprop":
        optimizer = torch.optim.RMSprop(params=model.parameters(), lr=learning_rate)
    elif optim == "SGD":
        optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate)

    loss_fn = nn.CrossEntropyLoss()
    train_size = len(train_loader.dataset)

    # Training and Testing Loop
    for epoch in range(epochs):
        print(f"Epoch: {epoch + 1}\n------------------")

        # Training Phase
        model.train()
        for batch, (X, y) in enumerate(train_loader):
            X, y = X.to(device), y.to(device)

            # Forward pass
            pred = model(X)
            loss = loss_fn(pred, y)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch % 100 == 0:
                loss_value, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss_value:>7f}  [{current:>5d}/{train_size:>5d}]")

        # Testing Phase (run once per epoch)
        test_loss, correct = 0, 0
        num_batches = len(test_data_loader)
        model.eval()
        with torch.inference_mode():
            for X, y in test_data_loader:
                X, y = X.to(device), y.to(device)
                pred = model(X)
                test_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        test_loss /= num_batches
        correct /= len(test_data_loader.dataset)
        print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
        
    # Return accuracy and loss for analysis
    return 100 * correct, test_loss
    



In [31]:
# Experiment with different learning rates
"""Here only learning rate varies but other remain fixed
Batch Size : 64
Optimizer : SGD
Epochs : 5
"""
learning_rates = [0.001,0.01,0.1,0.5,0.9] 
results_learning_rate=[]
for lr in learning_rates:
    print(f"For learning rate : {lr}")
    accuracy,avg_loss = train_and_evaluate(epochs = 5,batch_size = 64,learning_rate = lr,optim = "SGD")
    results_learning_rate.append({"Learning Rate": lr,
                                  "Accuracy": accuracy,
                                  "Average Loss": avg_loss})




For learning rate : 0.001
Epoch: 1
------------------
loss: 2.300794  [   64/60000]
loss: 2.300961  [ 6464/60000]
loss: 2.296847  [12864/60000]
loss: 2.290457  [19264/60000]
loss: 2.295826  [25664/60000]
loss: 2.283027  [32064/60000]
loss: 2.269765  [38464/60000]
loss: 2.265738  [44864/60000]
loss: 2.269464  [51264/60000]
loss: 2.268400  [57664/60000]
Test Error: 
 Accuracy: 44.0%, Avg loss: 2.257025 

Epoch: 2
------------------
loss: 2.265470  [   64/60000]
loss: 2.250584  [ 6464/60000]
loss: 2.233686  [12864/60000]
loss: 2.244142  [19264/60000]
loss: 2.225823  [25664/60000]
loss: 2.224452  [32064/60000]
loss: 2.218394  [38464/60000]
loss: 2.199941  [44864/60000]
loss: 2.213725  [51264/60000]
loss: 2.194031  [57664/60000]
Test Error: 
 Accuracy: 61.3%, Avg loss: 2.192519 

Epoch: 3
------------------
loss: 2.205599  [   64/60000]
loss: 2.185604  [ 6464/60000]
loss: 2.155325  [12864/60000]
loss: 2.153044  [19264/60000]
loss: 2.161771  [25664/60000]
loss: 2.152941  [32064/60000]
loss: 

In [33]:
df_learning_rate = pd.DataFrame.from_dict(results_learning_rate)
df_learning_rate.to_csv("results/learning_rates.csv")

In [34]:
# Experiment 2 
"""Here only batch varies but other remain fixed
Learning Rate : 0.001
Optimizer : SGD
Epochs : 5
"""
batch_sizes = [28,64,128,256,512] 
results_batch_size=[]
for batch in batch_sizes:
    print(f"For Batch size : {batch}")
    accuracy,avg_loss = train_and_evaluate(epochs = 5,batch_size = batch ,learning_rate = 0.001,optim = "SGD")
    results_batch_size.append({"Batch": batch,
                                  "Accuracy": accuracy,
                                  "Average Loss": avg_loss})

For Batch size : 28
Epoch: 1
------------------
loss: 2.289565  [   28/60000]
loss: 2.294887  [ 2828/60000]
loss: 2.293866  [ 5628/60000]
loss: 2.277419  [ 8428/60000]
loss: 2.282064  [11228/60000]
loss: 2.273333  [14028/60000]
loss: 2.276517  [16828/60000]
loss: 2.268580  [19628/60000]
loss: 2.271899  [22428/60000]
loss: 2.255890  [25228/60000]
loss: 2.265361  [28028/60000]
loss: 2.248645  [30828/60000]
loss: 2.258613  [33628/60000]
loss: 2.230397  [36428/60000]
loss: 2.240878  [39228/60000]
loss: 2.245522  [42028/60000]
loss: 2.204708  [44828/60000]
loss: 2.200049  [47628/60000]
loss: 2.176532  [50428/60000]
loss: 2.203431  [53228/60000]
loss: 2.162762  [56028/60000]
loss: 2.173733  [58828/60000]
Test Error: 
 Accuracy: 62.8%, Avg loss: 2.167475 

Epoch: 2
------------------
loss: 2.182208  [   28/60000]
loss: 2.193784  [ 2828/60000]
loss: 2.139925  [ 5628/60000]
loss: 2.168800  [ 8428/60000]
loss: 2.142201  [11228/60000]
loss: 2.126107  [14028/60000]
loss: 2.102616  [16828/60000]
lo

In [35]:
results_batch_size

[{'Batch': 28, 'Accuracy': 86.28, 'Average Loss': 0.5580920605522812},
 {'Batch': 64, 'Accuracy': 72.02, 'Average Loss': 1.592876649965906},
 {'Batch': 128, 'Accuracy': 63.44, 'Average Loss': 2.14570140990482},
 {'Batch': 256, 'Accuracy': 50.94, 'Average Loss': 2.2432777410859517},
 {'Batch': 512,
  'Accuracy': 30.769999999999996,
  'Average Loss': 2.275238580764479}]

In [36]:
df_batch_size = pd.DataFrame(results_batch_size)
df_batch_size

Unnamed: 0,Batch,Accuracy,Average Loss
0,28,86.28,0.558092
1,64,72.02,1.592877
2,128,63.44,2.145701
3,256,50.94,2.243278
4,512,30.77,2.275239


In [37]:
df_batch_size.to_csv("results/batch_size.csv")

In [38]:
#Experiment 3 
"""Here only optimizer varies but other remain fixed
Learning Rate : 0.001
Epoch : 5
Batch size : 64
"""
optimizers = ["SGD","Adam","RMSprop","Adagrad"]
 
results_optimizer = []
for opt in optimizers:
    print(f"For optimizer: {opt}")
    accuracy,avg_loss = train_and_evaluate(epochs = 5 ,batch_size = 64 ,learning_rate = 0.001,optim = opt )
    results_optimizer.append({"Optimizer": opt,
                                  "Accuracy": accuracy,
                                  "Average Loss": avg_loss})

For optimizer: SGD
Epoch: 1
------------------
loss: 2.300794  [   64/60000]
loss: 2.300961  [ 6464/60000]
loss: 2.296847  [12864/60000]
loss: 2.290457  [19264/60000]
loss: 2.295826  [25664/60000]
loss: 2.283027  [32064/60000]
loss: 2.269765  [38464/60000]
loss: 2.265738  [44864/60000]
loss: 2.269464  [51264/60000]
loss: 2.268400  [57664/60000]
Test Error: 
 Accuracy: 44.0%, Avg loss: 2.257025 

Epoch: 2
------------------
loss: 2.265470  [   64/60000]
loss: 2.250584  [ 6464/60000]
loss: 2.233686  [12864/60000]
loss: 2.244142  [19264/60000]
loss: 2.225823  [25664/60000]
loss: 2.224452  [32064/60000]
loss: 2.218394  [38464/60000]
loss: 2.199941  [44864/60000]
loss: 2.213725  [51264/60000]
loss: 2.194031  [57664/60000]
Test Error: 
 Accuracy: 61.3%, Avg loss: 2.192519 

Epoch: 3
------------------
loss: 2.205599  [   64/60000]
loss: 2.185604  [ 6464/60000]
loss: 2.155325  [12864/60000]
loss: 2.153044  [19264/60000]
loss: 2.161771  [25664/60000]
loss: 2.152941  [32064/60000]
loss: 2.13122

In [39]:
df_optimizer = pd.DataFrame(results_optimizer)
df_optimizer.to_csv("results/optimizer.csv")

In [40]:
#Experiment 4 
"""Here only epoch count varies but other remain fixed
Learning Rate : 0.001
Optimizer : SGD
Batch size : 64
"""
epoch_count= [5,10,20,50] 
results_epoch=[]
for epoch_counts in epoch_count:
    print(f"For Epoch count : {epoch_counts}")
    accuracy,avg_loss = train_and_evaluate(epochs = epoch_counts ,batch_size = 64 ,learning_rate = 0.001,optim = "SGD")
    results_epoch.append({"Epoch Count": epoch_counts,
                                  "Accuracy": accuracy,
                                  "Average Loss": avg_loss})

For Epoch count : 5
Epoch: 1
------------------
loss: 2.300794  [   64/60000]
loss: 2.300961  [ 6464/60000]
loss: 2.296847  [12864/60000]
loss: 2.290457  [19264/60000]
loss: 2.295826  [25664/60000]
loss: 2.283027  [32064/60000]
loss: 2.269765  [38464/60000]
loss: 2.265738  [44864/60000]
loss: 2.269464  [51264/60000]
loss: 2.268400  [57664/60000]
Test Error: 
 Accuracy: 44.0%, Avg loss: 2.257025 

Epoch: 2
------------------
loss: 2.265470  [   64/60000]
loss: 2.250584  [ 6464/60000]
loss: 2.233686  [12864/60000]
loss: 2.244142  [19264/60000]
loss: 2.225823  [25664/60000]
loss: 2.224452  [32064/60000]
loss: 2.218394  [38464/60000]
loss: 2.199941  [44864/60000]
loss: 2.213725  [51264/60000]
loss: 2.194031  [57664/60000]
Test Error: 
 Accuracy: 61.3%, Avg loss: 2.192519 

Epoch: 3
------------------
loss: 2.205599  [   64/60000]
loss: 2.185604  [ 6464/60000]
loss: 2.155325  [12864/60000]
loss: 2.153044  [19264/60000]
loss: 2.161771  [25664/60000]
loss: 2.152941  [32064/60000]
loss: 2.1312

In [41]:
df_epoch = pd.DataFrame(results_epoch)
df_epoch.to_csv("results/epochs.csv")