In [2]:
#importing libraries
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

Now lets download training and test data from MNIST dataset


In [3]:
train_data = datasets.MNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor(),
    )

test_data = datasets.MNIST(
    root = "data",
    download = True,
    train = False,
    transform = ToTensor(),
    )

In [4]:
#  creating dataloaders 
train_data_loader = DataLoader(train_data,batch_size=32)
test_data_loader = DataLoader(test_data,batch_size=32)# batch size of test will remain same for all experiments
for X,y in test_data_loader :
    print(f"The image is of shape: {X.shape}")
    print(f"The shape of the label: {y.shape}")
    break

The image is of shape: torch.Size([32, 1, 28, 28])
The shape of the label: torch.Size([32])


In [7]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available
    else "cpu"
    )
print(f"Using {device}")

Using cuda


In [9]:
# create a base model
class SimpleNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
            )
    def forward(self,x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = SimpleNeuralNetwork().to(device)


In [27]:
def train_and_evaluate(epochs,batch_size,learning_rate,optim: str):
    # defining the optimizer
    if optim == "Adagrad":
        optimizer = torch.optim.Adagrad(params = model.parameters(),lr = learning_rate)
        print("Using Adagrad")
    elif optim == "Adam":
        optimizer = torch.optim.Adam(params = model.parameters(),lr = learning_rate)
        print("Using Adam")
    elif optim == "RMSprop":
        optimizer = torch.optim.RMSprop(params = model.parameters(),lr = learning_rate)
        print("Using RMSprop")
    else:
        optimizer = torch.optim.SGD(params = model.parameters(),lr = learning_rate)
        print("Using SGD")

    loss_fn = nn.CrossEntropyLoss()
    train_data_loader = DataLoader(train_data,batch_size=batch_size)
    size = len(train_data_loader.dataset)

    
    # training loop
    for e in range(epochs):
        print(f"Epoch: {e+1}\n------------------ ")
        for batch, (X,y) in enumerate(train_data_loader):
            X, y = X.to(device), y.to(device)
         
            model.train()
            #forward pass
            pred = model(X)
            # loss function
            loss = loss_fn(pred,y)

            # backward loss|
            optimizer.zero_grad()
            loss.backward()
        
          
            optimizer.step()
            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            # Testing -------------------------------------------------
            num_batches = len(test_data_loader)
            size_test = len(test_data_loader.dataset)
            model.eval()
            test_loss, correct = 0, 0
            with torch.inference_mode():
                for X, y in test_data_loader:
                    X, y = X.to(device), y.to(device)
                    pred = model(X)
                    test_loss += loss_fn(pred, y).item()
                    correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size_test
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

           
            



In [28]:
train_and_evaluate(epochs=50,batch_size=64,learning_rate=0.001,optim="SGD")

Using SGD
Epoch: 1
------------------ 
loss: 0.029320  [   64/60000]


KeyboardInterrupt: 

In [26]:
len(train_data_loader.dataset)

60000