# 1- Importing necessary libs.

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# 2- loading MNIST (train+test)

In [2]:
training_data = datasets.MNIST(
    train=True, 
    root='data', 
    download=True, 
    transform=ToTensor(),
)

test_data = datasets.MNIST(
    train=False, 
    root='data', 
    download=True, 
    transform=ToTensor(),
)

In [3]:
len(training_data.classes)

10

# 3- Creating data loaders to tranform the data into batches iterable

In [4]:
batch_size=32

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for x, y in test_dataloader:
    print(f"batch size -> {x.shape[0]}\n#channels -> {x.shape[1]}\nheight -> {x.shape[2]}\nwidth -> {x.shape[3]}")
    break

batch size -> 32
#channels -> 1
height -> 28
width -> 28


# 4- Defining a simple four layer model

In [5]:
device = 'cpu'

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.four_layer_stack = nn.Sequential(
            nn.Linear(28*28, 256), 
            nn.LeakyReLU(), 
            nn.Linear(256, 128), 
            nn.LeakyReLU(),
            nn.Linear(128,64), 
            nn.LeakyReLU(), 
            nn.Linear(64, 10)
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.four_layer_stack(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (four_layer_stack): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
)


In [6]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [7]:
# creating training loop....
def train(dataloader, model, optimizer, loss_fn):
    size = len(dataloader.dataset)
    model.train()
    for batch, (x, y) in enumerate(dataloader):
        X, y = x.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch %100 == 0:
            current_loss, current_batch = loss.item(), (batch+1) * len(X)
            print(f"loss -> {current_loss:>8f} , batch -> [{current_batch:>5d}/{size:>5d}]")


In [8]:
# create testing loop.....
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    model.eval()
    num_batches = len(dataloader)
    loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            preds = model(X)
            loss += loss_fn(preds, y).item()
            correct += (preds.argmax(1)==y).type(torch.float).sum().item()
    loss /= num_batches
    correct /= size
    print(f"Test Error:\nloss -> {loss:>8f}\t Accuracy -> {correct*100}")

# 5- training and evaluating our model

In [9]:
epochs = 10
for i in range(epochs):
    print(f"epoch {i+1}\n----------------")
    train(train_dataloader, model, optimizer, loss_fn)
    test(test_dataloader, model, loss_fn)
print("Done!")

epoch 1
----------------
loss -> 2.308447 , batch -> [   32/60000]
loss -> 0.504153 , batch -> [ 3232/60000]
loss -> 0.487113 , batch -> [ 6432/60000]
loss -> 0.207600 , batch -> [ 9632/60000]
loss -> 0.269628 , batch -> [12832/60000]
loss -> 0.270383 , batch -> [16032/60000]
loss -> 0.164871 , batch -> [19232/60000]
loss -> 0.102817 , batch -> [22432/60000]
loss -> 0.138588 , batch -> [25632/60000]
loss -> 0.161988 , batch -> [28832/60000]
loss -> 0.320695 , batch -> [32032/60000]
loss -> 0.229939 , batch -> [35232/60000]
loss -> 0.155359 , batch -> [38432/60000]
loss -> 0.206136 , batch -> [41632/60000]
loss -> 0.114463 , batch -> [44832/60000]
loss -> 0.159835 , batch -> [48032/60000]
loss -> 0.176960 , batch -> [51232/60000]
loss -> 0.063046 , batch -> [54432/60000]
loss -> 0.145418 , batch -> [57632/60000]
Test Error:
loss -> 0.142647	 Accuracy -> 95.64
epoch 2
----------------
loss -> 0.051772 , batch -> [   32/60000]
loss -> 0.276330 , batch -> [ 3232/60000]
loss -> 0.090728 , b

# 6- saving and loading the model

In [10]:
torch.save(model.state_dict(), 'simple_nn_model.pth')

In [11]:
model.load_state_dict(torch.load('simple_nn_model.pth', weights_only=True))

<All keys matched successfully>

In [12]:
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (four_layer_stack): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=64, out_features=10, bias=True)
  )
)


# 7- testing the loaded model on a sample

In [13]:
classes = training_data.classes

model.eval()
X, y = test_data[0][0], test_data[0][1]
pred = model(X)
prediction ,actual= classes[pred[0].argmax(0)], classes[y]
print(f"prediction: {prediction}\t actual: {actual}")

prediction: 7 - seven	 actual: 7 - seven


## Done!