In [8]:
#This is the same Neural Network which was implemented from Scratch in the other notebook but here we will be using Pytorch to implement it.
import torch # Main torch library
import torch.nn as nn  # For building neural network layers like Linear, Conv2d etc.
import torch.nn.functional as F # For using activation functions like ReLU, SoftMax etc.
import torch.optim as optim   # For optimizers like Adam
from torchvision import datasets, transforms # For preprocessing datasets and applying transformations
from torch.utils.data import DataLoader # For loading datasets in batches
import torch.nn.init as init

In [9]:
# This code snippet is the equivalent of the X = X/ 255.0 the resizing part, the train_test_split() part and the X_train.T, Y_train.T in batches
transforms = transforms.ToTensor() 
train_dataset = datasets.MNIST(root = "./data", train= True, transform= transforms, download=True)
test_dataset = datasets.MNIST(root = "./data", train=False, transform= transforms, download=True)

train_loader = DataLoader(dataset= train_dataset, batch_size = 64, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size=64, shuffle=False)


In [10]:
# The shape is of the format [Batch Size, Channels, Height, Width]
# While building from scratch it was [70000, 784] currently it is [64, 1, 28, 28] 
data = iter(train_loader)
images, labels = next(data)
print(images.shape)
print(labels.shape)



torch.Size([64, 1, 28, 28])
torch.Size([64])


In [11]:
# No manual One Hot Encoding is needed here as Pytorch's CrossEntropyLoss function automatically handles it. It directly takes the class indices as labels.

In [12]:
# No manual train-test-split is needed as torch MNIST dataset has already been split into train and test datasets accordingly. 

In [13]:
import torch.nn.init as init

input_size = 784
hidden_size = 128
output_size = 10

class WeightInitialization(nn.Module):
    def __init__(self):
        super(WeightInitialization, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # Explicitly define the Input Layer (input_size, hidden_size) shape kind of W1 & b1 
        self.fc2 = nn.Linear(hidden_size, output_size) # Explicitly define the Hidden Layer (hidden_size, output_size) shape kind of W2 & b2

        init.kaiming_normal(self.fc1.weight, nonlinearity='relu') # He Initialization for W1
        init.zeros_(self.fc1.bias) # Initialize b1 to 0
        init.kaiming_normal(self.fc2.weight, nonlinearity='relu') # He Initialization for W2
        init.zeros_(self.fc2.bias) # Initiialize b2 to 0

model = WeightInitialization()

  init.kaiming_normal(self.fc1.weight, nonlinearity='relu') # He Initialization for W1
  init.kaiming_normal(self.fc2.weight, nonlinearity='relu') # He Initialization for W2


In [14]:
print(model.fc1.weight.shape)  # (128, 784)
print(model.fc1.bias.shape)    # (128)
print(model.fc2.weight.shape)  # (10, 128)
print(model.fc2.bias.shape)    # (10)

torch.Size([128, 784])
torch.Size([128])
torch.Size([10, 128])
torch.Size([10])


In [15]:
# PyTorch has already built in functions for activation functions such as ReLU, Softmax etc so we dont need to define them explicitly. 

In [16]:
# The Forward progpagation in Pytorch is done as follows:
import torch.nn.functional as F
class ForwardPropagation(nn.Module):
    def __init__(self, layers):
        super(ForwardPropagation, self).__init__()
        self.fc1 = layers.fc1
        self.fc2 = layers.fc2

    def forward(self, x):
            Z1 = self.fc1(x)
            A1 = F.relu(Z1)
            Z2 = self.fc2(A1)
            # IMPORTANT : Dont Apply SoftMax Activation Here as CrossEntropy Loss expects raw logits not softmaxed outputs
            return Z2
        

forwardpropagation = ForwardPropagation(model)






In [17]:
#All these have to be incorporated in the main training loop
from torch.optim import Adam
data = iter(train_loader)
images, labels = next(data) # images shape - [64, 1, 28, 28] labels shape - [64]
images = images.view(images.size(0), -1)  # Shape converted to [64, 784] for fully connected layers
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001) # The Main role comes in Backprop but here we clear older gradients for optimizing memory footprint.
optimizer.zero_grad()
outputs = forwardpropagation(images)
criterion = nn.CrossEntropyLoss()   
loss = criterion(outputs, labels) 
print("Loss:", loss.item())

Loss: 2.3792948722839355


In [None]:
# BackPropgation is just one line in Pytorch thats it All these have to be incorporated in the main training loop

loss.backward()

In [None]:
# The updation of parameters is also a single liner in pytorch That's it. All these have to be incorporated in the main training loop

optimizer.step()

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim

epochs = 30
learning_rate = 0.001
losses = []

def accuracy(y_true, y_pred):
    _, y_pred_labels = torch.max(y_pred, 1)
    return (y_pred_labels == y_true).float().mean() * 100

for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    forwardpropagation.train()

    for images, labels in train_loader:
        images = images.view(images.size(0), -1)

        # Forward propagation
        outputs = forwardpropagation(images)

        #Cross Entropy Loss
        criterion = nn.CrossEntropyLoss()   
        loss = criterion(outputs, labels) 

        #Backward propagation
        optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_accuracy += accuracy(labels, outputs)

    avg_loss = epoch_loss / len(train_loader)
    avg_accuracy = epoch_accuracy / len(train_loader)
    losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%")

        
        

Epoch 1/30 - Loss: 0.3064, Accuracy: 96.75%
Epoch 2/30 - Loss: 0.2933, Accuracy: 96.91%
Epoch 3/30 - Loss: 0.2813, Accuracy: 97.07%
Epoch 4/30 - Loss: 0.2862, Accuracy: 97.16%
Epoch 5/30 - Loss: 0.2827, Accuracy: 97.12%
Epoch 6/30 - Loss: 0.2752, Accuracy: 97.29%
Epoch 7/30 - Loss: 0.2763, Accuracy: 97.32%
Epoch 8/30 - Loss: 0.2828, Accuracy: 97.33%
Epoch 9/30 - Loss: 0.2695, Accuracy: 97.41%
Epoch 10/30 - Loss: 0.2742, Accuracy: 97.46%
Epoch 11/30 - Loss: 0.2714, Accuracy: 97.43%
Epoch 12/30 - Loss: 0.2769, Accuracy: 97.45%
Epoch 13/30 - Loss: 0.2759, Accuracy: 97.48%
Epoch 14/30 - Loss: 0.2806, Accuracy: 97.47%
Epoch 15/30 - Loss: 0.2755, Accuracy: 97.49%
Epoch 16/30 - Loss: 0.2870, Accuracy: 97.44%
Epoch 17/30 - Loss: 0.2935, Accuracy: 97.43%
Epoch 18/30 - Loss: 0.2893, Accuracy: 97.48%
Epoch 19/30 - Loss: 0.2957, Accuracy: 97.48%
Epoch 20/30 - Loss: 0.3012, Accuracy: 97.49%
Epoch 21/30 - Loss: 0.2983, Accuracy: 97.50%
Epoch 22/30 - Loss: 0.3034, Accuracy: 97.50%
Epoch 23/30 - Loss: