In [31]:
#%% Learning point: understand basic PyTorch concepts and how to train a simple MLP model

"""
Task:  fix this buggy code in the code
"""
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import classification_report
import numpy as np

torch.manual_seed(42)


<torch._C.Generator at 0x10d3f7db0>

In [32]:
#%%  

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        # breakpoint()
        # Flatten the image for the input layer
        x = self.flatten(x)
        # Apply the linear layers of MLP with ReLU activation
        logits = self.linear_relu_stack(x)
        # Apply the softmax function to get probabilities
        probabilities = self.softmax(logits)
        return probabilities


In [33]:
#%%  
# Load MNIST dataset
# Import to rescale the image to [-1, 1] to match activation functions
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

# Load the MNIST dataset
train_data = datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
test_data = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)

In [34]:
#%%  
# Prepare the dataloaders, shuffle the data, and set the batch size
# Batches are used to update the model weights because we can't pass the entire dataset at once
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

In [35]:
#%%  
# Initialize the model
model = NeuralNetwork()
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
  (softmax): LogSoftmax(dim=1)
)


In [36]:
#%%  
# MNIST dataset
test_tensor = train_data[0]
print("Image shape:", test_tensor[0].shape, "Class:", test_tensor[1])

Image shape: torch.Size([1, 28, 28]) Class: 5


In [37]:
#%%  
print("Test model forward pass")
assert model(test_tensor[0]).shape == (1, 10), "Model output shape is incorrect"

Test model forward pass


In [38]:
#%%  
learning_rate = 1e-3
epochs = 25
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


In [39]:
#%% Training 
for epoch in tqdm(range(epochs)):
    size = len(train_dataloader.dataset)
    for batch, (X, y) in enumerate(train_dataloader):
        pred = model(X)
        loss = loss_fn(pred,y)

        # Clear old gradients
        optimizer.zero_grad()
        # Compute derivatives
        loss.backward()
        # Update the weights of the model using the optimizer
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"Epoch: {epoch+1}, Loss: {loss:.6f}, Progress: [{current}/{size}]")


  0%|          | 0/25 [00:00<?, ?it/s]

Epoch: 1, Loss: 2.298006, Progress: [0/60000]
Epoch: 1, Loss: 2.283853, Progress: [6400/60000]
Epoch: 1, Loss: 2.256865, Progress: [12800/60000]
Epoch: 1, Loss: 2.228138, Progress: [19200/60000]
Epoch: 1, Loss: 2.197832, Progress: [25600/60000]
Epoch: 1, Loss: 2.154214, Progress: [32000/60000]
Epoch: 1, Loss: 2.112940, Progress: [38400/60000]
Epoch: 1, Loss: 2.113910, Progress: [44800/60000]
Epoch: 1, Loss: 2.031742, Progress: [51200/60000]
Epoch: 1, Loss: 2.039376, Progress: [57600/60000]


  4%|▍         | 1/25 [00:13<05:26, 13.59s/it]

Epoch: 2, Loss: 1.948175, Progress: [0/60000]


In [None]:
#%%  
# Test the model
model.eval()
y_pred = []
y_true = []

# Disable gradient computation for evaluation to save memory and computations
with torch.no_grad():
    all_preds = []
    all_labels = []

    for X, y in test_dataloader:
        # breakpoint()
        preds = model(X)
        all_preds.extend(preds.argmax(1).numpy())  # Get the predicted classes
        all_labels.extend(y.numpy())

In [None]:
#%%  
# Convert list to NumPy arrays for Scikit-Learn
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

In [None]:
#%%  
# Classification report
report = classification_report(
    all_labels, all_preds, target_names=[str(i) for i in range(10)]
)
print("Classification Report:\n", report)
