<a href="https://colab.research.google.com/github/Amal-Baby-Mathews/Pytorch_abm/blob/main/Pytorch_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# 🧠 STEP 1: Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# ✅ Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [4]:
# 🧠 STEP 2: Load MNIST with transforms (ToTensor + Normalize)
transform = transforms.Compose([
    transforms.ToTensor(),               # PIL → Tensor
    transforms.Normalize((0.5,), (0.5,)) # Pixel range: [-1, 1]
])

train_data = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=2)


100%|██████████| 9.91M/9.91M [00:14<00:00, 694kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 57.0kB/s]
100%|██████████| 1.65M/1.65M [00:11<00:00, 145kB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.73MB/s]


In [5]:
# 🧠 STEP 3: Define a simple MLP using nn.Module
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),              # 28x28 → 784
            nn.Linear(784, 256),       # Input → Hidden
            nn.ReLU(),                 # Activation
            nn.Linear(256, 10)         # Hidden → Output (10 classes)
        )

    def forward(self, x):
        return self.model(x)

model = MLP().to(device)


In [6]:
# 🧠 STEP 4: Define loss and optimizer
criterion = nn.CrossEntropyLoss()               # For multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adaptive optimizer


In [None]:
# 🧠 STEP 5: Training loop (one epoch for now)
epochs = 5
losses = []

for epoch in range(epochs):
    total_loss = 0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # 🧹 1. Zero the gradients
        optimizer.zero_grad()

        # 🔁 2. Forward pass
        outputs = model(inputs)

        # 📉 3. Calculate loss
        loss = criterion(outputs, labels)

        # 🔙 4. Backward pass (calculate gradients)
        loss.backward()

        # ⬆️ 5. Update weights
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{epochs} | Loss: {avg_loss:.4f}")


In [None]:
# 🧠 STEP 6: Plot training loss
plt.plot(losses, label="Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("MLP on MNIST – Training Loss")
plt.legend()
plt.grid(True)
plt.show()
