### Importing Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchinfo import summary

### Use GPU if available

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


### Transform for MNIST (flattened later)

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),  # converts to [0,1]
    transforms.Normalize((0.1307,), (0.3081,))  # mean & std for MNIST
])

### Load data

In [4]:
train_dataset = datasets.MNIST(root='mnist_train', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='mnist_test', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, pin_memory=True)

100.0%
100.0%
100.0%
100.0%
100.0%
100.0%
100.0%
100.0%


### Define Feedforward Neural Network

In [5]:
class FNN(nn.Module):
    def __init__(self):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # raw logits
        return x

### Initialize model

In [6]:
model = FNN().to(device)
summary(model, input_size=(64, 784), device=device.type)

Layer (type:depth-idx)                   Output Shape              Param #
FNN                                      [64, 10]                  --
├─Linear: 1-1                            [64, 256]                 200,960
├─ReLU: 1-2                              [64, 256]                 --
├─Linear: 1-3                            [64, 128]                 32,896
├─ReLU: 1-4                              [64, 128]                 --
├─Linear: 1-5                            [64, 10]                  1,290
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 15.05
Input size (MB): 0.20
Forward/backward pass size (MB): 0.20
Params size (MB): 0.94
Estimated Total Size (MB): 1.34

### Loss and optimizer

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Warm up the model by running a forward pass with dummy data

In [8]:
inputs, labels = next(iter(train_loader))

inputs = inputs.to(device)

with torch.no_grad():  
    outputs = model(inputs)
    print(outputs.cpu().numpy(),outputs.shape)

[[ 1.08514056e-01  2.14635432e-02 -6.17815629e-02 -3.51852551e-03
   8.77752081e-02 -3.23430449e-02 -2.05323517e-01 -7.18705952e-02
  -5.58719262e-02 -4.92296405e-02]
 [ 6.26947880e-02  1.11679591e-01 -4.87175584e-02  1.33010298e-02
   7.18920976e-02  1.17215000e-01 -2.23242313e-01  2.91676521e-02
  -4.85361591e-02 -1.18878603e-01]
 [ 6.46236539e-02  1.62372962e-02 -3.68760154e-02  5.14301769e-02
   1.06729411e-01  1.00280695e-01 -2.21421823e-01 -3.00173834e-02
  -1.29926801e-02 -7.86502659e-02]
 [ 1.20105036e-01 -2.12719664e-02 -9.00794268e-02  7.10431114e-02
   1.26499653e-01  1.63470924e-01 -2.14266866e-01 -1.62534893e-01
  -9.65756550e-02 -1.05313554e-01]
 [ 4.92532849e-02  1.13205455e-01 -6.54692128e-02 -1.54921897e-02
   1.05464682e-01  8.77187103e-02 -2.23258868e-01 -8.88154730e-02
  -3.01552191e-02 -8.44262093e-02]
 [ 7.32428282e-02  2.74685100e-02 -9.35202688e-02 -1.37343824e-01
   1.87939554e-01  1.13851644e-01 -1.93000436e-01  1.33819625e-01
  -5.43218702e-02 -1.69421867e-01

### Training loop

In [9]:
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

Epoch [1/10], Loss: 0.2310
Epoch [2/10], Loss: 0.0934
Epoch [3/10], Loss: 0.0641
Epoch [4/10], Loss: 0.0498
Epoch [5/10], Loss: 0.0383
Epoch [6/10], Loss: 0.0344
Epoch [7/10], Loss: 0.0269
Epoch [8/10], Loss: 0.0237
Epoch [9/10], Loss: 0.0258
Epoch [10/10], Loss: 0.0205


### Evaluate on test set

In [10]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 97.82%
