# Exercise-1: Training Deep Neural Network on MNIST 

Train a controlled deep neural network on the MNIST dataset. Set random seeds to 42.
Load and preprocess MNIST. Build the network using the following configuration:
* Flatten input images to 28 × 28 = 784 features
* 3 hidden layers, 64 neurons each
* ELU activation function
* He normal initialization
* Output layer: 10 neurons with softmax
* Optimizer: Nadam
* learning rate = 0.001, loss=sparse categorical crossentropy
* EarlyStopping callback: monitor validation loss, patience = 5, restore best weights
* epochs = 50, batch size = 32
* Use only the first 1000 training samples and first 200 test samples

In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import random

In [19]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

In [20]:
# 1. Set seeds

SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [21]:
# 2. Load MNIST (only part)

transform = transforms.ToTensor()

train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

train_subset = Subset(train_dataset, range(1000))   # first 1000 samples
test_subset  = Subset(test_dataset, range(200))     # first 200 samples

batch_size = 32
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

# 3. Build model

class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 64),
            nn.ELU(),
            nn.Linear(64, 64),
            nn.ELU(),
            nn.Linear(64, 64),
            nn.ELU(),
            nn.Linear(64, 10)  # logits -> CrossEntropyLoss will softmax
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.net:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)  # He normal init
                nn.init.zeros_(layer.bias)

    def forward(self, x):
        return self.net(x)

model = MNISTModel()

# 4. Optimizer & loss

optimizer = optim.NAdam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 5. Training with Early Stopping

epochs = 50
patience = 5
best_loss = np.inf
patience_counter = 0
best_state = None

for epoch in range(epochs):
    model.train()
    train_loss = 0

    for x, y in train_loader:
        optimizer.zero_grad()
        preds = model(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # valdiation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x, y in val_loader:
            preds = model(x)
            loss = criterion(preds, y)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    train_loss /= len(train_loader)

    print(f"Epoch {epoch+1:02d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    # Early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_state = model.state_dict().copy()
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# Restore best weights
model.load_state_dict(best_state)
print("Restored best model weights.")

Epoch 01 | Train Loss: 1.4510 | Val Loss: 0.8960
Epoch 02 | Train Loss: 0.5706 | Val Loss: 0.5384
Epoch 03 | Train Loss: 0.3566 | Val Loss: 0.4145
Epoch 04 | Train Loss: 0.2649 | Val Loss: 0.3699
Epoch 05 | Train Loss: 0.1881 | Val Loss: 0.3231
Epoch 06 | Train Loss: 0.1341 | Val Loss: 0.3018
Epoch 07 | Train Loss: 0.1017 | Val Loss: 0.3012
Epoch 08 | Train Loss: 0.0732 | Val Loss: 0.2884
Epoch 09 | Train Loss: 0.0528 | Val Loss: 0.2864
Epoch 10 | Train Loss: 0.0383 | Val Loss: 0.2985
Epoch 11 | Train Loss: 0.0275 | Val Loss: 0.3177
Epoch 12 | Train Loss: 0.0225 | Val Loss: 0.3262
Epoch 13 | Train Loss: 0.0171 | Val Loss: 0.3215
Epoch 14 | Train Loss: 0.0143 | Val Loss: 0.3109
Early stopping triggered.
Restored best model weights.


### Q1.1 Report the obtained test accuracy

In [22]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for x, y in val_loader:
        preds = model(x).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print(f"Accuracy: {correct/total:.2%}")


Accuracy: 92.00%


# Exercise-2: Training Deep Neural Network on CIFAR-10 
Train a controlled deep neural network on the CIFAR-10 dataset. Set random seeds to
42. Load and preprocess CIFAR-10. Build the network using the following configuration:
* Flatten input images to 32 × 32 × 3 = 3072 features
* 4 hidden layers, 256 neurons each
* ELU activation function
* He normal initialization
* Output layer: 10 neurons with softmax
* Optimizer: Nadam
* learning rate = 0.001, loss =′ sparse categorical crossentropy′
* EarlyStopping callback: monitor validation loss, patience = 5, restore best weights
* epochs = 50, batch size = 128
* Use only the first 5000 training samples and first 1000 test samples

In [23]:
# 2. Load CIFAR-10 (only part)
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
test_dataset  = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

train_subset = Subset(train_dataset, range(5000))   # first 5000 samples
test_subset  = Subset(test_dataset, range(1000))    # first 1000 samples

batch_size = 128
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

# 3. Build Model

class CIFAR10MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(3072, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 10)  # logits (softmax done in CrossEntropy)
        )
        self.init_weights()

    def init_weights(self):
        for layer in self.model:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)  # He init
                nn.init.zeros_(layer.bias)

    def forward(self, x):
        return self.model(x)

model = CIFAR10MLP()

# 4. Optimizer & loss

optimizer = optim.NAdam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 5. Train w/ Early Stopping

epochs = 50
patience = 5
best_loss = np.inf
patience_counter = 0
best_state = None

for epoch in range(epochs):
    model.train()
    train_loss = 0

    for x, y in train_loader:
        optimizer.zero_grad()
        preds = model(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x, y in val_loader:
            preds = model(x)
            loss = criterion(preds, y)
            val_loss += loss.item()

    train_loss /= len(train_loader)
    val_loss /= len(val_loader)

    print(f"Epoch {epoch+1:02d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    # Early stopping logic
    if val_loss < best_loss:
        best_loss = val_loss
        best_state = model.state_dict().copy()
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break

# Restore best weights
model.load_state_dict(best_state)
print("Restored best model weights.")

Epoch 01 | Train Loss: 3.2465 | Val Loss: 2.7205
Epoch 02 | Train Loss: 2.2015 | Val Loss: 2.4690
Epoch 03 | Train Loss: 2.0216 | Val Loss: 2.6111
Epoch 04 | Train Loss: 2.0074 | Val Loss: 3.0149
Epoch 05 | Train Loss: 1.9588 | Val Loss: 2.9991
Epoch 06 | Train Loss: 1.9131 | Val Loss: 2.5847
Epoch 07 | Train Loss: 1.8907 | Val Loss: 2.6296
Early stopping triggered.
Restored best model weights.


### Q2.1 Report the obtained the test accuracy.

In [24]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for x, y in val_loader:
        preds = model(x).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print(f"Final Test Accuracy: {correct/total:.2%}")


Final Test Accuracy: 17.00%


# Exercise-3: Regularization with Alpha Dropout and MC Dropout
Using the MNIST dataset, extend the previously trained deep neural network by applying
Alpha Dropout. Then, without retraining, use Monte Carlo (MC) Dropout at inference
to estimate if you can achieve better accuracy. Set random seeds to 42. Use the following
configuration:
* Flatten input images to 28 × 28 = 784 features
* 3 hidden layers, 64 neurons each
* SELU activation function (required for Alpha Dropout)
* LeCun normal initialization
* Alpha Dropout rate: 0.1 in all hidden layers
* Output layer: 10 neurons with softmax
* Optimizer: Nadam
* learning rate = 0.001, loss=sparse categorical crossentropy
* epochs = 50, batch size = 32
* Use only the first 1000 training samples and first 200 test samples
* For MC Dropout, enable dropout during inference and average predictions over 20 stochastic forward passes