# Deep Learning - Exercise 2
## Emanuele Fontana

This notebook contains implementations of MLP from scratch using only NumPy and PyTorch.

## Import Libraries


In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import torch


## Load Data

In [9]:
# Load dataset
digits = load_digits()
X, y = digits.data, digits.target

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_one_hot = encoder.fit_transform(y.reshape(-1, 1))

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Train shape:", X_train_scaled.shape)
print("Test shape:", X_test_scaled.shape)

Train shape: (1437, 64)
Test shape: (360, 64)


## Definition of useful functions

In [22]:
def leaky_relu(x, alpha=0.01):
    return torch.where(x > 0, x, alpha * x)

def leaky_relu_derivative(x, alpha=0.01):
    return torch.where(x > 0, torch.ones_like(x), alpha * torch.ones_like(x))

def softmax(x):
    exp_x = torch.exp(x - torch.max(x, dim=1, keepdim=True).values)
    return exp_x / torch.sum(exp_x, dim=1, keepdim=True)

def cross_entropy_loss(y_true, y_pred):
    n_samples = y_true.shape[0]
    log_preds = torch.log(y_pred + 1e-15)
    loss = -torch.sum(y_true * log_preds) / n_samples
    return loss




## Define MLP from Scratch

In [28]:
class MLP:
    def __init__(self, layer_sizes, alpha=0.01, lr=0.01, weight_decay=0.0,early_stopping=False):
        self.learning_rate = lr
        self.weight_decay = weight_decay
        self.early_stopping = early_stopping
        #We inizialize weights with He initialization
        self.W1 = torch.randn(layer_sizes[0], layer_sizes[1]) * np.sqrt(2. / layer_sizes[0])
        self.b1 = torch.zeros(1, layer_sizes[1])
        self.W2 = torch.randn(layer_sizes[1], layer_sizes[2]) * np.sqrt(2. / layer_sizes[1])
        self.b2 = torch.zeros(1, layer_sizes[2])
        self.best_loss = float('inf')
        self.no_improve_epochs = 0


    def forward(self, X):
        self.Z1 = X@self.W1 + self.b1
        self.A1 = leaky_relu(self.Z1)
        self.Z2 = self.A1@self.W2 + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y_true):
        n_samples = y_true.shape[0]
        dZ2 = self.A2 - y_true
        dW2 = self.A1.T @ dZ2 / n_samples
        db2 = torch.sum(dZ2, axis=0, keepdim=True) / n_samples

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * leaky_relu_derivative(self.Z1)
        dW1 = X.T @ dZ1 / n_samples
        db1 = torch.sum(dZ1, axis=0, keepdim=True) / n_samples

        # Update weights and biases
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1

    def train(self, X, y, epochs=10000):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = cross_entropy_loss(y, y_pred)

            self.backward(X, y)

            if self.early_stopping:
                if loss.item() < self.best_loss:
                    self.best_loss = loss.item()
                    self.no_improve_epochs = 0
                else:
                    self.no_improve_epochs += 1
                    if self.no_improve_epochs >= 10:
                        print(f"Early stopping at epoch {epoch}")
                        break

            if epoch % 5000 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item()}")

    def predict(self, X):
        y_pred = self.forward(X)
        return torch.argmax(y_pred, axis=1)

## Run Training and Evaluation

In [29]:
# Convert data to torch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
# Initialize and train MLP
mlp = MLP(layer_sizes=[64, 32, 10], lr=0.01, weight_decay=0.001, early_stopping=True)
mlp.train(X_train_tensor, y_train_tensor, epochs=50000)
# Evaluate on test set
y_test_pred = mlp.predict(X_test_tensor)
accuracy = torch.sum(y_test_pred == torch.argmax(y_test_tensor, axis=1)).item() / y_test_tensor.shape[0]
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Epoch 0, Loss: 3.132293701171875
Epoch 5000, Loss: 0.0541706383228302
Epoch 10000, Loss: 0.023854956030845642
Epoch 15000, Loss: 0.013831119053065777
Epoch 20000, Loss: 0.009229687973856926
Epoch 25000, Loss: 0.00677869189530611
Epoch 30000, Loss: 0.005285857245326042
Epoch 35000, Loss: 0.004288507625460625
Epoch 40000, Loss: 0.0035886240657418966
Epoch 45000, Loss: 0.003072054823860526
Test Accuracy: 97.22%


# Now let's use MNIST dataset

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Now let's use MNIST dataset
mnist_train = datasets.MNIST(root='./data', train=True, download=True)
mnist_test = datasets.MNIST(root='./data', train=False, download=True)

train_loader = DataLoader(mnist_train, batch_size=64, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=1000, shuffle=False)
# Convert data to torch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Initialize and train MLP
mlp = MLP(layer_sizes=[64, 32, 10], lr=0.01, weight_decay=0.001, early_stopping=True)
mlp.train(X_train_tensor, y_train_tensor, epochs=50000)
# Evaluate on test set
y_test_pred = mlp.predict(X_test_tensor)
accuracy = torch.sum(y_test_pred == torch.argmax(y_test_tensor, axis=1)).item() / y_test_tensor.shape[0]
print(f"Test Accuracy: {accuracy * 100:.2f}%")

RuntimeError: Dataset not found. You can use download=True to download it