In [1]:
#Start with a NN (MLP) to classify MNIST?

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the MLP architecture, we go with 784 -> 128 -> 10 
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Set hyperparameters
input_size = 784  # 28x28 pixels
hidden_size = 128 
num_classes = 10 # 10 digits
num_epochs = 5
batch_size = 64
learning_rate = 0.001

# MNIST Data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model, Loss func, and optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

Epoch 1/5, Batch 0/938, Loss: 2.3727
Epoch 1/5, Batch 100/938, Loss: 0.4027
Epoch 1/5, Batch 200/938, Loss: 0.3331
Epoch 1/5, Batch 300/938, Loss: 0.4029
Epoch 1/5, Batch 400/938, Loss: 0.1957
Epoch 1/5, Batch 500/938, Loss: 0.1277
Epoch 1/5, Batch 600/938, Loss: 0.3867
Epoch 1/5, Batch 700/938, Loss: 0.0953
Epoch 1/5, Batch 800/938, Loss: 0.0676
Epoch 1/5, Batch 900/938, Loss: 0.2638
Epoch 2/5, Batch 0/938, Loss: 0.3333
Epoch 2/5, Batch 100/938, Loss: 0.1034
Epoch 2/5, Batch 200/938, Loss: 0.0569
Epoch 2/5, Batch 300/938, Loss: 0.1372
Epoch 2/5, Batch 400/938, Loss: 0.0409
Epoch 2/5, Batch 500/938, Loss: 0.1662
Epoch 2/5, Batch 600/938, Loss: 0.2782
Epoch 2/5, Batch 700/938, Loss: 0.0737
Epoch 2/5, Batch 800/938, Loss: 0.2088
Epoch 2/5, Batch 900/938, Loss: 0.0454
Epoch 3/5, Batch 0/938, Loss: 0.0645
Epoch 3/5, Batch 100/938, Loss: 0.0228
Epoch 3/5, Batch 200/938, Loss: 0.1193
Epoch 3/5, Batch 300/938, Loss: 0.0524
Epoch 3/5, Batch 400/938, Loss: 0.0607
Epoch 3/5, Batch 500/938, Loss:

In [2]:
#Discuss problems with NN - How can we extract uncertainties? Monte Carlo dropout? Regularisation? How about when our training data is limited in size?

# Define the MLP architecture with dropout, same structure as last time
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.5):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Monte Carlo Dropout prediction
def mc_dropout_predict(model, data, num_samples=100):
    model.train()  # Enable dropout during inference
    outputs = []
    with torch.no_grad():
        for _ in range(num_samples):
            outputs.append(torch.softmax(model(data), dim=1))
    outputs = torch.stack(outputs)
    mean = outputs.mean(dim=0)
    variance = outputs.var(dim=0)
    return mean, variance

# model, loss, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

# Uncertainty estimation
def demonstrate_uncertainty():
    # model
    model = MLP(input_size, hidden_size, num_classes).to(device)

    #sample from the test set
    data, target = next(iter(test_loader))
    data, target = data.to(device), target.to(device)

    #MC Dropout
    mc_mean, mc_variance = mc_dropout_predict(model, data)
    mc_uncertainty = mc_variance.sum(axis=1)

    # Print results for a few samples
    for i in range(5):
        print(f"Sample {i+1}:")
        print(f"True label: {target[i]}")
        print(f"MC Dropout - Predicted: {mc_mean[i].argmax().item()}, Uncertainty: {mc_uncertainty[i].item():.4f}")
        print()

demonstrate_uncertainty()

Epoch 1/5, Batch 0/938, Loss: 2.4139
Epoch 1/5, Batch 100/938, Loss: 0.3815
Epoch 1/5, Batch 200/938, Loss: 0.3073
Epoch 1/5, Batch 300/938, Loss: 0.2723
Epoch 1/5, Batch 400/938, Loss: 0.3718
Epoch 1/5, Batch 500/938, Loss: 0.2316
Epoch 1/5, Batch 600/938, Loss: 0.2661
Epoch 1/5, Batch 700/938, Loss: 0.2846
Epoch 1/5, Batch 800/938, Loss: 0.3185
Epoch 1/5, Batch 900/938, Loss: 0.2233
Epoch 2/5, Batch 0/938, Loss: 0.3223
Epoch 2/5, Batch 100/938, Loss: 0.1031
Epoch 2/5, Batch 200/938, Loss: 0.1814
Epoch 2/5, Batch 300/938, Loss: 0.2234
Epoch 2/5, Batch 400/938, Loss: 0.1090
Epoch 2/5, Batch 500/938, Loss: 0.3774
Epoch 2/5, Batch 600/938, Loss: 0.1912
Epoch 2/5, Batch 700/938, Loss: 0.1679
Epoch 2/5, Batch 800/938, Loss: 0.2529
Epoch 2/5, Batch 900/938, Loss: 0.2250
Epoch 3/5, Batch 0/938, Loss: 0.1861
Epoch 3/5, Batch 100/938, Loss: 0.1094
Epoch 3/5, Batch 200/938, Loss: 0.0856
Epoch 3/5, Batch 300/938, Loss: 0.1320
Epoch 3/5, Batch 400/938, Loss: 0.0669
Epoch 3/5, Batch 500/938, Loss:

In [None]:
#NN Regularisation - TBD

In [None]:
#Introduce BNN - what does it mean to have Bayesian NN? Why use it? (uncertainty, overfitting, smaller datasets)

In [None]:
#MCMC versus VI?

In [None]:
#Compare NN to BNN

In [None]:
#More advanced structures beyond an MLP?