Diana Covaci, 261 086 280

Nicholas Milin, 261 106 314

Viktor Allais, 261 148 866

In [2]:
!pip install -q numpy pandas matplotlib seaborn scikit-learn ucimlrepo
!pip install -q torchvision

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import itertools
from IPython.core.debugger import set_trace
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from itertools import combinations
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

# Task 1: Acquire the data

In [3]:
# find mean & std for train_dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)

loader_iter = iter(loader)
images, _ = next(loader_iter)
mean_train = images.mean().item()
std_train = images.std().item()
print((mean_train, std_train))

# normalize train_dataset & test_dataset
mlp_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean_train, std_train),
    transforms.Lambda(lambda x:x.view(-1))
])

full_train_dataset_normalized = datasets.FashionMNIST(root='./data', train=True, download=True, transform=mlp_transform)
test_dataset_normalized = datasets.FashionMNIST(root='./data', train=False, download=True, transform=mlp_transform)

# split full train set into train and validation sets (80/20)
train_size = int(0.8*len(full_train_dataset_normalized))
val_size = len(full_train_dataset_normalized) - train_size
train_dataset_normalized, val_dataset_normalized = random_split(full_train_dataset_normalized, [train_size, val_size])

# create dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset_normalized, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(val_dataset_normalized, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset_normalized, batch_size=batch_size, shuffle=False)

# sanity check
x, y = next(iter(train_loader))
print((x.shape, y.shape)) # expect [64, 784] and [64]
input_size = x.shape[1]
output_size = len(set(full_train_dataset_normalized.targets.numpy()))
print(input_size, output_size)

100.0%
100.0%
100.0%
100.0%


(0.28604060411453247, 0.3530242443084717)
(torch.Size([64, 784]), torch.Size([64]))
784 10


# Task 2: Implement a Multilayer Perceptron

## Activation Functions

In [5]:
class relu:
  def activation(self, x):
    return np.maximum(0, x)
  def derivative(self, x):
    return np.where(x > 0, 1, 0)

class leakyRelu:
  def __init__(self, gamma=0.01):
    self.gamma = gamma
  def activation(self, x):
    return np.where(x > 0, x, self.gamma * x)
  def derivative(self, x):
    return np.where(x > 0, 1, self.gamma)

class sigmoid:
  def activation(self, x):
    return 1 / (1 + np.exp(-x))
  def derivative(self, x):
    return self.activation(x) * (1 - self.activation(x))

class tanh:
  def activation(self, x):
    return np.tanh(x)
  def derivative(self, x):
    return 1 - np.tanh(x) ** 2

class softmax:
  def activation(self, x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    total = np.sum(exp_x, axis=1, keepdims=True)
    return exp_x / total
  def derivative(self, x):
    return self.activation(x) * (1 - self.activation(x))


## MLP

In [6]:
def softmax(X):
  exp_X = np.exp(X - np.max(X, axis=1, keepdims=True))
  return exp_X / np.sum(exp_X, axis=1, keepdims=True)

# implementing the MLP class
class MLP:

  def __init__(self, activation_function, num_hidden_layers, units):
    self.activation_function = activation_function
    self.num_hidden_layers = num_hidden_layers
    self.units = units

    # initialize weights and biases
    layer_sizes = [input_size] + units + [output_size]
    self.L = num_hidden_layers + 1  # Number of layers (excluding input) (input is 0 indexed)
    self.weights = []
    self.biases = []
    self.activations = {}
    self.z_values = {}

    for i in range(len(layer_sizes)-1):
      w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2/layer_sizes[i])
      b = np.zeros((1, layer_sizes[i+1]))
      self.weights.append(w)
      self.biases.append(b)

    self.grad_weights = {}
    self.grad_biases = {}


  def forward_propagation(self, X):
    # Forward propagation through the network
    self.activations[0] = X
    previous_activation = X
    # Hidden layers
    for l in range(self.L):
        W = self.weights[l]
        b = self.biases[l]

        Z = np.dot(previous_activation, W) + b
        # Activation for Layer L (last layer)
        if l == self.L - 1:
          A = softmax(Z)
        else:
          A = self.activation_function.activation(Z)

        self.z_values[l+1] = Z
        self.activations[l+1] = A
        previous_activation = A

    # Sums to 1 as it is softmax
    return previous_activation

  def backward_propagation(self, AL, X, Y):
    m = Y.shape[0]
    num_layers = self.L
    dZ = AL - Y

    for l in reversed(range(num_layers)):
      if l == 0:
        previous_activation = X
      else:
        previous_activation = self.activations[l]
      W = self.weights[l]

      self.grad_weights[l] = np.dot(previous_activation.T, dZ) / m
      self.grad_biases[l] = np.sum(dZ, axis=0, keepdims=True) / m

      if l > 0:
        # Compute gradient for previous layer
        dA_prev = np.dot(dZ, W.T)
        Z_prev = self.z_values[l]
        dZ = dA_prev * self.activation_function.derivative(Z_prev)


  def update_parameters(self):
    for l in range(self.L):
      self.weights[l] -= self.learning_rate * self.grad_weights[l]
      self.biases[l] -= self.learning_rate * self.grad_biases[l]


  def compute_loss(self, AL, Y):
    """Compute cross-entropy loss"""
    m = Y.shape[0]

    # Clip probabilities to avoid log(0)
    epsilon = 1e-15
    AL = np.clip(AL, epsilon, 1 - epsilon)

    # Cross-entropy loss
    cross_entropy = -np.sum(Y * np.log(AL)) / m

    return cross_entropy

  def compute_accuracy(self, true, pred):
    predictions = np.argmax(pred, axis=1)
    labels = np.argmax(true, axis=1)
    accuracy = np.mean(predictions == labels)
    return accuracy


  def fit(self, train_loader, learning_rate, epochs):
    self.learning_rate = learning_rate
    for epoch in range(epochs):
      epoch_loss = 0
      n_batches = 0
      epoch_accuracy = 0

      for X_batch, y_batch in train_loader:
        X_batch_np = X_batch.numpy()
        y_batch_onehot = np.eye(output_size)[y_batch.numpy()] # One-hot encode labels

        # Forward propagation
        AL = self.forward_propagation(X_batch_np)

        # Compute loss and accuracy
        loss = self.compute_loss(AL, y_batch_onehot)
        accuracy = self.compute_accuracy(y_batch_onehot, AL)
        epoch_loss += loss
        epoch_accuracy += accuracy
        n_batches += 1

        # Backward propagation
        self.backward_propagation(AL, X_batch_np, y_batch_onehot)

        # Update parameters
        self.update_parameters()
      print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/n_batches:.4f}, Accuracy: {epoch_accuracy/n_batches:.4f}")

  '''
  def fit(self, X, y, learning_rate, epochs):
    self.learning_rate = learning_rate
    for epoch in range(epochs):
      # Random permutation of indices
      np.random.seed(42+epoch)
      indices = np.random.permutation(N)
      X_shuffled = X[indices]
      y_shuffled = y[indices]

      epoch_loss = 0
      n_batches = 0
      epoch_accuracy = 0

      # Mini-batch gradient descent
      for i in range(0, n_samples, batch_size):
        # Get mini-batch
        end_idx = min(i + batch_size, n_samples)
        X_batch = X_shuffled[i:end_idx]
        y_batch = y_shuffled[i:end_idx]

        # Forward propagation
        AL = self.forward_propagation(X_batch)

        # Compute loss
        loss = self.compute_loss(AL, y_batch)
        accuracy = self.compute_accuracy(y_batch, AL)
        epoch_loss += loss
        epoch_accuracy += accuracy
        n_batches += 1

        # Backward propagation
        self.backward_propagation(AL, X_batch, y_batch)

        # Update parameters
        self.update_parameters()
    return self
'''
  def predict(self, X):
    """Make predictions"""
    AL = self.forward_propagation(X)
    predictions = np.argmax(AL, axis=1)
    return predictions

## Accuracy Score

In [7]:
def evaluate_acc(true, pred):
  return np.sum(true == pred) / len(true)

# Task 3: Run the experiments and report

## 3.1: Three different models

### Model 1: No hidden layers

In [10]:
model1 = MLP(activation_function=relu(), num_hidden_layers=0, units=[])
model1.fit(train_loader, 0.01, 20)

Epoch 1/20, Loss: 0.6661, Accuracy: 0.7695
Epoch 2/20, Loss: 0.5101, Accuracy: 0.8222
Epoch 3/20, Loss: 0.4785, Accuracy: 0.8342
Epoch 4/20, Loss: 0.4606, Accuracy: 0.8411
Epoch 5/20, Loss: 0.4486, Accuracy: 0.8450
Epoch 6/20, Loss: 0.4401, Accuracy: 0.8481
Epoch 7/20, Loss: 0.4330, Accuracy: 0.8498
Epoch 8/20, Loss: 0.4277, Accuracy: 0.8538
Epoch 9/20, Loss: 0.4233, Accuracy: 0.8551
Epoch 10/20, Loss: 0.4196, Accuracy: 0.8564
Epoch 11/20, Loss: 0.4159, Accuracy: 0.8571
Epoch 12/20, Loss: 0.4131, Accuracy: 0.8586
Epoch 13/20, Loss: 0.4106, Accuracy: 0.8593
Epoch 14/20, Loss: 0.4082, Accuracy: 0.8595
Epoch 15/20, Loss: 0.4058, Accuracy: 0.8598
Epoch 16/20, Loss: 0.4035, Accuracy: 0.8610
Epoch 17/20, Loss: 0.4018, Accuracy: 0.8618
Epoch 18/20, Loss: 0.4002, Accuracy: 0.8632
Epoch 19/20, Loss: 0.3982, Accuracy: 0.8630
Epoch 20/20, Loss: 0.3966, Accuracy: 0.8638


In [11]:
all_y_true = []
all_y_pred = []

for X_batch, y_batch in train_loader:
    X_batch_np = X_batch.numpy()
    y_batch_np = y_batch.numpy()

    y_pred = model1.predict(X_batch_np)

    all_y_true.extend(y_batch_np)
    all_y_pred.extend(y_pred)

# Convert lists to numpy arrays for evaluation
all_y_true_np = np.array(all_y_true)
all_y_pred_np = np.array(all_y_pred)

print(evaluate_acc(all_y_true_np, all_y_pred_np))

0.8643125


### Model 2: One hidden layers [256] ReLU

In [None]:
model2 = MLP(activation_function=relu(), num_hidden_layers=1, units=[256])
model2.fit(train_loader, 0.01, 20)

Epoch 1/20, Loss: 0.6246, Accuracy: 0.7848
Epoch 2/20, Loss: 0.4570, Accuracy: 0.8398
Epoch 3/20, Loss: 0.4172, Accuracy: 0.8535
Epoch 4/20, Loss: 0.3941, Accuracy: 0.8621
Epoch 5/20, Loss: 0.3761, Accuracy: 0.8669
Epoch 6/20, Loss: 0.3612, Accuracy: 0.8731
Epoch 7/20, Loss: 0.3502, Accuracy: 0.8756
Epoch 8/20, Loss: 0.3397, Accuracy: 0.8794
Epoch 9/20, Loss: 0.3306, Accuracy: 0.8835
Epoch 10/20, Loss: 0.3230, Accuracy: 0.8856
Epoch 11/20, Loss: 0.3145, Accuracy: 0.8893
Epoch 12/20, Loss: 0.3084, Accuracy: 0.8905
Epoch 13/20, Loss: 0.3015, Accuracy: 0.8931
Epoch 14/20, Loss: 0.2958, Accuracy: 0.8944
Epoch 15/20, Loss: 0.2903, Accuracy: 0.8972
Epoch 16/20, Loss: 0.2853, Accuracy: 0.8984
Epoch 17/20, Loss: 0.2796, Accuracy: 0.9017
Epoch 18/20, Loss: 0.2752, Accuracy: 0.9029
Epoch 19/20, Loss: 0.2699, Accuracy: 0.9050
Epoch 20/20, Loss: 0.2656, Accuracy: 0.9052
0.9093125


In [None]:
all_y_true = []
all_y_pred = []

for X_batch, y_batch in train_loader:
    X_batch_np = X_batch.numpy()
    y_batch_np = y_batch.numpy()

    y_pred = model2.predict(X_batch_np)

    all_y_true.extend(y_batch_np)
    all_y_pred.extend(y_pred)

# Convert lists to numpy arrays for evaluation
all_y_true_np = np.array(all_y_true)
all_y_pred_np = np.array(all_y_pred)

print(evaluate_acc(all_y_true_np, all_y_pred_np))

### Model 3: Two hidden layers [256,256] ReLU

In [13]:
model3 = MLP(activation_function=relu(), num_hidden_layers=2, units=[256,256])
model3.fit(train_loader, 0.01, 20)
all_y_true = []
all_y_pred = []

for X_batch, y_batch in train_loader:
    X_batch_np = X_batch.numpy()
    y_batch_np = y_batch.numpy()

    y_pred = model3.predict(X_batch_np)

    all_y_true.extend(y_batch_np)
    all_y_pred.extend(y_pred)

# Convert lists to numpy arrays for evaluation
all_y_true_np = np.array(all_y_true)
all_y_pred_np = np.array(all_y_pred)

print(evaluate_acc(all_y_true_np, all_y_pred_np))

Epoch 1/20, Loss: 0.6057, Accuracy: 0.7884
Epoch 2/20, Loss: 0.4331, Accuracy: 0.8464
Epoch 3/20, Loss: 0.3900, Accuracy: 0.8627
Epoch 4/20, Loss: 0.3647, Accuracy: 0.8712
Epoch 5/20, Loss: 0.3446, Accuracy: 0.8775
Epoch 6/20, Loss: 0.3311, Accuracy: 0.8819
Epoch 7/20, Loss: 0.3176, Accuracy: 0.8866
Epoch 8/20, Loss: 0.3057, Accuracy: 0.8904
Epoch 9/20, Loss: 0.2956, Accuracy: 0.8941
Epoch 10/20, Loss: 0.2862, Accuracy: 0.8981
Epoch 11/20, Loss: 0.2779, Accuracy: 0.9008
Epoch 12/20, Loss: 0.2701, Accuracy: 0.9037
Epoch 13/20, Loss: 0.2616, Accuracy: 0.9055
Epoch 14/20, Loss: 0.2558, Accuracy: 0.9090
Epoch 15/20, Loss: 0.2488, Accuracy: 0.9112
Epoch 16/20, Loss: 0.2424, Accuracy: 0.9135
Epoch 17/20, Loss: 0.2351, Accuracy: 0.9164
Epoch 18/20, Loss: 0.2307, Accuracy: 0.9179
Epoch 19/20, Loss: 0.2238, Accuracy: 0.9207
Epoch 20/20, Loss: 0.2187, Accuracy: 0.9218
0.924625
