# Multi-Layer Perceptron (MLP)
This notebook builts an MLP for classification, same way as described in [Cepeda Humerez et al. (2019)](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1007290)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import tqdm
from sympy import sqrt
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

# Import all the functions from the 'src' directory, we import all the functions from each module so we can use them straight away
from ssa_simulation import *
from ssa_analysis import *
from ssa_classification import *
%load_ext autoreload
%autoreload 2

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size:list, output_size, 
                 dropout_rate=0.3, learning_rate=0.01, 
                 device=None):
        # Call the parent class (nn.Module) constructor
        super(MLP, self).__init__() 

        # Automatically detect GPU(s) if available, otherwise use CPU
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        num_gpus = torch.cuda.device_count()  # Count number of available GPUs
        print(f"🔄 Using device: {self.device} ({num_gpus} GPUs available)")

        # Define the architecture of the model
        self.fc1 = nn.Linear(input_size, hidden_size[0])  # 1st Fully Connected Layer
        self.fc2 = nn.Linear(hidden_size[0], hidden_size[1])  # 2nd Fully Connected Layer
        self.fc3 = nn.Linear(hidden_size[1], output_size)  # Output layer (Fully connected layer mapping hidden layer to output.)

        # Activation function (Exponential Activation with α=1)
        self.exp_activation = lambda x: torch.where(x > 0, x, torch.exp(x) - 1)

        # Dropout layer
        self.dropout = nn.Dropout(p=dropout_rate)
        
        # Move model to GPU if available
        self.to(self.device)

        # initialise weights using He initialisation
        self.initialize_weights()

        # Enable multi-GPU support if multiple GPUs are available
        if num_gpus > 1:
            self = nn.DataParallel(self)  # Wrap model in DataParallel

        # loss function and optimiser
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
    
    def initialize_weights(self):
        """
        Initialize model weights using He (Kaiming) initialization.
        He initialization (also known as Kaiming initialization) is a weight initialization method designed to help deep neural networks train faster and prevent vanishing/exploding gradients. It is especially useful for layers with ReLU activation functions.

        ✅ Why Use He Initialization?
        1. Prevents Vanishing/Exploding Gradients – Ensures stable learning in deep networks.
        2. Speeds Up Convergence – Helps gradients flow properly during backpropagation.
        3. Optimized for ReLU Activation – Distributes weights in a way that keeps activations properly scaled.
        """
        for layer in self.modules():
            if isinstance(layer, nn.Linear):  # Apply only to Linear layers
                nn.init.kaiming_normal_(layer.weight, mode='fan_in', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.zeros_(layer.bias)  # Set bias to zero


    def forward(self, x):
        '''
        Forward pass: Pass the input through the layers of the model
        '''
        x = self.exp_activation(self.fc1(x))  # Exponential activation
        x = self.dropout(x)
        x = self.exp_activation(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)  # Output layer (logits)
        return x          # Return the final output

    def train_model(self, train_loader, val_loader=None, epochs=10, save_path=None):
        '''
        Train the model using the provided DataLoader and optional validation DataLoader.
        Saves the best model based on the validation accuracy.
        '''

        best_val_acc = 0.0
        # losses = []

        for epoch in range(epochs):
            self.train()
            total_loss, correct, total = 0, 0, 0

            for batch_X, batch_y in train_loader:
                # Move data to GPU if available
                batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)

                self.optimizer.zero_grad()  # Reset gradients
                outputs = self(batch_X)  # Forward pass
                loss = self.criterion(outputs, batch_y)  # Compute loss
                loss.backward()  # Backpropagation
                self.optimizer.step()  # Update weights

                total_loss += loss.item()
                correct += (torch.argmax(outputs, dim=1) == batch_y).sum().item()
                total += batch_y.size(0)
            
            train_acc = correct / total
            # avg_loss = total_loss / len(train_loader)
            # losses.append(avg_loss)
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}")

            if val_loader is not None:
                val_acc = self.evaluate(val_loader)
                print(f"Validation Acc: {val_acc:.4f}")

                # save the best model based on validation accuracy
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    # only save the model if a save path is provided
                    if save_path is not None:
                        torch.save(self.state_dict(), save_path)
                        print(f"✅ Model saved at {save_path} (Best Validation Acc: {best_val_acc:.4f})")

        print("Training complete!")        
        # return losses
    
    def evaluate(self, data_loader):
        '''
        Evaluate the model on a dataset and return accuracy
        '''
        self.eval()  # Set model to evaluation mode, dropout disabled in eval mode. 
        correct, total = 0, 0
        
        with torch.no_grad():
            for batch_X, batch_y in data_loader:
                # Move data to GPU if available
                batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
                outputs = self(batch_X)
                predicted = torch.argmax(outputs, dim=1)
                correct += (predicted == batch_y).sum().item()
                total += batch_y.size(0)

        return correct / total

    def predict(self, X):
        '''
        Make predictions on new data
        '''
        self.eval()  # Ensure model is in evaluation mode
        with torch.no_grad():
            # Move data to GPU if available
            X = X.to(self.device)
            outputs = self(X)
            return torch.argmax(outputs, dim=1)
        
    def load_model(self, file_path):
        '''
        Load model from a file
        '''
        # Load model to the correct device
        self.load_state_dict(torch.load(file_path, map_location=self.device))  
        # Ensure model is on the correct device
        self.to(self.device)  
        self.eval()
        print(f"🔄 Model loaded from {file_path}")

In [31]:
# Example usage
input_size = 200  # Adjust based on dataset
hidden_size = [300, 200]
output_size = 2  # Number of classes
dropout_rate = 0.3
learning_rate = 0.001
epochs = 1000
batch_size = 32

# Generate synthetic data
X_train = torch.randn(1000, input_size)
y_train = torch.randint(0, output_size, (1000,))
X_val = torch.randn(200, input_size)
y_val = torch.randint(0, output_size, (200,))

# Convert to DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Initialize and train model
file_path = "mlp_model.pth"
model = MLP(input_size, hidden_size, output_size, dropout_rate, learning_rate)
model.train_model(train_loader, val_loader, epochs, save_path=file_path)

# Load best model and evaluate
model.load_model(file_path)
test_acc = model.evaluate(val_loader)
print(f"Final Test Accuracy: {test_acc:.4f}")

# Make predictions
X_test = torch.randn(5, input_size)
predictions = model.predict(X_test)
print("Predicted classes:", predictions)


🔄 Using device: cuda (1 GPUs available)
Epoch [1/1000], Loss: 43.8337, Train Acc: 0.5100
Validation Acc: 0.4350
✅ Model saved at mlp_model.pth (Best Validation Acc: 0.4350)
Epoch [2/1000], Loss: 29.7834, Train Acc: 0.5970
Validation Acc: 0.4400
✅ Model saved at mlp_model.pth (Best Validation Acc: 0.4400)
Epoch [3/1000], Loss: 23.8744, Train Acc: 0.6620
Validation Acc: 0.4450
✅ Model saved at mlp_model.pth (Best Validation Acc: 0.4450)
Epoch [4/1000], Loss: 23.5183, Train Acc: 0.6670
Validation Acc: 0.4150
Epoch [5/1000], Loss: 21.3321, Train Acc: 0.7000
Validation Acc: 0.4300
Epoch [6/1000], Loss: 18.8490, Train Acc: 0.7230
Validation Acc: 0.4150
Epoch [7/1000], Loss: 16.2148, Train Acc: 0.7660
Validation Acc: 0.4250
Epoch [8/1000], Loss: 15.6223, Train Acc: 0.7610
Validation Acc: 0.4000
Epoch [9/1000], Loss: 14.0946, Train Acc: 0.8060
Validation Acc: 0.3950
Epoch [10/1000], Loss: 13.6673, Train Acc: 0.7980
Validation Acc: 0.4050
Epoch [11/1000], Loss: 12.6382, Train Acc: 0.8250
Valida