# MLP with Single Hidden Layer using PyTorch

1. Define an MLP with variable number of inputs (num_inputs), outputs (num_outputs), and nodes in hidden layer (num_hidden_layer_nodes).  
2. Use ReLU activation for each node
3. Use MSE loss
4. Use SGD optimizer


<img src="https://www.learnopencv.com/wp-content/uploads/2020/01/mlp.png" alt="mlp" width="500"/>

## Table of Contents
* [1 Define MLP using NN Module](#1-Define-MLP-using-NN-Module)
* [2 Generate Data](#2-Generate-Data)
* [3 Perform Training](#3-Perform-Training)
* [4 MLP with Sequential Module](#4-MLP-with-Sequential-Module)

In [None]:
import numpy as np
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from IPython.display import clear_output

In [None]:
def system_config(SEED_VALUE=42, package_list=None):
    """
    Configures the system environment for PyTorch-based operations.

    Args:
        SEED_VALUE (int): Seed value for random number generation. Default is 42.
        package_list (str): String containing a list of additional packages to install
        for Google Colab or Kaggle. Default is None.

    Returns:
        tuple: A tuple containing the device name as a string and a boolean indicating GPU availability.
    """

    random.seed(SEED_VALUE)
    np.random.seed(SEED_VALUE)
    torch.manual_seed(SEED_VALUE)

    def is_running_in_colab():
        return 'COLAB_GPU' in os.environ

    def is_running_in_kaggle():
        return 'KAGGLE_KERNEL_RUN_TYPE' in os.environ

    #--------------------------------
    # Check for the availability GPUs.
    #--------------------------------
    if torch.cuda.is_available():
        print('Using CUDA GPU')

        # This section for installing packages required by Colab.
        if is_running_in_colab() or is_running_in_kaggle():
            print('Installing required packages...')
            !pip install {package_list}

        # Set the device to the first CUDA device.
        DEVICE = torch.device('cuda')
        print("Device: ", DEVICE)
        GPU_AVAILABLE = True

        torch.cuda.manual_seed(SEED_VALUE)
        torch.cuda.manual_seed_all(SEED_VALUE)

        # Performance and deterministic behavior.
        torch.backends.cudnn.enabled = True       # Provides highly optimized primitives for DL operations.
        torch.backends.cudnn.deterministic = True # Insures deterministic even when above cudnn is enabled.
        torch.backends.cudnn.benchmark = False    # Setting to True can cause non-deterministic behavior.

    elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
        print('Using Apple Silicon GPU')

        # Set the device to the Apple Silicon GPU Metal Performance Shader (MPS).
        DEVICE = torch.device("mps")
        print("Device: ", DEVICE)
        # Environment variable that allows PyTorch to fall back to CPU execution
        # when encountering operations that are not currently supported by MPS.
        os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
        GPU_AVAILABLE = True

        torch.mps.manual_seed(SEED_VALUE)
        torch.use_deterministic_algorithms(True)

    else:
        print('Using CPU')
        DEVICE = torch.device('cpu')
        print("Device: ", DEVICE)
        GPU_AVAILABLE = False

        if is_running_in_colab() or is_running_in_kaggle():
            print('Installing required packages...')
            !pip install {package_list}
            print('Note: Change runtime type to GPU for better performance.')

        torch.use_deterministic_algorithms(True)

    return str(DEVICE), GPU_AVAILABLE

In [None]:
# Additional packages required for Google Colab or Kaggle.
package_list = "torchinfo"

DEVICE, GPU_AVAILABLE = system_config(package_list=package_list)

Using CUDA GPU
Installing required packages...
Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Device:  cuda


In [None]:
from torchinfo import summary

from IPython.display import clear_output

In [None]:
bold = f"\033[1m"
reset = f"\033[0m"

## 1 Define MLP using NN Module

In [None]:
# Define the model
class MLP(nn.Module):
    def __init__(self, num_inputs, num_hidden_layer_nodes, num_outputs):
        # Initialize super class.
        super().__init__()

        # Add hidden layer.
        self.linear1 = nn.Linear(num_inputs, num_hidden_layer_nodes)

        # Add output layer.
        self.linear2 = nn.Linear(num_hidden_layer_nodes, num_outputs)


    def forward(self, x):
        # Forward pass through hidden layer with
        x = F.relu(self.linear1(x))

        # Foward pass to output layer
        return self.linear2(x)

## 2 Generate Data

In [None]:
# Num data points.
num_data = 1000

# Data parameters.
num_inputs = 1000
num_outputs = 10

# Create random Tensors to hold inputs and outputs.
X = torch.randn(num_data, num_inputs)
Y = torch.randn(num_data, num_outputs)

## 3 Perform Training

In [None]:
def train(model, criterion, optimizer, data, targets, num_epochs):

    model.train()

    for epoch_idx in range(num_epochs):
        # Clear cell outputs at the start of each epoch.
        clear_output()

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()

        # Forward pass: Compute predicted y by passing data to the model.
        y_pred = model(data)

        # Compute and print loss
        loss = loss_function(y_pred, targets)

        # Calculate gradient using backward pass.
        loss.backward()

        # Update model parameters (weights).
        optimizer.step()

        print(f"{f'{bold}[ Epoch: {epoch_idx+1} ]{reset}':=^80}")

        train_loss_stat = f"{bold}Loss: {loss:.4f}{reset}"

        print(f"\n{train_loss_stat}")

        print(f"{'='*72}\n")

    return

### 3.1 Define Model Parameters, Loss Function and Optimizer

In [None]:
# Training parameters.
num_epochs = 100

# Network parameters.
num_hidden_layer_nodes = 100

# Get reproducible results
torch.manual_seed(42);

# Construct our model by instantiating the class defined above
model = MLP(num_inputs, num_hidden_layer_nodes, num_outputs)

print(summary(model, input_size=(1,num_inputs), device="cpu", row_settings=["var_names"]))

# Define loss function
loss_function = nn.MSELoss(reduction='sum')

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-4)

Layer (type (var_name))                  Output Shape              Param #
MLP (MLP)                                [1, 10]                   --
├─Linear (linear1)                       [1, 100]                  100,100
├─Linear (linear2)                       [1, 10]                   1,010
Total params: 101,110
Trainable params: 101,110
Non-trainable params: 0
Total mult-adds (M): 0.10
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.40
Estimated Total Size (MB): 0.41


In [None]:
train(model, loss_function, optimizer, data=X, targets=Y, num_epochs=num_epochs)


[1mLoss: 0.9612[0m



## 4 MLP with Sequential Module

Observe that in the section above, we had defined the `Linear` and the `ReLU` modules individually.

The value a `Sequential` module provides over manually calling a sequence of modules is that it allows treating the whole container as a single module, such that performing a transformation on the Sequential applies to each of the modules it stores.




In [None]:
# Define the model
class MLP_Sequential(torch.nn.Module):
    def __init__(self, num_inputs, num_hidden_layer_nodes, num_outputs):
        # Initialize super class
        super().__init__()

        # Build model using Sequential container.
        self.model = nn.Sequential(
            # Add hidden layer.
            nn.Linear(num_inputs, num_hidden_layer_nodes),
            # Add ReLU activation.
            nn.ReLU(),
            # Add output layer.
            nn.Linear(num_hidden_layer_nodes, num_outputs)
        )

    def forward(self, x):
        # Forward pass.
        return self.model(x)

We are going to use the same training parameters that we have defined in the previous sections.

In [None]:
# Training parameters.
num_epochs = 100

# Network parameters.
num_hidden_layer_nodes = 100

# Get reproducible results
torch.manual_seed(42);

# Construct our model by instantiating the class defined above
model_seq = MLP_Sequential(num_inputs, num_hidden_layer_nodes, num_outputs)

print(summary(model_seq, input_size=(1,num_inputs), device="cpu", row_settings=["var_names"]))

# Define loss function
loss_function = nn.MSELoss(reduction='sum')

# Define optimizer
optimizer = optim.SGD(model_seq.parameters(), lr=1e-4)

Layer (type (var_name))                  Output Shape              Param #
MLP_Sequential (MLP_Sequential)          [1, 10]                   --
├─Sequential (model)                     [1, 10]                   --
│    └─Linear (0)                        [1, 100]                  100,100
│    └─ReLU (1)                          [1, 100]                  --
│    └─Linear (2)                        [1, 10]                   1,010
Total params: 101,110
Trainable params: 101,110
Non-trainable params: 0
Total mult-adds (M): 0.10
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.40
Estimated Total Size (MB): 0.41


In [None]:
train(model_seq, loss_function, optimizer, data=X, targets=Y, num_epochs=num_epochs)


[1mLoss: 0.9612[0m

