# **MLP Regression on 2D Sinc Function**

This notebook applies a Multi-Layer Perceptron (MLP) for regression on the 2D sinc function, covering data generation, MLP architecture, training, and evaluation.

In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D 

np.random.seed(42) # For reproducibility


# **1. Sinc2D Data Functions**

Functions for generating and visualizing the 2D sinc dataset.

In [ ]:
def sinc2D_gen(Npatterns):
    """
    Generates a 2D sinc function dataset.
    """
    side_length = int(np.sqrt(Npatterns))
    x1 = np.linspace(-5, 5, side_length)
    x2 = np.linspace(-5, 5, side_length)
    XX1, XX2 = np.meshgrid(x1, x2)
    
    def safe_sinc_val(val):
        return np.where(val == 0, 1.0, np.sin(val) / val)

    YY = 10 * safe_sinc_val(XX1) * safe_sinc_val(XX2)

    X = np.hstack((XX1.reshape(-1, 1), XX2.reshape(-1, 1)))
    y = YY.reshape(-1, 1)
    
    return X, y, XX1, XX2, YY

def sinc2D_display(XX1, XX2, YY, title_suffix=""):
    """
    Displays a 3D surface plot of the 2D sinc function.
    """
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(XX1, XX2, YY, cmap='viridis', edgecolor='none')
    ax.set_title(f'Sinc2D Synthetic Dataset {title_suffix}. Domain: [-5,5]x[-5,5]')
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_zlabel('Y')
    plt.show()


# **2. MLP Core Functions**

Essential functions for MLP: activation, cost, weight initialization, forward pass, and backpropagation.

## **2.1 Activation and Cost Functions**
* **Sigmoid Activation:** $\sigma(z) = \frac{1}{1 + e^{-z}}$
* **Sigmoid Derivative:** $\sigma'(z) = \sigma(z)(1 - \sigma(z))$
* **Mean Squared Error (MSE) Cost:** $J = \frac{1}{2B} \sum_{i=1}^{B} (y_i - \hat{y}_i)^2$

## **2.2 Forward Pass**
Input $P_{input}$ (BatchSize $\times$ Features) to output $\hat{Y}$ (Output $\times$ BatchSize):
* Hidden Layer Pre-activation: $Z_1 = W_1 A_0$
* Output Layer Activation (Prediction): $\hat{Y} = W_2 A_1$

## **2.3 Backpropagation**
Error signals and gradients for weight updates:
* Output Error: $\delta_2 = (\hat{Y} - Y_{true})$
* Gradient for $W_2$: $\nabla_{W_2} J = \delta_2 (A_1^{ext})^T$
* Hidden Error: $\delta_1 = (W_2^T \delta_2)_{\text{excluding bias}} \odot \sigma'(Z_1)$
* Gradient for $W_1$: $\nabla_{W_1} J = \delta_1 A_0^T$

In [ ]:
def MLP_sigmoid(z):
    """Sigmoid activation function."""
    return 1.0 / (1.0 + np.exp(-z))

def MLP_sigmoid_derivative(rZ):
    """Derivative of the sigmoid function."""
    sigma_of_rZ = MLP_sigmoid(rZ)
    return sigma_of_rZ * (1.0 - sigma_of_rZ)

def MLP_MSE_cost(y_true, y_pred, model, W1, W2):
    """Mean Squared Error cost function."""
    if y_true.ndim == 1:
        y_true = y_true.reshape(1, -1)
    cost = np.sum((y_true - y_pred)**2) / (2 * y_true.shape[1])
    return cost

def MLP_initialize_weights(model):
    """Initializes MLP weights uniformly in [-1, 1]."""
    n_hidden = model['n_hidden']
    n_features = model['n_features']
    n_output = model['n_output']
    W1 = 2 * np.random.rand(n_hidden, n_features + 1) - 1
    W2 = 2 * np.random.rand(n_output, n_hidden + 1) - 1
    return W1, W2

def MLP_MSELIN_forward(P_input, W1, W2):
    """Performs forward pass through MLP."""
    batch_size = P_input.shape[0]
    A0 = np.vstack((np.ones(batch_size), P_input.T))
    rZ1 = W1 @ A0
    rA1 = MLP_sigmoid(rZ1)
    A1 = np.vstack((np.ones(batch_size), rA1))
    rA2 = W2 @ A1
    return rA2, A1, A0, rZ1

def MLP_MSELIN_backprop(rA2, A1, A0, rZ1, Y_true, W1, W2):
    """Computes gradients via backpropagation."""
    dL_dZ2 = rA2 - Y_true
    dL_dW2 = dL_dZ2 @ A1.T
    dL_dA1 = W2.T @ dL_dZ2
    sigma_prime_of_rZ1 = MLP_sigmoid_derivative(rZ1)
    dL_drZ1 = dL_dA1[1:, :] * sigma_prime_of_rZ1
    dL_dW1 = dL_drZ1 @ A0.T
    return dL_dW1, dL_dW2

def MLP_MSELIN_predict(P_input, W1, W2):
    """Predicts outputs using trained MLP."""
    rA2, _, _, _ = MLP_MSELIN_forward(P_input, W1, W2)
    return rA2


# **3. MLP Training Loop**

The main function to train the MLP with mini-batch Gradient Descent.

In [ ]:
def MLP_MSELIN_train(P_train, y_train, model):
    """Trains the MLP model."""
    W1, W2 = MLP_initialize_weights(model)
    eta = model['eta']
    epochs = model['epochs']
    minibatches = model['minibatches']
    model['cost_history'] = []
    num_observations = P_train.shape[0]
    
    for e in range(1, epochs + 1):
        current_eta = eta # Fixed learning rate
        shuffled_indices = np.random.permutation(num_observations)
        P_shuffled = P_train[shuffled_indices, :]
        y_shuffled = y_train[shuffled_indices, :]
        mini_batch_indices = np.array_split(np.arange(num_observations), minibatches)
                          
        for m_idx, idx in enumerate(mini_batch_indices):
            P_mini_batch = P_shuffled[idx, :]
            y_mini_batch = y_shuffled[idx, :].T 
            rA2, A1, A0, rZ1 = MLP_MSELIN_forward(P_mini_batch, W1, W2)
            cost = MLP_MSE_cost(y_mini_batch, rA2, model, W1, W2)
            model['cost_history'].append(cost)
            print(f'Epoch {e}/{epochs}, Minibatch {m_idx+1}/{minibatches}, Loss (MSE) {cost:.6f}')
            delta_W1_unscaled, delta_W2_unscaled = MLP_MSELIN_backprop(rA2, A1, A0, rZ1, y_mini_batch, W1, W2)
            delta_W1 = current_eta * delta_W1_unscaled
            delta_W2 = current_eta * delta_W2_unscaled
            W1 = W1 - delta_W1
            W2 = W2 - delta_W2
            
    model['W1'] = W1
    model['W2'] = W2
    return model, W1, W2


# **4. Main Script: Training and Evaluation**

Sets up parameters, generates data, trains the MLP, and evaluates its performance.

In [ ]:
plt.close('all') # Close all figures

# Parameters
Ninput = 2         
Ntrain = 150**2    
Ntest  = 2500      

# Data Generation
Xtrain, ytrain, XX1_train, XX2_train, YY_train = sinc2D_gen(Ntrain)
sinc2D_display(XX1_train, XX2_train, YY_train, title_suffix="(Training Data)")
Xtest, ytest, _, _, _ = sinc2D_gen(Ntest)

# Shuffle training data
shuffled_ind = np.random.permutation(Ntrain)
Xtrain = Xtrain[shuffled_ind, :]
ytrain = ytrain[shuffled_ind]

# Model Parameters
model = {
    'n_output': 1,        
    'n_features': Ninput, 
    'n_hidden': 300,      
    'epochs': 500,        
    'eta': 1e-6,          
    'minibatches': 30,    
}

# Train MLP
print("\nStarting MLP training...")
model_trained, W1_trained, W2_trained = MLP_MSELIN_train(Xtrain, ytrain, model)
print("MLP training complete.")

# Predictions
ytrain_pred = MLP_MSELIN_predict(Xtrain, W1_trained, W2_trained).T
ytest_pred  = MLP_MSELIN_predict(Xtest,  W1_trained, W2_trained).T

# Compute MSE
acc_train = np.sum((ytrain - ytrain_pred)**2) / (2 * len(ytrain))
print(f'\nTraining MSE: {acc_train:.6f}')
acc_test = np.sum((ytest - ytest_pred)**2) / (2 * len(ytest))
print(f'Test MSE: {acc_test:.6f}')

# Plots
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(ytrain, ytrain_pred, '.b', alpha=0.5)
plt.title('Scatter Plot (Training Set)')
plt.xlabel('True Y')
plt.ylabel('Predicted Y')
plt.grid(True)
plt.axis('equal')

plt.subplot(1, 2, 2)
plt.plot(ytest, ytest_pred, '.r', alpha=0.5)
plt.title('Scatter Plot (Test Set)')
plt.xlabel('True Y')
plt.ylabel('Predicted Y')
plt.grid(True)
plt.axis('equal')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(model_trained['cost_history'], color='purple', linewidth=2)
plt.title('MLP Training Loss')
plt.xlabel('Mini-batch Iteration')
plt.ylabel('Loss (MSE)')
plt.grid(True)
plt.show()

# Visualize Learned Surface
if Ninput == 2:
    x1_vis = np.linspace(-5, 5, 100)
    x2_vis = np.linspace(-5, 5, 100)
    XX1_vis, XX2_vis = np.meshgrid(x1_vis, x2_vis)
    X_vis = np.hstack((XX1_vis.reshape(-1, 1), XX2_vis.reshape(-1, 1)))
    YY_pred_surface = MLP_MSELIN_predict(X_vis, W1_trained, W2_trained).T.reshape(XX1_vis.shape)
    
    fig = plt.figure(figsize=(12, 10))
    ax = fig.add_subplot(111, projection='3d')
    ax.plot_surface(XX1_vis, XX2_vis, YY_pred_surface, cmap='viridis', edgecolor='none', alpha=0.8)
    ax.set_title('MLP Learned Surface for Sinc2D Function')
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_zlabel('Predicted Y')
    plt.show()
