# A1 - Comparison of MLR, Manual BP Neural Network and PyTorch Neural Network

In this notebook we use the **Bike Sharing (hourly) encoded dataset** and compare
three different regression models:

- Multiple Linear Regression (MLR) using scikit-learn
- A custom neural network implemented from scratch (manual backprop)
- A neural network implemented with PyTorch

We use the same dataset, the same train/test split and the same scaling for all models.

The regression target is **cnt_log**.


# Imports and Dataset Load

In [1]:
# A1 - Neural Networks and Regression (Model Comparison)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

from models.NeuralNet import NeuralNet                # custom BP implementation (from scratch)
from models.mlr_sklearn import MultipleLinearRegressionSK
from models.neuralnet_torch import NeuralNetTorch



# Load dataset (Bike Sharing - hourly, preprocessed/encoded)
hours = pd.read_csv("hours_encoded.csv")

# We use cnt_log as the regression target (drop original cnt)
hours = hours.drop(columns=["cnt"])

# Shuffle and take first 1500 samples, as required
hours = hours.sample(n=1500, random_state=42).reset_index(drop=True)

# Separate features and target
X_df = hours.drop(columns=["cnt_log"])
y = hours["cnt_log"].values

print("Data shape (features):", X_df.shape)
print("Target shape:", y.shape)
print("Feature columns:", X_df.columns.tolist())

Data shape (features): (1500, 61)
Target shape: (1500,)
Feature columns: ['holiday', 'workingday', 'temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'season_1', 'season_2', 'season_3', 'season_4', 'yr_0', 'yr_1', 'mnth_1', 'mnth_2', 'mnth_3', 'mnth_4', 'mnth_5', 'mnth_6', 'mnth_7', 'mnth_8', 'mnth_9', 'mnth_10', 'mnth_11', 'mnth_12', 'hr_0', 'hr_1', 'hr_2', 'hr_3', 'hr_4', 'hr_5', 'hr_6', 'hr_7', 'hr_8', 'hr_9', 'hr_10', 'hr_11', 'hr_12', 'hr_13', 'hr_14', 'hr_15', 'hr_16', 'hr_17', 'hr_18', 'hr_19', 'hr_20', 'hr_21', 'hr_22', 'hr_23', 'weekday_0', 'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6', 'weathersit_1', 'weathersit_2', 'weathersit_3', 'weathersit_4']


# Train - Test Split & Scaling

In [2]:

# Train / Test split

# 80% -> train+validation (trainval)
# 20% -> test
X_trainval_df, X_test_df, y_trainval, y_test = train_test_split(
    X_df, y, test_size=0.2, random_state=42, shuffle=True
)

print("Train+Val size:", X_trainval_df.shape[0])
print("Test size     :", X_test_df.shape[0])

# Convert to NumPy
X_trainval = X_trainval_df.values
X_test = X_test_df.values

# Feature scaling (StandardScaler for X)
x_scaler = StandardScaler()
X_trainval_np = x_scaler.fit_transform(X_trainval)
X_test_np     = x_scaler.transform(X_test)

# Target scaling (StandardScaler for y)
# We scale cnt_log to help neural networks training.
y_scaler = StandardScaler()
y_trainval_scaled = y_scaler.fit_transform(y_trainval.reshape(-1, 1)).ravel()
y_test_scaled     = y_scaler.transform(y_test.reshape(-1, 1)).ravel()

n_features = X_trainval_np.shape[1]
print("Number of input features:", n_features)


Train+Val size: 1200
Test size     : 300
Number of input features: 61


# Predict_batch & Definition of MAPE

In [3]:
def predict_batch(model, X):
    """
    Run model.predict on each sample in X and stack results as a column vector.

    This is useful for the manual NeuralNet implementation,
    which usually expects a single sample as input to predict().
    """
    return np.array([model.predict(x) for x in X]).reshape(-1, 1)


def mape(y_true, y_pred):
    """
    Safe MAPE implementation (ignores zero targets).

    MAPE = Mean Absolute Percentage Error (in %).
    """
    y_true = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    mask = y_true != 0
    if not np.any(mask):
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100.0


# Execution of BP from scratch (One manual experiment)

In [4]:

def run_experiment(hidden_layers, epochs, lr, momentum,
                   activation='tanh', val_split=0.2):
    """
    Runs one BP experiment on the Bike Sharing data using the custom NeuralNet.

    Outside the NeuralNet class:
      - We split the dataset into:
            80% -> train+validation (X_trainval_np, y_trainval_scaled)
            20% -> test           (X_test_np, y_test_scaled)

    Inside the NeuralNet class:
      - val_split controls the percentage of validation inside the 80%.
      - The class automatically shuffles and splits the 80% into:
            (1 - val_split) -> internal training
            val_split       -> internal validation

    This function:
      - trains the network on the 80% (with internal train/val),
      - computes predictions on the 80% and on the 20% test,
      - returns metrics in the original target scale (cnt_log).
    """

    # Build full layer structure: [input, hidden..., output]
    layers = [n_features] + hidden_layers + [1]

    # Initialize and train network
    net = NeuralNet(
        n=layers,
        fact=activation,
        eta=lr,
        alpha=momentum,
        epochs=epochs,
        val_split=val_split
    )

    # Train on the 80% (network internally splits into train/val)
    net.fit(X_trainval_np, y_trainval_scaled)

    # Error evolution per epoch (from NeuralNet)
    train_err, val_err = net.loss_epochs()

    # --- Predictions in scaled space ---
    y_trainval_pred_scaled = predict_batch(net, X_trainval_np)
    y_test_pred_scaled     = predict_batch(net, X_test_np)

    # --- Back to original target scale (cnt_log) ---
    y_trainval_pred = y_scaler.inverse_transform(y_trainval_pred_scaled).ravel()
    y_test_pred     = y_scaler.inverse_transform(y_test_pred_scaled).ravel()

    # --- Metrics (in original cnt_log scale) ---
    trainval_mse  = mean_squared_error(y_trainval, y_trainval_pred)
    test_mse      = mean_squared_error(y_test, y_test_pred)

    trainval_mae  = mean_absolute_error(y_trainval, y_trainval_pred)
    test_mae      = mean_absolute_error(y_test, y_test_pred)

    trainval_mape = mape(y_trainval, y_trainval_pred)
    test_mape     = mape(y_test, y_test_pred)

    res = {
        "model": net,
        "train_err": train_err,
        "val_err": val_err,
        "Number of layers": len(layers),
        "Layer Structure": layers,
        "Num epochs": epochs,
        "Learning Rate": lr,
        "Momentum": momentum,
        "Activation function": activation,

        # Metrics on the 80% (train+val combined)
        "TRAINVAL_MSE": trainval_mse,
        "TRAINVAL_MAE": trainval_mae,
        "TRAINVAL_MAPE": trainval_mape,

        # Metrics on the 20% test set
        "TEST_MSE": test_mse,
        "TEST_MAE": test_mae,
        "TEST_MAPE": test_mape,
    }

    return res



# Define list wth 4 sets of hyperparameters

In [5]:
hyperparams_list = [

    # --- 1: Shallow tanh ---
    {
        "hidden_layers": [20],
        "epochs": 400,
        "lr": 0.005,
        "momentum": 0.5,
        "activation": "tanh"
    },

    # --- 2: Deeper tanh ---
    {
        "hidden_layers": [40, 15],
        "epochs": 600,
        "lr": 0.005,
        "momentum": 0.9,
        "activation": "tanh"
    },

    # --- 3: Shallow sigmoid ---
    {
        "hidden_layers": [20],
        "epochs": 400,
        "lr": 0.005,
        "momentum": 0.5,
        "activation": "sigmoid"
    },

    # --- 4: Deeper ReLU ---
    {
        "hidden_layers": [40, 15],
        "epochs": 600,
        "lr": 0.005,
        "momentum": 0.9,
        "activation": "relu"
    },
]


# Run the 4 experiments

In [6]:
all_results = []

for i, cfg in enumerate(hyperparams_list, start=1):
    print("\n====================================")
    print(f"Running Experiment {i}")
    print("Config:", cfg)
    print("====================================")

    res = run_experiment(
        hidden_layers=cfg["hidden_layers"],
        epochs=cfg["epochs"],
        lr=cfg["lr"],
        momentum=cfg["momentum"],
        activation=cfg["activation"],
        val_split=0.2      # keep same val_split for all
    )

    res["Experiment"] = i
    all_results.append(res)

# Build a DataFrame with the most important fields
cols = [
    "Experiment",
    "Number of layers",
    "Layer Structure",
    "Num epochs",
    "Learning Rate",
    "Momentum",
    "Activation function",
    "TRAINVAL_MSE", "TRAINVAL_MAE", "TRAINVAL_MAPE",
    "TEST_MSE", "TEST_MAE", "TEST_MAPE",
]

results_df = pd.DataFrame(all_results)[cols]

print("\n=== Summary of all manual BP experiments ===")
display(results_df)


Running Experiment 1
Config: {'hidden_layers': [20], 'epochs': 400, 'lr': 0.005, 'momentum': 0.5, 'activation': 'tanh'}
Neural network has been initialized
Architecture (neurons per layer): [61, 20, 1]
Activation function used: tanh
 Layer 1: w(20, 61), theta(20, 1)
 Layer 2: w(1, 20), theta(1, 1)
Epoch 0: Train MSE=0.115037 | Val MSE=0.066814
Epoch 100: Train MSE=0.003727 | Val MSE=0.037243
Epoch 200: Train MSE=0.001753 | Val MSE=0.037746
Epoch 300: Train MSE=0.000959 | Val MSE=0.040257

Running Experiment 2
Config: {'hidden_layers': [40, 15], 'epochs': 600, 'lr': 0.005, 'momentum': 0.9, 'activation': 'tanh'}
Neural network has been initialized
Architecture (neurons per layer): [61, 40, 15, 1]
Activation function used: tanh
 Layer 1: w(40, 61), theta(40, 1)
 Layer 2: w(15, 40), theta(15, 1)
 Layer 3: w(1, 15), theta(1, 1)
Epoch 0: Train MSE=0.108875 | Val MSE=0.062370
Epoch 100: Train MSE=0.000234 | Val MSE=0.023093
Epoch 200: Train MSE=0.000105 | Val MSE=0.021677
Epoch 300: Train MS

Unnamed: 0,Experiment,Number of layers,Layer Structure,Num epochs,Learning Rate,Momentum,Activation function,TRAINVAL_MSE,TRAINVAL_MAE,TRAINVAL_MAPE,TEST_MSE,TEST_MAE,TEST_MAPE
0,1,3,"[61, 20, 1]",400,0.005,0.5,tanh,0.037926,0.1058,3.198342,0.185323,0.32117,11.125083
1,2,4,"[61, 40, 15, 1]",600,0.005,0.9,tanh,0.018124,0.053801,1.837297,0.082356,0.151568,6.931774
2,3,3,"[61, 20, 1]",400,0.005,0.5,sigmoid,0.021249,0.089967,3.013636,0.070456,0.153016,6.992538
3,4,4,"[61, 40, 15, 1]",600,0.005,0.9,relu,0.021993,0.066506,2.387277,0.102897,0.192352,8.72133


# Execution of MLR (scikit-learn)

In [7]:
# We train MLR on the same scaled data as the neural networks.
mlr = MultipleLinearRegressionSK(fit_intercept=True)

mlr.fit(X_trainval_np, y_trainval_scaled)

# Predictions in scaled space
y_trainval_pred_mlr_scaled = mlr.predict(X_trainval_np).reshape(-1, 1)
y_test_pred_mlr_scaled     = mlr.predict(X_test_np).reshape(-1, 1)

# Back to original target scale (cnt_log)
y_trainval_pred_mlr = y_scaler.inverse_transform(y_trainval_pred_mlr_scaled).ravel()
y_test_pred_mlr     = y_scaler.inverse_transform(y_test_pred_mlr_scaled).ravel()

# Metrics (in original cnt_log scale)
trainval_mse_mlr  = mean_squared_error(y_trainval, y_trainval_pred_mlr)
test_mse_mlr      = mean_squared_error(y_test, y_test_pred_mlr)

trainval_mae_mlr  = mean_absolute_error(y_trainval, y_trainval_pred_mlr)
test_mae_mlr      = mean_absolute_error(y_test, y_test_pred_mlr)

trainval_mape_mlr = mape(y_trainval, y_trainval_pred_mlr)
test_mape_mlr     = mape(y_test, y_test_pred_mlr)

print("\n=== Multiple Linear Regression (scikit-learn) ===")
print(f"TRAIN+VAL -> MSE: {trainval_mse_mlr:.4f}, MAE: {trainval_mae_mlr:.4f}, MAPE: {trainval_mape_mlr:.2f}%")
print(f"TEST      -> MSE: {test_mse_mlr:.4f}, MAE: {test_mae_mlr:.4f}, MAPE: {test_mape_mlr:.2f}%")



Multiple Linear Regression (scikit-learn) initialized.
fit_intercept: True

=== Multiple Linear Regression (scikit-learn) ===
TRAIN+VAL -> MSE: 0.2025, MAE: 0.3162, MAPE: 10.62%
TEST      -> MSE: 0.2202, MAE: 0.3202, MAPE: 12.33%


# Execution of PyTorch NeuralNet

In [8]:

# Here you can choose manually which experiment configuration
# from hyperparams_list you want to replicate with PyTorch.
#
# Example: choose experiment 2 (index 1) or any other.

chosen_experiment_index = 1  # 0-based index -> 0,1,2,3  (here: experiment 2)
chosen_cfg = hyperparams_list[chosen_experiment_index]

print("\nUsing configuration from manual BP experiment:",
      chosen_experiment_index + 1, "->", chosen_cfg)

hidden_layers_torch = chosen_cfg["hidden_layers"]
layers_torch = [n_features] + hidden_layers_torch + [1]

net_torch = NeuralNetTorch(
    n=layers_torch,
    fact=chosen_cfg["activation"],   # same activation
    eta=chosen_cfg["lr"],            # same learning rate
    alpha=chosen_cfg["momentum"],    # same momentum
    epochs=chosen_cfg["epochs"],     # same number of epochs
    val_split=0.2                    # same validation split as manual net
)

# Train with scaled data
net_torch.fit(X_trainval_np, y_trainval_scaled)

# Loss history for later plots
train_err_torch, val_err_torch = net_torch.loss_epochs()

# Predictions in scaled space
y_trainval_pred_torch_scaled = net_torch.predict(X_trainval_np).reshape(-1, 1)
y_test_pred_torch_scaled     = net_torch.predict(X_test_np).reshape(-1, 1)

# Back to original target scale (cnt_log)
y_trainval_pred_torch = y_scaler.inverse_transform(y_trainval_pred_torch_scaled).ravel()
y_test_pred_torch     = y_scaler.inverse_transform(y_test_pred_torch_scaled).ravel()

# Metrics (in original cnt_log scale)
trainval_mse_torch  = mean_squared_error(y_trainval, y_trainval_pred_torch)
test_mse_torch      = mean_squared_error(y_test, y_test_pred_torch)

trainval_mae_torch  = mean_absolute_error(y_trainval, y_trainval_pred_torch)
test_mae_torch      = mean_absolute_error(y_test, y_test_pred_torch)

trainval_mape_torch = mape(y_trainval, y_trainval_pred_torch)
test_mape_torch     = mape(y_test, y_test_pred_torch)

print("\n=== PyTorch Neural Network (chosen config) ===")
print(f"TRAIN+VAL -> MSE: {trainval_mse_torch:.4f}, MAE: {trainval_mae_torch:.4f}, MAPE: {trainval_mape_torch:.2f}%")
print(f"TEST      -> MSE: {test_mse_torch:.4f}, MAE: {test_mae_torch:.4f}, MAPE: {test_mape_torch:.2f}%")




Using configuration from manual BP experiment: 2 -> {'hidden_layers': [40, 15], 'epochs': 600, 'lr': 0.005, 'momentum': 0.9, 'activation': 'tanh'}
Neural network (PyTorch) initialized.
Layers: [61, 40, 15, 1]
Activation: tanh
Epoch 0: Train MSE=1.001849 | Val MSE=0.961159
Epoch 100: Train MSE=0.095751 | Val MSE=0.080517
Epoch 200: Train MSE=0.079744 | Val MSE=0.071580
Epoch 300: Train MSE=0.068534 | Val MSE=0.066015
Epoch 400: Train MSE=0.058300 | Val MSE=0.060804
Epoch 500: Train MSE=0.048738 | Val MSE=0.055672

=== PyTorch Neural Network (chosen config) ===
TRAIN+VAL -> MSE: 0.0838, MAE: 0.1894, MAPE: 7.07%
TEST      -> MSE: 0.1267, MAE: 0.2167, MAPE: 9.39%
