# A1_5 â€“ Optional Part 2: Cross Validattion

We are using the BP from scratch model + KFold

In [1]:
# Import libraries and classes

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import sys , os
base = os.path.dirname(os.getcwd())  
sys.path.append(os.path.join(base, "models"))
sys.path.append(os.path.join(base, "utils"))
from NeuralNet import NeuralNet
from utils import predict_batch, mape, evaluate_regression

In [2]:
# Load preprocessed data from ../data
X_trainval_np = np.load("../data/X_trainval_np.npy")
y_trainval = np.load("../data/y_trainval.npy")
y_trainval_scaled = np.load("../data/y_trainval_scaled.npy")
y_scaler = joblib.load("../data/y_scaler.joblib")

n_features = X_trainval_np.shape[1]
print("X_trainval_np:", X_trainval_np.shape)

X_trainval_np: (1200, 61)


In [3]:
# Configuration to evaluate with K-Fold

# Set hyperparameters for the evaluations
cv_configs = [
    {
        "name": "One Layer tanh",
        "hidden_layers": [20],
        "epochs": 400,
        "lr": 0.005,
        "momentum": 0.5,
        "activation": "tanh",
    },
    {
        "name": "Deep tanh",
        "hidden_layers": [60, 30],
        "epochs": 700,
        "lr": 0.002,
        "momentum": 0.9,
        "activation": "tanh",
    },
]

k_folds = 5
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)


### K-fold cross-validation for hyperparameter evaluation

In this section we use **k-fold cross-validation** to evaluate different neural network configurations.

The idea is the following:

- We have a list of model configurations (`cv_configs`), with different
  hyperparameters (number of layers, learning rate, momentum, activation, etc.).
- For each configuration, we use **KFold** to split the training+validation data
  into `k_folds` parts.
- In every fold, we train the network on `k-1` parts and validate it on the
  remaining part.
- We repeat this process for all folds, so every sample is used for validation once.

For each fold we compute three regression metrics:

- **MSE** (Mean Squared Error)  
- **MAE** (Mean Absolute Error)  
- **MAPE** (Mean Absolute Percentage Error)

At the end, we calculate the **mean** and **standard deviation** of these metrics across all folds, for each configuration.  




In [4]:
 # Loop of K-Fold for each configuration

# This list will store the average metrics for each model configuration
cv_results = []

# Loop over every configuration defined in cv_configs
for cfg in cv_configs:
    print("\n====================================")
    print("Config:", cfg["name"])
    print("====================================")

    # Lists to save the metrics of each fold
    mse_list = []
    mae_list = []
    mape_list = []

    # K-Fold is applied on the training + validation set (80% of the data) kf.split(...) returns the indexes for train and validation in each fold
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_trainval_np), start=1):
        print(f"Fold {fold}/{k_folds}")

        # Select the training and validation features
        X_tr = X_trainval_np[train_idx]
        X_val = X_trainval_np[val_idx]

        # Target for training must stay in scaled form
        y_tr_scaled = y_trainval_scaled[train_idx]

        # Validation target stays in original scale (
        y_val = y_trainval[val_idx]

        # Build the network structure: input size -> hidden layers -> 1 output neuron
        layers = [n_features] + cfg["hidden_layers"] + [1]

        # Create the neural network with the current hyperparameters
        net = NeuralNet(
            n=layers,                 # network structure
            fact=cfg["activation"],   # activation function
            eta=cfg["lr"],            # learning rate
            alpha=cfg["momentum"],    # momentum value
            epochs=cfg["epochs"],     # number of training epochs
            val_split=0.0             # no internal validation (we already use K-Fold)
        )

        # Train the network with the current fold data
        net.fit(X_tr, y_tr_scaled)

        # Predict on the validation fold (still in scaled form)
        y_val_pred_scaled = predict_batch(net, X_val)

        # Convert predictions back to the original target scale
        y_val_pred = y_scaler.inverse_transform(y_val_pred_scaled).ravel()

        # Calculate MSE, MAE and MAPE for this fold
        metrics = evaluate_regression(y_val, y_val_pred)

        # Save the metrics for this fold
        mse_list.append(metrics["MSE"])
        mae_list.append(metrics["MAE"])
        mape_list.append(metrics["MAPE"])

    # After finishing all folds, compute the mean and standard deviation for each metric and save them in the results list
    cv_results.append({
        "Name":      cfg["name"],
        "Folds":     k_folds,
        "MSE_mean":  np.mean(mse_list),
        "MSE_std":   np.std(mse_list),
        "MAE_mean":  np.mean(mae_list),
        "MAE_std":   np.std(mae_list),
        "MAPE_mean": np.mean(mape_list),
        "MAPE_std":  np.std(mape_list),
    })

# Create a DataFrame to show the final results in a clean table
cv_df = pd.DataFrame(cv_results)
display(cv_df)



Config: One Layer tanh
Fold 1/5
Neural network has been initialized
Architecture (neurons per layer): [61, 20, 1]
Activation function used: tanh
 Layer 1: w(20, 61), theta(20, 1)
 Layer 2: w(1, 20), theta(1, 1)
Epoch 0: Train MSE=0.116081
Epoch 100: Train MSE=0.004340
Epoch 200: Train MSE=0.001627
Epoch 300: Train MSE=0.000871
Fold 2/5
Neural network has been initialized
Architecture (neurons per layer): [61, 20, 1]
Activation function used: tanh
 Layer 1: w(20, 61), theta(20, 1)
 Layer 2: w(1, 20), theta(1, 1)
Epoch 0: Train MSE=0.115141
Epoch 100: Train MSE=0.002966
Epoch 200: Train MSE=0.001228
Epoch 300: Train MSE=0.000702
Fold 3/5
Neural network has been initialized
Architecture (neurons per layer): [61, 20, 1]
Activation function used: tanh
 Layer 1: w(20, 61), theta(20, 1)
 Layer 2: w(1, 20), theta(1, 1)
Epoch 0: Train MSE=0.119714
Epoch 100: Train MSE=0.003654
Epoch 200: Train MSE=0.001421
Epoch 300: Train MSE=0.000770
Fold 4/5
Neural network has been initialized
Architecture 

Unnamed: 0,Name,Folds,MSE_mean,MSE_std,MAE_mean,MAE_std,MAPE_mean,MAPE_std
0,One Layer tanh,5,0.175663,0.022197,0.312571,0.021065,9.383743,0.712146
1,Deep tanh,5,0.059324,0.006108,0.130005,0.008871,5.323214,0.33479
