In [1]:
import pandas as pd
import datautils
import modelutils as mu
import numpy as np
import itertools
import time
from tqdm import tqdm

## DATASET IMPORTING

In [2]:
DATASET = "Dataset/ML-CUP23-TR.csv"
RESULTS = "Results-chol/"
PLOT = "Plots/"
RUNS = "FullRuns/"

In [3]:
df_cup = pd.read_csv(DATASET, skiprows=6)
df_cup.rename(columns={"# Training set: ID": "ID"}, inplace=True)

In [4]:
df_cup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID      1000 non-null   int64  
 1   x1      1000 non-null   float64
 2   x2      1000 non-null   float64
 3   x3      1000 non-null   float64
 4   x4      1000 non-null   float64
 5   x5      1000 non-null   float64
 6   x6      1000 non-null   float64
 7   x7      1000 non-null   float64
 8   x8      1000 non-null   float64
 9   x9      1000 non-null   float64
 10  x10     1000 non-null   float64
 11  y1      1000 non-null   float64
 12  y2      1000 non-null   float64
 13  y3      1000 non-null   float64
dtypes: float64(13), int64(1)
memory usage: 109.5 KB


In [5]:
X_train, y_train = datautils.obtain_features_targets(df_cup)
print(X_train.shape, y_train.shape)

input_size = X_train.shape[1]
output_size = y_train.shape[1]
input_size, output_size

(1000, 10) (1000, 3)


(10, 3)

In [6]:
hidden_sizes = [*range(50, 1001, 50)]
alphas = [-1, -2, -3, -4, -5, -6]
seeds = range(3)

# Create parameter grid using itertools.product
param_grid = [
    {"Hidden size": hs, "Alpha": a} for hs, a in itertools.product(hidden_sizes, alphas)
]

In [7]:
results_dict = {
    "Hidden size": [],
    "Alpha": [],
    "Seed": [],
    "Soldist": [],
    "LSE": [],
    "LSE_Diff": [],
    "LSE_Diff_noalpha": [],  # Added LSE difference without alpha
    "Total time": [],
    "Chol time": [],
    "Chol Residual": [],
    "QR Residual": [],
}

In [8]:
# Wrap param_grid with tqdm for progress bar
for params in tqdm(param_grid, desc="Grid Search"):
    for seed in seeds:
        value = 10 ** params["Alpha"]
        # Initialize models
        model_qr = mu.ELM(input_size, params["Hidden size"], output_size, seed=seed)
        model_chol = mu.ELM(input_size, params["Hidden size"], output_size, seed=seed)

        # Compute QR solution
        model_qr.compute_wout_system_qr(X_train, y_train, alpha=value)

        # Use compute_wout_system, which internally uses cholesky decomposition
        start_total = time.process_time()
        chol_time = model_chol.compute_wout_system(X_train, y_train, alpha=value)
        total_time = time.process_time() - start_total

        # If cholesky failed, compute_wout_system returns -1 as chol_time and output_weights is set to nan
        if chol_time == -1:
            w_chol = np.full_like(model_qr.output_weights, np.nan)
            model_chol.output_weights = w_chol
            chol_residual = np.nan
        else:
            # Compute residual for Cholesky system
            A = model_chol.hidden_activations(X_train)
            BtB = A.T @ A + value * np.eye(params["Hidden size"])
            Aty = A.T @ y_train
            chol_residual = np.linalg.norm(
                BtB @ model_chol.output_weights - Aty, ord="fro"
            )

        # Compute residual for QR system
        A_qr = model_qr.hidden_activations(X_train)
        Q, R = np.linalg.qr(A_qr)
        RtY = R.T @ Q.T @ y_train
        RtR = R.T @ R + value * np.eye(params["Hidden size"])
        qr_residual = np.linalg.norm(RtR @ model_qr.output_weights - RtY, ord="fro")

        # Compute metrics
        soldist = np.linalg.norm(
            model_chol.output_weights - model_qr.output_weights, "fro"
        )
        lse_chol = mu.compute_loss(y_train, model_chol.predict(X_train), alpha=value)
        lse_qr = mu.compute_loss(y_train, model_qr.predict(X_train), alpha=value)
        lse_diff = abs(lse_chol - lse_qr)
        # Compute LSE difference without alpha
        lse_chol_noalpha = mu.compute_loss(
            y_train, model_chol.predict(X_train), alpha=0
        )
        lse_qr_noalpha = mu.compute_loss(y_train, model_qr.predict(X_train), alpha=0)
        lse_diff_noalpha = abs(lse_chol_noalpha - lse_qr_noalpha)
        # Store results
        results_dict["Hidden size"].append(params["Hidden size"])
        results_dict["Alpha"].append(value)
        results_dict["Seed"].append(seed)
        results_dict["Soldist"].append(soldist)
        results_dict["LSE"].append(lse_chol)
        results_dict["LSE_Diff"].append(lse_diff)
        results_dict["LSE_Diff_noalpha"].append(lse_diff_noalpha)
        results_dict["Total time"].append(total_time)
        results_dict["Chol time"].append(chol_time)
        results_dict["Chol Residual"].append(chol_residual)
        results_dict["QR Residual"].append(qr_residual)

results_df = pd.DataFrame(results_dict)
results_df.to_csv(RESULTS + "chol_experiment-diffalpha.csv", index=False)
results_df.head()

Grid Search:  31%|███       | 37/120 [00:07<00:16,  5.11it/s]


KeyboardInterrupt: 

In [None]:
hidden_sizes = [3, 6, 10]
alphas = [-1, -2, -3, -4, -5, -6]
seeds = range(20)

# Create parameter grid using itertools.product
param_grid = [
    {"Hidden size": hs, "Alpha": a} for hs, a in itertools.product(hidden_sizes, alphas)
]

In [None]:
results_dict = {
    "Hidden size": [],
    "Alpha": [],
    "Seed": [],
    "Soldist": [],
    "LSE": [],
    "LSE_Diff": [],
    "LSE_Diff_noalpha": [],  # Added LSE difference without alpha
    "Total time": [],
    "Chol time": [],
    "Chol Residual": [],
    "QR Residual": [],
}

In [None]:
# Wrap param_grid with tqdm for progress bar
for params in tqdm(param_grid, desc="Grid Search"):
    for seed in seeds:
        value = 10 ** params["Alpha"]
        # Initialize models
        model_qr = mu.ELM(input_size, params["Hidden size"], output_size, seed=seed)
        model_chol = mu.ELM(input_size, params["Hidden size"], output_size, seed=seed)

        # Compute QR solution
        model_qr.compute_wout_system_qr(X_train, y_train, alpha=value)

        # Use compute_wout_system, which internally uses cholesky decomposition
        start_total = time.process_time()
        chol_time = model_chol.compute_wout_system(X_train, y_train, alpha=value)
        total_time = time.process_time() - start_total

        # If cholesky failed, compute_wout_system returns -1 as chol_time and output_weights is set to nan
        if chol_time == -1:
            w_chol = np.full_like(model_qr.output_weights, np.nan)
            model_chol.output_weights = w_chol
            chol_residual = np.nan
        else:
            # Compute residual for Cholesky system
            A = model_chol.hidden_activations(X_train)
            BtB = A.T @ A + value * np.eye(params["Hidden size"])
            Aty = A.T @ y_train
            chol_residual = np.linalg.norm(
                BtB @ model_chol.output_weights - Aty, ord="fro"
            )

        # Compute residual for QR system
        A_qr = model_qr.hidden_activations(X_train)
        BtB_qr = A_qr.T @ A_qr + value * np.eye(params["Hidden size"])
        Aty_qr = A_qr.T @ y_train
        qr_residual = np.linalg.norm(
            BtB_qr @ model_qr.output_weights - Aty_qr, ord="fro"
        )

        # Compute metrics
        soldist = np.linalg.norm(
            model_chol.output_weights - model_qr.output_weights, "fro"
        )
        lse_chol = mu.compute_loss(y_train, model_chol.predict(X_train), alpha=value)
        lse_qr = mu.compute_loss(y_train, model_qr.predict(X_train), alpha=value)
        lse_diff = abs(lse_chol - lse_qr)
        # Compute LSE difference without alpha
        lse_chol_noalpha = mu.compute_loss(
            y_train, model_chol.predict(X_train), alpha=0
        )
        lse_qr_noalpha = mu.compute_loss(y_train, model_qr.predict(X_train), alpha=0)
        lse_diff_noalpha = abs(lse_chol_noalpha - lse_qr_noalpha)
        # Store results
        results_dict["Hidden size"].append(params["Hidden size"])
        results_dict["Alpha"].append(value)
        results_dict["Seed"].append(seed)
        results_dict["Soldist"].append(soldist)
        results_dict["LSE"].append(lse_chol)
        results_dict["LSE_Diff"].append(lse_diff)
        results_dict["LSE_Diff_noalpha"].append(lse_diff_noalpha)
        results_dict["Total time"].append(total_time)
        results_dict["Chol time"].append(chol_time)
        results_dict["Chol Residual"].append(chol_residual)
        results_dict["QR Residual"].append(qr_residual)

results_df = pd.DataFrame(results_dict)
results_df.to_csv(RESULTS + "chol_experiment-balanced.csv", index=False)
results_df.head()

Grid Search: 100%|██████████| 18/18 [00:00<00:00, 51.72it/s]
Grid Search: 100%|██████████| 18/18 [00:00<00:00, 51.72it/s]


Unnamed: 0,Hidden size,Alpha,Seed,Soldist,LSE,LSE_Diff,LSE_Diff_noalpha,Total time,Chol time,Chol Residual,QR Residual
0,3,0.1,0,2.26725e-14,1656.781778,0.0,0.0,0.002743,0.000193,5.332385e-13,5.737526e-12
1,3,0.1,1,5.35218e-13,1392.887507,0.0,0.0,8.8e-05,2.2e-05,6.212338e-12,1.290684e-11
2,3,0.1,2,3.489422e-14,1227.619484,0.0,0.0,0.001236,2.5e-05,2.369082e-12,7.77426e-12
3,3,0.1,3,5.097556e-14,1080.84109,0.0,0.0,7e-05,1.4e-05,4.336896e-12,5.453711e-12
4,3,0.1,4,2.825817e-14,1590.79132,0.0,0.0,0.000725,1.4e-05,1.652375e-12,7.525691e-12
