In [6]:
import numpy as np
import os
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor


In [7]:
BASE_DIR  = r"C:\Users\islem\Desktop\R37-LightGBM"
DATA_DIR  = os.path.join(BASE_DIR, "data", "processed")
NPZ_PATH  = os.path.join(DATA_DIR, "processed_data_multi.npz")

print("üì¶ Loading:", NPZ_PATH)
data = np.load(NPZ_PATH, allow_pickle=True)

X = data["X"]
y = data["y"]
feature_cols = data["feature_cols"]
target_cols  = data["target_cols"]

print("\n=== SHAPES ===")
print("X:", X.shape, " y:", y.shape)
print("Targets:", target_cols)

# ŸÜŸÅÿ≥ ÿ™ŸÇÿ≥ŸäŸÖ 70/15/15 ÿßŸÑÿ∞Ÿä ÿßÿ≥ÿ™ÿπŸÖŸÑŸÜÿßŸá ÿ≥ÿßÿ®ŸÇŸãÿß
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42
)

print("\n=== SPLIT SHAPES ===")
print("X_train:", X_train.shape, " y_train:", y_train.shape)
print("X_val  :", X_val.shape,   " y_val  :", y_val.shape)
print("X_test :", X_test.shape,  " y_test :", y_test.shape)

# ÿØŸÖÿ¨ TRAIN + VAL ŸÑŸÑÿ™ÿØÿ±Ÿäÿ® ÿßŸÑŸÜŸáÿßÿ¶Ÿä
X_train_full = np.vstack([X_train, X_val])
y_train_full = np.vstack([y_train, y_val])


üì¶ Loading: C:\Users\islem\Desktop\R37-LightGBM\data\processed\processed_data_multi.npz

=== SHAPES ===
X: (2000, 36)  y: (2000, 6)
Targets: ['computation_pout_C_ISEN_EFF_TOT2TOT'
 'computation_pout_C_POLY_EFF_TOT2TOT' 'computation_pout_TOT_PRES_RATIO'
 'computation_pout_MASS_FLOW' 'computation_pout_TORQUE'
 'computation_pout_C_PRES_LOSS']

=== SPLIT SHAPES ===
X_train: (1400, 36)  y_train: (1400, 6)
X_val  : (300, 36)  y_val  : (300, 6)
X_test : (300, 36)  y_test : (300, 6)


In [8]:
def rmse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    return np.sqrt(mse)

results = []  # [model_name, target_name, R2_test, RMSE_test]


In [9]:
models = {
    "LinearRegression": MultiOutputRegressor(
        LinearRegression()
    ),

    "Ridge": MultiOutputRegressor(
        Ridge(alpha=1.0, random_state=42)
    ),

    "RandomForest": MultiOutputRegressor(
        RandomForestRegressor(
            n_estimators=300,
            max_depth=None,
            n_jobs=-1,
            random_state=42,
        )
    ),

    "LightGBM": MultiOutputRegressor(
        LGBMRegressor(
            n_estimators=500,
            learning_rate=0.05,
            max_depth=10,
            subsample=0.8,
            colsample_bytree=0.8,
            objective="regression",
            n_jobs=-1,
            random_state=42,
        ),
        n_jobs=-1,
    ),

    "XGBoost": MultiOutputRegressor(
        XGBRegressor(
            n_estimators=500,
            learning_rate=0.05,
            max_depth=8,
            subsample=0.8,
            colsample_bytree=0.8,
            objective="reg:squarederror",
            n_jobs=-1,
            tree_method="hist",
            random_state=42,
        ),
        n_jobs=-1,
    ),
}


In [10]:
for model_name, model in models.items():
    print("\n==============================")
    print(f"üöÄ Training model: {model_name}")
    print("==============================")

    # ÿ™ÿØÿ±Ÿäÿ® ÿπŸÑŸâ TRAIN+VAL
    model.fit(X_train_full, y_train_full)

    # ÿ™ŸÜÿ®ÿ§ ÿπŸÑŸâ TEST
    y_test_pred = model.predict(X_test)

    print(f"\n=== TEST METRICS for {model_name} ===")
    for i, tgt in enumerate(target_cols):
        r2  = r2_score(y_test[:, i], y_test_pred[:, i])
        err = rmse(y_test[:, i], y_test_pred[:, i])
        print(f"{tgt:40s} | R2_test = {r2:7.4f} | RMSE_test = {err:10.6f}")
        results.append([model_name, tgt, r2, err])



üöÄ Training model: LinearRegression

=== TEST METRICS for LinearRegression ===
computation_pout_C_ISEN_EFF_TOT2TOT      | R2_test =  0.1914 | RMSE_test =   0.016174
computation_pout_C_POLY_EFF_TOT2TOT      | R2_test =  0.1902 | RMSE_test =   0.015182
computation_pout_TOT_PRES_RATIO          | R2_test =  0.6968 | RMSE_test =   0.031616
computation_pout_MASS_FLOW               | R2_test =  0.5209 | RMSE_test =   0.345023
computation_pout_TORQUE                  | R2_test =  0.8035 | RMSE_test =  21.564082
computation_pout_C_PRES_LOSS             | R2_test =  0.8626 | RMSE_test =   0.070542

üöÄ Training model: Ridge

=== TEST METRICS for Ridge ===
computation_pout_C_ISEN_EFF_TOT2TOT      | R2_test =  0.1047 | RMSE_test =   0.017019
computation_pout_C_POLY_EFF_TOT2TOT      | R2_test =  0.1086 | RMSE_test =   0.015929
computation_pout_TOT_PRES_RATIO          | R2_test =  0.6020 | RMSE_test =   0.036222
computation_pout_MASS_FLOW               | R2_test =  0.4567 | RMSE_test =   0.36741

In [11]:
df_results = pd.DataFrame(
    results,
    columns=["model", "target", "R2_test", "RMSE_test"]
)

print("\n\n=== SUMMARY TABLE (by model & target) ===")
display(df_results)

pivot_r2   = df_results.pivot(index="target", columns="model", values="R2_test")
pivot_rmse = df_results.pivot(index="target", columns="model", values="RMSE_test")

print("\n\n=== R2 on TEST (pivot) ===")
display(pivot_r2)

print("\n\n=== RMSE on TEST (pivot) ===")
display(pivot_rmse)

FIG_DIR = os.path.join(BASE_DIR, "figures")
os.makedirs(FIG_DIR, exist_ok=True)

OUT_CSV = os.path.join(FIG_DIR, "model_comparison_metrics.csv")
df_results.to_csv(OUT_CSV, index=False)
print(f"\nüíæ Metrics saved to: {OUT_CSV}")




=== SUMMARY TABLE (by model & target) ===


Unnamed: 0,model,target,R2_test,RMSE_test
0,LinearRegression,computation_pout_C_ISEN_EFF_TOT2TOT,0.191385,0.016174
1,LinearRegression,computation_pout_C_POLY_EFF_TOT2TOT,0.190176,0.015182
2,LinearRegression,computation_pout_TOT_PRES_RATIO,0.696802,0.031616
3,LinearRegression,computation_pout_MASS_FLOW,0.520892,0.345023
4,LinearRegression,computation_pout_TORQUE,0.803499,21.564082
5,LinearRegression,computation_pout_C_PRES_LOSS,0.862569,0.070542
6,Ridge,computation_pout_C_ISEN_EFF_TOT2TOT,0.104719,0.017019
7,Ridge,computation_pout_C_POLY_EFF_TOT2TOT,0.10859,0.015929
8,Ridge,computation_pout_TOT_PRES_RATIO,0.602004,0.036222
9,Ridge,computation_pout_MASS_FLOW,0.456682,0.367416




=== R2 on TEST (pivot) ===


model,LightGBM,LinearRegression,RandomForest,Ridge,XGBoost
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
computation_pout_C_ISEN_EFF_TOT2TOT,0.751399,0.191385,0.674398,0.104719,0.682658
computation_pout_C_POLY_EFF_TOT2TOT,0.756511,0.190176,0.6703,0.10859,0.687985
computation_pout_C_PRES_LOSS,0.918255,0.862569,0.826716,0.783797,0.85977
computation_pout_MASS_FLOW,0.854327,0.520892,0.778531,0.456682,0.79839
computation_pout_TORQUE,0.888125,0.803499,0.787662,0.667197,0.82782
computation_pout_TOT_PRES_RATIO,0.865281,0.696802,0.768748,0.602004,0.803759




=== RMSE on TEST (pivot) ===


model,LightGBM,LinearRegression,RandomForest,Ridge,XGBoost
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
computation_pout_C_ISEN_EFF_TOT2TOT,0.008968,0.016174,0.010264,0.017019,0.010133
computation_pout_C_POLY_EFF_TOT2TOT,0.008325,0.015182,0.009687,0.015929,0.009424
computation_pout_C_PRES_LOSS,0.054405,0.070542,0.07921,0.088478,0.071256
computation_pout_MASS_FLOW,0.190248,0.345023,0.234578,0.367416,0.223814
computation_pout_TORQUE,16.271011,21.564082,22.416239,28.063526,20.185518
computation_pout_TOT_PRES_RATIO,0.021074,0.031616,0.027611,0.036222,0.025435



üíæ Metrics saved to: C:\Users\islem\Desktop\R37-LightGBM\figures\model_comparison_metrics.csv
