In [1]:
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import optuna
import os
import json
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

from src.utils import get_kfold_data, convert_non_numeric_to_numeric, calculate_r2_score, calculate_metrics
from src.normalisation import Normaliser
from src.constants import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(DATA_PATH)

In [3]:
data.head()

Unnamed: 0,outcome,carat,cut,color,clarity,depth,table,price,x,y,...,a6,a7,a8,a9,a10,b6,b7,b8,b9,b10
0,-26.701232,1.14,Ideal,G,VS1,62.3,56.0,7948,6.73,6.7,...,0.168836,-0.273758,1.107832,1.247795,0.482344,0.489511,-0.321138,0.573382,0.446871,-1.990581
1,6.548093,0.38,Premium,H,VS2,60.5,59.0,898,4.69,4.66,...,-0.256549,0.315373,-0.030326,-0.114335,-1.059588,-1.76136,-1.343951,-1.00255,-0.22503,-0.446653
2,6.612562,0.5,Very Good,E,SI1,60.7,58.0,1351,5.09,5.13,...,-1.193327,-0.657307,-0.591726,-0.446856,-0.765286,-0.816544,-1.397794,-0.47713,0.810509,1.725131
3,-5.073562,0.7,Premium,D,SI1,61.2,58.0,2512,5.74,5.7,...,-1.740788,-1.77886,-0.82507,0.444932,1.173109,0.453606,-0.26344,0.24621,-0.850503,-0.41295
4,-14.436557,0.83,Ideal,G,SI2,62.4,54.0,2751,6.01,6.08,...,-0.859322,1.409268,0.861992,1.109063,-1.436722,-1.461618,0.081787,0.258087,0.851146,2.204813


Inspecting columns

In [4]:
# Find columns
all_columns = data.columns.tolist()
print(all_columns)

numeric_columns = data.select_dtypes(include=["number"]).columns.tolist()
numeric_columns.remove("outcome") # Remove the target column
print(numeric_columns)

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns.tolist()
print(non_numeric_columns)

['outcome', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['cut', 'color', 'clarity']


In [5]:
for non_numeric_column in non_numeric_columns:
    print(data[non_numeric_column].value_counts())

cut
Ideal        4040
Premium      2439
Very Good    2296
Good          925
Fair          300
Name: count, dtype: int64
color
G    2120
E    1873
F    1746
H    1506
D    1246
I     983
J     526
Name: count, dtype: int64
clarity
SI1     2408
VS2     2256
SI2     1743
VS1     1503
VVS2     951
VVS1     675
IF       318
I1       146
Name: count, dtype: int64


Converting non-numeric features to numerical features

In [6]:
data = convert_non_numeric_to_numeric(data=data)
print(data)

['G', 'E', 'F', 'H', 'D', 'I', 'J']
        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...      

Normalise data using each columns respective mean and std.

In [7]:
print(data)

        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...        b8        b9       b10  colour_G  

Data splitting:
- Split the entire dataset into training and testing sets first.
- Use the training set to generate folds (one validation and the rest training folds) (K-Fold Cross Validation)

In [8]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=REPRODUCIBILITY_SEED)
print(f"Training set size: {len(train_data)} | Test set size: {len(test_data)}")    
print()


Training set size: 8000 | Test set size: 2000



In [9]:
normaliser = Normaliser()
os.makedirs(TRAINING_STATISTICS_DIR, exist_ok=True)
stats_for_each_column = {}
for column in numeric_columns:
    print(data[column])
    train_data_column_mean = normaliser.calculate_mean(train_data[column])
    train_data_column_std = normaliser.calculate_std(train_data[column])

    train_data[column] = normaliser.standardise(train_data[column], mean=train_data_column_mean, std=train_data_column_std)

    stats_for_each_column[column] = {
        "mean": train_data_column_mean,
        "std": train_data_column_std
    }

    # Normalise test data using the mean and std of the training data
    test_data[column] = normaliser.standardise(test_data[column], mean=train_data_column_mean, std=train_data_column_std)
    print("after", train_data[column])

with open(f"{TRAINING_STATISTICS_DIR}/stats.json", "w") as f:
    json.dump(stats_for_each_column, f)

0       1.14
1       0.38
2       0.50
3       0.70
4       0.83
        ... 
9995    0.33
9996    1.01
9997    0.52
9998    0.31
9999    0.37
Name: carat, Length: 10000, dtype: float64
after 9254   -1.023878
1561    1.522047
1670   -1.045094
6087   -0.111588
6669    1.140158
          ...   
5734   -0.154021
5191    0.970430
5390   -1.045094
860    -1.002662
7270   -1.151175
Name: carat, Length: 8000, dtype: float64
0       62.3
1       60.5
2       60.7
3       61.2
4       62.4
        ... 
9995    62.6
9996    69.5
9997    57.9
9998    62.0
9999    59.9
Name: depth, Length: 10000, dtype: float64
after 9254   -0.381342
1561    0.945157
1670    0.386631
6087   -0.311527
6669   -0.311527
          ...   
5734    0.665894
5191    0.107368
5390    0.107368
860     0.875341
7270    0.665894
Name: depth, Length: 8000, dtype: float64
0       56.0
1       59.0
2       58.0
3       58.0
4       54.0
        ... 
9995    57.0
9996    55.0
9997    61.0
9998    54.0
9999    59.0
Name: table, Le

In [10]:
kfold_data = get_kfold_data(data=train_data, k=NUM_FOLDS, reproducibility_seed=REPRODUCIBILITY_SEED)

Fold: 0/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%

Fold: 1/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%

Fold: 2/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%

Fold: 3/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%

Fold: 4/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%



Define models and hyperparameter tuning objectives for each model

In [11]:
models = {
        "linear_regression": LinearRegression,
        "lasso": Lasso,
        "ridge": Ridge,
        "xgb": xgb.XGBRegressor,
        "random_forest": RandomForestRegressor,
        "gradient_boosting": GradientBoostingRegressor,
        "ada_boost": AdaBoostRegressor,
        "lgbm": lgb.LGBMRegressor
        }

def objective(model_type, trial, x_train, y_train, x_val, y_val):
    if model_type == LinearRegression:
        parameters = {
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        }
    elif model_type == Lasso:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
            "warm_start": trial.suggest_categorical("warm_start", [True, False]),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == Ridge:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "solver": trial.suggest_categorical("solver", ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "positive": False,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == xgb.XGBRegressor:
        parameters = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "n_estimators": 100,
            "eta": trial.suggest_float("eta", 1e-2, 0.2, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 10, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1, 6),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "seed": REPRODUCIBILITY_SEED
        }
    elif model_type == RandomForestRegressor:
        parameters = {
            "n_estimators": 100,
            "criterion": trial.suggest_categorical("criterion", ["absolute_error", "squared_error"]),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "bootstrap": True,
            "oob_score": False,
            "n_jobs": -1,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == GradientBoostingRegressor:
        parameters = {
            "n_estimators": 100,
            "loss": trial.suggest_categorical("loss", ["absolute_error", "squared_error", "huber", "quantile"]),
            "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "subsample": trial.suggest_float("subsample", 0.05, 1.0),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 2**10),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == AdaBoostRegressor:
        parameters = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 100),
            "loss": trial.suggest_categorical("loss", ["linear", "square", "exponential"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == lgb.LGBMRegressor:
        parameters = {
                    "objective": "regression",
                    "metric": "rmse",
                    "n_estimators": 100,
                    "verbosity": -1,
                    "bagging_freq": 1,
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "subsample": trial.suggest_float("subsample", 0.05, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
                    "seed": REPRODUCIBILITY_SEED
        }

    
    model = model_type(**parameters) # Create the model
    model.fit(x_train, y_train)
    predictions = model.predict(x_val)
    metrics = calculate_metrics(targets=y_val, preds=predictions)
    rmse = metrics["rmse"]
    return rmse

In [12]:
# Train + Validate models
metrics = ["mae", "mse", "rmse", "pcc", "spearman_r", "r2_score"]
model_scores = {model_name: defaultdict(list) for model_name in models.keys()}

if os.path.exists("model_best_hyperparameters"):
    raise Exception("Directory for best hyperparameters already exists. Please delete it before running this script.")

os.makedirs("model_best_hyperparameters")

for fold in range(NUM_FOLDS):
    fold_data = kfold_data[fold]
     
    # Extract data
    train_data = fold_data["train"]
    val_data = fold_data["val"]

    train_y = train_data["outcome"]
    val_y = val_data["outcome"]
    
    train_x = train_data.drop(columns=["outcome"])
    val_x = val_data.drop(columns=["outcome"])

    # print(f"Fold {fold+1}/{NUM_FOLDS}")
    # print(f"Train data shape: {train_x.shape} | Train target shape: {train_y.shape}")
    # print(f"Val data shape: {val_x.shape} | Val target shape: {val_y.shape}")
    # print(f"Test data shape: {test_x.shape} | Test target shape: {test_y.shape}")

    # Train model
    for model_name, model in models.items():
        study = optuna.create_study(direction="minimize")
        study.optimize(lambda trial: objective(trial=trial, 
                                               model_type=model, 
                                               x_train=train_x, 
                                               y_train=train_y, 
                                               x_val=val_x, 
                                               y_val=val_y
                                               ), n_trials=N_TRIALS)
        
        # Train model with best hyperparameters
        best_fold_params = study.best_params
        model = model(**best_fold_params)
        model.fit(train_x, train_y)
        preds = model.predict(val_x)
        
        # Save the best hyperparameters for this model at this fold.
        os.makedirs(f"model_best_hyperparameters/{model_name}", exist_ok=True)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}.json", "w") as f:
            json.dump(best_fold_params, f)

        # Calculate metrics
        metrics = calculate_metrics(targets=val_y, preds=preds)
        mae = metrics["mae"]
        mse = metrics["mse"]
        rmse = metrics["rmse"]
        pcc = metrics["pcc"]
        spearman_r = metrics["spearman_r"]
        r2_score = metrics["r2_score"]

        for metric in metrics:
            model_scores[model_name][metric].append(metrics[metric])

        print(f"Fold: {fold+1}/{NUM_FOLDS}")
        print(f"Model name: {model_name}")
        print(f"MAE: {mae}")
        print(f"MSE: {mse}")
        print(f"RMSE: {rmse}")
        print(f"PCC: {pcc}")
        print(f"Spearman R: {spearman_r}")
        print(f"R2 Score: {r2_score}")
        print()


[I 2025-02-18 14:37:01,199] A new study created in memory with name: no-name-f15d18d0-1ef5-40e1-935f-855b8da2efa4
[I 2025-02-18 14:37:01,208] Trial 0 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 14:37:01,214] Trial 1 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 14:37:01,221] Trial 2 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 14:37:01,228] Trial 3 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 14:37:01,234] Trial 4 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 14:37:01,240] Trial 5 finished with value: 10

Fold: 1/5
Model name: linear_regression
MAE: 8.707517175964739
MSE: 116.4178751798801
RMSE: 10.789711542941271
PCC: 0.5299360904553945
Spearman R: 0.5514073667606901
R2 Score: 0.2804847588737671



[I 2025-02-18 14:37:02,037] Trial 2 finished with value: 10.779077269796314 and parameters: {'alpha': 0.008015889108867863, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 2 with value: 10.779077269796314.
[I 2025-02-18 14:37:02,074] Trial 3 finished with value: 10.779223522084727 and parameters: {'alpha': 0.007459654637685029, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 2 with value: 10.779077269796314.
[I 2025-02-18 14:37:02,083] Trial 4 finished with value: 10.763112368886837 and parameters: {'alpha': 0.06471471831904486, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 4 with value: 10.763112368886837.
[I 2025-02-18 14:37:02,090] Trial 5 finished with value: 10.757102822174714 and parameters: {'alpha': 0.09758625413059716, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 5 with value: 10.757102822174714.
[I 2025-02-18 14:37:02,100] Trial 6 finished

Fold: 1/5
Model name: lasso
MAE: 8.668730559460927
MSE: 115.70551925250143
RMSE: 10.756650001394553
PCC: 0.533991362251101
Spearman R: 0.5569320212625084
R2 Score: 0.2848874414176943



[I 2025-02-18 14:37:03,716] Trial 1 finished with value: 10.789289062161945 and parameters: {'alpha': 0.00435896238034593, 'solver': 'sag', 'fit_intercept': True}. Best is trial 1 with value: 10.789289062161945.
[I 2025-02-18 14:37:03,723] Trial 2 finished with value: 10.789657531899033 and parameters: {'alpha': 0.0013138860204368614, 'solver': 'lsqr', 'fit_intercept': False}. Best is trial 1 with value: 10.789289062161945.
[I 2025-02-18 14:37:03,737] Trial 3 finished with value: 10.789705001168505 and parameters: {'alpha': 0.004445034607870543, 'solver': 'svd', 'fit_intercept': False}. Best is trial 1 with value: 10.789289062161945.
[I 2025-02-18 14:37:04,090] Trial 4 finished with value: 10.789201883053218 and parameters: {'alpha': 0.035050960564629106, 'solver': 'sag', 'fit_intercept': False}. Best is trial 4 with value: 10.789201883053218.
[I 2025-02-18 14:37:04,097] Trial 5 finished with value: 10.789552991367222 and parameters: {'alpha': 0.005953829226055649, 'solver': 'sparse_cg

Fold: 1/5
Model name: ridge
MAE: 8.706935124038269
MSE: 116.39511949863805
RMSE: 10.788656983083579
PCC: 0.5300624087803333
Spearman R: 0.5514849185487963
R2 Score: 0.2806253992990503



[I 2025-02-18 14:37:43,637] Trial 0 finished with value: 9.198202796978526 and parameters: {'eta': 0.042746541730240416, 'gamma': 1.3452280818119873e-06, 'max_depth': 5, 'min_child_weight': 6, 'subsample': 0.9448718707034002, 'colsample_bytree': 0.8832196692566187}. Best is trial 0 with value: 9.198202796978526.
[I 2025-02-18 14:37:43,719] Trial 1 finished with value: 9.306983789809454 and parameters: {'eta': 0.1472106506868595, 'gamma': 6.400711670089117e-06, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.8700044435304679, 'colsample_bytree': 0.9455032970161554}. Best is trial 0 with value: 9.198202796978526.
[I 2025-02-18 14:37:43,967] Trial 2 finished with value: 9.39283764425313 and parameters: {'eta': 0.030860806704477203, 'gamma': 0.808202689901994, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.5779531983569863, 'colsample_bytree': 0.8351936785433114}. Best is trial 0 with value: 9.198202796978526.
[I 2025-02-18 14:37:44,048] Trial 3 finished with value: 9.25613023

Fold: 1/5
Model name: xgb
MAE: 7.387196887838481
MSE: 83.87873076869346
RMSE: 9.15853322146584
PCC: 0.694306397533227
Spearman R: 0.6971010906644886
R2 Score: 0.4815914214106085



[I 2025-02-18 14:37:55,107] Trial 1 finished with value: 10.038384802284138 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 8}. Best is trial 1 with value: 10.038384802284138.
[I 2025-02-18 14:37:55,225] Trial 2 finished with value: 10.138797215208898 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 5}. Best is trial 1 with value: 10.038384802284138.
[I 2025-02-18 14:37:56,758] Trial 3 finished with value: 10.337452065510085 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 6}. Best is trial 1 with value: 10.038384802284138.
[I 2025-02-18 14:37:56,884] Trial 4 finished with value: 9.716996778533849 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 3, 'min_samples_leaf': 6}. Best is tri

Fold: 1/5
Model name: random_forest
MAE: 7.662830822117281
MSE: 90.55380296063245
RMSE: 9.51597619588408
PCC: 0.6810423975312034
Spearman R: 0.6847996063670337
R2 Score: 0.4403364494374724



[I 2025-02-18 14:38:31,221] Trial 0 finished with value: 10.598222590435265 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.007779119355353228, 'subsample': 0.7431249736087558, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 8, 'max_features': 'log2', 'max_leaf_nodes': 379}. Best is trial 0 with value: 10.598222590435265.
[I 2025-02-18 14:38:32,429] Trial 1 finished with value: 12.274840790843303 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.0015590543314908936, 'subsample': 0.9932132301287065, 'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_leaf_nodes': 506}. Best is trial 0 with value: 10.598222590435265.
[I 2025-02-18 14:38:33,307] Trial 2 finished with value: 9.401912137953923 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.043173348091989305, 'subsample': 0.3155292685212613, 'max_depth': 8, 'min_s

Fold: 1/5
Model name: gradient_boosting
MAE: 7.387753800616748
MSE: 84.80791348352228
RMSE: 9.2091212112515
PCC: 0.6911061260405427
Spearman R: 0.6925892060895336
R2 Score: 0.47584865103211305



[I 2025-02-18 14:39:57,453] Trial 0 finished with value: 9.744914134093849 and parameters: {'n_estimators': 85, 'loss': 'linear', 'learning_rate': 0.030096534333756073}. Best is trial 0 with value: 9.744914134093849.
[I 2025-02-18 14:39:59,189] Trial 1 finished with value: 9.7446172026016 and parameters: {'n_estimators': 61, 'loss': 'square', 'learning_rate': 0.020115861726582362}. Best is trial 1 with value: 9.7446172026016.
[I 2025-02-18 14:40:01,037] Trial 2 finished with value: 9.812636418339874 and parameters: {'n_estimators': 66, 'loss': 'linear', 'learning_rate': 0.01508129472163794}. Best is trial 1 with value: 9.7446172026016.
[I 2025-02-18 14:40:03,531] Trial 3 finished with value: 9.797060329300697 and parameters: {'n_estimators': 87, 'loss': 'square', 'learning_rate': 0.0021516694676000066}. Best is trial 1 with value: 9.7446172026016.
[I 2025-02-18 14:40:05,835] Trial 4 finished with value: 9.770936570772278 and parameters: {'n_estimators': 81, 'loss': 'exponential', 'lear

Fold: 1/5
Model name: ada_boost
MAE: 7.613414391975063
MSE: 89.6545072049484
RMSE: 9.468606402472775
PCC: 0.6699854434171596
Spearman R: 0.6741748316512903
R2 Score: 0.44589450486061966



[I 2025-02-18 14:43:16,193] Trial 1 finished with value: 11.28134099734886 and parameters: {'learning_rate': 0.011045993162056586, 'num_leaves': 268, 'subsample': 0.613443000036067, 'colsample_bytree': 0.2573304511534084, 'min_data_in_leaf': 96}. Best is trial 0 with value: 9.422118105655219.
[I 2025-02-18 14:43:16,268] Trial 2 finished with value: 12.211066308404073 and parameters: {'learning_rate': 0.0010565448070695791, 'num_leaves': 742, 'subsample': 0.6049977797933337, 'colsample_bytree': 0.9271128029251919, 'min_data_in_leaf': 96}. Best is trial 0 with value: 9.422118105655219.
[I 2025-02-18 14:43:16,354] Trial 3 finished with value: 9.3973441117989 and parameters: {'learning_rate': 0.06541998528580832, 'num_leaves': 599, 'subsample': 0.3445730832932381, 'colsample_bytree': 0.5305910790597586, 'min_data_in_leaf': 35}. Best is trial 3 with value: 9.3973441117989.
[I 2025-02-18 14:43:16,499] Trial 4 finished with value: 9.511693348576195 and parameters: {'learning_rate': 0.06840372

Fold: 1/5
Model name: lgbm
MAE: 7.439741090025507
MSE: 85.40306223809563
RMSE: 9.241377724024467
PCC: 0.6872518381205639
Spearman R: 0.6910654105724261
R2 Score: 0.4721703619464279



[I 2025-02-18 14:43:26,170] Trial 24 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 14:43:26,183] Trial 25 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 14:43:26,190] Trial 26 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 14:43:26,198] Trial 27 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 14:43:26,204] Trial 28 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 14:43:26,211] Trial 29 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[

Fold: 2/5
Model name: linear_regression
MAE: 8.589235102672504
MSE: 116.93096828103437
RMSE: 10.813462363231972
PCC: 0.5423829744003588
Spearman R: 0.5661288207534456
R2 Score: 0.2926921600863054



[I 2025-02-18 14:43:26,908] Trial 6 finished with value: 10.830131842207686 and parameters: {'alpha': 0.03806401189206707, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}. Best is trial 0 with value: 10.812629151806066.
[I 2025-02-18 14:43:26,933] Trial 7 finished with value: 10.813347187821723 and parameters: {'alpha': 0.001382957014974925, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}. Best is trial 0 with value: 10.812629151806066.
[I 2025-02-18 14:43:26,961] Trial 8 finished with value: 10.813677204811917 and parameters: {'alpha': 0.00826980177185079, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 0 with value: 10.812629151806066.
[I 2025-02-18 14:43:26,985] Trial 9 finished with value: 10.813344050538872 and parameters: {'alpha': 0.001463570352902228, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}. Best is trial 0 with value: 10.812629151806066.
[I 2025-02-18 14:43:27,001] Trial 10 finis

Fold: 2/5
Model name: lasso
MAE: 8.591091826630192
MSE: 116.91157848035516
RMSE: 10.812565767677677
PCC: 0.542728892439964
Spearman R: 0.5665135005912112
R2 Score: 0.2928094477324813



[I 2025-02-18 14:43:29,292] Trial 5 finished with value: 10.81354978786924 and parameters: {'alpha': 0.0021709559641367457, 'solver': 'saga', 'fit_intercept': True}. Best is trial 4 with value: 10.813462140926642.
[I 2025-02-18 14:43:29,296] Trial 6 finished with value: 10.8134628741789 and parameters: {'alpha': 0.006282155793954552, 'solver': 'cholesky', 'fit_intercept': True}. Best is trial 4 with value: 10.813462140926642.
[I 2025-02-18 14:43:29,301] Trial 7 finished with value: 10.813461752354065 and parameters: {'alpha': 0.004846148955835552, 'solver': 'auto', 'fit_intercept': False}. Best is trial 7 with value: 10.813461752354065.
[I 2025-02-18 14:43:29,589] Trial 8 finished with value: 10.81346026582224 and parameters: {'alpha': 0.004731654994011694, 'solver': 'saga', 'fit_intercept': False}. Best is trial 8 with value: 10.81346026582224.
[I 2025-02-18 14:43:29,594] Trial 9 finished with value: 10.81346120531903 and parameters: {'alpha': 0.0019530529872782102, 'solver': 'sparse_

Fold: 2/5
Model name: ridge
MAE: 8.589395176974188
MSE: 116.93067861883964
RMSE: 10.813448969632198
PCC: 0.5423971013478573
Spearman R: 0.5661109379339602
R2 Score: 0.2926939122341262



[I 2025-02-18 14:43:48,050] Trial 0 finished with value: 9.914207322479593 and parameters: {'eta': 0.015100799711521947, 'gamma': 1.4132347450010752e-07, 'max_depth': 10, 'min_child_weight': 4, 'subsample': 0.7445152305259053, 'colsample_bytree': 0.8153679101013809}. Best is trial 0 with value: 9.914207322479593.
[I 2025-02-18 14:43:48,116] Trial 1 finished with value: 9.385829212630599 and parameters: {'eta': 0.04808260275598184, 'gamma': 0.00022818753254650286, 'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.5443935919678857, 'colsample_bytree': 0.5734509288969103}. Best is trial 1 with value: 9.385829212630599.
[I 2025-02-18 14:43:48,361] Trial 2 finished with value: 9.88485980264203 and parameters: {'eta': 0.14625402423228762, 'gamma': 9.15894148471199e-06, 'max_depth': 9, 'min_child_weight': 5, 'subsample': 0.9522972859368701, 'colsample_bytree': 0.6953256043385891}. Best is trial 1 with value: 9.385829212630599.
[I 2025-02-18 14:43:48,433] Trial 3 finished with value: 10.25

Fold: 2/5
Model name: xgb
MAE: 7.367300512469649
MSE: 86.47428099829172
RMSE: 9.299154853979566
PCC: 0.6913375467239236
Spearman R: 0.6991444430251731
R2 Score: 0.4769226852377646



[I 2025-02-18 14:44:00,153] Trial 1 finished with value: 10.268581843841854 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 6, 'min_samples_split': 3, 'min_samples_leaf': 1}. Best is trial 0 with value: 9.690123827234713.
[I 2025-02-18 14:44:00,268] Trial 2 finished with value: 10.152338298044876 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 2}. Best is trial 0 with value: 9.690123827234713.
[I 2025-02-18 14:44:00,372] Trial 3 finished with value: 10.759029523100763 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 0 with value: 9.690123827234713.
[I 2025-02-18 14:44:00,553] Trial 4 finished with value: 9.65405086550001 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 1}. Best is trial 4

Fold: 2/5
Model name: random_forest
MAE: 7.61709813904347
MSE: 92.84511272946787
RMSE: 9.635616883701212
PCC: 0.6792977889288616
Spearman R: 0.6847062137524272
R2 Score: 0.4383859374756006



[I 2025-02-18 14:44:37,714] Trial 0 finished with value: 9.617845208344008 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.03546998012057837, 'subsample': 0.8906656594312897, 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_leaf_nodes': 1007}. Best is trial 0 with value: 9.617845208344008.
[I 2025-02-18 14:44:39,227] Trial 1 finished with value: 10.011160371553812 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.08876353971287862, 'subsample': 0.2862191233641416, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_leaf_nodes': 373}. Best is trial 0 with value: 9.617845208344008.
[I 2025-02-18 14:44:40,074] Trial 2 finished with value: 10.479410654929875 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.010680258533017127, 'subsample': 0.16155635346788916, 'max_depth': 8, 'min_samples_split': 9, 'min_sam

Fold: 2/5
Model name: gradient_boosting
MAE: 7.427991393519388
MSE: 87.71686823619717
RMSE: 9.365728387915015
PCC: 0.6867560190942924
Spearman R: 0.6909694837771422
R2 Score: 0.46940635566256705



[I 2025-02-18 14:46:07,214] Trial 0 finished with value: 9.768367375046557 and parameters: {'n_estimators': 98, 'loss': 'linear', 'learning_rate': 0.023129225080686898}. Best is trial 0 with value: 9.768367375046557.
[I 2025-02-18 14:46:09,708] Trial 1 finished with value: 9.771343457640844 and parameters: {'n_estimators': 87, 'loss': 'square', 'learning_rate': 0.01721244050993702}. Best is trial 0 with value: 9.768367375046557.
[I 2025-02-18 14:46:11,896] Trial 2 finished with value: 9.802415046482935 and parameters: {'n_estimators': 77, 'loss': 'exponential', 'learning_rate': 0.027129731286168343}. Best is trial 0 with value: 9.768367375046557.
[I 2025-02-18 14:46:14,272] Trial 3 finished with value: 9.812185865678709 and parameters: {'n_estimators': 82, 'loss': 'exponential', 'learning_rate': 0.002875960920299697}. Best is trial 0 with value: 9.768367375046557.
[I 2025-02-18 14:46:16,358] Trial 4 finished with value: 9.833390243473884 and parameters: {'n_estimators': 73, 'loss': 'sq

Fold: 2/5
Model name: ada_boost
MAE: 7.651556802147061
MSE: 93.53997353355932
RMSE: 9.671606564245637
PCC: 0.6618442736798891
Spearman R: 0.6685732556088323
R2 Score: 0.43418276955860047



[I 2025-02-18 14:49:36,608] Trial 2 finished with value: 10.73817407007569 and parameters: {'learning_rate': 0.06820191346689607, 'num_leaves': 907, 'subsample': 0.8134110826487356, 'colsample_bytree': 0.09656616015610274, 'min_data_in_leaf': 98}. Best is trial 1 with value: 10.659853520040357.
[I 2025-02-18 14:49:36,701] Trial 3 finished with value: 10.377758913679571 and parameters: {'learning_rate': 0.008344037140699482, 'num_leaves': 478, 'subsample': 0.740574053930085, 'colsample_bytree': 0.8656742901556288, 'min_data_in_leaf': 66}. Best is trial 3 with value: 10.377758913679571.
[I 2025-02-18 14:49:36,847] Trial 4 finished with value: 9.634394028663007 and parameters: {'learning_rate': 0.05331735489667741, 'num_leaves': 733, 'subsample': 0.9582792318669225, 'colsample_bytree': 0.30161903010022956, 'min_data_in_leaf': 46}. Best is trial 4 with value: 9.634394028663007.
[I 2025-02-18 14:49:36,932] Trial 5 finished with value: 11.040341668053513 and parameters: {'learning_rate': 0.0

Fold: 2/5
Model name: lgbm
MAE: 7.439932970100623
MSE: 88.60118085098733
RMSE: 9.412820026484482
PCC: 0.6815529098714987
Spearman R: 0.6875733125286377
R2 Score: 0.46405720603547806



[I 2025-02-18 14:49:46,522] Trial 24 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 14:49:46,528] Trial 25 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 14:49:46,534] Trial 26 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 14:49:46,540] Trial 27 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 14:49:46,546] Trial 28 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 14:49:46,553] Trial 29 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[

Fold: 3/5
Model name: linear_regression
MAE: 8.755107813660903
MSE: 120.93045179515701
RMSE: 10.996838263571808
PCC: 0.5082810320078583
Spearman R: 0.5304086007455472
R2 Score: 0.25632540927888203



[I 2025-02-18 14:49:47,171] Trial 12 finished with value: 10.972345206468516 and parameters: {'alpha': 0.09755624083879193, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 12 with value: 10.972345206468516.
[I 2025-02-18 14:49:47,180] Trial 13 finished with value: 10.972595976299543 and parameters: {'alpha': 0.08946288374296148, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 12 with value: 10.972345206468516.
[I 2025-02-18 14:49:47,190] Trial 14 finished with value: 10.97832223321008 and parameters: {'alpha': 0.046813710561541697, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 12 with value: 10.972345206468516.
[I 2025-02-18 14:49:47,250] Trial 15 finished with value: 10.995586856284653 and parameters: {'alpha': 0.0017715595560380659, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 12 with value: 10.972345206468516.
[I 2025-02-18 14:49:47,259] Trial 

Fold: 3/5
Model name: lasso
MAE: 8.734007149629308
MSE: 120.3925633360427
RMSE: 10.972354502842254
PCC: 0.5101751173125759
Spearman R: 0.5367718844812048
R2 Score: 0.2596332111910342



[I 2025-02-18 14:49:48,433] Trial 9 finished with value: 10.99683628719875 and parameters: {'alpha': 0.009231030571380703, 'solver': 'auto', 'fit_intercept': False}. Best is trial 8 with value: 10.996810394049122.
[I 2025-02-18 14:49:48,441] Trial 10 finished with value: 10.996813346343048 and parameters: {'alpha': 0.0440619694787964, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 8 with value: 10.996810394049122.
[I 2025-02-18 14:49:48,448] Trial 11 finished with value: 10.996811138576117 and parameters: {'alpha': 0.04349171218632651, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 8 with value: 10.996810394049122.
[I 2025-02-18 14:49:48,455] Trial 12 finished with value: 10.996875094798238 and parameters: {'alpha': 0.0381787774782426, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 8 with value: 10.996810394049122.
[I 2025-02-18 14:49:48,714] Trial 13 finished with value: 10.996894943666078 and parameters: {'alpha': 0.07676173229538748, 'sol

Fold: 3/5
Model name: ridge
MAE: 8.755100844614203
MSE: 120.92955310223552
RMSE: 10.99679740207282
PCC: 0.5082835348928054
Spearman R: 0.5304014054302365
R2 Score: 0.2563309358859579



[I 2025-02-18 14:49:51,613] Trial 1 finished with value: 10.095470935623498 and parameters: {'eta': 0.18229538204464862, 'gamma': 9.187437881219525e-08, 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.5401505642865279, 'colsample_bytree': 0.7906460733967433}. Best is trial 0 with value: 9.34263255964139.
[I 2025-02-18 14:49:51,728] Trial 2 finished with value: 9.41885281458584 and parameters: {'eta': 0.040387913625316824, 'gamma': 2.074000701665219e-06, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.7400106394137622, 'colsample_bytree': 0.6494687989461878}. Best is trial 0 with value: 9.34263255964139.
[I 2025-02-18 14:49:51,849] Trial 3 finished with value: 10.008527026578292 and parameters: {'eta': 0.012358440500433105, 'gamma': 5.227217621758387e-08, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.9112901487571129, 'colsample_bytree': 0.7919610424343075}. Best is trial 0 with value: 9.34263255964139.
[I 2025-02-18 14:49:51,938] Trial 4 finished with value: 9.3884

Fold: 3/5
Model name: xgb
MAE: 7.343753682427242
MSE: 86.9791017787962
RMSE: 9.326258723560922
PCC: 0.6824884404646622
Spearman R: 0.6867194664919792
R2 Score: 0.4651128234747306



[I 2025-02-18 14:50:03,125] Trial 1 finished with value: 9.854852672999462 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 2}. Best is trial 1 with value: 9.854852672999462.
[I 2025-02-18 14:50:03,254] Trial 2 finished with value: 9.860580074382131 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 8}. Best is trial 1 with value: 9.854852672999462.
[I 2025-02-18 14:50:03,364] Trial 3 finished with value: 10.800571937936375 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 1 with value: 9.854852672999462.
[I 2025-02-18 14:50:03,493] Trial 4 finished with value: 10.476925174615157 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 1

Fold: 3/5
Model name: random_forest
MAE: 7.672133028686679
MSE: 94.0151528349051
RMSE: 9.696141131135885
PCC: 0.6659102967567978
Spearman R: 0.6699176142256307
R2 Score: 0.42184388408212825



[I 2025-02-18 14:50:39,719] Trial 0 finished with value: 10.019835703068363 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.026131045819798325, 'subsample': 0.05168663515247938, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'max_leaf_nodes': 256}. Best is trial 0 with value: 10.019835703068363.
[I 2025-02-18 14:50:40,257] Trial 1 finished with value: 9.719166215253715 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.06558817498565081, 'subsample': 0.15771401985297173, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'log2', 'max_leaf_nodes': 77}. Best is trial 1 with value: 9.719166215253715.
[I 2025-02-18 14:50:40,632] Trial 2 finished with value: 11.655734380300059 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.004753237253123138, 'subsample': 0.3519432917861715, 'max_depth': 4, 'min_samples_sp

Fold: 3/5
Model name: gradient_boosting
MAE: 7.449522024408639
MSE: 88.70100902485927
RMSE: 9.418121310795444
PCC: 0.6749649997780283
Spearman R: 0.6800123603563908
R2 Score: 0.4545237729298375



[I 2025-02-18 14:52:04,883] Trial 0 finished with value: 9.79172373658976 and parameters: {'n_estimators': 68, 'loss': 'square', 'learning_rate': 0.03120971762906154}. Best is trial 0 with value: 9.79172373658976.
[I 2025-02-18 14:52:06,653] Trial 1 finished with value: 9.826054765527394 and parameters: {'n_estimators': 61, 'loss': 'square', 'learning_rate': 0.0012321623882256497}. Best is trial 0 with value: 9.79172373658976.
[I 2025-02-18 14:52:08,446] Trial 2 finished with value: 9.834344479601729 and parameters: {'n_estimators': 62, 'loss': 'square', 'learning_rate': 0.001200391212697905}. Best is trial 0 with value: 9.79172373658976.
[I 2025-02-18 14:52:09,983] Trial 3 finished with value: 9.807659605237754 and parameters: {'n_estimators': 54, 'loss': 'square', 'learning_rate': 0.03558693764071843}. Best is trial 0 with value: 9.79172373658976.
[I 2025-02-18 14:52:12,287] Trial 4 finished with value: 9.810260385607398 and parameters: {'n_estimators': 80, 'loss': 'exponential', 'le

Fold: 3/5
Model name: ada_boost
MAE: 7.652117418003956
MSE: 94.64122094890718
RMSE: 9.728371957779327
PCC: 0.6473898951346391
Spearman R: 0.6650912949814658
R2 Score: 0.41799381206525776



[I 2025-02-18 14:56:13,045] Trial 0 finished with value: 10.760161921036438 and parameters: {'learning_rate': 0.007443619073712195, 'num_leaves': 295, 'subsample': 0.9549058344363385, 'colsample_bytree': 0.6246061184922395, 'min_data_in_leaf': 2}. Best is trial 0 with value: 10.760161921036438.
[I 2025-02-18 14:56:13,160] Trial 1 finished with value: 9.673155054244436 and parameters: {'learning_rate': 0.013630953659602069, 'num_leaves': 177, 'subsample': 0.7122475668650912, 'colsample_bytree': 0.991919372353954, 'min_data_in_leaf': 43}. Best is trial 1 with value: 9.673155054244436.
[I 2025-02-18 14:56:13,205] Trial 2 finished with value: 12.115314041967807 and parameters: {'learning_rate': 0.0027364069583226064, 'num_leaves': 266, 'subsample': 0.22115102838528994, 'colsample_bytree': 0.4159101036159323, 'min_data_in_leaf': 57}. Best is trial 1 with value: 9.673155054244436.
[I 2025-02-18 14:56:13,241] Trial 3 finished with value: 11.152337104819749 and parameters: {'learning_rate': 0.

Fold: 3/5
Model name: lgbm
MAE: 7.42770900006365
MSE: 88.17395326441465
RMSE: 9.390098682357637
PCC: 0.6779266556268314
Spearman R: 0.6817349440370875
R2 Score: 0.45776495801694683



[I 2025-02-18 14:56:26,480] Trial 22 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 14:56:26,486] Trial 23 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 14:56:26,493] Trial 24 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 14:56:26,500] Trial 25 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 14:56:26,506] Trial 26 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 14:56:26,513] Trial 27 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.

Fold: 4/5
Model name: linear_regression
MAE: 8.602220439887196
MSE: 115.140769914462
RMSE: 10.730366718545177
PCC: 0.5397874341390443
Spearman R: 0.5686795893279646
R2 Score: 0.29042379751749636



[I 2025-02-18 14:56:27,200] Trial 7 finished with value: 10.789217479585604 and parameters: {'alpha': 0.03472408453081121, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 0 with value: 10.728361567826365.
[I 2025-02-18 14:56:27,209] Trial 8 finished with value: 10.707661630166827 and parameters: {'alpha': 0.03470640949649367, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 8 with value: 10.707661630166827.
[I 2025-02-18 14:56:27,223] Trial 9 finished with value: 10.715727170511485 and parameters: {'alpha': 0.016260839436312843, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 8 with value: 10.707661630166827.
[I 2025-02-18 14:56:27,232] Trial 10 finished with value: 10.694245767217428 and parameters: {'alpha': 0.08354148654857668, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 10 with value: 10.694245767217428.
[I 2025-02-18 14:56:27,240] Trial 11 finish

Fold: 4/5
Model name: lasso
MAE: 8.589693562442204
MSE: 114.35319849260337
RMSE: 10.693605495463322
PCC: 0.5456515947259959
Spearman R: 0.5759409661878775
R2 Score: 0.2952773514682069



[I 2025-02-18 14:56:28,718] Trial 3 finished with value: 10.730871581571249 and parameters: {'alpha': 0.0650800167686515, 'solver': 'saga', 'fit_intercept': False}. Best is trial 1 with value: 10.73036633752857.
[I 2025-02-18 14:56:28,723] Trial 4 finished with value: 10.730370498437326 and parameters: {'alpha': 0.004393130767296017, 'solver': 'auto', 'fit_intercept': False}. Best is trial 1 with value: 10.73036633752857.
[I 2025-02-18 14:56:28,727] Trial 5 finished with value: 10.730369818091798 and parameters: {'alpha': 0.0036024075274940504, 'solver': 'auto', 'fit_intercept': False}. Best is trial 1 with value: 10.73036633752857.
[I 2025-02-18 14:56:28,871] Trial 6 finished with value: 10.730369638019095 and parameters: {'alpha': 0.0012322228566212618, 'solver': 'sag', 'fit_intercept': True}. Best is trial 1 with value: 10.73036633752857.
[I 2025-02-18 14:56:28,876] Trial 7 finished with value: 10.73039782180429 and parameters: {'alpha': 0.0361495482113378, 'solver': 'auto', 'fit_in

Fold: 4/5
Model name: ridge
MAE: 8.602151393968473
MSE: 115.12800558170159
RMSE: 10.729771925893932
PCC: 0.5398716905275414
Spearman R: 0.5687046205877425
R2 Score: 0.29050246006920666



[I 2025-02-18 14:56:32,197] Trial 1 finished with value: 9.65435831376706 and parameters: {'eta': 0.1507097070798206, 'gamma': 0.058059796242687096, 'max_depth': 9, 'min_child_weight': 1, 'subsample': 0.6695932189274891, 'colsample_bytree': 0.6893023226690513}. Best is trial 0 with value: 9.188530733319194.
[I 2025-02-18 14:56:32,423] Trial 2 finished with value: 9.619342049498552 and parameters: {'eta': 0.1291516730837432, 'gamma': 5.160807078258699e-06, 'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.9221774736543423, 'colsample_bytree': 0.59824409617794}. Best is trial 0 with value: 9.188530733319194.
[I 2025-02-18 14:56:32,728] Trial 3 finished with value: 9.714392479647833 and parameters: {'eta': 0.019877896401564486, 'gamma': 7.991841798363447e-07, 'max_depth': 9, 'min_child_weight': 5, 'subsample': 0.9612706478258257, 'colsample_bytree': 0.6279392451646046}. Best is trial 0 with value: 9.188530733319194.
[I 2025-02-18 14:56:33,138] Trial 4 finished with value: 9.3415375519

Fold: 4/5
Model name: xgb
MAE: 7.265604012889869
MSE: 82.69541729574536
RMSE: 9.093702067680981
PCC: 0.7015165792306901
Spearman R: 0.7080854074552373
R2 Score: 0.4903742591697684



[I 2025-02-18 14:56:44,900] Trial 1 finished with value: 9.61279378566331 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 5}. Best is trial 1 with value: 9.61279378566331.
[I 2025-02-18 14:56:46,296] Trial 2 finished with value: 10.764176040800479 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 1 with value: 9.61279378566331.
[I 2025-02-18 14:56:47,783] Trial 3 finished with value: 10.649749766911054 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 6}. Best is trial 1 with value: 9.61279378566331.
[I 2025-02-18 14:56:47,892] Trial 4 finished with value: 11.002157603055444 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 1 

Fold: 4/5
Model name: random_forest
MAE: 7.566522777112112
MSE: 88.93003921215309
RMSE: 9.430272488754134
PCC: 0.6893338956903815
Spearman R: 0.6948917773014754
R2 Score: 0.45195225324914323



[I 2025-02-18 14:57:20,196] Trial 0 finished with value: 10.750097752205702 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.009458367033345879, 'subsample': 0.23958243032081866, 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_leaf_nodes': 416}. Best is trial 0 with value: 10.750097752205702.
[I 2025-02-18 14:57:20,345] Trial 1 finished with value: 9.596336924136896 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.04596100473441368, 'subsample': 0.07110714969136915, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 784}. Best is trial 1 with value: 9.596336924136896.
[I 2025-02-18 14:57:20,953] Trial 2 finished with value: 10.84117920223331 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.008541394944193268, 'subsample': 0.6001385575276206, 'max_depth': 6, 'min_samp

Fold: 4/5
Model name: gradient_boosting
MAE: 7.387119533017873
MSE: 85.54897881353553
RMSE: 9.249269096179196
PCC: 0.6912843332635823
Spearman R: 0.6965453199395782
R2 Score: 0.472788660716258



[I 2025-02-18 14:58:42,298] Trial 0 finished with value: 9.457501888149565 and parameters: {'n_estimators': 82, 'loss': 'square', 'learning_rate': 0.0461343018338616}. Best is trial 0 with value: 9.457501888149565.
[I 2025-02-18 14:58:43,793] Trial 1 finished with value: 9.442979605482973 and parameters: {'n_estimators': 61, 'loss': 'square', 'learning_rate': 0.08207609266162738}. Best is trial 1 with value: 9.442979605482973.
[I 2025-02-18 14:58:45,497] Trial 2 finished with value: 9.752856103177248 and parameters: {'n_estimators': 63, 'loss': 'square', 'learning_rate': 0.0021751106686027167}. Best is trial 1 with value: 9.442979605482973.
[I 2025-02-18 14:58:48,089] Trial 3 finished with value: 9.69989155665563 and parameters: {'n_estimators': 97, 'loss': 'exponential', 'learning_rate': 0.005204917702894781}. Best is trial 1 with value: 9.442979605482973.
[I 2025-02-18 14:58:49,864] Trial 4 finished with value: 9.55201273317001 and parameters: {'n_estimators': 67, 'loss': 'exponentia

Fold: 4/5
Model name: ada_boost
MAE: 7.508755501642875
MSE: 88.55007792238627
RMSE: 9.41010509624554
PCC: 0.6751908753201281
Spearman R: 0.6822874941477514
R2 Score: 0.45429383468275186



[I 2025-02-18 15:02:14,165] Trial 1 finished with value: 9.521862113532743 and parameters: {'learning_rate': 0.062497790706739165, 'num_leaves': 499, 'subsample': 0.34967422733572323, 'colsample_bytree': 0.291983337919407, 'min_data_in_leaf': 22}. Best is trial 1 with value: 9.521862113532743.
[I 2025-02-18 15:02:15,215] Trial 2 finished with value: 9.571084691542357 and parameters: {'learning_rate': 0.0630827217932546, 'num_leaves': 841, 'subsample': 0.6028428000329363, 'colsample_bytree': 0.5795938914485399, 'min_data_in_leaf': 4}. Best is trial 1 with value: 9.521862113532743.
[I 2025-02-18 15:02:15,255] Trial 3 finished with value: 11.021844633526683 and parameters: {'learning_rate': 0.00689306045355429, 'num_leaves': 452, 'subsample': 0.1231869995620632, 'colsample_bytree': 0.5280799982237657, 'min_data_in_leaf': 44}. Best is trial 1 with value: 9.521862113532743.
[I 2025-02-18 15:02:15,335] Trial 4 finished with value: 9.19493097901351 and parameters: {'learning_rate': 0.04321439

Fold: 4/5
Model name: lgbm
MAE: 7.327661633951752
MSE: 83.87021250973201
RMSE: 9.158068164724044
PCC: 0.6957306427886197
Spearman R: 0.7027244024314073
R2 Score: 0.48313436727696135



[I 2025-02-18 15:02:26,216] Trial 24 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 15:02:26,222] Trial 25 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 15:02:26,230] Trial 26 finished with value: 10.921387446629879 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 15:02:26,237] Trial 27 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 15:02:26,243] Trial 28 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 15:02:26,249] Trial 29 finished with value: 10.921387446629879 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.921387446629819.
[I

Fold: 5/5
Model name: linear_regression
MAE: 8.280015810684853
MSE: 119.27670375940339
RMSE: 10.921387446629819
PCC: 0.4972213158473644
Spearman R: 0.5898355302091915
R2 Score: 0.23207756770389032



[I 2025-02-18 15:02:26,935] Trial 10 finished with value: 10.31027186843952 and parameters: {'alpha': 0.017742627065113488, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 5 with value: 10.310260188679198.
[I 2025-02-18 15:02:26,957] Trial 11 finished with value: 10.310257728708901 and parameters: {'alpha': 0.018787260541250943, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 11 with value: 10.310257728708901.
[I 2025-02-18 15:02:26,978] Trial 12 finished with value: 10.310448281679266 and parameters: {'alpha': 0.015674161900636484, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 11 with value: 10.310257728708901.
[I 2025-02-18 15:02:26,999] Trial 13 finished with value: 10.310552151065254 and parameters: {'alpha': 0.02214431471239597, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 11 with value: 10.310257728708901.
[I 2025-02-18 15:02:27,027] Trial 1

Fold: 5/5
Model name: lasso
MAE: 8.200257845662499
MSE: 106.3002909129739
RMSE: 10.310203243048797
PCC: 0.5626424506814478
Spearman R: 0.5917569372097411
R2 Score: 0.3156217821349754



[I 2025-02-18 15:02:29,079] Trial 10 finished with value: 10.911743230649988 and parameters: {'alpha': 0.001166736202043798, 'solver': 'saga', 'fit_intercept': False}. Best is trial 0 with value: 10.453022564522271.
[I 2025-02-18 15:02:29,087] Trial 11 finished with value: 10.907491920941153 and parameters: {'alpha': 0.09405086686703819, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 0 with value: 10.453022564522271.
[I 2025-02-18 15:02:29,098] Trial 12 finished with value: 10.91101374518236 and parameters: {'alpha': 0.06987692876940572, 'solver': 'svd', 'fit_intercept': False}. Best is trial 0 with value: 10.453022564522271.
[I 2025-02-18 15:02:29,106] Trial 13 finished with value: 10.919995307435855 and parameters: {'alpha': 0.009297286748709322, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 0 with value: 10.453022564522271.
[I 2025-02-18 15:02:29,114] Trial 14 finished with value: 10.915045298402134 and parameters: {'alpha': 0.042447271832985224, 'solv

Fold: 5/5
Model name: ridge
MAE: 8.215421213491457
MSE: 107.15873335642891
RMSE: 10.35175025570212
PCC: 0.5572179774147017
Spearman R: 0.5894805076095733
R2 Score: 0.3100949928426262



[I 2025-02-18 15:02:31,704] Trial 2 finished with value: 9.092061805729728 and parameters: {'eta': 0.0641784537913545, 'gamma': 1.0049295292619907e-06, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.6778632948937862, 'colsample_bytree': 0.9493996053106034}. Best is trial 1 with value: 9.034191940710656.
[I 2025-02-18 15:02:31,802] Trial 3 finished with value: 9.152022439179648 and parameters: {'eta': 0.023822370779077756, 'gamma': 3.175389546147346e-08, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.899251200346192, 'colsample_bytree': 0.8043766304567197}. Best is trial 1 with value: 9.034191940710656.
[I 2025-02-18 15:02:31,865] Trial 4 finished with value: 9.258343445707002 and parameters: {'eta': 0.1825934392845071, 'gamma': 2.6788844368234296e-05, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5981316265382458, 'colsample_bytree': 0.9396512918519684}. Best is trial 1 with value: 9.034191940710656.
[I 2025-02-18 15:02:31,973] Trial 5 finished with value: 9.54144

Fold: 5/5
Model name: xgb
MAE: 7.191994655874915
MSE: 81.88690345113046
RMSE: 9.049138271190824
PCC: 0.6879723153179262
Spearman R: 0.693123218016882
R2 Score: 0.4727990622692628



[I 2025-02-18 15:02:42,380] Trial 1 finished with value: 10.650443271794012 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 10, 'min_samples_leaf': 1}. Best is trial 0 with value: 10.636264457781419.
[I 2025-02-18 15:02:44,029] Trial 2 finished with value: 9.554001003268189 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 2 with value: 9.554001003268189.
[I 2025-02-18 15:02:46,024] Trial 3 finished with value: 9.364638624227299 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 9}. Best is trial 3 with value: 9.364638624227299.
[I 2025-02-18 15:02:47,700] Trial 4 finished with value: 9.656657564057959 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 8}. Best is trial

Fold: 5/5
Model name: random_forest
MAE: 7.3645376268268095
MSE: 85.20854853858191
RMSE: 9.230847660891275
PCC: 0.6810713630695048
Spearman R: 0.6856717922546064
R2 Score: 0.45141378170412116



[I 2025-02-18 15:03:37,632] Trial 0 finished with value: 14.97029030462493 and parameters: {'loss': 'quantile', 'criterion': 'friedman_mse', 'learning_rate': 0.034387195889017834, 'subsample': 0.20796301898786396, 'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_leaf_nodes': 720}. Best is trial 0 with value: 14.97029030462493.
[I 2025-02-18 15:03:38,048] Trial 1 finished with value: 9.17750211216157 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.033880353052103934, 'subsample': 0.35690951584508396, 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 9, 'max_features': 'log2', 'max_leaf_nodes': 357}. Best is trial 1 with value: 9.17750211216157.
[I 2025-02-18 15:03:38,358] Trial 2 finished with value: 12.040306842919179 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.0018241780257312365, 'subsample': 0.30450364735733765, 'max_depth': 3, 'min_samples_split': 5, 

Fold: 5/5
Model name: gradient_boosting
MAE: 7.231030551783192
MSE: 82.77153521466037
RMSE: 9.097886304777631
PCC: 0.6841896592849155
Spearman R: 0.689977417959929
R2 Score: 0.467103661959521



[I 2025-02-18 15:04:54,178] Trial 0 finished with value: 9.453590494677599 and parameters: {'n_estimators': 98, 'loss': 'square', 'learning_rate': 0.003061551915611813}. Best is trial 0 with value: 9.453590494677599.
[I 2025-02-18 15:04:56,285] Trial 1 finished with value: 9.349418018078975 and parameters: {'n_estimators': 75, 'loss': 'linear', 'learning_rate': 0.04536858184415286}. Best is trial 1 with value: 9.349418018078975.
[I 2025-02-18 15:04:58,726] Trial 2 finished with value: 9.416503577972009 and parameters: {'n_estimators': 86, 'loss': 'linear', 'learning_rate': 0.018349988081184557}. Best is trial 1 with value: 9.349418018078975.
[I 2025-02-18 15:05:00,954] Trial 3 finished with value: 9.309825467711589 and parameters: {'n_estimators': 89, 'loss': 'square', 'learning_rate': 0.0857273120240021}. Best is trial 3 with value: 9.309825467711589.
[I 2025-02-18 15:05:03,063] Trial 4 finished with value: 9.329446306316797 and parameters: {'n_estimators': 77, 'loss': 'linear', 'lear

Fold: 5/5
Model name: ada_boost
MAE: 7.407983018065365
MSE: 86.59204004405652
RMSE: 9.30548440673867
PCC: 0.665604675316007
Spearman R: 0.6749933191416005
R2 Score: 0.44250664285420704



[I 2025-02-18 15:08:54,164] Trial 0 finished with value: 11.424182897977461 and parameters: {'learning_rate': 0.002696130433238335, 'num_leaves': 398, 'subsample': 0.7059176527672502, 'colsample_bytree': 0.7086378032750336, 'min_data_in_leaf': 7}. Best is trial 0 with value: 11.424182897977461.
[I 2025-02-18 15:08:54,834] Trial 1 finished with value: 9.286472896686618 and parameters: {'learning_rate': 0.03175164737899468, 'num_leaves': 917, 'subsample': 0.9065141328359984, 'colsample_bytree': 0.7702281463757545, 'min_data_in_leaf': 10}. Best is trial 1 with value: 9.286472896686618.
[I 2025-02-18 15:08:54,951] Trial 2 finished with value: 9.219332882996735 and parameters: {'learning_rate': 0.01908988234512981, 'num_leaves': 359, 'subsample': 0.9628768400114969, 'colsample_bytree': 0.7900788052759101, 'min_data_in_leaf': 67}. Best is trial 2 with value: 9.219332882996735.
[I 2025-02-18 15:08:55,008] Trial 3 finished with value: 9.149626472111883 and parameters: {'learning_rate': 0.08491

Fold: 5/5
Model name: lgbm
MAE: 7.238008444470687
MSE: 82.69432965595506
RMSE: 9.093642265668638
PCC: 0.6839353685279288
Spearman R: 0.6894680652219006
R2 Score: 0.467600723653541



Compute average scores and rank models by R2 score

In [13]:
for model_name, model_metrics in model_scores.items():
    for metric, scores in model_metrics.items():
        model_scores[model_name][metric] = sum(scores) / len(scores)
    model_scores[model_name] = dict(model_scores[model_name])

model_scores = dict(sorted(model_scores.items(), key=lambda x: x[1]["r2_score"], reverse=True))

In [14]:
# Print results
for i, (model_name, model_metrics) in enumerate(model_scores.items()):
    print(f"No.{i+1} Model: {model_name}")
    for metric, score in model_metrics.items():
        print(f"{metric}: {score}")
    print()

No.1 Model: xgb
mae: 7.311169950300032
mse: 84.38288685853144
rmse: 9.185357427575628
pcc: 0.6915242558540858
spearman_r: 0.6968347251307521
r2_score: 0.47736005031242695

No.2 Model: lgbm
mae: 7.374610627722444
mse: 85.74854770383693
rmse: 9.259201372651853
pcc: 0.6852794829870886
spearman_r: 0.6905132269582918
r2_score: 0.4689455233858711

No.3 Model: gradient_boosting
mae: 7.376683460669168
mse: 85.90926095455492
rmse: 9.268025262183757
pcc: 0.6856602274922723
spearman_r: 0.6900187576245147
r2_score: 0.4679342204600593

No.4 Model: random_forest
mae: 7.57662447875727
mse: 90.31053125514808
rmse: 9.501770872073317
pcc: 0.6793311483953499
spearman_r: 0.6839974007802347
r2_score: 0.4407864611896931

No.5 Model: ada_boost
mae: 7.566765426366864
mse: 90.59556393077153
rmse: 9.51683488549639
pcc: 0.6640030325735646
spearman_r: 0.673024039106188
r2_score: 0.43897431280428734

No.6 Model: lasso
mae: 8.556756188765025
mse: 114.73263009489531
rmse: 10.70907580208532
pcc: 0.5390378834822169
sp