In [1]:
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import optuna
import os
import json
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

from src.utils import get_kfold_data, convert_non_numeric_to_numeric, calculate_r2_score, calculate_metrics
from src.normalisation import Normaliser
from src.constants import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(DATA_PATH)

In [3]:
data.head()

Unnamed: 0,outcome,carat,cut,color,clarity,depth,table,price,x,y,...,a6,a7,a8,a9,a10,b6,b7,b8,b9,b10
0,-26.701232,1.14,Ideal,G,VS1,62.3,56.0,7948,6.73,6.7,...,0.168836,-0.273758,1.107832,1.247795,0.482344,0.489511,-0.321138,0.573382,0.446871,-1.990581
1,6.548093,0.38,Premium,H,VS2,60.5,59.0,898,4.69,4.66,...,-0.256549,0.315373,-0.030326,-0.114335,-1.059588,-1.76136,-1.343951,-1.00255,-0.22503,-0.446653
2,6.612562,0.5,Very Good,E,SI1,60.7,58.0,1351,5.09,5.13,...,-1.193327,-0.657307,-0.591726,-0.446856,-0.765286,-0.816544,-1.397794,-0.47713,0.810509,1.725131
3,-5.073562,0.7,Premium,D,SI1,61.2,58.0,2512,5.74,5.7,...,-1.740788,-1.77886,-0.82507,0.444932,1.173109,0.453606,-0.26344,0.24621,-0.850503,-0.41295
4,-14.436557,0.83,Ideal,G,SI2,62.4,54.0,2751,6.01,6.08,...,-0.859322,1.409268,0.861992,1.109063,-1.436722,-1.461618,0.081787,0.258087,0.851146,2.204813


Inspecting columns

In [4]:
# Find columns
all_columns = data.columns.tolist()
print(all_columns)

numeric_columns = data.select_dtypes(include=["number"]).columns.tolist()
numeric_columns.remove("outcome") # Remove the target column
print(numeric_columns)

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns.tolist()
print(non_numeric_columns)

['outcome', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['cut', 'color', 'clarity']


In [5]:
for non_numeric_column in non_numeric_columns:
    print(data[non_numeric_column].value_counts())

cut
Ideal        4040
Premium      2439
Very Good    2296
Good          925
Fair          300
Name: count, dtype: int64
color
G    2120
E    1873
F    1746
H    1506
D    1246
I     983
J     526
Name: count, dtype: int64
clarity
SI1     2408
VS2     2256
SI2     1743
VS1     1503
VVS2     951
VVS1     675
IF       318
I1       146
Name: count, dtype: int64


Converting non-numeric features to numerical features

In [6]:
data = convert_non_numeric_to_numeric(data=data)
print(data)

['G', 'E', 'F', 'H', 'D', 'I', 'J']
        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...      

Normalise data using each columns respective mean and std.

In [7]:
print(data)

        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...        b8        b9       b10  colour_G  

Data splitting:
- Split the entire dataset into training and testing sets first.
- Use the training set to generate folds (one validation and the rest training folds) (K-Fold Cross Validation)

In [8]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=REPRODUCIBILITY_SEED)
print(f"Training set size: {len(train_data)} | Test set size: {len(test_data)}")    
print()


Training set size: 8000 | Test set size: 2000



In [9]:
normaliser = Normaliser()
os.makedirs(TRAINING_STATISTICS_DIR, exist_ok=True)
stats_for_each_column = {}
for column in numeric_columns:
    print(data[column])
    train_data_column_mean = normaliser.calculate_mean(train_data[column])
    train_data_column_std = normaliser.calculate_std(train_data[column])

    train_data[column] = normaliser.standardise(train_data[column], mean=train_data_column_mean, std=train_data_column_std)

    stats_for_each_column[column] = {
        "mean": train_data_column_mean,
        "std": train_data_column_std
    }

    # Normalise test data using the mean and std of the training data
    test_data[column] = normaliser.standardise(test_data[column], mean=train_data_column_mean, std=train_data_column_std)
    print("after", train_data[column])

with open(f"{TRAINING_STATISTICS_DIR}/stats.json", "w") as f:
    json.dump(stats_for_each_column, f)

0       1.14
1       0.38
2       0.50
3       0.70
4       0.83
        ... 
9995    0.33
9996    1.01
9997    0.52
9998    0.31
9999    0.37
Name: carat, Length: 10000, dtype: float64
after 9254   -1.023878
1561    1.522047
1670   -1.045094
6087   -0.111588
6669    1.140158
          ...   
5734   -0.154021
5191    0.970430
5390   -1.045094
860    -1.002662
7270   -1.151175
Name: carat, Length: 8000, dtype: float64
0       62.3
1       60.5
2       60.7
3       61.2
4       62.4
        ... 
9995    62.6
9996    69.5
9997    57.9
9998    62.0
9999    59.9
Name: depth, Length: 10000, dtype: float64
after 9254   -0.381342
1561    0.945157
1670    0.386631
6087   -0.311527
6669   -0.311527
          ...   
5734    0.665894
5191    0.107368
5390    0.107368
860     0.875341
7270    0.665894
Name: depth, Length: 8000, dtype: float64
0       56.0
1       59.0
2       58.0
3       58.0
4       54.0
        ... 
9995    57.0
9996    55.0
9997    61.0
9998    54.0
9999    59.0
Name: table, Le

In [10]:
kfold_data = get_kfold_data(data=train_data, k=NUM_FOLDS, reproducibility_seed=REPRODUCIBILITY_SEED)

Fold: 0/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')

Fold: 1/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', '

Define models and hyperparameter tuning objectives for each model

In [11]:
models = {
        "linear_regression": LinearRegression,
        "lasso": Lasso,
        "ridge": Ridge,
        "xgb": xgb.XGBRegressor,
        "random_forest": RandomForestRegressor,
        "gradient_boosting": GradientBoostingRegressor,
        "ada_boost": AdaBoostRegressor,
        "lgbm": lgb.LGBMRegressor
        }

# Must contain the one-hot encoded columns (otherwise this does not make sense)
minimum_features = [feature for feature in data.columns if feature.startswith("colour")] 

def objective(model_type, trial, x_train, y_train, x_val, y_val):
    if model_type == LinearRegression:
        parameters = {
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        }
    elif model_type == Lasso:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
            "warm_start": trial.suggest_categorical("warm_start", [True, False]),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == Ridge:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "solver": trial.suggest_categorical("solver", ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "positive": False,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == xgb.XGBRegressor:
        parameters = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "n_estimators": 100,
            "eta": trial.suggest_float("eta", 1e-2, 0.2, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 10, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1, 6),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "seed": REPRODUCIBILITY_SEED
        }
    elif model_type == RandomForestRegressor:
        parameters = {
            "n_estimators": 100,
            "criterion": trial.suggest_categorical("criterion", ["absolute_error", "squared_error"]),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "bootstrap": True,
            "oob_score": False,
            "n_jobs": -1,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == GradientBoostingRegressor:
        parameters = {
            "n_estimators": 100,
            "loss": trial.suggest_categorical("loss", ["absolute_error", "squared_error", "huber", "quantile"]),
            "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "subsample": trial.suggest_float("subsample", 0.05, 1.0),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 2**10),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == AdaBoostRegressor:
        parameters = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 100),
            "loss": trial.suggest_categorical("loss", ["linear", "square", "exponential"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == lgb.LGBMRegressor:
        parameters = {
                    "objective": "regression",
                    "metric": "rmse",
                    "n_estimators": 100,
                    "verbosity": -1,
                    "bagging_freq": 1,
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "subsample": trial.suggest_float("subsample", 0.05, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
                    "seed": REPRODUCIBILITY_SEED
        }
    
    # Wrapper method for feature selection
    selected_features = minimum_features.copy()
    train_columns = x_train.columns.tolist() 
    for feature in train_columns:
        if feature.startswith("colour"):
            continue
        if trial.suggest_categorical(feature, [0, 1]) == 1:
            selected_features.append(feature)
    if len(selected_features) == 0:
        return float("inf")
    
    x_train = x_train[selected_features]
    x_val = x_val[selected_features]

    model = model_type(**parameters) # Create the model
    model.fit(x_train, y_train)
    predictions = model.predict(x_val)
    metrics = calculate_metrics(targets=y_val, preds=predictions)
    rmse = metrics["rmse"]
    return rmse

In [12]:
# Train + Validate models
metrics = ["mae", "mse", "rmse", "pcc", "spearman_r", "r2_score"]
model_scores = {model_name: defaultdict(list) for model_name in models.keys()}

if os.path.exists("model_best_hyperparameters"):
    raise Exception("Directory for best hyperparameters already exists. Please delete it before running this script.")

os.makedirs("model_best_hyperparameters")

for fold in range(NUM_FOLDS):
    fold_data = kfold_data[fold]
     
    # Extract data
    train_data = fold_data["train"]
    val_data = fold_data["val"]

    train_y = train_data["outcome"]
    val_y = val_data["outcome"]
    
    train_x = train_data.drop(columns=["outcome"])
    val_x = val_data.drop(columns=["outcome"])

    # print(f"Fold {fold+1}/{NUM_FOLDS}")
    # print(f"Train data shape: {train_x.shape} | Train target shape: {train_y.shape}")
    # print(f"Val data shape: {val_x.shape} | Val target shape: {val_y.shape}")
    # print(f"Test data shape: {test_x.shape} | Test target shape: {test_y.shape}")
    training_features = set(train_x.columns.tolist())


    # Train model
    for model_name, model in models.items():
        study = optuna.create_study(direction="minimize")
        print("G", train_x.columns)
        print("G", val_x.columns)
        study.optimize(lambda trial: objective(trial=trial, 
                                               model_type=model, 
                                               x_train=train_x, 
                                               y_train=train_y, 
                                               x_val=val_x, 
                                               y_val=val_y
                                               ), n_trials=N_TRIALS)
        
        # Train model with best hyperparameters
        best_trial = study.best_trial 

        best_fold_params = {} # Hyperparameters
        best_selected_features = minimum_features.copy() # Features selected by the model
        for param in best_trial.params:
            if param in training_features: # i.e., if the parameter is a feature
                if best_trial.params[param] == 1:
                    best_selected_features.append(param)
            else: # A hyperparameter
                best_fold_params[param] = best_trial.params[param]
        print("P", best_fold_params)
        print("F", best_selected_features)
        
        # Select the best features
        print(train_x.columns)
        print(val_x.columns)
        train_x = train_x[best_selected_features]
        val_x = val_x[best_selected_features]

        model = model(**best_fold_params)
        model.fit(train_x, train_y)
        preds = model.predict(val_x)
        
        # Save the best hyperparameters for this model at this fold.
        os.makedirs(f"model_best_hyperparameters/{model_name}", exist_ok=True)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}.json", "w") as f:
            json.dump(best_fold_params, f)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}_selected_features.json", "w") as f:
            json.dump(best_selected_features, f)

        # Calculate metrics
        metrics = calculate_metrics(targets=val_y, preds=preds)
        mae = metrics["mae"]
        mse = metrics["mse"]
        rmse = metrics["rmse"]
        pcc = metrics["pcc"]
        spearman_r = metrics["spearman_r"]
        r2_score = metrics["r2_score"]

        for metric in metrics:
            model_scores[model_name][metric].append(metrics[metric])

        print(f"Fold: {fold+1}/{NUM_FOLDS}")
        print(f"Model name: {model_name}")
        print(f"MAE: {mae}")
        print(f"MSE: {mse}")
        print(f"RMSE: {rmse}")
        print(f"PCC: {pcc}")
        print(f"Spearman R: {spearman_r}")
        print(f"R2 Score: {r2_score}")
        print()


[I 2025-02-18 16:22:22,899] A new study created in memory with name: no-name-57bca8b4-71e4-4bb1-a47a-93ee1f6efeab
[I 2025-02-18 16:22:22,907] Trial 0 finished with value: 12.079737445821111 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 1, 'clarity': 1, 'depth': 0, 'table': 1, 'price': 0, 'x': 0, 'y': 0, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 0, 'b3': 0, 'b4': 1, 'b5': 1, 'a6': 0, 'a7': 1, 'a8': 1, 'a9': 1, 'a10': 1, 'b6': 1, 'b7': 1, 'b8': 1, 'b9': 0, 'b10': 0}. Best is trial 0 with value: 12.079737445821111.
[I 2025-02-18 16:22:22,913] Trial 1 finished with value: 12.065051961136902 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 0, 'table': 1, 'price': 1, 'x': 0, 'y': 1, 'z': 0, 'a1': 1, 'a2': 0, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 0, 'a6': 0, 'a7': 0, 'a8': 1, 'a9': 1, 'a10': 0, 'b6': 1, 'b7': 0, 'b8': 1, 'b9': 1, 'b10': 1}. Best is trial 1 with value: 12.065051961136902.

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 16:22:22,980] Trial 10 finished with value: 11.274102604911025 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 0, 'clarity': 1, 'depth': 1, 'table': 0, 'price': 1, 'x': 0, 'y': 0, 'z': 0, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 0, 'a5': 1, 'b1': 0, 'b2': 1, 'b3': 1, 'b4': 0, 'b5': 0, 'a6': 1, 'a7': 1, 'a8': 0, 'a9': 1, 'a10': 1, 'b6': 0, 'b7': 0, 'b8': 0, 'b9': 1, 'b10': 0}. Best is trial 6 with value: 10.972781715240504.
[I 2025-02-18 16:22:22,991] Trial 11 finished with value: 11.161148621984024 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 0, 'x': 1, 'y': 0, 'z': 0, 'a1': 0, 'a2': 0, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 1, 'a6': 1, 'a7': 0, 'a8': 0, 'a9': 0, 'a10': 0, 'b6': 0, 'b7': 0, 'b8': 1, 'b9': 0, 'b10': 1}. Best is trial 6 with value: 10.972781715240504.
[I 2025-02-18 16:22:23,003] Trial 12 finished with value: 11.155397495245769 and parameters: {'fit_intercept':

P {'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1', 'a4', 'b1', 'b3', 'b5', 'a6', 'a8', 'a9', 'b6', 'b8']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 1/5
Model name: linear_regression
MAE: 8.678654058165534
MSE: 115.8232988977597
RMSE: 10.762123345221411
PCC: 0.5

[I 2025-02-18 16:22:24,397] Trial 17 finished with value: 10.949030495371465 and parameters: {'alpha': 0.007117805241283343, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'carat': 1, 'cut': 1, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 0, 'z': 1, 'a1': 1, 'a4': 1, 'b1': 0, 'b3': 1, 'b5': 0, 'a6': 0, 'a8': 1, 'a9': 0, 'b6': 1, 'b8': 1}. Best is trial 15 with value: 10.944411388759402.
[I 2025-02-18 16:22:24,407] Trial 18 finished with value: 11.325886818753085 and parameters: {'alpha': 0.09683948040165082, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'carat': 0, 'cut': 1, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 0, 'z': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'b5': 0, 'a6': 0, 'a8': 0, 'a9': 0, 'b6': 1, 'b8': 0}. Best is trial 15 with value: 10.944411388759402.
[I 2025-02-18 16:22:24,418] Trial 19 finished with value: 10.949660090059766 and parameters: {'alpha': 0.0027021772939990387, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start':

P {'alpha': 0.049510767448672346, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1', 'a4', 'b1', 'b3', 'b5', 'b6']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1',
       'a4', 'b1', 'b3', 'b5', 'a6', 'a8', 'a9', 'b6', 'b8'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1',
       'a4', 'b1', 'b3', 'b5', 'a6', 'a8', 'a9', 'b6', 'b8'],
      dtype='object')
Fold: 1/5
Model name: lasso
MAE: 8.659280315685002
MSE: 115.57218620807838
RMSE: 10.750450511865928
PCC: 0.5347287312097687
Spearman R: 0.5569036355092327
R2 Score: 0.28571150007243196

G Index(['colour_G', 'colour_E', 'colour_F', 'colo

[I 2025-02-18 16:22:25,798] Trial 22 finished with value: 10.954567577174998 and parameters: {'alpha': 0.009931780408976175, 'solver': 'auto', 'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'x': 1, 'z': 0, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'b5': 1, 'b6': 1}. Best is trial 22 with value: 10.954567577174998.
[I 2025-02-18 16:22:25,806] Trial 23 finished with value: 10.95456756731698 and parameters: {'alpha': 0.010209563010259524, 'solver': 'auto', 'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'x': 1, 'z': 0, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'b5': 1, 'b6': 1}. Best is trial 23 with value: 10.95456756731698.
[I 2025-02-18 16:22:25,815] Trial 24 finished with value: 10.954567454871766 and parameters: {'alpha': 0.013378113928724865, 'solver': 'auto', 'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'x': 1, 'z': 0, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'b5': 1, 'b6': 1}. Best is trial

P {'alpha': 0.08609910915994648, 'solver': 'sag', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'x', 'z', 'a1', 'a4', 'b3', 'b5', 'b6']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1',
       'a4', 'b1', 'b3', 'b5', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'z', 'a1',
       'a4', 'b1', 'b3', 'b5', 'b6'],
      dtype='object')
Fold: 1/5
Model name: ridge
MAE: 8.772595505369653
MSE: 119.83668724673622
RMSE: 10.946994438965255
PCC: 0.5096240215391835
Spearman R: 0.5408987464448229
R2 Score: 0.2593549505445185

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'x', 'z', 'a1', 'a4', 'b3', 'b5', 'b6'],

[I 2025-02-18 16:22:27,628] Trial 1 finished with value: 12.951110597815132 and parameters: {'eta': 0.11883161816059946, 'gamma': 0.008852315089601251, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.7614609277071681, 'colsample_bytree': 0.6404126720141262, 'carat': 0, 'depth': 0, 'x': 1, 'z': 0, 'a1': 1, 'a4': 1, 'b3': 0, 'b5': 1, 'b6': 0}. Best is trial 0 with value: 10.326748650997894.
[I 2025-02-18 16:22:27,670] Trial 2 finished with value: 12.201437893600378 and parameters: {'eta': 0.01862822768923473, 'gamma': 5.011898100823444e-06, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.7438879480720381, 'colsample_bytree': 0.9927743141990832, 'carat': 0, 'depth': 0, 'x': 1, 'z': 1, 'a1': 0, 'a4': 0, 'b3': 0, 'b5': 1, 'b6': 0}. Best is trial 0 with value: 10.326748650997894.
[I 2025-02-18 16:22:27,746] Trial 3 finished with value: 11.814245549513519 and parameters: {'eta': 0.11087253411362244, 'gamma': 0.5566501207967748, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0

P {'eta': 0.05781641389309902, 'gamma': 0.0997835208168698, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.5145172275608891, 'colsample_bytree': 0.7547372451186747}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b3', 'b5', 'b6']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'x', 'z', 'a1', 'a4', 'b3', 'b5', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'x', 'z', 'a1', 'a4', 'b3', 'b5', 'b6'],
      dtype='object')
Fold: 1/5
Model name: xgb
MAE: 7.779983744336709
MSE: 93.65781880476608
RMSE: 9.67769697835007
PCC: 0.6489890840420758
Spearman R: 0.6501396699764336
R2 Score: 0.4211522244625655

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3', 'b5', 'b6'],
      dtype='object')
G 

[I 2025-02-18 16:22:35,689] Trial 1 finished with value: 9.882038352235297 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 3, 'depth': 1, 'a1': 1, 'a4': 0, 'b3': 1, 'b5': 0, 'b6': 0}. Best is trial 1 with value: 9.882038352235297.
[I 2025-02-18 16:22:36,464] Trial 2 finished with value: 10.241303550966995 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 2, 'depth': 1, 'a1': 1, 'a4': 0, 'b3': 0, 'b5': 1, 'b6': 1}. Best is trial 1 with value: 9.882038352235297.
[I 2025-02-18 16:22:37,214] Trial 3 finished with value: 10.000009035167515 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 6, 'min_samples_leaf': 5, 'depth': 1, 'a1': 0, 'a4': 1, 'b3': 1, 'b5': 0, 'b6': 1}. Best is trial 1 with value: 9.882038352235297.
[I 2025-02-18 16:22:37,895] Trial 4 finished with val

P {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 7}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3', 'b5', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3', 'b5', 'b6'],
      dtype='object')


[I 2025-02-18 16:23:53,236] A new study created in memory with name: no-name-e5acac26-a632-45a8-9e0c-09bc071873a2


Fold: 1/5
Model name: random_forest
MAE: 7.833354930182675
MSE: 95.0393032484029
RMSE: 9.748810350417271
PCC: 0.6433272763141981
Spearman R: 0.6434531917785906
R2 Score: 0.4126140243705295

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:23:53,499] Trial 0 finished with value: 18.9940914626709 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.004675839729982812, 'subsample': 0.7117835236229859, 'max_depth': 4, 'min_samples_split': 2, 'min_samples_leaf': 9, 'max_features': 'log2', 'max_leaf_nodes': 506, 'depth': 1, 'a1': 0, 'a4': 0, 'b3': 1}. Best is trial 0 with value: 18.9940914626709.
[I 2025-02-18 16:23:54,523] Trial 1 finished with value: 12.531806367446652 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.0033477825391118985, 'subsample': 0.9174823294774543, 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_leaf_nodes': 121, 'depth': 0, 'a1': 1, 'a4': 0, 'b3': 1}. Best is trial 1 with value: 12.531806367446652.
[I 2025-02-18 16:23:54,802] Trial 2 finished with value: 19.944293364481947 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.00217982

P {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.09928627865615468, 'subsample': 0.5689605676271026, 'max_depth': 3, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_leaf_nodes': 580}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Fold: 1/5
Model name: gradient_boosting
MAE: 7.826731138581022
MSE: 95.74021191928837
RMSE: 9.784692735047349
PCC: 0.6390495618704781
Spearman R: 0.639538935139771
R2 Score: 0.40828209106080027

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
G Index(['colour_G

[I 2025-02-18 16:24:21,532] Trial 0 finished with value: 12.18337553979092 and parameters: {'n_estimators': 77, 'loss': 'square', 'learning_rate': 0.0023765182808967826, 'depth': 0, 'a1': 1, 'b3': 1}. Best is trial 0 with value: 12.18337553979092.
[I 2025-02-18 16:24:21,850] Trial 1 finished with value: 10.071016473056597 and parameters: {'n_estimators': 93, 'loss': 'square', 'learning_rate': 0.03469642504712658, 'depth': 1, 'a1': 0, 'b3': 1}. Best is trial 1 with value: 10.071016473056597.
[I 2025-02-18 16:24:22,088] Trial 2 finished with value: 12.495405117194347 and parameters: {'n_estimators': 79, 'loss': 'exponential', 'learning_rate': 0.005992041916865846, 'depth': 0, 'a1': 1, 'b3': 0}. Best is trial 1 with value: 10.071016473056597.
[I 2025-02-18 16:24:22,321] Trial 3 finished with value: 10.043981521202344 and parameters: {'n_estimators': 66, 'loss': 'square', 'learning_rate': 0.0161679141605621, 'depth': 1, 'a1': 0, 'b3': 1}. Best is trial 3 with value: 10.043981521202344.
[I 

P {'n_estimators': 99, 'loss': 'linear', 'learning_rate': 0.08681594646186346}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 16:24:58,846] A new study created in memory with name: no-name-3bdb8e07-e9de-4a1d-92fd-de141db28018
[I 2025-02-18 16:24:58,918] Trial 0 finished with value: 10.118260847100656 and parameters: {'learning_rate': 0.07994684653992408, 'num_leaves': 464, 'subsample': 0.16258720182100211, 'colsample_bytree': 0.7180789026584177, 'min_data_in_leaf': 61, 'depth': 1, 'a1': 0, 'b3': 1}. Best is trial 0 with value: 10.118260847100656.
[I 2025-02-18 16:24:58,940] Trial 1 finished with value: 12.702771647287774 and parameters: {'learning_rate': 0.003945931271111599, 'num_leaves': 164, 'subsample': 0.6590305993448359, 'colsample_bytree': 0.19621491607959535, 'min_data_in_leaf': 91, 'depth': 0, 'a1': 1, 'b3': 0}. Best is trial 0 with value: 10.118260847100656.
[I 2025-02-18 16:24:58,966] Trial 2 finished with value: 10.558514281891254 and parameters: {'learning_rate': 0.0736270608611295, 'num_leaves': 572, 'subsample': 0.2004850425124518, 'colsample_bytree': 0.10173963973205591, 'min_dat

Fold: 1/5
Model name: ada_boost
MAE: 7.888892447630116
MSE: 96.7291044093941
RMSE: 9.83509554653101
PCC: 0.6341986561561036
Spearman R: 0.6367716808775976
R2 Score: 0.4021702872045029

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 16:24:59,431] Trial 4 finished with value: 12.544125840087906 and parameters: {'learning_rate': 0.01949428101057438, 'num_leaves': 704, 'subsample': 0.7147602043761414, 'colsample_bytree': 0.3504388620561718, 'min_data_in_leaf': 1, 'depth': 0, 'a1': 1, 'b3': 1}. Best is trial 0 with value: 10.118260847100656.
[I 2025-02-18 16:24:59,472] Trial 5 finished with value: 12.420258124214003 and parameters: {'learning_rate': 0.06029209552189842, 'num_leaves': 713, 'subsample': 0.19012227518053104, 'colsample_bytree': 0.3856544204737131, 'min_data_in_leaf': 43, 'depth': 0, 'a1': 0, 'b3': 1}. Best is trial 0 with value: 10.118260847100656.
[I 2025-02-18 16:24:59,488] Trial 6 finished with value: 12.722374909023426 and parameters: {'learning_rate': 0.003962378295009242, 'num_leaves': 322, 'subsample': 0.8549932407271459, 'colsample_bytree': 0.1564506657679664, 'min_data_in_leaf': 61, 'depth': 0, 'a1': 0, 'b3': 0}. Best is trial 0 with value: 10.118260847100656.
[I 2025-02-18 16:24:5

P {'learning_rate': 0.06934811393006872, 'num_leaves': 380, 'subsample': 0.1709831096801862, 'colsample_bytree': 0.23566373425703607, 'min_data_in_leaf': 97}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Fold: 1/5
Model name: lgbm
MAE: 8.128548910169313
MSE: 102.74288426387122
RMSE: 10.136216466900814
PCC: 0.6059934949634637
Spearman R: 0.5982430700630083
R2 Score: 0.3650024016423551

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'co

[I 2025-02-18 16:25:04,085] Trial 13 finished with value: 11.053137302821375 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 1, 'clarity': 1, 'depth': 1, 'table': 1, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 0, 'a8': 1, 'a9': 0, 'a10': 0, 'b6': 0, 'b7': 1, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 8 with value: 11.049751913627844.
[I 2025-02-18 16:25:04,096] Trial 14 finished with value: 11.052852876146817 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 1, 'clarity': 1, 'depth': 1, 'table': 1, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 1, 'a9': 1, 'a10': 0, 'b6': 0, 'b7': 1, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 8 with value: 11.049751913627844.
[I 2025-02-18 16:25:04,109] Trial 15 finished with value: 11.052852876146817 and parameters: {'fit_intercept':

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'clarity', 'depth', 'table', 'price', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b6', 'b8', 'b9']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 2/5
Model name: linear_regression
MAE: 8.589072318548048
MSE: 116.6725274206948
RMSE: 10.801505794133279
PCC: 0.5438

[I 2025-02-18 16:25:05,475] Trial 19 finished with value: 10.891570754460899 and parameters: {'alpha': 0.002452530610331684, 'fit_intercept': True, 'selection': 'random', 'warm_start': False, 'carat': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'price': 0, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 0, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b6': 1, 'b8': 1, 'b9': 0}. Best is trial 18 with value: 10.891569851723709.
[I 2025-02-18 16:25:05,486] Trial 20 finished with value: 10.891567760716185 and parameters: {'alpha': 0.002053946521417841, 'fit_intercept': True, 'selection': 'random', 'warm_start': False, 'carat': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'price': 0, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 0, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b6': 1, 'b8': 1, 'b9': 0}. Best is trial 20 with value: 10.891567760716185.
[I 2025-02-18 16:25:05,496] Trial 21 finished with value: 10.891567695020425 and parameters: {'alpha': 0.0020475022265333946, 'fit_intercept': True, 'selection': 'random', 'warm_start': False, '

P {'alpha': 0.0011558217337647107, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a1', 'a2', 'a4', 'a5', 'b1', 'b3', 'b6', 'b8']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'table', 'price', 'a1', 'a2',
       'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b6', 'b8', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'table', 'price', 'a1', 'a2',
       'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b6', 'b8', 'b9'],
      dtype='object')
Fold: 2/5
Model name: lasso
MAE: 8.588809896767772
MSE: 116.54873475479928
RMSE: 10.795773930330297
PCC: 0.5445077891829525
Spearman R: 0.5694324431376732
R2 Score: 0.2950042658847808

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', '

[I 2025-02-18 16:25:06,663] Trial 15 finished with value: 10.813174739949334 and parameters: {'alpha': 0.04177204219034148, 'solver': 'sag', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 1, 'a2': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b3': 1, 'b6': 1, 'b8': 1}. Best is trial 11 with value: 10.813174536914325.
[I 2025-02-18 16:25:06,685] Trial 16 finished with value: 10.813174844798214 and parameters: {'alpha': 0.013129664708201821, 'solver': 'sag', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 1, 'a2': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b3': 1, 'b6': 1, 'b8': 1}. Best is trial 11 with value: 10.813174536914325.
[I 2025-02-18 16:25:06,694] Trial 17 finished with value: 10.813169676173313 and parameters: {'alpha': 0.00683340591815161, 'solver': 'auto', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 1, 'a2': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b3': 1, 'b6': 1, 'b8': 1}. Best is trial 17 with value: 10.813169676173313.
[I 2025-02-18 16:25:06,702] Tria

P {'alpha': 0.030622969636631702, 'solver': 'saga', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a2', 'a4', 'a5', 'b1',
       'b3', 'b6', 'b8'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a2', 'a4', 'a5', 'b1',
       'b3', 'b6', 'b8'],
      dtype='object')
Fold: 2/5
Model name: ridge
MAE: 8.587714154549166
MSE: 116.5242702847226
RMSE: 10.794640813140687
PCC: 0.5446461535218501
Spearman R: 0.5695729070597294
R2 Score: 0.2951522498768656

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6'],
      dtype='object')
G Index(['

[I 2025-02-18 16:25:07,843] Trial 2 finished with value: 10.745376571686768 and parameters: {'eta': 0.011386335118220603, 'gamma': 0.00013482862339858664, 'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.812654526798035, 'colsample_bytree': 0.9919662293982449, 'carat': 0, 'depth': 1, 'table': 1, 'a1': 0, 'a4': 1, 'b1': 0, 'b3': 0, 'b6': 0}. Best is trial 1 with value: 10.506169472377675.
[I 2025-02-18 16:25:07,984] Trial 3 finished with value: 10.730612256164218 and parameters: {'eta': 0.010041352024367952, 'gamma': 1.3908487661712023e-07, 'max_depth': 9, 'min_child_weight': 1, 'subsample': 0.770467726693973, 'colsample_bytree': 0.6639431879019724, 'carat': 1, 'depth': 1, 'table': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'b6': 0}. Best is trial 1 with value: 10.506169472377675.
[I 2025-02-18 16:25:08,031] Trial 4 finished with value: 12.74040800963056 and parameters: {'eta': 0.132008232966018, 'gamma': 0.007155232067156216, 'max_depth': 5, 'min_child_weight': 3, 'subsample': 0.72084

P {'eta': 0.06292612366636241, 'gamma': 0.4484481110822983, 'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.6191711927345831, 'colsample_bytree': 0.8748898609545599}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6'],
      dtype='object')
Fold: 2/5
Model name: xgb
MAE: 7.420117402024857
MSE: 88.27997898127454
RMSE: 9.395742598713236
PCC: 0.6837299603237734
Spearman R: 0.6906104592228357
R2 Score: 0.4660001353037687

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b

[I 2025-02-18 16:25:14,800] Trial 1 finished with value: 10.229705254473037 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 6, 'min_samples_split': 10, 'min_samples_leaf': 8, 'carat': 1, 'depth': 1, 'table': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'b6': 1}. Best is trial 1 with value: 10.229705254473037.
[I 2025-02-18 16:25:15,458] Trial 2 finished with value: 12.674707962900433 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 5, 'min_samples_leaf': 9, 'carat': 1, 'depth': 0, 'table': 0, 'a1': 1, 'a4': 1, 'b1': 0, 'b3': 0, 'b6': 1}. Best is trial 1 with value: 10.229705254473037.
[I 2025-02-18 16:25:15,547] Trial 3 finished with value: 12.310516844943347 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 3, 'min_samples_split': 10, 'min_samples_leaf': 6, 'carat': 1, 'depth': 0, 'table': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b6': 0}. Best is trial 1 with val

P {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 1}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a4', 'b1', 'b3', 'b6'],
      dtype='object')


[I 2025-02-18 16:25:45,883] A new study created in memory with name: no-name-e8d70a41-f50f-4d7c-8f50-241870bc55b7


Fold: 2/5
Model name: random_forest
MAE: 7.516771143715798
MSE: 90.80216896640552
RMSE: 9.529017208841923
PCC: 0.6754650375302633
Spearman R: 0.68097633924857
R2 Score: 0.450743571739296

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:25:46,213] Trial 0 finished with value: 12.278145855868612 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0014794835378999938, 'subsample': 0.8690507136872443, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 586, 'depth': 1, 'a1': 0, 'a4': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 12.278145855868612.
[I 2025-02-18 16:25:46,407] Trial 1 finished with value: 10.178053688170257 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.021496407321896595, 'subsample': 0.3326893443744954, 'max_depth': 3, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_leaf_nodes': 587, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 10.178053688170257.
[I 2025-02-18 16:25:46,587] Trial 2 finished with value: 12.601178933976607 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learni

P {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0935507068557073, 'subsample': 0.9527016297296121, 'max_depth': 4, 'min_samples_split': 9, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_leaf_nodes': 64}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:26:17,895] A new study created in memory with name: no-name-738a691f-77d8-45cd-93f2-e1705823bb60


Fold: 2/5
Model name: gradient_boosting
MAE: 7.44123182957836
MSE: 89.05135862527233
RMSE: 9.436702741173546
PCC: 0.6794123797261027
Spearman R: 0.6874418779069836
R2 Score: 0.46133410988919965

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:26:18,118] Trial 0 finished with value: 12.510389356334311 and parameters: {'n_estimators': 52, 'loss': 'exponential', 'learning_rate': 0.039681457806299496, 'depth': 0, 'a1': 1, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 12.510389356334311.
[I 2025-02-18 16:26:18,495] Trial 1 finished with value: 10.031479346550682 and parameters: {'n_estimators': 66, 'loss': 'square', 'learning_rate': 0.0014326758415476814, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 10.031479346550682.
[I 2025-02-18 16:26:18,802] Trial 2 finished with value: 12.38754066482507 and parameters: {'n_estimators': 58, 'loss': 'linear', 'learning_rate': 0.007014216434258164, 'depth': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 10.031479346550682.
[I 2025-02-18 16:26:19,267] Trial 3 finished with value: 12.260003331877412 and parameters: {'n_estimators': 72, 'loss': 'square', 'learning_rate': 0.0018513156778368635, 'depth': 0, 'a1': 1, 'a

P {'n_estimators': 74, 'loss': 'exponential', 'learning_rate': 0.08381340696964068}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:27:02,540] A new study created in memory with name: no-name-abd42da2-3370-40c9-aa68-175a85f48edd
[I 2025-02-18 16:27:02,634] Trial 0 finished with value: 12.382360342830966 and parameters: {'learning_rate': 0.002346218808401675, 'num_leaves': 788, 'subsample': 0.6747643392246502, 'colsample_bytree': 0.4541294209362242, 'min_data_in_leaf': 42, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 12.382360342830966.


Fold: 2/5
Model name: ada_boost
MAE: 7.662157355584002
MSE: 93.56968518422232
RMSE: 9.673142466862686
PCC: 0.6602527096569968
Spearman R: 0.666912652211205
R2 Score: 0.4340030457116191

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:27:02,730] Trial 1 finished with value: 12.502787580334937 and parameters: {'learning_rate': 0.0013881045798954115, 'num_leaves': 752, 'subsample': 0.9866342877874141, 'colsample_bytree': 0.6242666526825974, 'min_data_in_leaf': 62, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 12.382360342830966.
[I 2025-02-18 16:27:02,767] Trial 2 finished with value: 11.804217162679025 and parameters: {'learning_rate': 0.01001959421158992, 'num_leaves': 67, 'subsample': 0.3060976728880926, 'colsample_bytree': 0.4472669852315062, 'min_data_in_leaf': 47, 'depth': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 2 with value: 11.804217162679025.
[I 2025-02-18 16:27:02,803] Trial 3 finished with value: 12.612986691263195 and parameters: {'learning_rate': 0.017996236230846875, 'num_leaves': 656, 'subsample': 0.4922018626809762, 'colsample_bytree': 0.10648075683790542, 'min_data_in_leaf': 87, 'depth': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial

P {'learning_rate': 0.04892658252362244, 'num_leaves': 768, 'subsample': 0.1821687610089457, 'colsample_bytree': 0.3812023146863825, 'min_data_in_leaf': 92}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Fold: 2/5
Model name: lgbm
MAE: 7.52244198992016
MSE: 90.65268467062516
RMSE: 9.521170341435194
PCC: 0.67718284410874
Spearman R: 0.6855631535402943
R2 Score: 0.451647792544988

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'col

[I 2025-02-18 16:27:08,672] Trial 10 finished with value: 12.36734284985136 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 0, 'table': 1, 'price': 1, 'x': 0, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 0, 'b4': 0, 'b5': 1, 'a6': 1, 'a7': 0, 'a8': 1, 'a9': 1, 'a10': 1, 'b6': 1, 'b7': 1, 'b8': 1, 'b9': 1, 'b10': 1}. Best is trial 6 with value: 10.976396653260919.
[I 2025-02-18 16:27:08,683] Trial 11 finished with value: 10.98200598939229 and parameters: {'fit_intercept': False, 'carat': 0, 'cut': 1, 'clarity': 0, 'depth': 1, 'table': 1, 'price': 1, 'x': 0, 'y': 1, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 0, 'a8': 1, 'a9': 1, 'a10': 1, 'b6': 1, 'b7': 1, 'b8': 0, 'b9': 1, 'b10': 1}. Best is trial 6 with value: 10.976396653260919.
[I 2025-02-18 16:27:08,695] Trial 12 finished with value: 10.976651552277048 and parameters: {'fit_intercept': F

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'clarity', 'depth', 'x', 'a1', 'a4', 'a5', 'b1', 'b2', 'b3', 'a7', 'b9']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 3/5
Model name: linear_regression
MAE: 8.726770331775631
MSE: 120.17975594723353
RMSE: 10.962652778740807
PCC: 0.5122397474921366
Spearman R: 0.534742957907

[I 2025-02-18 16:27:10,046] Trial 25 finished with value: 11.166853536893608 and parameters: {'alpha': 0.09571848257011696, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False, 'clarity': 0, 'depth': 1, 'x': 0, 'a1': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 1, 'b3': 1, 'a7': 1, 'b9': 0}. Best is trial 16 with value: 11.155432492339433.
[I 2025-02-18 16:27:10,057] Trial 26 finished with value: 12.553209221444376 and parameters: {'alpha': 0.011827471408929759, 'fit_intercept': False, 'selection': 'random', 'warm_start': True, 'clarity': 1, 'depth': 0, 'x': 1, 'a1': 1, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 0, 'a7': 1, 'b9': 0}. Best is trial 16 with value: 11.155432492339433.
[I 2025-02-18 16:27:10,066] Trial 27 finished with value: 11.15579952845524 and parameters: {'alpha': 0.023645186857807005, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False, 'clarity': 0, 'depth': 1, 'x': 0, 'a1': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 1, 'b3': 1, 'a7': 1, 'b9': 0}. Best 

P {'alpha': 0.007213570548979384, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'clarity', 'depth', 'x', 'a1', 'b1', 'b3', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'x', 'a1', 'a4', 'a5', 'b1', 'b2', 'b3',
       'a7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'x', 'a1', 'a4', 'a5', 'b1', 'b2', 'b3',
       'a7', 'b9'],
      dtype='object')
Fold: 3/5
Model name: lasso
MAE: 8.833794564168867
MSE: 123.17922283148567
RMSE: 11.098613554470923
PCC: 0.49407427872521925
Spearman R: 0.5222476834951889
R2 Score: 0.24249635418777882

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'x', 'a1', 'b1', 'b3', 'b9'],
      dtype='object')
G I

[I 2025-02-18 16:27:10,983] Trial 23 finished with value: 11.101550218834094 and parameters: {'alpha': 0.0030684995888612394, 'solver': 'lsqr', 'fit_intercept': False, 'clarity': 1, 'depth': 1, 'x': 0, 'a1': 1, 'b1': 1, 'b3': 1, 'b9': 1}. Best is trial 21 with value: 11.101550202795142.
[I 2025-02-18 16:27:11,004] Trial 24 finished with value: 11.101722948096318 and parameters: {'alpha': 0.0027197597195228623, 'solver': 'sag', 'fit_intercept': False, 'clarity': 1, 'depth': 1, 'x': 0, 'a1': 1, 'b1': 1, 'b3': 1, 'b9': 1}. Best is trial 21 with value: 11.101550202795142.
[I 2025-02-18 16:27:11,011] Trial 25 finished with value: 11.101716911285006 and parameters: {'alpha': 0.0034221502919950394, 'solver': 'auto', 'fit_intercept': False, 'clarity': 1, 'depth': 1, 'x': 0, 'a1': 1, 'b1': 1, 'b3': 1, 'b9': 1}. Best is trial 21 with value: 11.101550202795142.
[I 2025-02-18 16:27:11,019] Trial 26 finished with value: 12.210086228461961 and parameters: {'alpha': 0.0019002729081237038, 'solver': '

P {'alpha': 0.07787683163359059, 'solver': 'sag', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'x', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'x', 'a1', 'b1', 'b3', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'x', 'a1', 'b1', 'b3', 'b9'],
      dtype='object')
Fold: 3/5
Model name: ridge
MAE: 8.831287061954551
MSE: 123.13974741617321
RMSE: 11.096835017975765
PCC: 0.49439456821659
Spearman R: 0.5225037705092854
R2 Score: 0.24273911242518098

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b

[I 2025-02-18 16:27:12,060] Trial 2 finished with value: 9.865375623554273 and parameters: {'eta': 0.030174415826582004, 'gamma': 0.016445009357904408, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.6025979337959784, 'colsample_bytree': 0.9646333185965235, 'depth': 1, 'x': 1, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 2 with value: 9.865375623554273.
[I 2025-02-18 16:27:12,110] Trial 3 finished with value: 12.947106522672522 and parameters: {'eta': 0.144089667949204, 'gamma': 0.009028825768628646, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8380121576936541, 'colsample_bytree': 0.6307827346090132, 'depth': 0, 'x': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 2 with value: 9.865375623554273.
[I 2025-02-18 16:27:12,140] Trial 4 finished with value: 12.690991813571667 and parameters: {'eta': 0.17066489771673302, 'gamma': 2.013720556922377e-06, 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.6065362893433588, 'colsample_bytree': 0.6821301777374643, 'depth': 0, 'x': 1,

P {'eta': 0.06460028240236114, 'gamma': 9.65597446141746e-07, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.5336485900267126, 'colsample_bytree': 0.7937856724175059}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'x', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')
Fold: 3/5
Model name: xgb
MAE: 7.508595561171062
MSE: 90.60291180170567
RMSE: 9.518556182620642
PCC: 0.6659569499211988
Spearman R: 0.6728612452582988
R2 Score: 0.4428278208502203

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-18 16:27:18,757] Trial 1 finished with value: 9.925573701974452 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 8, 'depth': 1, 'x': 1, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 9.925573701974452.
[I 2025-02-18 16:27:18,851] Trial 2 finished with value: 12.418494425921285 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 5, 'min_samples_leaf': 9, 'depth': 0, 'x': 1, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 9.925573701974452.
[I 2025-02-18 16:27:19,287] Trial 3 finished with value: 12.767925596681527 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 2, 'depth': 0, 'x': 0, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 1 with value: 9.925573701974452.
[I 2025-02-18 16:27:20,011] Trial 4 finished with value: 9.76200683204666 and paramet

P {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 10}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'x', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:28:36,722] A new study created in memory with name: no-name-1f94e7b4-2e8c-4569-b6b2-e9fb01925f31


Fold: 3/5
Model name: random_forest
MAE: 7.577227162874462
MSE: 92.5819059207556
RMSE: 9.621949174712762
PCC: 0.6579357180572667
Spearman R: 0.6680297023553524
R2 Score: 0.4306577874163182

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:28:36,930] Trial 0 finished with value: 11.500866453257615 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.005166498838577819, 'subsample': 0.2652890912734677, 'max_depth': 3, 'min_samples_split': 7, 'min_samples_leaf': 8, 'max_features': 'log2', 'max_leaf_nodes': 724, 'depth': 1, 'a1': 1, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 11.500866453257615.
[I 2025-02-18 16:28:37,800] Trial 1 finished with value: 10.608451825292624 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.00862025087456619, 'subsample': 0.7272586325185059, 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 980, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 10.608451825292624.
[I 2025-02-18 16:28:38,007] Trial 2 finished with value: 9.968066441535298 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0148131

P {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.05304944275418881, 'subsample': 0.5171264718496452, 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_leaf_nodes': 417}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:29:20,590] A new study created in memory with name: no-name-7a5d3ce1-8566-4046-b66d-0df9a0afd5a6
[I 2025-02-18 16:29:20,784] Trial 0 finished with value: 10.543582895955995 and parameters: {'n_estimators': 53, 'loss': 'exponential', 'learning_rate': 0.012959415806870149, 'depth': 1, 'a1': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.543582895955995.


Fold: 3/5
Model name: gradient_boosting
MAE: 7.541117453954473
MSE: 91.57735868827484
RMSE: 9.569605983961662
PCC: 0.661485166764686
Spearman R: 0.6703082843027783
R2 Score: 0.4368353567620492

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:29:20,960] Trial 1 finished with value: 12.666339416972363 and parameters: {'n_estimators': 58, 'loss': 'square', 'learning_rate': 0.011216799769134814, 'depth': 0, 'a1': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.543582895955995.
[I 2025-02-18 16:29:21,179] Trial 2 finished with value: 10.310900132901494 and parameters: {'n_estimators': 61, 'loss': 'square', 'learning_rate': 0.0038410649839873295, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 2 with value: 10.310900132901494.
[I 2025-02-18 16:29:21,550] Trial 3 finished with value: 12.353368436750648 and parameters: {'n_estimators': 86, 'loss': 'square', 'learning_rate': 0.0040082579810119855, 'depth': 0, 'a1': 1, 'b1': 1, 'b3': 0}. Best is trial 2 with value: 10.310900132901494.
[I 2025-02-18 16:29:21,936] Trial 4 finished with value: 9.831980239658291 and parameters: {'n_estimators': 81, 'loss': 'exponential', 'learning_rate': 0.046789058940154504, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 1}. Best is tr

P {'n_estimators': 65, 'loss': 'linear', 'learning_rate': 0.061038466146497834}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:29:54,567] A new study created in memory with name: no-name-3539708d-5299-4f60-87c7-850b0299c3dd
[I 2025-02-18 16:29:54,675] Trial 0 finished with value: 12.49643403884435 and parameters: {'learning_rate': 0.057280114037163585, 'num_leaves': 95, 'subsample': 0.33909388359585924, 'colsample_bytree': 0.8713668259641065, 'min_data_in_leaf': 19, 'depth': 0, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 12.49643403884435.
[I 2025-02-18 16:29:54,696] Trial 1 finished with value: 12.743534310246451 and parameters: {'learning_rate': 0.02668875794038264, 'num_leaves': 13, 'subsample': 0.5133628245853602, 'colsample_bytree': 0.21694889673756618, 'min_data_in_leaf': 91, 'depth': 0, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 12.49643403884435.
[I 2025-02-18 16:29:54,749] Trial 2 finished with value: 12.535149565631864 and parameters: {'learning_rate': 0.005367622117435289, 'num_leaves': 409, 'subsample': 0.8492870581058959, 'colsample_bytree': 0.431632955970

Fold: 3/5
Model name: ada_boost
MAE: 7.693786406656802
MSE: 95.74579839833991
RMSE: 9.784978201219454
PCC: 0.642241269667713
Spearman R: 0.6563965823184447
R2 Score: 0.41120109633127444

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:29:54,789] Trial 3 finished with value: 12.31429764424482 and parameters: {'learning_rate': 0.0012198746198999527, 'num_leaves': 124, 'subsample': 0.14868117321506727, 'colsample_bytree': 0.8320620564532193, 'min_data_in_leaf': 27, 'depth': 1, 'a1': 1, 'b1': 1, 'b3': 0}. Best is trial 3 with value: 12.31429764424482.
[I 2025-02-18 16:29:55,000] Trial 4 finished with value: 10.797321149514005 and parameters: {'learning_rate': 0.030674488070274116, 'num_leaves': 938, 'subsample': 0.5253941148243322, 'colsample_bytree': 0.6216271099652289, 'min_data_in_leaf': 5, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 4 with value: 10.797321149514005.
[I 2025-02-18 16:29:55,104] Trial 5 finished with value: 12.522182325240992 and parameters: {'learning_rate': 0.0022422304637329227, 'num_leaves': 227, 'subsample': 0.8280441770326846, 'colsample_bytree': 0.8613208060720126, 'min_data_in_leaf': 42, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 4 with value: 10.797321149

P {'learning_rate': 0.06919134774713838, 'num_leaves': 480, 'subsample': 0.5994390565231381, 'colsample_bytree': 0.49863331531436317, 'min_data_in_leaf': 92}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Fold: 3/5
Model name: lgbm
MAE: 7.515996933985207
MSE: 91.73242466779942
RMSE: 9.577704561522005
PCC: 0.6606010658573935
Spearman R: 0.669285651088145
R2 Score: 0.4358817621368256

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colou

[I 2025-02-18 16:30:03,210] Trial 13 finished with value: 10.701729172904562 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 0, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 0, 'a6': 1, 'a7': 0, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 1, 'b7': 1, 'b8': 0, 'b9': 1, 'b10': 0}. Best is trial 11 with value: 10.701729172904562.
[I 2025-02-18 16:30:03,221] Trial 14 finished with value: 10.937158487256383 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 0, 'y': 0, 'z': 0, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 0, 'a6': 1, 'a7': 0, 'a8': 0, 'a9': 0, 'a10': 1, 'b6': 1, 'b7': 1, 'b8': 0, 'b9': 1, 'b10': 0}. Best is trial 11 with value: 10.701729172904562.
[I 2025-02-18 16:30:03,234] Trial 15 finished with value: 10.701729172904562 and parameters: {'fit_intercept':

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'a5', 'b1', 'b3', 'a8', 'a10', 'b6', 'b7', 'b9']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 4/5
Model name: linear_regression
MAE: 8.578814658656363
MSE: 114.25929986521085
RMSE: 10.689214183709243
PCC: 0.5451299049131537


[I 2025-02-18 16:30:04,592] Trial 22 finished with value: 10.688081632885602 and parameters: {'alpha': 0.010791886519554535, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True, 'carat': 1, 'depth': 1, 'price': 0, 'y': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'a5': 0, 'b1': 1, 'b3': 1, 'a8': 1, 'a10': 1, 'b6': 0, 'b7': 0, 'b9': 1}. Best is trial 22 with value: 10.688081632885602.
[I 2025-02-18 16:30:04,601] Trial 23 finished with value: 10.68779561318451 and parameters: {'alpha': 0.013109138315171064, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True, 'carat': 1, 'depth': 1, 'price': 0, 'y': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'a5': 0, 'b1': 1, 'b3': 1, 'a8': 1, 'a10': 1, 'b6': 0, 'b7': 0, 'b9': 1}. Best is trial 23 with value: 10.68779561318451.
[I 2025-02-18 16:30:04,611] Trial 24 finished with value: 10.687824482482723 and parameters: {'alpha': 0.012860162361351823, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True, 'carat': 1, 'depth': 1, 'price': 0, 'y

P {'alpha': 0.02815323403515394, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3', 'a10', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'a5',
       'b1', 'b3', 'a8', 'a10', 'b6', 'b7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'a5',
       'b1', 'b3', 'a8', 'a10', 'b6', 'b7', 'b9'],
      dtype='object')
Fold: 4/5
Model name: lasso
MAE: 8.576309598561249
MSE: 114.09436788356497
RMSE: 10.681496518913676
PCC: 0.5460308658562397
Spearman R: 0.574366888424566
R2 Score: 0.2968724427706546

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a

[I 2025-02-18 16:30:05,673] Trial 22 finished with value: 10.684072299033431 and parameters: {'alpha': 0.0038427495629930614, 'solver': 'sparse_cg', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'a10': 0, 'b9': 0}. Best is trial 17 with value: 10.6840721010757.
[I 2025-02-18 16:30:05,681] Trial 23 finished with value: 10.684072255596774 and parameters: {'alpha': 0.004151920506921027, 'solver': 'sparse_cg', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'a10': 0, 'b9': 0}. Best is trial 17 with value: 10.6840721010757.
[I 2025-02-18 16:30:05,699] Trial 24 finished with value: 10.684072188156819 and parameters: {'alpha': 0.00463194685765031, 'solver': 'sparse_cg', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 1, 'b3': 1, 'a10': 0, 'b9': 0}. Best is trial 17 with value: 10.6840721010757.
[I 2025-02-18 16:30:05,708] Trial 25 finished with value: 10.684071961053926 and parameters: {'alpha': 0.0062484

P {'alpha': 0.09934478381205337, 'solver': 'lsqr', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3', 'a10', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3', 'a10', 'b9'],
      dtype='object')
Fold: 4/5
Model name: ridge
MAE: 8.571952615446413
MSE: 114.14802864904614
RMSE: 10.684008079791317
PCC: 0.5459788799267256
Spearman R: 0.5738082876204249
R2 Score: 0.29654174841955105

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'dept

[I 2025-02-18 16:30:06,739] Trial 4 finished with value: 12.312749817257291 and parameters: {'eta': 0.08717197245387556, 'gamma': 0.00012613594102799618, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.903390643568718, 'colsample_bytree': 0.8391394536152434, 'depth': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 9.664022692388839.
[I 2025-02-18 16:30:06,811] Trial 5 finished with value: 12.21926334571757 and parameters: {'eta': 0.032885177300249674, 'gamma': 2.5589860554287357e-05, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.6858438115968171, 'colsample_bytree': 0.7010795359058128, 'depth': 0, 'a1': 1, 'a2': 1, 'a4': 1, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 9.664022692388839.
[I 2025-02-18 16:30:06,931] Trial 6 finished with value: 12.588665962329978 and parameters: {'eta': 0.023808523692377136, 'gamma': 0.37687902660983763, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.5470249610731597, 'colsample_bytree': 0.774532743088

P {'eta': 0.035031645404655164, 'gamma': 0.0009023778235575723, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.9823697641906878, 'colsample_bytree': 0.935345875319645}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a2', 'a4', 'b1', 'b3'],
      dtype='object')
Fold: 4/5
Model name: xgb
MAE: 7.316211473022264
MSE: 84.70833677158085
RMSE: 9.203713205635042
PCC: 0.6919382547798356
Spearman R: 0.6987163755220946
R2 Score: 0.4779692721384369

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'co

[I 2025-02-18 16:30:14,882] Trial 0 finished with value: 9.6982723022139 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 8, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 9.6982723022139.
[I 2025-02-18 16:30:14,988] Trial 1 finished with value: 12.305482268377228 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 6, 'depth': 0, 'a1': 1, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 9.6982723022139.
[I 2025-02-18 16:30:15,093] Trial 2 finished with value: 12.220996116872946 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 9, 'depth': 0, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 9.6982723022139.
[I 2025-02-18 16:30:15,781] Trial 3 finished with value: 12.253489211553347 and parameters

P {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 9}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:30:38,363] A new study created in memory with name: no-name-0eba5efc-5cc8-408e-ba13-4890a25e409c


Fold: 4/5
Model name: random_forest
MAE: 7.571626990746243
MSE: 90.2266492064836
RMSE: 9.498770931361783
PCC: 0.6684291077383331
Spearman R: 0.6738608257268851
R2 Score: 0.44396165533529053

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:30:38,557] Trial 0 finished with value: 10.174305282398368 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.012658518943922718, 'subsample': 0.6300210021739397, 'max_depth': 4, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 152, 'depth': 1, 'a1': 1, 'a4': 1, 'b3': 1}. Best is trial 0 with value: 10.174305282398368.
[I 2025-02-18 16:30:38,797] Trial 1 finished with value: 15.803006357734517 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.029227566657074062, 'subsample': 0.33662455965004384, 'max_depth': 4, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_leaf_nodes': 681, 'depth': 1, 'a1': 1, 'a4': 1, 'b3': 1}. Best is trial 0 with value: 10.174305282398368.
[I 2025-02-18 16:30:38,997] Trial 2 finished with value: 17.365083245824557 and parameters: {'loss': 'quantile', 'criterion': 'friedman_mse', 'learning_rate': 0.0143429

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.06847138327373646, 'subsample': 0.8020953971785622, 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 232}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:31:07,986] A new study created in memory with name: no-name-8933b8be-7054-4087-be1b-433cddebc814


Fold: 4/5
Model name: gradient_boosting
MAE: 7.5351953332731965
MSE: 89.4983106960821
RMSE: 9.460354681304613
PCC: 0.6702953449382276
Spearman R: 0.6744655330514695
R2 Score: 0.4484501755589819

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:31:08,239] Trial 0 finished with value: 9.995518532885642 and parameters: {'n_estimators': 70, 'loss': 'exponential', 'learning_rate': 0.06921364550265093, 'depth': 1, 'a1': 1, 'a4': 0, 'b3': 0}. Best is trial 0 with value: 9.995518532885642.
[I 2025-02-18 16:31:08,735] Trial 1 finished with value: 9.668181194431169 and parameters: {'n_estimators': 83, 'loss': 'exponential', 'learning_rate': 0.001685731821356102, 'depth': 1, 'a1': 1, 'a4': 1, 'b3': 1}. Best is trial 1 with value: 9.668181194431169.
[I 2025-02-18 16:31:08,943] Trial 2 finished with value: 12.427340822260453 and parameters: {'n_estimators': 66, 'loss': 'square', 'learning_rate': 0.010650867305183147, 'depth': 0, 'a1': 0, 'a4': 0, 'b3': 1}. Best is trial 1 with value: 9.668181194431169.
[I 2025-02-18 16:31:09,124] Trial 3 finished with value: 10.190012108771066 and parameters: {'n_estimators': 76, 'loss': 'exponential', 'learning_rate': 0.014534446295948841, 'depth': 1, 'a1': 0, 'a4': 0, 'b3': 0}. Best is

P {'n_estimators': 65, 'loss': 'exponential', 'learning_rate': 0.07974437806812792}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:31:43,708] A new study created in memory with name: no-name-720c8003-4f83-4383-b6e9-2a189d35ad69
[I 2025-02-18 16:31:43,820] Trial 0 finished with value: 12.227801764933233 and parameters: {'learning_rate': 0.01934722421633312, 'num_leaves': 335, 'subsample': 0.936424623694993, 'colsample_bytree': 0.9007491333300488, 'min_data_in_leaf': 51, 'depth': 0, 'a1': 1, 'a4': 1, 'b3': 1}. Best is trial 0 with value: 12.227801764933233.
[I 2025-02-18 16:31:43,845] Trial 1 finished with value: 11.406889381487236 and parameters: {'learning_rate': 0.03538115057276387, 'num_leaves': 766, 'subsample': 0.2795028157148749, 'colsample_bytree': 0.05280998544378005, 'min_data_in_leaf': 89, 'depth': 1, 'a1': 1, 'a4': 1, 'b3': 0}. Best is trial 1 with value: 11.406889381487236.


Fold: 4/5
Model name: ada_boost
MAE: 7.6726163384594175
MSE: 92.22123396293256
RMSE: 9.60318873931636
PCC: 0.6579924540656712
Spearman R: 0.6608895334034461
R2 Score: 0.4316696593892674

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')


[I 2025-02-18 16:31:44,075] Trial 2 finished with value: 12.769416918972476 and parameters: {'learning_rate': 0.029078008898570176, 'num_leaves': 283, 'subsample': 0.981007572546356, 'colsample_bytree': 0.6501535114384233, 'min_data_in_leaf': 9, 'depth': 0, 'a1': 0, 'a4': 0, 'b3': 1}. Best is trial 1 with value: 11.406889381487236.
[I 2025-02-18 16:31:44,091] Trial 3 finished with value: 12.737457955493522 and parameters: {'learning_rate': 0.025931566681078006, 'num_leaves': 761, 'subsample': 0.5404871405637997, 'colsample_bytree': 0.1393901699269705, 'min_data_in_leaf': 78, 'depth': 0, 'a1': 0, 'a4': 0, 'b3': 0}. Best is trial 1 with value: 11.406889381487236.
[I 2025-02-18 16:31:44,335] Trial 4 finished with value: 12.543620145437638 and parameters: {'learning_rate': 0.0025086838672323734, 'num_leaves': 282, 'subsample': 0.7379935141611935, 'colsample_bytree': 0.8536678402710115, 'min_data_in_leaf': 13, 'depth': 0, 'a1': 1, 'a4': 0, 'b3': 1}. Best is trial 1 with value: 11.4068893814

P {'learning_rate': 0.03830073645264637, 'num_leaves': 691, 'subsample': 0.20150038371177337, 'colsample_bytree': 0.9207067208685865, 'min_data_in_leaf': 63}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b3'],
      dtype='object')
Fold: 4/5
Model name: lgbm
MAE: 7.7653326772719575
MSE: 94.22547604919255
RMSE: 9.706980789575745
PCC: 0.6475675181739362
Spearman R: 0.6491679015176138
R2 Score: 0.41931814836949144

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F

[I 2025-02-18 16:31:49,669] Trial 9 finished with value: 10.967951486325063 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'price': 0, 'x': 1, 'y': 1, 'z': 0, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 0, 'a5': 0, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 0, 'a7': 1, 'a8': 0, 'a9': 0, 'a10': 0, 'b6': 0, 'b7': 1, 'b8': 0, 'b9': 1, 'b10': 0}. Best is trial 5 with value: 10.452531746484228.
[I 2025-02-18 16:31:49,683] Trial 10 finished with value: 10.965899900334843 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 1, 'clarity': 1, 'depth': 1, 'table': 0, 'price': 0, 'x': 1, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 0, 'b3': 0, 'b4': 1, 'b5': 0, 'a6': 1, 'a7': 0, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 1, 'b7': 0, 'b8': 0, 'b9': 1, 'b10': 1}. Best is trial 5 with value: 10.452531746484228.
[I 2025-02-18 16:31:49,695] Trial 11 finished with value: 10.337948610119653 and parameters: {'fit_intercept': Tr

P {'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'clarity', 'depth', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b5', 'a7', 'b7', 'b9', 'b10']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 5/5
Model name: linear_regression
MAE: 8.181410102969199
MSE: 105.97285557522574
RMSE: 10.294311806780758
PCC:

[I 2025-02-18 16:31:51,163] Trial 19 finished with value: 10.673916604689413 and parameters: {'alpha': 0.0037245758677240965, 'fit_intercept': False, 'selection': 'random', 'warm_start': True, 'carat': 1, 'clarity': 1, 'depth': 1, 'y': 1, 'z': 1, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 1, 'b5': 1, 'a7': 1, 'b7': 1, 'b9': 1, 'b10': 1}. Best is trial 19 with value: 10.673916604689413.
[I 2025-02-18 16:31:51,193] Trial 20 finished with value: 10.67279344299646 and parameters: {'alpha': 0.0030343257049647867, 'fit_intercept': False, 'selection': 'random', 'warm_start': True, 'carat': 1, 'clarity': 1, 'depth': 1, 'y': 1, 'z': 1, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 1, 'b5': 1, 'a7': 1, 'b7': 1, 'b9': 1, 'b10': 1}. Best is trial 20 with value: 10.67279344299646.
[I 2025-02-18 16:31:51,221] Trial 21 finished with value: 10.67277825416038 and parameters: {'alpha': 0.0030238296748580818, 'fit_intercept': False, 'selection': 'random', 

P {'alpha': 0.008003343633555664, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'a5', 'b1', 'b3', 'a7', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'y', 'z', 'a1', 'a2', 'a3',
       'a4', 'a5', 'b1', 'b2', 'b3', 'b5', 'a7', 'b7', 'b9', 'b10'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'y', 'z', 'a1', 'a2', 'a3',
       'a4', 'a5', 'b1', 'b2', 'b3', 'b5', 'a7', 'b7', 'b9', 'b10'],
      dtype='object')
Fold: 5/5
Model name: lasso
MAE: 8.19282476226158
MSE: 106.31080958221887
RMSE: 10.31071334012438
PCC: 0.5621138750620092
Spearman R: 0.5907336789975308
R2 Score: 0.3155540612656311

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
     

[I 2025-02-18 16:31:53,305] Trial 25 finished with value: 10.429031654502571 and parameters: {'alpha': 0.03787000820390836, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 0, 'a5': 0, 'b1': 1, 'b3': 1, 'a7': 1, 'b9': 1}. Best is trial 25 with value: 10.429031654502571.
[I 2025-02-18 16:31:53,327] Trial 26 finished with value: 12.37592904397547 and parameters: {'alpha': 0.037421700912015285, 'solver': 'sag', 'fit_intercept': True, 'depth': 0, 'a1': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b3': 0, 'a7': 0, 'b9': 0}. Best is trial 25 with value: 10.429031654502571.
[I 2025-02-18 16:31:53,345] Trial 27 finished with value: 10.42903037788621 and parameters: {'alpha': 0.04742703223900336, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 0, 'a5': 0, 'b1': 1, 'b3': 1, 'a7': 1, 'b9': 1}. Best is trial 27 with value: 10.42903037788621.
[I 2025-02-18 16:31:53,364] Trial 28 finished with value: 10.429029422574782 and parameters: {'alpha': 0.05458116088209931, 'solver':

P {'alpha': 0.0814130179532292, 'solver': 'lsqr', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'a5', 'b1', 'b3', 'a7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'a5', 'b1', 'b3', 'a7', 'b9'],
      dtype='object')
Fold: 5/5
Model name: ridge
MAE: 8.29058918114248
MSE: 108.75868859658553
RMSE: 10.42874338530705
PCC: 0.5476696629603204
Spearman R: 0.5809324466532995
R2 Score: 0.2997942259630827

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7', 'b9'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a

[I 2025-02-18 16:31:54,368] Trial 2 finished with value: 10.525212174451829 and parameters: {'eta': 0.010567011834702085, 'gamma': 0.43740684370455013, 'max_depth': 8, 'min_child_weight': 3, 'subsample': 0.9081424167732479, 'colsample_bytree': 0.9802140548041296, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0, 'a7': 1, 'b9': 0}. Best is trial 2 with value: 10.525212174451829.
[I 2025-02-18 16:31:54,424] Trial 3 finished with value: 12.292683168576556 and parameters: {'eta': 0.011800075008807188, 'gamma': 2.1742685078415504e-06, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.6719619236392125, 'colsample_bytree': 0.8238915143908546, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 0, 'a7': 1, 'b9': 1}. Best is trial 2 with value: 10.525212174451829.
[I 2025-02-18 16:31:54,467] Trial 4 finished with value: 12.657131693380828 and parameters: {'eta': 0.1143641154179874, 'gamma': 0.0020598167302239706, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.5658963639913495, 'colsample_bytree': 0.80407268118

P {'eta': 0.0698243135287244, 'gamma': 0.43134287639048496, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.8829148680975475, 'colsample_bytree': 0.9506783496348653}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7', 'b9'],
      dtype='object')
Fold: 5/5
Model name: xgb
MAE: 7.30764363367425
MSE: 84.06662350764644
RMSE: 9.168785279831043
PCC: 0.6776416375255456
Spearman R: 0.6819868562448659
R2 Score: 0.45876567708366334

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colou

[I 2025-02-18 16:32:01,385] Trial 0 finished with value: 10.180221403838564 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 6, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0, 'a7': 1}. Best is trial 0 with value: 10.180221403838564.
[I 2025-02-18 16:32:01,480] Trial 1 finished with value: 12.054030880624754 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 6, 'min_samples_leaf': 9, 'depth': 0, 'a1': 1, 'b1': 1, 'b3': 0, 'a7': 0}. Best is trial 0 with value: 10.180221403838564.
[I 2025-02-18 16:32:01,575] Trial 2 finished with value: 11.854119419505126 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 10, 'min_samples_leaf': 2, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 1, 'a7': 1}. Best is trial 0 with value: 10.180221403838564.
[I 2025-02-18 16:32:02,459] Trial 3 finished with value: 9.50700354735157 and

P {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 1}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 16:32:33,588] A new study created in memory with name: no-name-2c270ff3-1b1a-4300-bd77-dbb7366a536e
[I 2025-02-18 16:32:33,705] Trial 0 finished with value: 10.020356885096925 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.04224281494546738, 'subsample': 0.318398874334952, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_leaf_nodes': 25, 'depth': 1, 'a1': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.020356885096925.


Fold: 5/5
Model name: random_forest
MAE: 7.3740815809904685
MSE: 85.11517178378136
RMSE: 9.225788409874864
PCC: 0.6739045697511903
Spearman R: 0.676192125465674
R2 Score: 0.4520149561363974

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:32:34,405] Trial 1 finished with value: 18.726452383811125 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.005802285420132302, 'subsample': 0.8753480009253999, 'max_depth': 7, 'min_samples_split': 5, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 565, 'depth': 1, 'a1': 1, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 10.020356885096925.
[I 2025-02-18 16:32:34,733] Trial 2 finished with value: 9.632357038242395 and parameters: {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.03153224454404916, 'subsample': 0.7359395018400977, 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 8, 'max_features': 'log2', 'max_leaf_nodes': 355, 'depth': 1, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 2 with value: 9.632357038242395.
[I 2025-02-18 16:32:35,137] Trial 3 finished with value: 11.702834841909304 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.0542442829875

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.06212193344059796, 'subsample': 0.6636358946007167, 'max_depth': 4, 'min_samples_split': 3, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 224}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:33:18,572] A new study created in memory with name: no-name-e9cafda5-9e7f-425a-ab28-ed3a5b02d3e0
[I 2025-02-18 16:33:18,736] Trial 0 finished with value: 10.14431005981151 and parameters: {'n_estimators': 66, 'loss': 'square', 'learning_rate': 0.0016542861550532767, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.14431005981151.


Fold: 5/5
Model name: gradient_boosting
MAE: 7.331689573388035
MSE: 84.58689416583402
RMSE: 9.197113360497088
PCC: 0.6749056749215454
Spearman R: 0.6784659945179667
R2 Score: 0.4554160916516782

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:33:18,919] Trial 1 finished with value: 10.149109445726138 and parameters: {'n_estimators': 75, 'loss': 'exponential', 'learning_rate': 0.011277356052050307, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.14431005981151.
[I 2025-02-18 16:33:19,136] Trial 2 finished with value: 12.348261378030742 and parameters: {'n_estimators': 71, 'loss': 'square', 'learning_rate': 0.0026712940256227702, 'depth': 0, 'a1': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.14431005981151.
[I 2025-02-18 16:33:19,371] Trial 3 finished with value: 9.913346871325862 and parameters: {'n_estimators': 66, 'loss': 'exponential', 'learning_rate': 0.09900069049632142, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 3 with value: 9.913346871325862.
[I 2025-02-18 16:33:19,591] Trial 4 finished with value: 10.139200915099188 and parameters: {'n_estimators': 96, 'loss': 'exponential', 'learning_rate': 0.0011143708877051474, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best

P {'n_estimators': 91, 'loss': 'linear', 'learning_rate': 0.08315202481660834}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:34:03,888] A new study created in memory with name: no-name-a0a30309-7554-4d58-b3a6-102aaa1091e0
[I 2025-02-18 16:34:03,980] Trial 0 finished with value: 12.365761235616683 and parameters: {'learning_rate': 0.0013056346865094244, 'num_leaves': 911, 'subsample': 0.5696596270686342, 'colsample_bytree': 0.6411080986155429, 'min_data_in_leaf': 54, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 12.365761235616683.
[I 2025-02-18 16:34:04,024] Trial 1 finished with value: 11.92547960142028 and parameters: {'learning_rate': 0.09205142532450777, 'num_leaves': 496, 'subsample': 0.5942540362423929, 'colsample_bytree': 0.4923198811487761, 'min_data_in_leaf': 89, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 11.92547960142028.


Fold: 5/5
Model name: ada_boost
MAE: 7.440195009243428
MSE: 87.46335204701604
RMSE: 9.3521843462913
PCC: 0.6615139955044345
Spearman R: 0.6690527578664317
R2 Score: 0.43689699728627507

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 16:34:04,068] Trial 2 finished with value: 11.963470657145555 and parameters: {'learning_rate': 0.0014343689306455273, 'num_leaves': 362, 'subsample': 0.651834609464172, 'colsample_bytree': 0.9293237462147566, 'min_data_in_leaf': 89, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 1 with value: 11.92547960142028.
[I 2025-02-18 16:34:04,100] Trial 3 finished with value: 12.047460355123652 and parameters: {'learning_rate': 0.0012103755238531172, 'num_leaves': 780, 'subsample': 0.21772630591150227, 'colsample_bytree': 0.7022299665512746, 'min_data_in_leaf': 88, 'depth': 1, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 11.92547960142028.
[I 2025-02-18 16:34:04,278] Trial 4 finished with value: 11.768806141763777 and parameters: {'learning_rate': 0.0022348691171688974, 'num_leaves': 463, 'subsample': 0.36549231555392475, 'colsample_bytree': 0.7939728461902109, 'min_data_in_leaf': 6, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 4 with value: 11.76880614

P {'learning_rate': 0.04152185578142371, 'num_leaves': 1017, 'subsample': 0.5128426639228317, 'colsample_bytree': 0.9039894149692504, 'min_data_in_leaf': 83}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Fold: 5/5
Model name: lgbm
MAE: 7.367489131844955
MSE: 84.87467280576088
RMSE: 9.212745128666096
PCC: 0.6738416221302715
Spearman R: 0.6778058126194582
R2 Score: 0.45356332689401624



Compute average scores and rank models by R2 score

In [13]:
for model_name, model_metrics in model_scores.items():
    for metric, scores in model_metrics.items():
        model_scores[model_name][metric] = sum(scores) / len(scores)
    model_scores[model_name] = dict(model_scores[model_name])

model_scores = dict(sorted(model_scores.items(), key=lambda x: x[1]["r2_score"], reverse=True))

In [14]:
# Print results
for i, (model_name, model_metrics) in enumerate(model_scores.items()):
    print(f"No.{i+1} Model: {model_name}")
    for metric, score in model_metrics.items():
        print(f"{metric}: {score}")
    print()

No.1 Model: xgb
mae: 7.4665103628458285
mse: 88.26313397339472
rmse: 9.392898849030006
pcc: 0.6736511773184859
spearman_r: 0.6788629212449057
r2_score: 0.453343025967731

No.2 Model: gradient_boosting
mae: 7.535193065755017
mse: 90.09082681895033
rmse: 9.489693900396853
pcc: 0.6650296256442079
spearman_r: 0.6700441249837938
r2_score: 0.4420635649845418

No.3 Model: random_forest
mae: 7.574612361701929
mse: 90.7530398251658
rmse: 9.52486721504172
pcc: 0.6638123418782502
spearman_r: 0.6685024369150143
r2_score: 0.43799839899956633

No.4 Model: lgbm
mae: 7.659961928638319
mse: 92.84562849144984
rmse: 9.63096345761997
pcc: 0.653037309046761
spearman_r: 0.6560131177657038
r2_score: 0.42508268631753526

No.5 Model: ada_boost
mae: 7.671529511514753
mse: 93.14583480038098
rmse: 9.649717860044161
pcc: 0.6512398170101837
spearman_r: 0.658004641335425
r2_score: 0.4231882171845879

No.6 Model: linear_regression
mae: 8.550944294022955
mse: 114.58154754122492
rmse: 10.701961581717097
pcc: 0.53969696