In [1]:
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import optuna
import os
import json
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

from src.utils import get_kfold_data, convert_non_numeric_to_numeric, calculate_r2_score, calculate_metrics
from src.normalisation import Normaliser
from src.constants import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(DATA_PATH)

In [3]:
data.head()

Unnamed: 0,outcome,carat,cut,color,clarity,depth,table,price,x,y,...,a6,a7,a8,a9,a10,b6,b7,b8,b9,b10
0,-26.701232,1.14,Ideal,G,VS1,62.3,56.0,7948,6.73,6.7,...,0.168836,-0.273758,1.107832,1.247795,0.482344,0.489511,-0.321138,0.573382,0.446871,-1.990581
1,6.548093,0.38,Premium,H,VS2,60.5,59.0,898,4.69,4.66,...,-0.256549,0.315373,-0.030326,-0.114335,-1.059588,-1.76136,-1.343951,-1.00255,-0.22503,-0.446653
2,6.612562,0.5,Very Good,E,SI1,60.7,58.0,1351,5.09,5.13,...,-1.193327,-0.657307,-0.591726,-0.446856,-0.765286,-0.816544,-1.397794,-0.47713,0.810509,1.725131
3,-5.073562,0.7,Premium,D,SI1,61.2,58.0,2512,5.74,5.7,...,-1.740788,-1.77886,-0.82507,0.444932,1.173109,0.453606,-0.26344,0.24621,-0.850503,-0.41295
4,-14.436557,0.83,Ideal,G,SI2,62.4,54.0,2751,6.01,6.08,...,-0.859322,1.409268,0.861992,1.109063,-1.436722,-1.461618,0.081787,0.258087,0.851146,2.204813


Inspecting columns

In [4]:
# Find columns
all_columns = data.columns.tolist()
print(all_columns)

numeric_columns = data.select_dtypes(include=["number"]).columns.tolist()
numeric_columns.remove("outcome") # Remove the target column
print(numeric_columns)

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns.tolist()
print(non_numeric_columns)

['outcome', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['cut', 'color', 'clarity']


In [5]:
for non_numeric_column in non_numeric_columns:
    print(data[non_numeric_column].value_counts())

cut
Ideal        4040
Premium      2439
Very Good    2296
Good          925
Fair          300
Name: count, dtype: int64
color
G    2120
E    1873
F    1746
H    1506
D    1246
I     983
J     526
Name: count, dtype: int64
clarity
SI1     2408
VS2     2256
SI2     1743
VS1     1503
VVS2     951
VVS1     675
IF       318
I1       146
Name: count, dtype: int64


Converting non-numeric features to numerical features

In [6]:
data = convert_non_numeric_to_numeric(data=data)
print(data)

['G', 'E', 'F', 'H', 'D', 'I', 'J']
        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...      

Normalise data using each columns respective mean and std.

In [7]:
print(data)

        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...        b8        b9       b10  colour_G  

Data splitting:
- Split the entire dataset into training and testing sets first.
- Use the training set to generate folds (one validation and the rest training folds) (K-Fold Cross Validation)

In [8]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=REPRODUCIBILITY_SEED)
print(f"Training set size: {len(train_data)} | Test set size: {len(test_data)}")    
print()


Training set size: 8000 | Test set size: 2000



In [9]:
normaliser = Normaliser()
os.makedirs(TRAINING_STATISTICS_DIR, exist_ok=True)
stats_for_each_column = {}
for column in numeric_columns:
    print(data[column])
    train_data_column_mean = normaliser.calculate_mean(train_data[column])
    train_data_column_std = normaliser.calculate_std(train_data[column])

    train_data[column] = normaliser.standardise(train_data[column], mean=train_data_column_mean, std=train_data_column_std)

    stats_for_each_column[column] = {
        "mean": train_data_column_mean,
        "std": train_data_column_std
    }

    # Normalise test data using the mean and std of the training data
    test_data[column] = normaliser.standardise(test_data[column], mean=train_data_column_mean, std=train_data_column_std)
    print("after", train_data[column])

with open(f"{TRAINING_STATISTICS_DIR}/stats.json", "w") as f:
    json.dump(stats_for_each_column, f)

0       1.14
1       0.38
2       0.50
3       0.70
4       0.83
        ... 
9995    0.33
9996    1.01
9997    0.52
9998    0.31
9999    0.37
Name: carat, Length: 10000, dtype: float64
after 9254   -1.023878
1561    1.522047
1670   -1.045094
6087   -0.111588
6669    1.140158
          ...   
5734   -0.154021
5191    0.970430
5390   -1.045094
860    -1.002662
7270   -1.151175
Name: carat, Length: 8000, dtype: float64
0       62.3
1       60.5
2       60.7
3       61.2
4       62.4
        ... 
9995    62.6
9996    69.5
9997    57.9
9998    62.0
9999    59.9
Name: depth, Length: 10000, dtype: float64
after 9254   -0.381342
1561    0.945157
1670    0.386631
6087   -0.311527
6669   -0.311527
          ...   
5734    0.665894
5191    0.107368
5390    0.107368
860     0.875341
7270    0.665894
Name: depth, Length: 8000, dtype: float64
0       56.0
1       59.0
2       58.0
3       58.0
4       54.0
        ... 
9995    57.0
9996    55.0
9997    61.0
9998    54.0
9999    59.0
Name: table, Le

In [10]:
kfold_data = get_kfold_data(data=train_data, k=NUM_FOLDS, reproducibility_seed=REPRODUCIBILITY_SEED)

Fold: 0/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')

Fold: 1/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', '

Define models and hyperparameter tuning objectives for each model

In [11]:
models = {
        "linear_regression": LinearRegression,
        "lasso": Lasso,
        "ridge": Ridge,
        "xgb": xgb.XGBRegressor,
        "random_forest": RandomForestRegressor,
        "gradient_boosting": GradientBoostingRegressor,
        "ada_boost": AdaBoostRegressor,
        "lgbm": lgb.LGBMRegressor
        }

# Must contain the one-hot encoded columns (otherwise this does not make sense)
minimum_features = [feature for feature in data.columns if feature.startswith("colour")] 

def objective(model_type, trial, x_train, y_train, x_val, y_val):
    if model_type == LinearRegression:
        parameters = {
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        }
    elif model_type == Lasso:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
            "warm_start": trial.suggest_categorical("warm_start", [True, False]),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == Ridge:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "solver": trial.suggest_categorical("solver", ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "positive": False,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == xgb.XGBRegressor:
        parameters = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "n_estimators": 100,
            "eta": trial.suggest_float("eta", 1e-2, 0.2, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 10, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1, 6),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "seed": REPRODUCIBILITY_SEED
        }
    elif model_type == RandomForestRegressor:
        parameters = {
            "n_estimators": 100,
            "criterion": trial.suggest_categorical("criterion", ["absolute_error", "squared_error"]),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "bootstrap": True,
            "oob_score": False,
            "n_jobs": -1,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == GradientBoostingRegressor:
        parameters = {
            "n_estimators": 100,
            "loss": trial.suggest_categorical("loss", ["absolute_error", "squared_error", "huber", "quantile"]),
            "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "subsample": trial.suggest_float("subsample", 0.05, 1.0),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 2**10),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == AdaBoostRegressor:
        parameters = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 100),
            "loss": trial.suggest_categorical("loss", ["linear", "square", "exponential"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == lgb.LGBMRegressor:
        parameters = {
                    "objective": "regression",
                    "metric": "rmse",
                    "n_estimators": 100,
                    "verbosity": -1,
                    "bagging_freq": 1,
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "subsample": trial.suggest_float("subsample", 0.05, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
                    "seed": REPRODUCIBILITY_SEED
        }
    
    # Wrapper method for feature selection
    if USE_FEATURE_SELECTION:
        selected_features = minimum_features.copy()
        train_columns = x_train.columns.tolist() 
        for feature in train_columns:
            if feature.startswith("colour"):
                continue
            if trial.suggest_categorical(feature, [0, 1]) == 1:
                selected_features.append(feature)
        if len(selected_features) == 0:
            return float("inf")
        
        x_train = x_train[selected_features]
        x_val = x_val[selected_features]

    model = model_type(**parameters) # Create the model
    model.fit(x_train, y_train)
    predictions = model.predict(x_val)
    metrics = calculate_metrics(targets=y_val, preds=predictions)
    rmse = metrics["rmse"]
    return rmse

In [12]:
# Train + Validate models
metrics = ["mae", "mse", "rmse", "pcc", "spearman_r", "r2_score"]
model_scores = {model_name: defaultdict(list) for model_name in models.keys()}

if os.path.exists("model_best_hyperparameters"):
    raise Exception("Directory for best hyperparameters already exists. Please delete it before running this script.")

os.makedirs("model_best_hyperparameters")

for fold in range(NUM_FOLDS):
    fold_data = kfold_data[fold]
     
    # Extract data
    train_data = fold_data["train"]
    val_data = fold_data["val"]

    train_y = train_data["outcome"]
    val_y = val_data["outcome"]
    
    train_x = train_data.drop(columns=["outcome"])
    val_x = val_data.drop(columns=["outcome"])

    # print(f"Fold {fold+1}/{NUM_FOLDS}")
    # print(f"Train data shape: {train_x.shape} | Train target shape: {train_y.shape}")
    # print(f"Val data shape: {val_x.shape} | Val target shape: {val_y.shape}")
    # print(f"Test data shape: {test_x.shape} | Test target shape: {test_y.shape}")
    training_features = set(train_x.columns.tolist())


    # Train model
    for model_name, model in models.items():
        study = optuna.create_study(direction="minimize")
        print("G", train_x.columns)
        print("G", val_x.columns)
        study.optimize(lambda trial: objective(trial=trial, 
                                               model_type=model, 
                                               x_train=train_x, 
                                               y_train=train_y, 
                                               x_val=val_x, 
                                               y_val=val_y
                                               ), n_trials=N_TRIALS)
        
        # Train model with best hyperparameters
        best_trial = study.best_trial 

        if USE_FEATURE_SELECTION:
            best_fold_params = {} # Hyperparameters
            best_selected_features = minimum_features.copy() # Features selected by the model
            for param in best_trial.params:
                if param in training_features: # i.e., if the parameter is a feature
                    if best_trial.params[param] == 1:
                        best_selected_features.append(param)
                else: # A hyperparameter
                    best_fold_params[param] = best_trial.params[param]
        else:
            best_fold_params = best_trial.params
            best_selected_features = train_x.columns.tolist() # All feature columns
        print("P", best_fold_params)
        print("F", best_selected_features)
        
        # Select the best features
        print(train_x.columns)
        print(val_x.columns)
        train_x = train_x[best_selected_features]
        val_x = val_x[best_selected_features]

        model = model(**best_fold_params)
        model.fit(train_x, train_y)
        preds = model.predict(val_x)
        
        # Save the best hyperparameters for this model at this fold.
        os.makedirs(f"model_best_hyperparameters/{model_name}", exist_ok=True)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}.json", "w") as f:
            json.dump(best_fold_params, f)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}_selected_features.json", "w") as f:
            json.dump(best_selected_features, f)

        # Calculate metrics
        metrics = calculate_metrics(targets=val_y, preds=preds)
        mae = metrics["mae"]
        mse = metrics["mse"]
        rmse = metrics["rmse"]
        pcc = metrics["pcc"]
        spearman_r = metrics["spearman_r"]
        r2_score = metrics["r2_score"]

        for metric in metrics:
            model_scores[model_name][metric].append(metrics[metric])

        print(f"Fold: {fold+1}/{NUM_FOLDS}")
        print(f"Model name: {model_name}")
        print(f"MAE: {mae}")
        print(f"MSE: {mse}")
        print(f"RMSE: {rmse}")
        print(f"PCC: {pcc}")
        print(f"Spearman R: {spearman_r}")
        print(f"R2 Score: {r2_score}")
        print()


[I 2025-02-18 16:55:52,730] A new study created in memory with name: no-name-48622613-c1a3-4aeb-a8cb-e650203fadb8
[I 2025-02-18 16:55:52,740] Trial 0 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,747] Trial 1 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,754] Trial 2 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,761] Trial 3 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,767] Trial 4 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,773] Trial 5 finished with value: 

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 16:55:52,810] Trial 11 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,816] Trial 12 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,822] Trial 13 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,828] Trial 14 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,834] Trial 15 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-18 16:55:52,841] Trial 16 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.

P {'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 1/5
Model name: linear_regression
MAE

[I 2025-02-18 16:55:53,676] Trial 4 finished with value: 10.783480275323198 and parameters: {'alpha': 0.0023233393454688608, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 2 with value: 10.772856038718924.
[I 2025-02-18 16:55:53,683] Trial 5 finished with value: 10.776051373516335 and parameters: {'alpha': 0.018603359897449927, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 2 with value: 10.772856038718924.
[I 2025-02-18 16:55:53,704] Trial 6 finished with value: 10.782267239492294 and parameters: {'alpha': 0.028350959665583868, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 2 with value: 10.772856038718924.
[I 2025-02-18 16:55:53,729] Trial 7 finished with value: 10.776967864200197 and parameters: {'alpha': 0.013284346160660591, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 2 with value: 10.772856038718924.
[I 2025-02-18 16:55:53,738] Trial 8 fin

P {'alpha': 0.09994422685666782, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colou

[I 2025-02-18 16:55:55,420] Trial 0 finished with value: 10.788867325682096 and parameters: {'alpha': 0.0020681419870088587, 'solver': 'saga', 'fit_intercept': True}. Best is trial 0 with value: 10.788867325682096.
[I 2025-02-18 16:55:55,425] Trial 1 finished with value: 10.789551276167296 and parameters: {'alpha': 0.003093118152808172, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 0 with value: 10.788867325682096.
[I 2025-02-18 16:55:55,430] Trial 2 finished with value: 10.789132543322369 and parameters: {'alpha': 0.006761482681258451, 'solver': 'lsqr', 'fit_intercept': True}. Best is trial 0 with value: 10.788867325682096.
[I 2025-02-18 16:55:55,447] Trial 3 finished with value: 10.789703967566028 and parameters: {'alpha': 0.005147655113216535, 'solver': 'svd', 'fit_intercept': False}. Best is trial 0 with value: 10.788867325682096.
[I 2025-02-18 16:55:55,786] Trial 4 finished with value: 10.789242823322063 and parameters: {'alpha': 0.004059967371370939, 'solver': 'sa

P {'alpha': 0.09988794269658571, 'solver': 'saga', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='obje

[I 2025-02-18 16:56:36,382] A new study created in memory with name: no-name-a13e9f86-5dd6-4e60-91d9-52ae07031b9a
[I 2025-02-18 16:56:36,563] Trial 0 finished with value: 9.163414114795877 and parameters: {'eta': 0.06501453187511012, 'gamma': 4.109556034009496e-06, 'max_depth': 4, 'min_child_weight': 5, 'subsample': 0.8421376608241643, 'colsample_bytree': 0.8072236760986962}. Best is trial 0 with value: 9.163414114795877.


Fold: 1/5
Model name: ridge
MAE: 8.706932721943984
MSE: 116.39502537158275
RMSE: 10.788652620767005
PCC: 0.5300629362910938
Spearman R: 0.5514823697196757
R2 Score: 0.28062598104692016

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 16:56:36,635] Trial 1 finished with value: 9.2503771349314 and parameters: {'eta': 0.03296513865402026, 'gamma': 1.8603347813101925, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.5999285878188865, 'colsample_bytree': 0.9836829826944209}. Best is trial 0 with value: 9.163414114795877.
[I 2025-02-18 16:56:36,709] Trial 2 finished with value: 9.182115169691533 and parameters: {'eta': 0.04650846824436618, 'gamma': 3.866965125961179, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.667702012623788, 'colsample_bytree': 0.8860395863375699}. Best is trial 0 with value: 9.163414114795877.
[I 2025-02-18 16:56:36,777] Trial 3 finished with value: 9.206416082562248 and parameters: {'eta': 0.14092729420815614, 'gamma': 0.003842491739928747, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.9138921632765051, 'colsample_bytree': 0.871366379752373}. Best is trial 0 with value: 9.163414114795877.
[I 2025-02-18 16:56:36,965] Trial 4 finished with value: 9.670048046440485 a

P {'eta': 0.12030751346504398, 'gamma': 0.11138454612964395, 'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.8049065621074901, 'colsample_bytree': 0.5321779343376319}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',

[I 2025-02-18 16:56:46,026] Trial 1 finished with value: 10.381748556770281 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 9}. Best is trial 0 with value: 9.601326500339452.
[I 2025-02-18 16:56:46,140] Trial 2 finished with value: 10.375739063376976 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 10}. Best is trial 0 with value: 9.601326500339452.
[I 2025-02-18 16:56:47,939] Trial 3 finished with value: 9.678749230825481 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 9}. Best is trial 0 with value: 9.601326500339452.
[I 2025-02-18 16:56:48,076] Trial 4 finished with value: 10.059255796459945 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 10, 'min_samples_leaf': 5}. Best is tri

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 6}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-18 16:57:22,307] A new study created in memory with name: no-name-2fea6181-cf4b-437d-a90d-b009be4f7137


Fold: 1/5
Model name: random_forest
MAE: 7.669107188453292
MSE: 90.81775180599743
RMSE: 9.529834825745798
PCC: 0.6810284056265278
Spearman R: 0.6840656431897044
R2 Score: 0.4387051259244438

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 16:57:24,339] Trial 0 finished with value: 20.259246324512926 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.0011148550076585631, 'subsample': 0.8010472867370798, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_leaf_nodes': 469}. Best is trial 0 with value: 20.259246324512926.
[I 2025-02-18 16:57:24,722] Trial 1 finished with value: 9.64581503178182 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.07440469226917244, 'subsample': 0.24052346550191867, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_leaf_nodes': 151}. Best is trial 1 with value: 9.64581503178182.
[I 2025-02-18 16:57:26,276] Trial 2 finished with value: 10.94085928842754 and parameters: {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.005434769192594998, 'subsample': 0.8738402216947179, 'max_depth': 10, 'min_samples_

P {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.06562263496454646, 'subsample': 0.9573940382134581, 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_features': 'log2', 'max_leaf_nodes': 978}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8

[I 2025-02-18 16:58:35,527] A new study created in memory with name: no-name-31619af8-3b41-4e5e-9be6-92d38455f9c7


Fold: 1/5
Model name: gradient_boosting
MAE: 7.377253854211128
MSE: 84.33738795793508
RMSE: 9.183538966974282
PCC: 0.6937405052959722
Spearman R: 0.69519939167945
R2 Score: 0.47875671207064197

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 16:58:38,158] Trial 0 finished with value: 9.784946332221839 and parameters: {'n_estimators': 92, 'loss': 'square', 'learning_rate': 0.0029279009821884874}. Best is trial 0 with value: 9.784946332221839.
[I 2025-02-18 16:58:40,219] Trial 1 finished with value: 9.66423518702128 and parameters: {'n_estimators': 74, 'loss': 'exponential', 'learning_rate': 0.06150645108963242}. Best is trial 1 with value: 9.66423518702128.
[I 2025-02-18 16:58:41,645] Trial 2 finished with value: 9.806245100182483 and parameters: {'n_estimators': 50, 'loss': 'square', 'learning_rate': 0.0014939088452821257}. Best is trial 1 with value: 9.66423518702128.
[I 2025-02-18 16:58:43,813] Trial 3 finished with value: 9.791374899449412 and parameters: {'n_estimators': 76, 'loss': 'exponential', 'learning_rate': 0.004305822280689688}. Best is trial 1 with value: 9.66423518702128.
[I 2025-02-18 16:58:45,381] Trial 4 finished with value: 9.73007299156834 and parameters: {'n_estimators': 55, 'loss': 'squar

P {'n_estimators': 92, 'loss': 'square', 'learning_rate': 0.09796851024007176}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='

[I 2025-02-18 17:02:30,404] A new study created in memory with name: no-name-14affb80-df2f-4de0-aa46-3ffd5204253f
[I 2025-02-18 17:02:30,488] Trial 0 finished with value: 9.760196232683146 and parameters: {'learning_rate': 0.07424157678584205, 'num_leaves': 154, 'subsample': 0.18969994133385382, 'colsample_bytree': 0.27492234111887354, 'min_data_in_leaf': 75}. Best is trial 0 with value: 9.760196232683146.


Fold: 1/5
Model name: ada_boost
MAE: 7.596668994476087
MSE: 89.24272415632609
RMSE: 9.446836727514988
PCC: 0.6719128880461211
Spearman R: 0.6761590347997866
R2 Score: 0.4484395107633935

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:02:30,586] Trial 1 finished with value: 11.181441336529536 and parameters: {'learning_rate': 0.0036370672575192515, 'num_leaves': 508, 'subsample': 0.7974871368583383, 'colsample_bytree': 0.9854518719527708, 'min_data_in_leaf': 90}. Best is trial 0 with value: 9.760196232683146.
[I 2025-02-18 17:02:30,665] Trial 2 finished with value: 10.56373333412002 and parameters: {'learning_rate': 0.008467765606418234, 'num_leaves': 40, 'subsample': 0.5700224632164383, 'colsample_bytree': 0.5829461124251971, 'min_data_in_leaf': 19}. Best is trial 0 with value: 9.760196232683146.
[I 2025-02-18 17:02:30,763] Trial 3 finished with value: 9.317779323585631 and parameters: {'learning_rate': 0.06743444417113412, 'num_leaves': 494, 'subsample': 0.8724190090293923, 'colsample_bytree': 0.5356836125190818, 'min_data_in_leaf': 85}. Best is trial 3 with value: 9.317779323585631.
[I 2025-02-18 17:02:31,176] Trial 4 finished with value: 9.736248937712034 and parameters: {'learning_rate': 0.0183

P {'learning_rate': 0.04054421152327191, 'num_leaves': 65, 'subsample': 0.37468453360078025, 'colsample_bytree': 0.9948446854290484, 'min_data_in_leaf': 55}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_

[I 2025-02-18 17:02:40,408] Trial 8 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 17:02:40,416] Trial 9 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 17:02:40,421] Trial 10 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 17:02:40,427] Trial 11 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 17:02:40,434] Trial 12 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-18 17:02:40,439] Trial 13 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[

P {'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 2/5
Model name: linear_regression
MAE

[I 2025-02-18 17:02:41,192] Trial 7 finished with value: 10.8136246332756 and parameters: {'alpha': 0.0028887349616432112, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 3 with value: 10.812807187836157.
[I 2025-02-18 17:02:41,246] Trial 8 finished with value: 10.8133566072076 and parameters: {'alpha': 0.0015866826580532727, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 3 with value: 10.812807187836157.
[I 2025-02-18 17:02:41,277] Trial 9 finished with value: 10.813360825288397 and parameters: {'alpha': 0.001109623914767453, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}. Best is trial 3 with value: 10.812807187836157.
[I 2025-02-18 17:02:41,299] Trial 10 finished with value: 10.812574927814744 and parameters: {'alpha': 0.0063037179797138755, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.812574927814744.
[I 2025-02-18 17:02:41,320] Trial 11 f

P {'alpha': 0.006897390883335658, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'co

[I 2025-02-18 17:02:43,388] Trial 7 finished with value: 10.81346452201003 and parameters: {'alpha': 0.026546467029358284, 'solver': 'cholesky', 'fit_intercept': True}. Best is trial 2 with value: 10.813452075515224.
[I 2025-02-18 17:02:43,392] Trial 8 finished with value: 10.813464936169394 and parameters: {'alpha': 0.03164058478239369, 'solver': 'cholesky', 'fit_intercept': True}. Best is trial 2 with value: 10.813452075515224.
[I 2025-02-18 17:02:43,533] Trial 9 finished with value: 10.813504789111159 and parameters: {'alpha': 0.00799395647089542, 'solver': 'sag', 'fit_intercept': True}. Best is trial 2 with value: 10.813452075515224.
[I 2025-02-18 17:02:43,823] Trial 10 finished with value: 10.81346064685589 and parameters: {'alpha': 0.00136605546432117, 'solver': 'saga', 'fit_intercept': False}. Best is trial 2 with value: 10.813452075515224.
[I 2025-02-18 17:02:43,831] Trial 11 finished with value: 10.813717611233233 and parameters: {'alpha': 0.003301377392268394, 'solver': 'spar

P {'alpha': 0.0999193209214128, 'solver': 'svd', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object

[I 2025-02-18 17:02:48,252] Trial 1 finished with value: 9.675196044070486 and parameters: {'eta': 0.01755949611463527, 'gamma': 0.0009141549119836105, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.6655698327543655, 'colsample_bytree': 0.9337577838875672}. Best is trial 0 with value: 9.49430786197747.
[I 2025-02-18 17:02:48,320] Trial 2 finished with value: 10.361450044892873 and parameters: {'eta': 0.011738459096622291, 'gamma': 1.9887978502374754e-06, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.9035928488557637, 'colsample_bytree': 0.7386182494086149}. Best is trial 0 with value: 9.49430786197747.
[I 2025-02-18 17:02:48,585] Trial 3 finished with value: 10.18876885189171 and parameters: {'eta': 0.01015223394225162, 'gamma': 3.538070764689299e-06, 'max_depth': 9, 'min_child_weight': 6, 'subsample': 0.5475923658622726, 'colsample_bytree': 0.9111806160886257}. Best is trial 0 with value: 9.49430786197747.
[I 2025-02-18 17:02:48,823] Trial 4 finished with value: 9.57091

P {'eta': 0.11418098245839162, 'gamma': 3.252531180535043e-05, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.9394442947629079, 'colsample_bytree': 0.6654746791950296}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G

[I 2025-02-18 17:03:00,073] Trial 0 finished with value: 9.991521956050072 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 6, 'min_samples_split': 6, 'min_samples_leaf': 9}. Best is trial 0 with value: 9.991521956050072.
[I 2025-02-18 17:03:01,877] Trial 1 finished with value: 9.80718490998149 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 1 with value: 9.80718490998149.
[I 2025-02-18 17:03:03,909] Trial 2 finished with value: 9.70521526142446 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 6}. Best is trial 2 with value: 9.70521526142446.
[I 2025-02-18 17:03:04,037] Trial 3 finished with value: 10.20419709478481 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 10}. Best is trial 2 w

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 4, 'min_samples_leaf': 4}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-18 17:04:12,926] A new study created in memory with name: no-name-b4053697-42d0-4397-a194-0ff15c90cb8d


Fold: 2/5
Model name: random_forest
MAE: 7.626505621632561
MSE: 92.73524121271588
RMSE: 9.629913873587649
PCC: 0.680588471819418
Spearman R: 0.687265059478539
R2 Score: 0.4390505431512768

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:04:13,692] Trial 0 finished with value: 11.878261520352801 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.00418206909068369, 'subsample': 0.751081169917316, 'max_depth': 4, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_leaf_nodes': 247}. Best is trial 0 with value: 11.878261520352801.
[I 2025-02-18 17:04:16,149] Trial 1 finished with value: 12.120056789660868 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.002016602337055736, 'subsample': 0.6017394421840532, 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_leaf_nodes': 465}. Best is trial 0 with value: 11.878261520352801.
[I 2025-02-18 17:04:16,529] Trial 2 finished with value: 9.485178259858221 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.046865896720826444, 'subsample': 0.4891551603100532, 'max_depth': 4, 'min_sa

P {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.08103126775984701, 'subsample': 0.9657031845905865, 'max_depth': 4, 'min_samples_split': 10, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 195}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
 

[I 2025-02-18 17:05:47,992] A new study created in memory with name: no-name-f4812b3e-fbfb-4456-8492-96177499cfdb


Fold: 2/5
Model name: gradient_boosting
MAE: 7.419372127143292
MSE: 88.62848608061027
RMSE: 9.414270342443448
PCC: 0.6819771814172597
Spearman R: 0.6883358138030522
R2 Score: 0.46389203847322424

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:05:50,537] Trial 0 finished with value: 9.812293407860572 and parameters: {'n_estimators': 89, 'loss': 'square', 'learning_rate': 0.004888953549209003}. Best is trial 0 with value: 9.812293407860572.
[I 2025-02-18 17:05:52,259] Trial 1 finished with value: 9.81166881971444 and parameters: {'n_estimators': 60, 'loss': 'linear', 'learning_rate': 0.010939363237183298}. Best is trial 1 with value: 9.81166881971444.
[I 2025-02-18 17:05:54,807] Trial 2 finished with value: 9.787169904878013 and parameters: {'n_estimators': 89, 'loss': 'square', 'learning_rate': 0.010047772119651736}. Best is trial 2 with value: 9.787169904878013.
[I 2025-02-18 17:05:57,262] Trial 3 finished with value: 9.806843428635299 and parameters: {'n_estimators': 86, 'loss': 'square', 'learning_rate': 0.006967961171731324}. Best is trial 2 with value: 9.787169904878013.
[I 2025-02-18 17:06:00,081] Trial 4 finished with value: 9.826145404811099 and parameters: {'n_estimators': 98, 'loss': 'linear', 'lea

P {'n_estimators': 100, 'loss': 'exponential', 'learning_rate': 0.09776150720548106}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      d

[I 2025-02-18 17:09:51,560] A new study created in memory with name: no-name-d7dcc91f-b185-4ae5-9de4-8aa371c3b10b


Fold: 2/5
Model name: ada_boost
MAE: 7.622610753785889
MSE: 92.79516955372414
RMSE: 9.633024943065607
PCC: 0.6640973690394005
Spearman R: 0.6715840958831552
R2 Score: 0.4386880405050464

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:09:52,254] Trial 0 finished with value: 12.646138260094858 and parameters: {'learning_rate': 0.002367539240009934, 'num_leaves': 698, 'subsample': 0.7632847624183794, 'colsample_bytree': 0.17840431054849976, 'min_data_in_leaf': 7}. Best is trial 0 with value: 12.646138260094858.
[I 2025-02-18 17:09:52,350] Trial 1 finished with value: 12.348835431801906 and parameters: {'learning_rate': 0.0037617885925372687, 'num_leaves': 762, 'subsample': 0.5943238413121605, 'colsample_bytree': 0.1850793399044265, 'min_data_in_leaf': 57}. Best is trial 1 with value: 12.348835431801906.
[I 2025-02-18 17:09:52,414] Trial 2 finished with value: 10.800999130529766 and parameters: {'learning_rate': 0.012445807388438947, 'num_leaves': 1011, 'subsample': 0.4489321704768236, 'colsample_bytree': 0.3970416644586056, 'min_data_in_leaf': 99}. Best is trial 2 with value: 10.800999130529766.
[I 2025-02-18 17:09:52,464] Trial 3 finished with value: 11.882976668094106 and parameters: {'learning_rate

P {'learning_rate': 0.04006798504669871, 'num_leaves': 546, 'subsample': 0.45190900106910264, 'colsample_bytree': 0.9096148208781045, 'min_data_in_leaf': 85}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour

[I 2025-02-18 17:10:02,328] Trial 10 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 17:10:02,336] Trial 11 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 17:10:02,342] Trial 12 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 17:10:02,349] Trial 13 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 17:10:02,356] Trial 14 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-18 17:10:02,362] Trial 15 finished with value: 10.996838263571808 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.996838263571808.
[I

P {'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 3/5
Model name: linear_regression
MAE:

[I 2025-02-18 17:10:03,153] Trial 9 finished with value: 10.9958977154923 and parameters: {'alpha': 0.0014157009419329344, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 0 with value: 10.972858445900258.
[I 2025-02-18 17:10:03,160] Trial 10 finished with value: 10.972461334850458 and parameters: {'alpha': 0.09328904256925953, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.972461334850458.
[I 2025-02-18 17:10:03,167] Trial 11 finished with value: 10.972748802453491 and parameters: {'alpha': 0.08760801645462288, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.972461334850458.
[I 2025-02-18 17:10:03,176] Trial 12 finished with value: 10.98067317657661 and parameters: {'alpha': 0.03478906749018878, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.972461334850458.
[I 2025-02-18 17:10:03,184] Trial 13 f

P {'alpha': 0.09995327025776458, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colou

[I 2025-02-18 17:10:04,528] Trial 3 finished with value: 10.996900163541246 and parameters: {'alpha': 0.05149457444775411, 'solver': 'saga', 'fit_intercept': False}. Best is trial 1 with value: 10.996835349114825.
[I 2025-02-18 17:10:04,534] Trial 4 finished with value: 10.996819056245693 and parameters: {'alpha': 0.011472919952714386, 'solver': 'sparse_cg', 'fit_intercept': False}. Best is trial 4 with value: 10.996819056245693.
[I 2025-02-18 17:10:04,540] Trial 5 finished with value: 10.996821786450903 and parameters: {'alpha': 0.05209215001727441, 'solver': 'sparse_cg', 'fit_intercept': True}. Best is trial 4 with value: 10.996819056245693.
[I 2025-02-18 17:10:04,544] Trial 6 finished with value: 10.996832579536724 and parameters: {'alpha': 0.026580365804817013, 'solver': 'auto', 'fit_intercept': False}. Best is trial 4 with value: 10.996819056245693.
[I 2025-02-18 17:10:04,548] Trial 7 finished with value: 10.996830311361734 and parameters: {'alpha': 0.0037968556994688444, 'solver'

P {'alpha': 0.07164252206280695, 'solver': 'sparse_cg', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype=

[I 2025-02-18 17:10:07,726] Trial 1 finished with value: 9.667127658236721 and parameters: {'eta': 0.12853801129291878, 'gamma': 0.005823827984078693, 'max_depth': 8, 'min_child_weight': 4, 'subsample': 0.9106180621001443, 'colsample_bytree': 0.6596519428744209}. Best is trial 0 with value: 9.377997961219902.
[I 2025-02-18 17:10:07,817] Trial 2 finished with value: 9.405616165770633 and parameters: {'eta': 0.05371017010858711, 'gamma': 0.0922214519592035, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.7112664748735953, 'colsample_bytree': 0.9091718276425915}. Best is trial 0 with value: 9.377997961219902.
[I 2025-02-18 17:10:08,392] Trial 3 finished with value: 9.662646611087268 and parameters: {'eta': 0.030755171363888, 'gamma': 0.0046997629763582726, 'max_depth': 10, 'min_child_weight': 1, 'subsample': 0.6618705296487778, 'colsample_bytree': 0.7550278363010094}. Best is trial 0 with value: 9.377997961219902.
[I 2025-02-18 17:10:08,488] Trial 4 finished with value: 9.6699960272

P {'eta': 0.09169698642312783, 'gamma': 0.09978654615613383, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.7450596535924662, 'colsample_bytree': 0.5858610217907625}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',

[I 2025-02-18 17:10:18,501] Trial 1 finished with value: 11.119476244020634 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 7}. Best is trial 0 with value: 9.787986686279964.
[I 2025-02-18 17:10:18,614] Trial 2 finished with value: 10.4658725835398 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 10}. Best is trial 0 with value: 9.787986686279964.
[I 2025-02-18 17:10:20,118] Trial 3 finished with value: 10.500151266844458 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 2}. Best is trial 0 with value: 9.787986686279964.
[I 2025-02-18 17:10:20,233] Trial 4 finished with value: 10.822832188125428 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 9, 'min_samples_leaf': 1}. Best is trial 0

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 4}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour

[I 2025-02-18 17:10:55,384] A new study created in memory with name: no-name-919658e5-65c0-4fff-8b98-0874d6f6d5ac


Fold: 3/5
Model name: random_forest
MAE: 7.64637765427628
MSE: 93.64236288184163
RMSE: 9.676898412293147
PCC: 0.6644125237785561
Spearman R: 0.6708295442693533
R2 Score: 0.4241363952871555

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:10:55,785] Trial 0 finished with value: 11.659090647648242 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.006235122723035989, 'subsample': 0.4568263562690096, 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_leaf_nodes': 494}. Best is trial 0 with value: 11.659090647648242.
[I 2025-02-18 17:10:56,051] Trial 1 finished with value: 12.019184722192481 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.002126208303545843, 'subsample': 0.14276675465370933, 'max_depth': 9, 'min_samples_split': 4, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'max_leaf_nodes': 202}. Best is trial 0 with value: 11.659090647648242.
[I 2025-02-18 17:10:58,181] Trial 2 finished with value: 12.326845103861373 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.0011987956325358422, 'subsample': 0.5893524245171163, 'max_depth': 10, 'm

P {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.08450652090522333, 'subsample': 0.8534520065187583, 'max_depth': 6, 'min_samples_split': 6, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 454}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
  

[I 2025-02-18 17:12:36,345] A new study created in memory with name: no-name-125a9204-d7e9-490a-aa5a-437df7094b42


Fold: 3/5
Model name: gradient_boosting
MAE: 7.432125924549505
MSE: 88.93201129857579
RMSE: 9.430377049650549
PCC: 0.6738195906381008
Spearman R: 0.679080402961095
R2 Score: 0.4531032000401176

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:12:37,840] Trial 0 finished with value: 9.86071866046854 and parameters: {'n_estimators': 52, 'loss': 'exponential', 'learning_rate': 0.011588251535521519}. Best is trial 0 with value: 9.86071866046854.
[I 2025-02-18 17:12:40,085] Trial 1 finished with value: 9.810359061657923 and parameters: {'n_estimators': 78, 'loss': 'linear', 'learning_rate': 0.005924452044380505}. Best is trial 1 with value: 9.810359061657923.
[I 2025-02-18 17:12:42,779] Trial 2 finished with value: 9.772864418495036 and parameters: {'n_estimators': 93, 'loss': 'exponential', 'learning_rate': 0.027774240138072392}. Best is trial 2 with value: 9.772864418495036.
[I 2025-02-18 17:12:45,310] Trial 3 finished with value: 9.799068617108247 and parameters: {'n_estimators': 90, 'loss': 'exponential', 'learning_rate': 0.04355037437499817}. Best is trial 2 with value: 9.772864418495036.
[I 2025-02-18 17:12:46,742] Trial 4 finished with value: 9.805155873795018 and parameters: {'n_estimators': 50, 'loss': 

P {'n_estimators': 100, 'loss': 'square', 'learning_rate': 0.09907404113758256}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype=

[I 2025-02-18 17:16:41,274] A new study created in memory with name: no-name-374655bb-15c7-4032-ae64-914168f1a29b
[I 2025-02-18 17:16:41,417] Trial 0 finished with value: 10.98358036514606 and parameters: {'learning_rate': 0.02093727456013524, 'num_leaves': 811, 'subsample': 0.6263603747651739, 'colsample_bytree': 0.21764253122084454, 'min_data_in_leaf': 36}. Best is trial 0 with value: 10.98358036514606.


Fold: 3/5
Model name: ada_boost
MAE: 7.601952487044501
MSE: 93.56616005883528
RMSE: 9.672960253140467
PCC: 0.6528581678326271
Spearman R: 0.664603217141798
R2 Score: 0.4246050125987574

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:16:41,574] Trial 1 finished with value: 11.878106208880812 and parameters: {'learning_rate': 0.011367813610339518, 'num_leaves': 779, 'subsample': 0.7290956132396174, 'colsample_bytree': 0.17644751205822545, 'min_data_in_leaf': 34}. Best is trial 0 with value: 10.98358036514606.
[I 2025-02-18 17:16:41,644] Trial 2 finished with value: 11.258591361539077 and parameters: {'learning_rate': 0.005242948636777822, 'num_leaves': 120, 'subsample': 0.8072800397688122, 'colsample_bytree': 0.5516321327098463, 'min_data_in_leaf': 85}. Best is trial 0 with value: 10.98358036514606.
[I 2025-02-18 17:16:42,181] Trial 3 finished with value: 9.627363222456522 and parameters: {'learning_rate': 0.026485573599870273, 'num_leaves': 301, 'subsample': 0.5927644848463399, 'colsample_bytree': 0.6743994855540214, 'min_data_in_leaf': 1}. Best is trial 3 with value: 9.627363222456522.
[I 2025-02-18 17:16:42,338] Trial 4 finished with value: 11.075226154848504 and parameters: {'learning_rate': 0.0

P {'learning_rate': 0.04839286653865815, 'num_leaves': 829, 'subsample': 0.39202739304162443, 'colsample_bytree': 0.8369629158664422, 'min_data_in_leaf': 59}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour

[I 2025-02-18 17:16:51,280] Trial 3 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 17:16:51,290] Trial 4 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 17:16:51,298] Trial 5 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 17:16:51,308] Trial 6 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 17:16:51,314] Trial 7 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-18 17:16:51,323] Trial 8 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 202

P {'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 4/5
Model name: linear_regression
MAE:

[I 2025-02-18 17:16:52,144] Trial 8 finished with value: 10.725387762835728 and parameters: {'alpha': 0.0048072468548396915, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 4 with value: 10.699508911333325.
[I 2025-02-18 17:16:52,162] Trial 9 finished with value: 10.729079806481018 and parameters: {'alpha': 0.00138196567588145, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 4 with value: 10.699508911333325.
[I 2025-02-18 17:16:52,179] Trial 10 finished with value: 10.695240261905525 and parameters: {'alpha': 0.07125467685813804, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 10 with value: 10.695240261905525.
[I 2025-02-18 17:16:52,188] Trial 11 finished with value: 10.693630913996005 and parameters: {'alpha': 0.09657909977513333, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 11 with value: 10.693630913996005.
[I 2025-02-18 17:16:52,196] Trial 12 fin

P {'alpha': 0.09863512960654754, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colo

[I 2025-02-18 17:16:53,716] Trial 3 finished with value: 10.730354325268387 and parameters: {'alpha': 0.08514720885776357, 'solver': 'sag', 'fit_intercept': True}. Best is trial 1 with value: 10.730334596643727.
[I 2025-02-18 17:16:53,996] Trial 4 finished with value: 10.730831123961169 and parameters: {'alpha': 0.01635958908360646, 'solver': 'saga', 'fit_intercept': False}. Best is trial 1 with value: 10.730334596643727.
[I 2025-02-18 17:16:54,004] Trial 5 finished with value: 10.730381671779456 and parameters: {'alpha': 0.01737923935049734, 'solver': 'svd', 'fit_intercept': False}. Best is trial 1 with value: 10.730334596643727.
[I 2025-02-18 17:16:54,009] Trial 6 finished with value: 10.730446562298036 and parameters: {'alpha': 0.09279878428633127, 'solver': 'cholesky', 'fit_intercept': False}. Best is trial 1 with value: 10.730334596643727.
[I 2025-02-18 17:16:54,019] Trial 7 finished with value: 10.730368925692884 and parameters: {'alpha': 0.002565228139168023, 'solver': 'auto', '

P {'alpha': 0.0027471084111693545, 'solver': 'lsqr', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='ob

[I 2025-02-18 17:16:56,649] Trial 1 finished with value: 9.631948160426417 and parameters: {'eta': 0.12841664507208386, 'gamma': 1.3160100686253928, 'max_depth': 7, 'min_child_weight': 6, 'subsample': 0.6831935088693765, 'colsample_bytree': 0.5940376506846208}. Best is trial 0 with value: 9.215575941209151.
[I 2025-02-18 17:16:56,929] Trial 2 finished with value: 9.550404069735592 and parameters: {'eta': 0.10158471203923129, 'gamma': 2.424614118751167e-08, 'max_depth': 9, 'min_child_weight': 2, 'subsample': 0.6261283250739802, 'colsample_bytree': 0.8997579940097908}. Best is trial 0 with value: 9.215575941209151.
[I 2025-02-18 17:16:57,035] Trial 3 finished with value: 9.409190904287875 and parameters: {'eta': 0.023893578836742948, 'gamma': 0.054488691841420335, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.5017349141262633, 'colsample_bytree': 0.6984039838661673}. Best is trial 0 with value: 9.215575941209151.
[I 2025-02-18 17:16:57,164] Trial 4 finished with value: 9.19823611

P {'eta': 0.0851556880023156, 'gamma': 2.1456396666519457, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.8871720755231942, 'colsample_bytree': 0.647612800727328}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
  

[I 2025-02-18 17:17:08,725] Trial 1 finished with value: 9.618278561199931 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 4}. Best is trial 1 with value: 9.618278561199931.
[I 2025-02-18 17:17:10,206] Trial 2 finished with value: 10.348212120938262 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 4}. Best is trial 1 with value: 9.618278561199931.
[I 2025-02-18 17:17:12,002] Trial 3 finished with value: 9.66514629296138 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 1 with value: 9.618278561199931.
[I 2025-02-18 17:17:13,490] Trial 4 finished with value: 10.648768217726758 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 3, 'min_samples_split': 7, 'min_samples_leaf': 10}. Best is tria

P {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 3}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour

[I 2025-02-18 17:20:35,605] A new study created in memory with name: no-name-d9705808-b3af-45a8-9f11-121791ab0492


Fold: 4/5
Model name: random_forest
MAE: 7.546356738259185
MSE: 89.23421366927519
RMSE: 9.446386275675751
PCC: 0.6920351372366216
Spearman R: 0.6984616790865934
R2 Score: 0.4500777221309544

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:20:37,500] Trial 0 finished with value: 9.814795163145444 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.01569236931826274, 'subsample': 0.7623238931229157, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 170}. Best is trial 0 with value: 9.814795163145444.
[I 2025-02-18 17:20:39,355] Trial 1 finished with value: 15.360309855163862 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.022948735890727788, 'subsample': 0.7705063534724687, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 449}. Best is trial 0 with value: 9.814795163145444.
[I 2025-02-18 17:20:40,508] Trial 2 finished with value: 9.238334900149047 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.039298658082792585, 'subsample': 0.5016677889429718, 'max_depth': 8, 'min_samples_split': 3,

P {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.09965809896816162, 'subsample': 0.7541102392428976, 'max_depth': 4, 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'max_leaf_nodes': 337}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
 

[I 2025-02-18 17:21:57,423] A new study created in memory with name: no-name-24c452a3-723b-4ae2-b654-22fa9f34e7e1


Fold: 4/5
Model name: gradient_boosting
MAE: 7.368343233035048
MSE: 84.75428794675452
RMSE: 9.206209206114888
PCC: 0.6917058773897914
Spearman R: 0.6983584339290757
R2 Score: 0.4776860895576398

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:21:59,390] Trial 0 finished with value: 9.422959398989443 and parameters: {'n_estimators': 76, 'loss': 'square', 'learning_rate': 0.07699900238541245}. Best is trial 0 with value: 9.422959398989443.
[I 2025-02-18 17:22:02,033] Trial 1 finished with value: 9.67517167270799 and parameters: {'n_estimators': 93, 'loss': 'square', 'learning_rate': 0.009912853123265395}. Best is trial 0 with value: 9.422959398989443.
[I 2025-02-18 17:22:04,684] Trial 2 finished with value: 9.429829234316598 and parameters: {'n_estimators': 99, 'loss': 'exponential', 'learning_rate': 0.07881384542040461}. Best is trial 0 with value: 9.422959398989443.
[I 2025-02-18 17:22:06,317] Trial 3 finished with value: 9.683937773552161 and parameters: {'n_estimators': 58, 'loss': 'square', 'learning_rate': 0.010308519302685623}. Best is trial 0 with value: 9.422959398989443.
[I 2025-02-18 17:22:08,077] Trial 4 finished with value: 9.6200727534471 and parameters: {'n_estimators': 62, 'loss': 'exponential

P {'n_estimators': 97, 'loss': 'square', 'learning_rate': 0.0898321351954432}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='o

[I 2025-02-18 17:25:51,322] A new study created in memory with name: no-name-2b9cabd2-1a03-452a-bff2-d96f3330bed2


Fold: 4/5
Model name: ada_boost
MAE: 7.507463634605354
MSE: 88.38233374795892
RMSE: 9.401187890259344
PCC: 0.6761379397206305
Spearman R: 0.6837477692558925
R2 Score: 0.45532758905461457

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:25:51,741] Trial 0 finished with value: 9.726240646414611 and parameters: {'learning_rate': 0.014635137434849956, 'num_leaves': 768, 'subsample': 0.8410582592996165, 'colsample_bytree': 0.7009397702723044, 'min_data_in_leaf': 16}. Best is trial 0 with value: 9.726240646414611.
[I 2025-02-18 17:25:51,927] Trial 1 finished with value: 9.287170190494901 and parameters: {'learning_rate': 0.02056701414006178, 'num_leaves': 872, 'subsample': 0.78820286471495, 'colsample_bytree': 0.9517456079590336, 'min_data_in_leaf': 33}. Best is trial 1 with value: 9.287170190494901.
[I 2025-02-18 17:25:52,018] Trial 2 finished with value: 9.202164257527441 and parameters: {'learning_rate': 0.022964623309078048, 'num_leaves': 656, 'subsample': 0.8157412219396399, 'colsample_bytree': 0.9937592934742846, 'min_data_in_leaf': 90}. Best is trial 2 with value: 9.202164257527441.
[I 2025-02-18 17:25:52,221] Trial 3 finished with value: 9.989376050446523 and parameters: {'learning_rate': 0.0122123

P {'learning_rate': 0.04663487337381044, 'num_leaves': 25, 'subsample': 0.708663941500789, 'colsample_bytree': 0.8963127822804291, 'min_data_in_leaf': 96}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E'

[I 2025-02-18 17:26:03,139] Trial 17 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 17:26:03,145] Trial 18 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 17:26:03,153] Trial 19 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 17:26:03,159] Trial 20 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 17:26:03,165] Trial 21 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2025-02-18 17:26:03,172] Trial 22 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.921387446629819.
[I 2

P {'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 5/5
Model name: linear_regression
MAE:

  model = cd_fast.enet_coordinate_descent(
[I 2025-02-18 17:26:03,881] Trial 3 finished with value: 10.493075453660884 and parameters: {'alpha': 0.001892481254585177, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 0 with value: 10.310805789775452.
  model = cd_fast.enet_coordinate_descent(
[I 2025-02-18 17:26:03,968] Trial 4 finished with value: 10.37016614819726 and parameters: {'alpha': 0.0025429141568819734, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 0 with value: 10.310805789775452.
[I 2025-02-18 17:26:03,996] Trial 5 finished with value: 10.31552288502517 and parameters: {'alpha': 0.0076870268291150436, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 0 with value: 10.310805789775452.
[I 2025-02-18 17:26:04,028] Trial 6 finished with value: 10.320226551234464 and parameters: {'alpha': 0.003998416627347032, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}. Be

P {'alpha': 0.01857695147432442, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colo

[I 2025-02-18 17:26:06,890] Trial 7 finished with value: 10.911733856507723 and parameters: {'alpha': 0.001232514054109969, 'solver': 'saga', 'fit_intercept': False}. Best is trial 4 with value: 10.351782342130074.
[I 2025-02-18 17:26:06,896] Trial 8 finished with value: 10.351806164308691 and parameters: {'alpha': 0.05809076066443265, 'solver': 'lsqr', 'fit_intercept': False}. Best is trial 4 with value: 10.351782342130074.
[I 2025-02-18 17:26:06,902] Trial 9 finished with value: 10.916571988103101 and parameters: {'alpha': 0.019930625900811967, 'solver': 'lsqr', 'fit_intercept': True}. Best is trial 4 with value: 10.351782342130074.
[I 2025-02-18 17:26:06,909] Trial 10 finished with value: 10.920534646429688 and parameters: {'alpha': 0.0054897769847821664, 'solver': 'sparse_cg', 'fit_intercept': True}. Best is trial 4 with value: 10.351782342130074.
[I 2025-02-18 17:26:06,916] Trial 11 finished with value: 10.906828482313585 and parameters: {'alpha': 0.0985898698539333, 'solver': 'au

P {'alpha': 0.09996998366435905, 'solver': 'lsqr', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='obje

[I 2025-02-18 17:26:09,366] Trial 0 finished with value: 9.320146753275475 and parameters: {'eta': 0.08216719312989607, 'gamma': 1.4008287483345988e-08, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.8766888938608417, 'colsample_bytree': 0.8410213254663498}. Best is trial 0 with value: 9.320146753275475.
[I 2025-02-18 17:26:09,429] Trial 1 finished with value: 9.164854290445255 and parameters: {'eta': 0.13975985053101156, 'gamma': 0.695895036330899, 'max_depth': 4, 'min_child_weight': 4, 'subsample': 0.6160174451254629, 'colsample_bytree': 0.6749826551377127}. Best is trial 1 with value: 9.164854290445255.
[I 2025-02-18 17:26:09,537] Trial 2 finished with value: 9.092668493432027 and parameters: {'eta': 0.029518325470244803, 'gamma': 0.17902992165699716, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.9698163193301489, 'colsample_bytree': 0.8492128160587502}. Best is trial 2 with value: 9.092668493432027.
[I 2025-02-18 17:26:09,635] Trial 3 finished with value: 9.152389463

P {'eta': 0.08743726444074995, 'gamma': 0.36016871409858636, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.9784995538736205, 'colsample_bytree': 0.9624912187413414}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',

[I 2025-02-18 17:26:20,677] Trial 1 finished with value: 9.595686044981962 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 10}. Best is trial 1 with value: 9.595686044981962.
[I 2025-02-18 17:26:22,637] Trial 2 finished with value: 9.38747861735577 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 7}. Best is trial 2 with value: 9.38747861735577.
[I 2025-02-18 17:26:22,787] Trial 3 finished with value: 9.530840289062121 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 1}. Best is trial 2 with value: 9.38747861735577.
[I 2025-02-18 17:26:24,554] Trial 4 finished with value: 9.447339481874938 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 6}. Best is trial 2 wi

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 1}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-18 17:27:14,750] A new study created in memory with name: no-name-aa20ed5e-c8fe-438d-be68-2d04304443a4


Fold: 5/5
Model name: random_forest
MAE: 7.3935319803265385
MSE: 85.37928658965859
RMSE: 9.240091265223445
PCC: 0.6835826852291847
Spearman R: 0.6875989092183239
R2 Score: 0.45031454291451833

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:27:16,127] Trial 0 finished with value: 9.9526134571281 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learning_rate': 0.011976340661821627, 'subsample': 0.6817993325722929, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'max_leaf_nodes': 744}. Best is trial 0 with value: 9.9526134571281.
[I 2025-02-18 17:27:16,421] Trial 1 finished with value: 12.09270699612052 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.0016348698121634636, 'subsample': 0.2751907242737629, 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'log2', 'max_leaf_nodes': 229}. Best is trial 0 with value: 9.9526134571281.
[I 2025-02-18 17:27:17,212] Trial 2 finished with value: 9.111309841215387 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.05656991857496503, 'subsample': 0.5297400920561894, 'max_depth': 5, 'min_samples_split': 10, 'min_samp

P {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.07144018929433808, 'subsample': 0.8914701707917898, 'max_depth': 5, 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'max_leaf_nodes': 544}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
  

[I 2025-02-18 17:28:45,600] A new study created in memory with name: no-name-36507754-426f-4530-b5a1-2a71ad01f621


Fold: 5/5
Model name: gradient_boosting
MAE: 7.238645810425146
MSE: 83.1229641658139
RMSE: 9.117179616844998
PCC: 0.6820055352320998
Spearman R: 0.6862451616192038
R2 Score: 0.46484110635190257

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:28:48,327] Trial 0 finished with value: 9.3468252457059 and parameters: {'n_estimators': 100, 'loss': 'square', 'learning_rate': 0.032971513056662065}. Best is trial 0 with value: 9.3468252457059.
[I 2025-02-18 17:28:50,613] Trial 1 finished with value: 9.35644725115913 and parameters: {'n_estimators': 82, 'loss': 'square', 'learning_rate': 0.029331338016372614}. Best is trial 0 with value: 9.3468252457059.
[I 2025-02-18 17:28:52,803] Trial 2 finished with value: 9.43993543392058 and parameters: {'n_estimators': 77, 'loss': 'linear', 'learning_rate': 0.0018397748483947268}. Best is trial 0 with value: 9.3468252457059.
[I 2025-02-18 17:28:55,161] Trial 3 finished with value: 9.454752292464521 and parameters: {'n_estimators': 83, 'loss': 'exponential', 'learning_rate': 0.0037508298044203704}. Best is trial 0 with value: 9.3468252457059.
[I 2025-02-18 17:28:56,759] Trial 4 finished with value: 9.457281542921818 and parameters: {'n_estimators': 56, 'loss': 'exponential', '

P {'n_estimators': 96, 'loss': 'exponential', 'learning_rate': 0.09990012131159196}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dt

[I 2025-02-18 17:32:41,460] A new study created in memory with name: no-name-50bf5b28-80d4-461c-aa77-7ef927dc5d4b


Fold: 5/5
Model name: ada_boost
MAE: 7.375712724382809
MSE: 86.1174302703805
RMSE: 9.279947751489795
PCC: 0.6681048160399731
Spearman R: 0.6774405044123594
R2 Score: 0.44556225623306067

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 17:32:41,655] Trial 0 finished with value: 12.21396994884663 and parameters: {'learning_rate': 0.0011103095964466723, 'num_leaves': 523, 'subsample': 0.8981939305922914, 'colsample_bytree': 0.29578117811209775, 'min_data_in_leaf': 37}. Best is trial 0 with value: 12.21396994884663.
[I 2025-02-18 17:32:42,215] Trial 1 finished with value: 11.287405721983493 and parameters: {'learning_rate': 0.016776856853352687, 'num_leaves': 846, 'subsample': 0.7420210211884135, 'colsample_bytree': 0.1370162065995983, 'min_data_in_leaf': 8}. Best is trial 1 with value: 11.287405721983493.
[I 2025-02-18 17:32:42,296] Trial 2 finished with value: 11.03307766141852 and parameters: {'learning_rate': 0.012130863756097997, 'num_leaves': 473, 'subsample': 0.6611540007489645, 'colsample_bytree': 0.20571816525554532, 'min_data_in_leaf': 65}. Best is trial 2 with value: 11.03307766141852.
[I 2025-02-18 17:32:42,340] Trial 3 finished with value: 11.180465221635423 and parameters: {'learning_rate': 0

P {'learning_rate': 0.060683245171880666, 'num_leaves': 552, 'subsample': 0.10965888131743018, 'colsample_bytree': 0.9980298785626268, 'min_data_in_leaf': 85}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colou

Compute average scores and rank models by R2 score

In [13]:
for model_name, model_metrics in model_scores.items():
    for metric, scores in model_metrics.items():
        model_scores[model_name][metric] = sum(scores) / len(scores)
    model_scores[model_name] = dict(model_scores[model_name])

model_scores = dict(sorted(model_scores.items(), key=lambda x: x[1]["r2_score"], reverse=True))

In [14]:
# Print results
for i, (model_name, model_metrics) in enumerate(model_scores.items()):
    print(f"No.{i+1} Model: {model_name}")
    for metric, score in model_metrics.items():
        print(f"{metric}: {score}")
    print()

No.1 Model: xgb
mae: 7.313216803269725
mse: 84.33908921500482
rmse: 9.182740632457717
pcc: 0.691672294596677
spearman_r: 0.6967393608356878
r2_score: 0.47767855351509086

No.2 Model: gradient_boosting
mae: 7.367148189872824
mse: 85.9550274899379
rmse: 9.270315036405632
pcc: 0.6846497379946448
spearman_r: 0.6894438407983754
r2_score: 0.46765582929870525

No.3 Model: lgbm
mae: 7.385082483373142
mse: 86.19213050527632
rmse: 9.283195054237007
pcc: 0.6832998341905263
spearman_r: 0.688306389768121
r2_score: 0.4660928199705593

No.4 Model: ada_boost
mae: 7.5408817188589286
mse: 90.020763557445
rmse: 9.48679151309404
pcc: 0.6666222361357504
spearman_r: 0.6747069242985984
r2_score: 0.44252448183097454

No.5 Model: random_forest
mae: 7.5763758365895715
mse: 90.36177123189775
rmse: 9.504624930505159
pcc: 0.6803294447380617
spearman_r: 0.6856441670485027
r2_score: 0.44045686588166977

No.6 Model: lasso
mae: 8.556747906586182
mse: 114.73260148499085
rmse: 10.709074671217989
pcc: 0.5390420052086586
