In [1]:
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import optuna
import os
import json
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

from src.utils import get_kfold_data, convert_non_numeric_to_numeric, calculate_r2_score, calculate_metrics, is_outlier
from src.normalisation import Normaliser
from src.constants import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(DATA_PATH)

In [3]:
data.head()

Unnamed: 0,outcome,carat,cut,color,clarity,depth,table,price,x,y,...,a6,a7,a8,a9,a10,b6,b7,b8,b9,b10
0,-26.701232,1.14,Ideal,G,VS1,62.3,56.0,7948,6.73,6.7,...,0.168836,-0.273758,1.107832,1.247795,0.482344,0.489511,-0.321138,0.573382,0.446871,-1.990581
1,6.548093,0.38,Premium,H,VS2,60.5,59.0,898,4.69,4.66,...,-0.256549,0.315373,-0.030326,-0.114335,-1.059588,-1.76136,-1.343951,-1.00255,-0.22503,-0.446653
2,6.612562,0.5,Very Good,E,SI1,60.7,58.0,1351,5.09,5.13,...,-1.193327,-0.657307,-0.591726,-0.446856,-0.765286,-0.816544,-1.397794,-0.47713,0.810509,1.725131
3,-5.073562,0.7,Premium,D,SI1,61.2,58.0,2512,5.74,5.7,...,-1.740788,-1.77886,-0.82507,0.444932,1.173109,0.453606,-0.26344,0.24621,-0.850503,-0.41295
4,-14.436557,0.83,Ideal,G,SI2,62.4,54.0,2751,6.01,6.08,...,-0.859322,1.409268,0.861992,1.109063,-1.436722,-1.461618,0.081787,0.258087,0.851146,2.204813


Inspecting columns

In [4]:
# Find columns
all_columns = data.columns.tolist()
print(all_columns)

numeric_columns = data.select_dtypes(include=["number"]).columns.tolist()
numeric_columns.remove("outcome") # Remove the target column
print(numeric_columns)

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns.tolist()
print(non_numeric_columns)

['outcome', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['cut', 'color', 'clarity']


In [5]:
for non_numeric_column in non_numeric_columns:
    print(data[non_numeric_column].value_counts())

cut
Ideal        4040
Premium      2439
Very Good    2296
Good          925
Fair          300
Name: count, dtype: int64
color
G    2120
E    1873
F    1746
H    1506
D    1246
I     983
J     526
Name: count, dtype: int64
clarity
SI1     2408
VS2     2256
SI2     1743
VS1     1503
VVS2     951
VVS1     675
IF       318
I1       146
Name: count, dtype: int64


Removing outliers

In [6]:
print(numeric_columns)
mean_per_column = {col: data[col].mean() for col in numeric_columns}
std_per_column = {col: data[col].std() for col in numeric_columns}


['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']


In [7]:
print(f"Before removing outliers: {data.shape}")
if REMOVE_OUTLIERS:
    data = data[~data.apply(is_outlier, axis=1, numeric_columns=numeric_columns, mean_per_column=mean_per_column, std_per_column=std_per_column)]
print(f"After removing outliers: {data.shape}")

Before removing outliers: (10000, 31)
After removing outliers: (10000, 31)


Converting non-numeric features to numerical features

In [8]:
data = convert_non_numeric_to_numeric(data=data)
print(data)

['G', 'E', 'F', 'H', 'D', 'I', 'J']
        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...      

Normalise data using each columns respective mean and std.

In [9]:
print(data)

        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9996 -12.246698   1.01    4        5   69.5   55.0   4853  6.00  5.94  4.15   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...        b8        b9       b10  colour_G  

Data splitting:
- Split the entire dataset into training and testing sets first.
- Use the training set to generate folds (one validation and the rest training folds) (K-Fold Cross Validation)

In [10]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=REPRODUCIBILITY_SEED)
print(f"Training set size: {len(train_data)} | Test set size: {len(test_data)}")    
print()


Training set size: 8000 | Test set size: 2000



In [11]:
normaliser = Normaliser()
os.makedirs(TRAINING_STATISTICS_DIR, exist_ok=True)
stats_for_each_column = {}
for column in numeric_columns:
    print(data[column])
    train_data_column_mean = normaliser.calculate_mean(train_data[column])
    train_data_column_std = normaliser.calculate_std(train_data[column])

    train_data[column] = normaliser.standardise(train_data[column], mean=train_data_column_mean, std=train_data_column_std)

    stats_for_each_column[column] = {
        "mean": train_data_column_mean,
        "std": train_data_column_std
    }

    # Normalise test data using the mean and std of the training data
    test_data[column] = normaliser.standardise(test_data[column], mean=train_data_column_mean, std=train_data_column_std)
    print("after", train_data[column])

with open(f"{TRAINING_STATISTICS_DIR}/stats.json", "w") as f:
    json.dump(stats_for_each_column, f)

0       1.14
1       0.38
2       0.50
3       0.70
4       0.83
        ... 
9995    0.33
9996    1.01
9997    0.52
9998    0.31
9999    0.37
Name: carat, Length: 10000, dtype: float64
after 9254   -1.023878
1561    1.522047
1670   -1.045094
6087   -0.111588
6669    1.140158
          ...   
5734   -0.154021
5191    0.970430
5390   -1.045094
860    -1.002662
7270   -1.151175
Name: carat, Length: 8000, dtype: float64
0       62.3
1       60.5
2       60.7
3       61.2
4       62.4
        ... 
9995    62.6
9996    69.5
9997    57.9
9998    62.0
9999    59.9
Name: depth, Length: 10000, dtype: float64
after 9254   -0.381342
1561    0.945157
1670    0.386631
6087   -0.311527
6669   -0.311527
          ...   
5734    0.665894
5191    0.107368
5390    0.107368
860     0.875341
7270    0.665894
Name: depth, Length: 8000, dtype: float64
0       56.0
1       59.0
2       58.0
3       58.0
4       54.0
        ... 
9995    57.0
9996    55.0
9997    61.0
9998    54.0
9999    59.0
Name: table, Le

In [12]:
kfold_data = get_kfold_data(data=train_data, k=NUM_FOLDS, reproducibility_seed=REPRODUCIBILITY_SEED)

Fold: 0/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')

Fold: 1/5
Train shape: (6400, 37) | 80.00%
Validation shape: (1600, 37) | 20.00%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', '

Define models and hyperparameter tuning objectives for each model

In [13]:
models = {
        "linear_regression": LinearRegression,
        "lasso": Lasso,
        "ridge": Ridge,
        "xgb": xgb.XGBRegressor,
        "random_forest": RandomForestRegressor,
        "gradient_boosting": GradientBoostingRegressor,
        "ada_boost": AdaBoostRegressor,
        "lgbm": lgb.LGBMRegressor
        }

# Must contain the one-hot encoded columns (otherwise this does not make sense)
minimum_features = [feature for feature in data.columns if feature.startswith("colour")] 

def objective(model_type, trial, x_train, y_train, x_val, y_val):
    if model_type == LinearRegression:
        parameters = {
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        }
    elif model_type == Lasso:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
            "warm_start": trial.suggest_categorical("warm_start", [True, False]),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == Ridge:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "solver": trial.suggest_categorical("solver", ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "positive": False,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == xgb.XGBRegressor:
        parameters = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "n_estimators": 100,
            "eta": trial.suggest_float("eta", 1e-2, 0.2, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 10, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1, 6),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "seed": REPRODUCIBILITY_SEED
        }
    elif model_type == RandomForestRegressor:
        parameters = {
            "n_estimators": 100,
            "criterion": trial.suggest_categorical("criterion", ["absolute_error", "squared_error"]),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "bootstrap": True,
            "oob_score": False,
            "n_jobs": -1,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == GradientBoostingRegressor:
        parameters = {
            "n_estimators": 100,
            "loss": trial.suggest_categorical("loss", ["absolute_error", "squared_error", "huber", "quantile"]),
            "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "subsample": trial.suggest_float("subsample", 0.05, 1.0),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 2**10),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == AdaBoostRegressor:
        parameters = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 100),
            "loss": trial.suggest_categorical("loss", ["linear", "square", "exponential"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == lgb.LGBMRegressor:
        parameters = {
                    "objective": "regression",
                    "metric": "rmse",
                    "n_estimators": 100,
                    "verbosity": -1,
                    "bagging_freq": 1,
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "subsample": trial.suggest_float("subsample", 0.05, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
                    "seed": REPRODUCIBILITY_SEED
        }
    
    # Wrapper method for feature selection
    if USE_FEATURE_SELECTION:
        selected_features = minimum_features.copy()
        train_columns = x_train.columns.tolist() 
        for feature in train_columns:
            if feature.startswith("colour"):
                continue
            if trial.suggest_categorical(feature, [0, 1]) == 1:
                selected_features.append(feature)
        if len(selected_features) == 0:
            return float("inf")
        
        x_train = x_train[selected_features]
        x_val = x_val[selected_features]

    model = model_type(**parameters) # Create the model
    model.fit(x_train, y_train)
    predictions = model.predict(x_val)
    metrics = calculate_metrics(targets=y_val, preds=predictions)
    rmse = metrics["rmse"]
    return rmse

In [14]:
# Train + Validate models
metrics = ["mae", "mse", "rmse", "pcc", "spearman_r", "r2_score"]
model_scores = {model_name: defaultdict(list) for model_name in models.keys()}

if os.path.exists("model_best_hyperparameters"):
    raise Exception("Directory for best hyperparameters already exists. Please delete it before running this script.")

os.makedirs("model_best_hyperparameters")

for fold in range(NUM_FOLDS):
    fold_data = kfold_data[fold]
     
    # Extract data
    train_data = fold_data["train"]
    val_data = fold_data["val"]

    train_y = train_data["outcome"]
    val_y = val_data["outcome"]
    
    train_x = train_data.drop(columns=["outcome"])
    val_x = val_data.drop(columns=["outcome"])

    # print(f"Fold {fold+1}/{NUM_FOLDS}")
    # print(f"Train data shape: {train_x.shape} | Train target shape: {train_y.shape}")
    # print(f"Val data shape: {val_x.shape} | Val target shape: {val_y.shape}")
    # print(f"Test data shape: {test_x.shape} | Test target shape: {test_y.shape}")
    training_features = set(train_x.columns.tolist())


    # Train model
    for model_name, model in models.items():
        study = optuna.create_study(direction="minimize")
        print("G", train_x.columns)
        print("G", val_x.columns)
        study.optimize(lambda trial: objective(trial=trial, 
                                               model_type=model, 
                                               x_train=train_x, 
                                               y_train=train_y, 
                                               x_val=val_x, 
                                               y_val=val_y
                                               ), n_trials=N_TRIALS)
        
        # Train model with best hyperparameters
        best_trial = study.best_trial 

        if USE_FEATURE_SELECTION:
            best_fold_params = {} # Hyperparameters
            best_selected_features = minimum_features.copy() # Features selected by the model
            for param in best_trial.params:
                if param in training_features: # i.e., if the parameter is a feature
                    if best_trial.params[param] == 1:
                        best_selected_features.append(param)
                else: # A hyperparameter
                    best_fold_params[param] = best_trial.params[param]
        else:
            best_fold_params = best_trial.params
            best_selected_features = train_x.columns.tolist() # All feature columns
        print("P", best_fold_params)
        print("F", best_selected_features)
        
        # Select the best features
        print(train_x.columns)
        print(val_x.columns)
        train_x = train_x[best_selected_features]
        val_x = val_x[best_selected_features]

        model = model(**best_fold_params)
        model.fit(train_x, train_y)
        preds = model.predict(val_x)
        
        # Save the best hyperparameters for this model at this fold.
        os.makedirs(f"model_best_hyperparameters/{model_name}", exist_ok=True)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}.json", "w") as f:
            json.dump(best_fold_params, f)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}_selected_features.json", "w") as f:
            json.dump(best_selected_features, f)

        # Calculate metrics
        metrics = calculate_metrics(targets=val_y, preds=preds)
        mae = metrics["mae"]
        mse = metrics["mse"]
        rmse = metrics["rmse"]
        pcc = metrics["pcc"]
        spearman_r = metrics["spearman_r"]
        r2_score = metrics["r2_score"]

        for metric in metrics:
            model_scores[model_name][metric].append(metrics[metric])

        print(f"Fold: {fold+1}/{NUM_FOLDS}")
        print(f"Model name: {model_name}")
        print(f"MAE: {mae}")
        print(f"MSE: {mse}")
        print(f"RMSE: {rmse}")
        print(f"PCC: {pcc}")
        print(f"Spearman R: {spearman_r}")
        print(f"R2 Score: {r2_score}")
        print()


[I 2025-02-19 01:17:42,572] A new study created in memory with name: no-name-1865597f-52d4-4569-b02a-2c22d701e354
[I 2025-02-19 01:17:42,580] Trial 0 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,587] Trial 1 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,593] Trial 2 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,600] Trial 3 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,606] Trial 4 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,613] Trial 5 finished with value: 1

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:17:42,646] Trial 10 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,652] Trial 11 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,658] Trial 12 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,664] Trial 13 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,673] Trial 14 finished with value: 10.789711542941271 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.789711542941271.
[I 2025-02-19 01:17:42,682] Trial 15 finished with value: 10.789711542941271 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.789711542941271.
[

P {'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 1/5
Model name: linear_regression
MAE

[I 2025-02-19 01:17:43,501] Trial 8 finished with value: 10.782920787469438 and parameters: {'alpha': 0.0292313169846942, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 6 with value: 10.771415453221172.
[I 2025-02-19 01:17:43,561] Trial 9 finished with value: 10.783958670240185 and parameters: {'alpha': 0.0018504537196147273, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}. Best is trial 6 with value: 10.771415453221172.
[I 2025-02-19 01:17:43,569] Trial 10 finished with value: 10.75916756656415 and parameters: {'alpha': 0.08788871927703522, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 10 with value: 10.75916756656415.
[I 2025-02-19 01:17:43,576] Trial 11 finished with value: 10.758558088809318 and parameters: {'alpha': 0.0906037276347603, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}. Best is trial 11 with value: 10.758558088809318.
[I 2025-02-19 01:17:43,585] Trial 12 finish

P {'alpha': 0.09980305555429725, 'fit_intercept': True, 'selection': 'random', 'warm_start': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colou

[I 2025-02-19 01:17:45,488] Trial 0 finished with value: 10.788832701289595 and parameters: {'alpha': 0.04256532624356071, 'solver': 'saga', 'fit_intercept': True}. Best is trial 0 with value: 10.788832701289595.
[I 2025-02-19 01:17:45,497] Trial 1 finished with value: 10.78970402770834 and parameters: {'alpha': 0.005106769406973619, 'solver': 'svd', 'fit_intercept': False}. Best is trial 0 with value: 10.788832701289595.
[I 2025-02-19 01:17:46,122] Trial 2 finished with value: 10.788859806621513 and parameters: {'alpha': 0.010843611852456143, 'solver': 'saga', 'fit_intercept': True}. Best is trial 0 with value: 10.788832701289595.
[I 2025-02-19 01:17:46,126] Trial 3 finished with value: 10.78970907260392 and parameters: {'alpha': 0.0016781668543090298, 'solver': 'auto', 'fit_intercept': False}. Best is trial 0 with value: 10.788832701289595.
[I 2025-02-19 01:17:46,133] Trial 4 finished with value: 10.789648128257909 and parameters: {'alpha': 0.007809740666858977, 'solver': 'lsqr', 'fi

P {'alpha': 0.06330190458903077, 'solver': 'saga', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='obje

[I 2025-02-19 01:18:26,994] A new study created in memory with name: no-name-32a03b06-8492-49e0-8e77-6c2b7a1d794c


Fold: 1/5
Model name: ridge
MAE: 8.706966099040997
MSE: 116.39611542421349
RMSE: 10.788703139127218
PCC: 0.5300572619043755
Spearman R: 0.5514776529209582
R2 Score: 0.28061924402753846

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:18:27,191] Trial 0 finished with value: 10.137924172924096 and parameters: {'eta': 0.01223930207526183, 'gamma': 5.654602713523533e-07, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.9682805154145087, 'colsample_bytree': 0.617204628606095}. Best is trial 0 with value: 10.137924172924096.
[I 2025-02-19 01:18:27,282] Trial 1 finished with value: 9.800674967499274 and parameters: {'eta': 0.011504115735615134, 'gamma': 0.0011767166118235852, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.5820109632699091, 'colsample_bytree': 0.9487612880596996}. Best is trial 1 with value: 9.800674967499274.
[I 2025-02-19 01:18:27,382] Trial 2 finished with value: 9.364419517206146 and parameters: {'eta': 0.1318542583568815, 'gamma': 9.839663310886898e-05, 'max_depth': 6, 'min_child_weight': 2, 'subsample': 0.8436803674076896, 'colsample_bytree': 0.7345346787685447}. Best is trial 2 with value: 9.364419517206146.
[I 2025-02-19 01:18:27,439] Trial 3 finished with value: 9.5114

P {'eta': 0.15618266124954547, 'gamma': 8.017840210076792e-08, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.8503173859945983, 'colsample_bytree': 0.5945609330330898}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G

[I 2025-02-19 01:18:38,642] Trial 0 finished with value: 9.60458579342426 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 1}. Best is trial 0 with value: 9.60458579342426.
[I 2025-02-19 01:18:40,346] Trial 1 finished with value: 10.094240653683078 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 5, 'min_samples_leaf': 5}. Best is trial 0 with value: 9.60458579342426.
[I 2025-02-19 01:18:40,487] Trial 2 finished with value: 9.740373834450088 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 0 with value: 9.60458579342426.
[I 2025-02-19 01:18:40,611] Trial 3 finished with value: 10.10834781104755 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 10}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour

[I 2025-02-19 01:19:47,397] A new study created in memory with name: no-name-cc99efe9-27ac-4fba-8e77-918949e42a64


Fold: 1/5
Model name: random_forest
MAE: 7.719398981766281
MSE: 91.94610689980564
RMSE: 9.588853263023982
PCC: 0.6818916529905507
Spearman R: 0.6862501918946062
R2 Score: 0.4317313799585173

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:19:48,023] Trial 0 finished with value: 12.1690061304808 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.0017758973358070165, 'subsample': 0.2802068323140812, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'log2', 'max_leaf_nodes': 80}. Best is trial 0 with value: 12.1690061304808.
[I 2025-02-19 01:19:49,004] Trial 1 finished with value: 14.685938228709087 and parameters: {'loss': 'quantile', 'criterion': 'friedman_mse', 'learning_rate': 0.06138451470801005, 'subsample': 0.9830514718799446, 'max_depth': 5, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_leaf_nodes': 959}. Best is trial 0 with value: 12.1690061304808.
[I 2025-02-19 01:19:49,741] Trial 2 finished with value: 9.672580530502676 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.08904160900520396, 'subsample': 0.2409533291407851, 'max_depth': 8, 'min_samples_split': 8, 'min_s

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.07814852440022126, 'subsample': 0.7281221829511084, 'max_depth': 4, 'min_samples_split': 7, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_leaf_nodes': 825}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8

[I 2025-02-19 01:21:16,273] A new study created in memory with name: no-name-f3a55dcc-768c-42dc-a996-0431a2563781


Fold: 1/5
Model name: gradient_boosting
MAE: 7.408114472796907
MSE: 84.86772453608074
RMSE: 9.212368020008794
PCC: 0.6912066936418602
Spearman R: 0.692777274522373
R2 Score: 0.4754789916148018

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:21:18,584] Trial 0 finished with value: 9.78297329208849 and parameters: {'n_estimators': 82, 'loss': 'exponential', 'learning_rate': 0.020515709861058513}. Best is trial 0 with value: 9.78297329208849.
[I 2025-02-19 01:21:21,207] Trial 1 finished with value: 9.652676031025873 and parameters: {'n_estimators': 95, 'loss': 'linear', 'learning_rate': 0.040945587361871434}. Best is trial 1 with value: 9.652676031025873.
[I 2025-02-19 01:21:22,660] Trial 2 finished with value: 9.76046951386172 and parameters: {'n_estimators': 52, 'loss': 'exponential', 'learning_rate': 0.04090764898139928}. Best is trial 1 with value: 9.652676031025873.
[I 2025-02-19 01:21:24,407] Trial 3 finished with value: 9.786176199563863 and parameters: {'n_estimators': 62, 'loss': 'square', 'learning_rate': 0.0059313787318834065}. Best is trial 1 with value: 9.652676031025873.
[I 2025-02-19 01:21:26,066] Trial 4 finished with value: 9.787334908340647 and parameters: {'n_estimators': 59, 'loss': 'squa

P {'n_estimators': 100, 'loss': 'square', 'learning_rate': 0.08739480964417243}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype=

[I 2025-02-19 01:24:51,111] A new study created in memory with name: no-name-abacfbef-88c4-4497-a87f-22c3d36ba272
[I 2025-02-19 01:24:51,217] Trial 0 finished with value: 9.543911175361904 and parameters: {'learning_rate': 0.01775420711921511, 'num_leaves': 49, 'subsample': 0.5940174581977419, 'colsample_bytree': 0.7413401270760157, 'min_data_in_leaf': 70}. Best is trial 0 with value: 9.543911175361904.


Fold: 1/5
Model name: ada_boost
MAE: 7.622548438151848
MSE: 89.94769445351102
RMSE: 9.48407583549979
PCC: 0.668318600059804
Spearman R: 0.6749994335143537
R2 Score: 0.44408247476198826

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:24:51,300] Trial 1 finished with value: 9.48178667543128 and parameters: {'learning_rate': 0.015293873893736773, 'num_leaves': 711, 'subsample': 0.2338688983872519, 'colsample_bytree': 0.9998341723347658, 'min_data_in_leaf': 30}. Best is trial 1 with value: 9.48178667543128.
[I 2025-02-19 01:24:51,454] Trial 2 finished with value: 10.929026862398818 and parameters: {'learning_rate': 0.07681805262579026, 'num_leaves': 607, 'subsample': 0.47177307756551823, 'colsample_bytree': 0.08930642786714249, 'min_data_in_leaf': 19}. Best is trial 1 with value: 9.48178667543128.
[I 2025-02-19 01:24:51,538] Trial 3 finished with value: 11.431646423401364 and parameters: {'learning_rate': 0.004150045620064813, 'num_leaves': 881, 'subsample': 0.5464831403255174, 'colsample_bytree': 0.5953145962403543, 'min_data_in_leaf': 77}. Best is trial 1 with value: 9.48178667543128.
[I 2025-02-19 01:24:51,783] Trial 4 finished with value: 10.404729036783216 and parameters: {'learning_rate': 0.0252

P {'learning_rate': 0.06438401509450763, 'num_leaves': 705, 'subsample': 0.19148767399203923, 'colsample_bytree': 0.9822521491431577, 'min_data_in_leaf': 64}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour

[I 2025-02-19 01:24:59,536] Trial 6 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-19 01:24:59,542] Trial 7 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-19 01:24:59,550] Trial 8 finished with value: 10.813462363231972 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-19 01:24:59,560] Trial 9 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-19 01:24:59,570] Trial 10 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 2025-02-19 01:24:59,579] Trial 11 finished with value: 10.813462363231972 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.813462363231972.
[I 20

P {'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 2/5
Model name: linear_regression
MAE:

[I 2025-02-19 01:25:00,372] Trial 5 finished with value: 10.813688235473748 and parameters: {'alpha': 0.0015537701481446212, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 2 with value: 10.812609153795075.
[I 2025-02-19 01:25:00,401] Trial 6 finished with value: 10.813617020391144 and parameters: {'alpha': 0.012844378574895836, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 2 with value: 10.812609153795075.
[I 2025-02-19 01:25:00,418] Trial 7 finished with value: 10.813590658110225 and parameters: {'alpha': 0.00689063858201734, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 2 with value: 10.812609153795075.
[I 2025-02-19 01:25:00,471] Trial 8 finished with value: 10.813355449325542 and parameters: {'alpha': 0.0016061954893211903, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 2 with value: 10.812609153795075.
[I 2025-02-19 01:25:00,495] Trial 9 fi

P {'alpha': 0.006880251680509011, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'co

[I 2025-02-19 01:25:02,779] Trial 0 finished with value: 10.813549741994597 and parameters: {'alpha': 0.0015538576806317628, 'solver': 'saga', 'fit_intercept': True}. Best is trial 0 with value: 10.813549741994597.
[I 2025-02-19 01:25:02,790] Trial 1 finished with value: 10.813464129380602 and parameters: {'alpha': 0.021717523528375398, 'solver': 'svd', 'fit_intercept': True}. Best is trial 1 with value: 10.813464129380602.
[I 2025-02-19 01:25:02,795] Trial 2 finished with value: 10.813443998997501 and parameters: {'alpha': 0.019994161121790727, 'solver': 'sparse_cg', 'fit_intercept': True}. Best is trial 2 with value: 10.813443998997501.
[I 2025-02-19 01:25:02,801] Trial 3 finished with value: 10.813700054050788 and parameters: {'alpha': 0.002276437872757385, 'solver': 'lsqr', 'fit_intercept': True}. Best is trial 2 with value: 10.813443998997501.
[I 2025-02-19 01:25:02,806] Trial 4 finished with value: 10.813461105479147 and parameters: {'alpha': 0.009985102599306059, 'solver': 'chol

P {'alpha': 0.008591885753549845, 'solver': 'sparse_cg', 'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype=

[I 2025-02-19 01:25:06,096] Trial 2 finished with value: 9.473345483782166 and parameters: {'eta': 0.025378609429715218, 'gamma': 2.290224701391959e-07, 'max_depth': 5, 'min_child_weight': 2, 'subsample': 0.8216620383719051, 'colsample_bytree': 0.7927928476300852}. Best is trial 1 with value: 9.303991573932612.
[I 2025-02-19 01:25:06,159] Trial 3 finished with value: 9.759498578955663 and parameters: {'eta': 0.01736465585603562, 'gamma': 0.0014434241658799064, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.7027044258036935, 'colsample_bytree': 0.9820951528126074}. Best is trial 1 with value: 9.303991573932612.
[I 2025-02-19 01:25:06,274] Trial 4 finished with value: 9.553173737409407 and parameters: {'eta': 0.1285561944991037, 'gamma': 3.196226887002632e-07, 'max_depth': 5, 'min_child_weight': 5, 'subsample': 0.6770136307513844, 'colsample_bytree': 0.9425874305253849}. Best is trial 1 with value: 9.303991573932612.
[I 2025-02-19 01:25:06,415] Trial 5 finished with value: 10.0153

P {'eta': 0.08307092711045708, 'gamma': 0.00022332528495335613, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.9197552630544233, 'colsample_bytree': 0.7709091443099487}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_

[I 2025-02-19 01:25:17,799] Trial 1 finished with value: 9.964177577884419 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 10, 'min_samples_leaf': 8}. Best is trial 1 with value: 9.964177577884419.
[I 2025-02-19 01:25:17,929] Trial 2 finished with value: 9.782868462001145 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 7}. Best is trial 2 with value: 9.782868462001145.
[I 2025-02-19 01:25:18,051] Trial 3 finished with value: 10.370582478076946 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 4, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 2 with value: 9.782868462001145.
[I 2025-02-19 01:25:18,174] Trial 4 finished with value: 10.544697701992986 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 3}. Best is trial 

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 4}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour

[I 2025-02-19 01:26:34,689] A new study created in memory with name: no-name-589aa78b-866b-4b9d-a73a-ae80a31f2654


Fold: 2/5
Model name: random_forest
MAE: 7.613397582121371
MSE: 92.85289996813317
RMSE: 9.63602096137888
PCC: 0.678081872930782
Spearman R: 0.6840539478726358
R2 Score: 0.4383388329741994

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:26:35,011] Trial 0 finished with value: 11.846193059101418 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0030635354785441865, 'subsample': 0.23196325333017387, 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_leaf_nodes': 368}. Best is trial 0 with value: 11.846193059101418.
[I 2025-02-19 01:26:36,268] Trial 1 finished with value: 9.56430781360584 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.032078116908747324, 'subsample': 0.7588486548505687, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_leaf_nodes': 762}. Best is trial 1 with value: 9.56430781360584.
[I 2025-02-19 01:26:36,962] Trial 2 finished with value: 17.58583781135821 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.013395667076739243, 'subsample': 0.41096376763152775, 'max_depth': 6, 'min_samples_split': 3

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.06501408845991155, 'subsample': 0.7209902379932357, 'max_depth': 6, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 199}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8

[I 2025-02-19 01:27:50,997] A new study created in memory with name: no-name-2c679a22-32e9-4c85-86ad-453df6ec49f4


Fold: 2/5
Model name: gradient_boosting
MAE: 7.451197458533732
MSE: 89.28344244876683
RMSE: 9.448991610154325
PCC: 0.6793412824347518
Spearman R: 0.6856055900803087
R2 Score: 0.4599302498999428

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:27:52,897] Trial 0 finished with value: 9.79689708884949 and parameters: {'n_estimators': 67, 'loss': 'linear', 'learning_rate': 0.005078535813897968}. Best is trial 0 with value: 9.79689708884949.
[I 2025-02-19 01:27:55,132] Trial 1 finished with value: 9.688723064129338 and parameters: {'n_estimators': 90, 'loss': 'square', 'learning_rate': 0.0748009017036427}. Best is trial 1 with value: 9.688723064129338.
[I 2025-02-19 01:27:57,764] Trial 2 finished with value: 9.755032538636614 and parameters: {'n_estimators': 94, 'loss': 'square', 'learning_rate': 0.02282900574649646}. Best is trial 1 with value: 9.688723064129338.
[I 2025-02-19 01:28:00,019] Trial 3 finished with value: 9.806392921378807 and parameters: {'n_estimators': 79, 'loss': 'square', 'learning_rate': 0.005175870298211518}. Best is trial 1 with value: 9.688723064129338.
[I 2025-02-19 01:28:01,977] Trial 4 finished with value: 9.810068035529898 and parameters: {'n_estimators': 69, 'loss': 'linear', 'learni

P {'n_estimators': 96, 'loss': 'exponential', 'learning_rate': 0.09938134350187898}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dt

[I 2025-02-19 01:31:44,271] A new study created in memory with name: no-name-0bc25ef8-14b9-4bae-b1b8-828240a46c1d
[I 2025-02-19 01:31:44,385] Trial 0 finished with value: 11.904951004389197 and parameters: {'learning_rate': 0.011072920733413696, 'num_leaves': 383, 'subsample': 0.9251557766387459, 'colsample_bytree': 0.12582366869615286, 'min_data_in_leaf': 52}. Best is trial 0 with value: 11.904951004389197.


Fold: 2/5
Model name: ada_boost
MAE: 7.628156027133237
MSE: 92.8277608441817
RMSE: 9.634716438182377
PCC: 0.6640350916868205
Spearman R: 0.6702888525436838
R2 Score: 0.4384908979037969

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:31:45,463] Trial 1 finished with value: 9.846346320740123 and parameters: {'learning_rate': 0.06819012820007021, 'num_leaves': 764, 'subsample': 0.4248223061645036, 'colsample_bytree': 0.8472084052768822, 'min_data_in_leaf': 3}. Best is trial 1 with value: 9.846346320740123.
[I 2025-02-19 01:31:45,553] Trial 2 finished with value: 10.768202806885704 and parameters: {'learning_rate': 0.018726365563750718, 'num_leaves': 236, 'subsample': 0.995356725217356, 'colsample_bytree': 0.24145059499673277, 'min_data_in_leaf': 68}. Best is trial 1 with value: 9.846346320740123.
[I 2025-02-19 01:31:45,599] Trial 3 finished with value: 12.293494953246418 and parameters: {'learning_rate': 0.001661048244708344, 'num_leaves': 825, 'subsample': 0.1043215006421988, 'colsample_bytree': 0.6227712012288656, 'min_data_in_leaf': 30}. Best is trial 1 with value: 9.846346320740123.
[I 2025-02-19 01:31:45,752] Trial 4 finished with value: 9.507406303885345 and parameters: {'learning_rate': 0.0821

P {'learning_rate': 0.05165489737583618, 'num_leaves': 113, 'subsample': 0.7928948095410717, 'colsample_bytree': 0.8980904556214522, 'min_data_in_leaf': 93}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_

[I 2025-02-19 01:31:55,326] Trial 14 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-19 01:31:55,332] Trial 15 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-19 01:31:55,340] Trial 16 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-19 01:31:55,348] Trial 17 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-19 01:31:55,357] Trial 18 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808.
[I 2025-02-19 01:31:55,364] Trial 19 finished with value: 10.996838263571808 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.996838263571808

P {'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 3/5
Model name: linear_regression
MAE

[I 2025-02-19 01:31:56,084] Trial 8 finished with value: 10.993740107273442 and parameters: {'alpha': 0.0044620364710451325, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}. Best is trial 6 with value: 10.980525078538271.
[I 2025-02-19 01:31:56,142] Trial 9 finished with value: 10.99596740766653 and parameters: {'alpha': 0.0013055466651347193, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 6 with value: 10.980525078538271.
[I 2025-02-19 01:31:56,150] Trial 10 finished with value: 10.972468837067996 and parameters: {'alpha': 0.09300597346429923, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.972468837067996.
[I 2025-02-19 01:31:56,157] Trial 11 finished with value: 10.972618094351525 and parameters: {'alpha': 0.08974509942312142, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 10 with value: 10.972468837067996.
[I 2025-02-19 01:31:56,164] Trial 12

P {'alpha': 0.09998985310916897, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colo

[I 2025-02-19 01:31:57,266] Trial 0 finished with value: 10.996894805727718 and parameters: {'alpha': 0.07743059824979422, 'solver': 'saga', 'fit_intercept': False}. Best is trial 0 with value: 10.996894805727718.
[I 2025-02-19 01:31:57,270] Trial 1 finished with value: 10.996831135119935 and parameters: {'alpha': 0.03335059127969077, 'solver': 'auto', 'fit_intercept': False}. Best is trial 1 with value: 10.996831135119935.
[I 2025-02-19 01:31:57,274] Trial 2 finished with value: 10.996838040355454 and parameters: {'alpha': 0.001177804408173603, 'solver': 'auto', 'fit_intercept': True}. Best is trial 1 with value: 10.996831135119935.
[I 2025-02-19 01:31:57,279] Trial 3 finished with value: 10.996837381817425 and parameters: {'alpha': 0.004652750256466232, 'solver': 'cholesky', 'fit_intercept': True}. Best is trial 1 with value: 10.996831135119935.
[I 2025-02-19 01:31:57,546] Trial 4 finished with value: 10.996832440951147 and parameters: {'alpha': 0.0523796418727503, 'solver': 'saga', 

P {'alpha': 0.09897175743300969, 'solver': 'sparse_cg', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype=

[I 2025-02-19 01:32:00,656] Trial 1 finished with value: 9.933554803842917 and parameters: {'eta': 0.014955808968329274, 'gamma': 1.0383156301242945e-06, 'max_depth': 7, 'min_child_weight': 6, 'subsample': 0.6904856207738974, 'colsample_bytree': 0.6751741052765068}. Best is trial 0 with value: 9.3307179541999.
[I 2025-02-19 01:32:00,840] Trial 2 finished with value: 10.263278121855274 and parameters: {'eta': 0.012559555786301817, 'gamma': 9.743194596591681e-05, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.613909301683512, 'colsample_bytree': 0.5937956487808151}. Best is trial 0 with value: 9.3307179541999.
[I 2025-02-19 01:32:01,035] Trial 3 finished with value: 9.64856018946655 and parameters: {'eta': 0.024553209222650606, 'gamma': 1.2343596088284497e-06, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.5972647275034859, 'colsample_bytree': 0.6427812122915204}. Best is trial 0 with value: 9.3307179541999.
[I 2025-02-19 01:32:01,516] Trial 4 finished with value: 9.7233670

P {'eta': 0.11301691640815295, 'gamma': 9.322398269156298e-06, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.7964046078143356, 'colsample_bytree': 0.7664175525211888}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G

[I 2025-02-19 01:32:10,541] Trial 1 finished with value: 9.890232616120558 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 3, 'min_samples_leaf': 3}. Best is trial 1 with value: 9.890232616120558.
[I 2025-02-19 01:32:12,342] Trial 2 finished with value: 9.955687598803094 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 2, 'min_samples_leaf': 5}. Best is trial 1 with value: 9.890232616120558.
[I 2025-02-19 01:32:12,487] Trial 3 finished with value: 10.79831811343012 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 7, 'min_samples_leaf': 3}. Best is trial 1 with value: 9.890232616120558.
[I 2025-02-19 01:32:12,632] Trial 4 finished with value: 9.87054133749803 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 5}. Best is trial 4 wit

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 8}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-19 01:33:15,512] A new study created in memory with name: no-name-5fe9acfd-5cd5-467a-b474-4144cad6bd29


Fold: 3/5
Model name: random_forest
MAE: 7.63461996535045
MSE: 93.28633609603476
RMSE: 9.65848518640655
PCC: 0.6674780862757264
Spearman R: 0.6748544061931274
R2 Score: 0.42632581962395677

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:33:16,326] Trial 0 finished with value: 19.82840460723027 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.002116043783322026, 'subsample': 0.3072425164498077, 'max_depth': 8, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'max_leaf_nodes': 351}. Best is trial 0 with value: 19.82840460723027.
[I 2025-02-19 01:33:17,462] Trial 1 finished with value: 12.010223796431822 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.002082461228026285, 'subsample': 0.8422280053184795, 'max_depth': 8, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_leaf_nodes': 523}. Best is trial 1 with value: 12.010223796431822.
[I 2025-02-19 01:33:18,036] Trial 2 finished with value: 12.085044963629326 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.003597668786450302, 'subsample': 0.7405446021090825, 'max_depth': 3, 'min_samples_

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.0997912759416124, 'subsample': 0.5487663899005757, 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'log2', 'max_leaf_nodes': 606}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8',

[I 2025-02-19 01:34:36,576] A new study created in memory with name: no-name-01eadffd-5466-4bfa-9c8c-e168c067f1a5


Fold: 3/5
Model name: gradient_boosting
MAE: 7.407635795028064
MSE: 88.72452103749973
RMSE: 9.419369460717618
PCC: 0.6753883503851266
Spearman R: 0.6824539804507735
R2 Score: 0.4543791833238461

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:34:39,329] Trial 0 finished with value: 9.79659524756036 and parameters: {'n_estimators': 96, 'loss': 'linear', 'learning_rate': 0.022063091684713793}. Best is trial 0 with value: 9.79659524756036.
[I 2025-02-19 01:34:41,983] Trial 1 finished with value: 9.791396414879436 and parameters: {'n_estimators': 94, 'loss': 'exponential', 'learning_rate': 0.018331200337311098}. Best is trial 1 with value: 9.791396414879436.
[I 2025-02-19 01:34:43,998] Trial 2 finished with value: 9.764893158450192 and parameters: {'n_estimators': 71, 'loss': 'exponential', 'learning_rate': 0.05475712458413542}. Best is trial 2 with value: 9.764893158450192.
[I 2025-02-19 01:34:45,357] Trial 3 finished with value: 9.717360457114246 and parameters: {'n_estimators': 51, 'loss': 'square', 'learning_rate': 0.08189402812321236}. Best is trial 3 with value: 9.717360457114246.
[I 2025-02-19 01:34:47,564] Trial 4 finished with value: 9.785601866196146 and parameters: {'n_estimators': 79, 'loss': 'expon

P {'n_estimators': 52, 'loss': 'square', 'learning_rate': 0.07130359454588839}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='

[I 2025-02-19 01:37:24,650] A new study created in memory with name: no-name-1664f1be-7f46-4af7-bdec-9f7a0dbcbef2
[I 2025-02-19 01:37:24,701] Trial 0 finished with value: 9.89385906266289 and parameters: {'learning_rate': 0.02580521467522431, 'num_leaves': 125, 'subsample': 0.24054031015068728, 'colsample_bytree': 0.3946938460343073, 'min_data_in_leaf': 50}. Best is trial 0 with value: 9.89385906266289.
[I 2025-02-19 01:37:24,823] Trial 1 finished with value: 12.243800380326157 and parameters: {'learning_rate': 0.0012659198091721946, 'num_leaves': 860, 'subsample': 0.6245602328997221, 'colsample_bytree': 0.7263050192144926, 'min_data_in_leaf': 46}. Best is trial 0 with value: 9.89385906266289.


Fold: 3/5
Model name: ada_boost
MAE: 7.649942376399656
MSE: 94.93668840101604
RMSE: 9.743545987011917
PCC: 0.6458706151202551
Spearman R: 0.6624130568055938
R2 Score: 0.41617680374967925

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:37:24,898] Trial 2 finished with value: 10.491230856277411 and parameters: {'learning_rate': 0.0403875294112062, 'num_leaves': 374, 'subsample': 0.6690506095363827, 'colsample_bytree': 0.14613593752782317, 'min_data_in_leaf': 83}. Best is trial 0 with value: 9.89385906266289.
[I 2025-02-19 01:37:24,995] Trial 3 finished with value: 12.453239980659609 and parameters: {'learning_rate': 0.0012657912952785783, 'num_leaves': 1000, 'subsample': 0.31298234161500027, 'colsample_bytree': 0.394715718318106, 'min_data_in_leaf': 30}. Best is trial 0 with value: 9.89385906266289.
[I 2025-02-19 01:37:25,077] Trial 4 finished with value: 12.121921027693203 and parameters: {'learning_rate': 0.0013294125237343017, 'num_leaves': 714, 'subsample': 0.796076799392626, 'colsample_bytree': 0.9131556204631343, 'min_data_in_leaf': 90}. Best is trial 0 with value: 9.89385906266289.
[I 2025-02-19 01:37:25,192] Trial 5 finished with value: 11.943719563240196 and parameters: {'learning_rate': 0.00

P {'learning_rate': 0.04512286010180142, 'num_leaves': 899, 'subsample': 0.6700333367841518, 'colsample_bytree': 0.7376122904689716, 'min_data_in_leaf': 85}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_

[I 2025-02-19 01:37:35,802] Trial 7 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-19 01:37:35,812] Trial 8 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-19 01:37:35,819] Trial 9 finished with value: 10.730366718545177 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-19 01:37:35,830] Trial 10 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-19 01:37:35,837] Trial 11 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2025-02-19 01:37:35,844] Trial 12 finished with value: 10.730366718545177 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 10.730366718545177.
[I 2

P {'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 4/5
Model name: linear_regression
MAE

[I 2025-02-19 01:37:36,632] Trial 10 finished with value: 10.718706169589069 and parameters: {'alpha': 0.012082319627120494, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}. Best is trial 0 with value: 10.697643890039513.
[I 2025-02-19 01:37:36,639] Trial 11 finished with value: 10.693992440807829 and parameters: {'alpha': 0.0881686361960254, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 11 with value: 10.693992440807829.
[I 2025-02-19 01:37:36,646] Trial 12 finished with value: 10.694435714860756 and parameters: {'alpha': 0.08112934955461346, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 11 with value: 10.693992440807829.
[I 2025-02-19 01:37:36,655] Trial 13 finished with value: 10.693610310990024 and parameters: {'alpha': 0.09811925334293568, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}. Best is trial 13 with value: 10.693610310990024.
[I 2025-02-19 01:37:36,667] Trial 14 fin

P {'alpha': 0.09811925334293568, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colou

[I 2025-02-19 01:37:37,835] Trial 4 finished with value: 10.73053398081157 and parameters: {'alpha': 0.006970385925771691, 'solver': 'sag', 'fit_intercept': False}. Best is trial 1 with value: 10.730353424438638.
[I 2025-02-19 01:37:37,840] Trial 5 finished with value: 10.730410030866075 and parameters: {'alpha': 0.05033956798397597, 'solver': 'auto', 'fit_intercept': False}. Best is trial 1 with value: 10.730353424438638.
[I 2025-02-19 01:37:37,845] Trial 6 finished with value: 10.730360894521965 and parameters: {'alpha': 0.01752734469497968, 'solver': 'sparse_cg', 'fit_intercept': True}. Best is trial 1 with value: 10.730353424438638.
[I 2025-02-19 01:37:37,850] Trial 7 finished with value: 10.730358250665981 and parameters: {'alpha': 0.02359672544920593, 'solver': 'sparse_cg', 'fit_intercept': True}. Best is trial 1 with value: 10.730353424438638.
[I 2025-02-19 01:37:37,855] Trial 8 finished with value: 10.730373528116647 and parameters: {'alpha': 0.007914340805430164, 'solver': 'au

P {'alpha': 0.0010000898797646398, 'solver': 'lsqr', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='ob

[I 2025-02-19 01:37:41,011] Trial 2 finished with value: 9.400782851283239 and parameters: {'eta': 0.07239344688806212, 'gamma': 0.35745716787873355, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.8183788974743766, 'colsample_bytree': 0.9952477838409255}. Best is trial 1 with value: 9.318526212073442.
[I 2025-02-19 01:37:41,135] Trial 3 finished with value: 10.228392992750397 and parameters: {'eta': 0.010217763633064387, 'gamma': 1.4037023137527096e-05, 'max_depth': 6, 'min_child_weight': 6, 'subsample': 0.5524374821456842, 'colsample_bytree': 0.7439574411923776}. Best is trial 1 with value: 9.318526212073442.
[I 2025-02-19 01:37:41,452] Trial 4 finished with value: 9.922100547582318 and parameters: {'eta': 0.016270830002103658, 'gamma': 8.335489446193725, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.9310946153744069, 'colsample_bytree': 0.5956051135497538}. Best is trial 1 with value: 9.318526212073442.
[I 2025-02-19 01:37:41,562] Trial 5 finished with value: 9.4466135

P {'eta': 0.09700919281617525, 'gamma': 1.2351196210769092e-07, 'max_depth': 3, 'min_child_weight': 2, 'subsample': 0.9423458726184386, 'colsample_bytree': 0.979291894964549}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G

[I 2025-02-19 01:37:51,242] Trial 1 finished with value: 9.883141387029553 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 10}. Best is trial 0 with value: 9.853279277070836.
[I 2025-02-19 01:37:52,499] Trial 2 finished with value: 11.023271590003521 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 4}. Best is trial 0 with value: 9.853279277070836.
[I 2025-02-19 01:37:54,185] Trial 3 finished with value: 9.764375130581652 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 7}. Best is trial 3 with value: 9.764375130581652.
[I 2025-02-19 01:37:56,194] Trial 4 finished with value: 9.475886731843847 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 6}. Best is trial

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 2}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-19 01:38:55,137] A new study created in memory with name: no-name-6c3942e3-d67f-46b8-bf85-621af847c5c0


Fold: 4/5
Model name: random_forest
MAE: 7.554541749107021
MSE: 89.36627757448878
RMSE: 9.453373872564693
PCC: 0.6878110074954038
Spearman R: 0.6932611126019972
R2 Score: 0.44926385399010316

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:38:55,862] Trial 0 finished with value: 12.345187559123854 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.001087491797166059, 'subsample': 0.3097338131394277, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 43}. Best is trial 0 with value: 12.345187559123854.
[I 2025-02-19 01:38:56,832] Trial 1 finished with value: 15.303964886405488 and parameters: {'loss': 'quantile', 'criterion': 'friedman_mse', 'learning_rate': 0.026540234567789353, 'subsample': 0.47411600984324737, 'max_depth': 8, 'min_samples_split': 8, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 93}. Best is trial 0 with value: 12.345187559123854.
[I 2025-02-19 01:38:57,717] Trial 2 finished with value: 11.54681361316372 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.004194213659374362, 'subsample': 0.6176714033728293, 'max_depth': 6, 'min_samples_split': 10

P {'loss': 'squared_error', 'criterion': 'squared_error', 'learning_rate': 0.08435119913958108, 'subsample': 0.9821663776653925, 'max_depth': 4, 'min_samples_split': 3, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_leaf_nodes': 236}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
  

[I 2025-02-19 01:40:21,286] A new study created in memory with name: no-name-ad97ce4e-2ffb-4990-a508-5f0e6505b2ce


Fold: 4/5
Model name: gradient_boosting
MAE: 7.365902860999012
MSE: 85.09758050936972
RMSE: 9.22483498548184
PCC: 0.6903679959540361
Spearman R: 0.696194457497835
R2 Score: 0.47557048590914974

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:40:23,840] Trial 0 finished with value: 9.549228298269094 and parameters: {'n_estimators': 88, 'loss': 'square', 'learning_rate': 0.023074911662991377}. Best is trial 0 with value: 9.549228298269094.
[I 2025-02-19 01:40:25,650] Trial 1 finished with value: 9.741189889193407 and parameters: {'n_estimators': 63, 'loss': 'exponential', 'learning_rate': 0.0021203076272068126}. Best is trial 0 with value: 9.549228298269094.
[I 2025-02-19 01:40:27,208] Trial 2 finished with value: 9.657552046407565 and parameters: {'n_estimators': 55, 'loss': 'linear', 'learning_rate': 0.03806337941936092}. Best is trial 0 with value: 9.549228298269094.
[I 2025-02-19 01:40:28,961] Trial 3 finished with value: 9.507441927545717 and parameters: {'n_estimators': 62, 'loss': 'exponential', 'learning_rate': 0.07387823042945543}. Best is trial 3 with value: 9.507441927545717.
[I 2025-02-19 01:40:31,664] Trial 4 finished with value: 9.735037877836612 and parameters: {'n_estimators': 95, 'loss': 'sq

P {'n_estimators': 92, 'loss': 'square', 'learning_rate': 0.09890506674327067}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='

[I 2025-02-19 01:44:09,573] A new study created in memory with name: no-name-717bcaf7-41fa-4c36-a4b7-65047544b341
[I 2025-02-19 01:44:09,686] Trial 0 finished with value: 9.180755489204351 and parameters: {'learning_rate': 0.02793909563617924, 'num_leaves': 967, 'subsample': 0.9199101611583287, 'colsample_bytree': 0.7689153216446577, 'min_data_in_leaf': 97}. Best is trial 0 with value: 9.180755489204351.
[I 2025-02-19 01:44:09,739] Trial 1 finished with value: 10.661603084114585 and parameters: {'learning_rate': 0.0066693589041582095, 'num_leaves': 675, 'subsample': 0.27895116440626977, 'colsample_bytree': 0.8160210413578799, 'min_data_in_leaf': 85}. Best is trial 0 with value: 9.180755489204351.


Fold: 4/5
Model name: ada_boost
MAE: 7.518095875049745
MSE: 88.72250260383063
RMSE: 9.419262317391455
PCC: 0.6743937550714079
Spearman R: 0.6815199125360202
R2 Score: 0.4532312358242897

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:44:09,794] Trial 2 finished with value: 9.393524826926832 and parameters: {'learning_rate': 0.022371095479999454, 'num_leaves': 962, 'subsample': 0.17795861342529212, 'colsample_bytree': 0.8072009416938989, 'min_data_in_leaf': 80}. Best is trial 0 with value: 9.180755489204351.
[I 2025-02-19 01:44:09,857] Trial 3 finished with value: 12.64300088769936 and parameters: {'learning_rate': 0.0017413334927995853, 'num_leaves': 350, 'subsample': 0.3839882443037256, 'colsample_bytree': 0.11084802547923718, 'min_data_in_leaf': 51}. Best is trial 0 with value: 9.180755489204351.
[I 2025-02-19 01:44:09,945] Trial 4 finished with value: 9.245363950367148 and parameters: {'learning_rate': 0.03275049571157809, 'num_leaves': 809, 'subsample': 0.2542841945280836, 'colsample_bytree': 0.8811891101401929, 'min_data_in_leaf': 28}. Best is trial 0 with value: 9.180755489204351.
[I 2025-02-19 01:44:10,270] Trial 5 finished with value: 12.325252088426929 and parameters: {'learning_rate': 0.0

P {'learning_rate': 0.0725709416688062, 'num_leaves': 7, 'subsample': 0.849010214527834, 'colsample_bytree': 0.8928105579705727, 'min_data_in_leaf': 90}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 

[I 2025-02-19 01:44:19,868] Trial 17 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 2 with value: 10.921387446629819.
[I 2025-02-19 01:44:19,874] Trial 18 finished with value: 10.921387446629879 and parameters: {'fit_intercept': False}. Best is trial 2 with value: 10.921387446629819.
[I 2025-02-19 01:44:19,881] Trial 19 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 2 with value: 10.921387446629819.
[I 2025-02-19 01:44:19,888] Trial 20 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 2 with value: 10.921387446629819.
[I 2025-02-19 01:44:19,895] Trial 21 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 2 with value: 10.921387446629819.
[I 2025-02-19 01:44:19,901] Trial 22 finished with value: 10.921387446629819 and parameters: {'fit_intercept': True}. Best is trial 2 with value: 10.921387446629819.
[I 

P {'fit_intercept': True}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 5/5
Model name: linear_regression
MAE:

  model = cd_fast.enet_coordinate_descent(
[I 2025-02-19 01:44:20,662] Trial 4 finished with value: 10.411881644047991 and parameters: {'alpha': 0.0018910855760567548, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}. Best is trial 3 with value: 10.310288175280267.
[I 2025-02-19 01:44:20,675] Trial 5 finished with value: 10.360115312136779 and parameters: {'alpha': 0.062490008519981106, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}. Best is trial 3 with value: 10.310288175280267.
  model = cd_fast.enet_coordinate_descent(
[I 2025-02-19 01:44:20,752] Trial 6 finished with value: 10.34262511484075 and parameters: {'alpha': 0.0030291827376535928, 'fit_intercept': False, 'selection': 'random', 'warm_start': True}. Best is trial 3 with value: 10.310288175280267.
  model = cd_fast.enet_coordinate_descent(
[I 2025-02-19 01:44:20,825] Trial 7 finished with value: 10.469678125716237 and parameters: {'alpha': 0.0012913834080206465, 'fit_intercept': True, '

P {'alpha': 0.01837473246380037, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'col

[I 2025-02-19 01:44:23,130] Trial 8 finished with value: 10.913811281241566 and parameters: {'alpha': 0.021342582974263138, 'solver': 'sag', 'fit_intercept': False}. Best is trial 2 with value: 10.45297653084107.
[I 2025-02-19 01:44:23,138] Trial 9 finished with value: 10.909621647804837 and parameters: {'alpha': 0.0655742113182669, 'solver': 'lsqr', 'fit_intercept': True}. Best is trial 2 with value: 10.45297653084107.
[I 2025-02-19 01:44:23,146] Trial 10 finished with value: 10.453055931557808 and parameters: {'alpha': 0.00537310191723011, 'solver': 'lsqr', 'fit_intercept': False}. Best is trial 2 with value: 10.45297653084107.
[I 2025-02-19 01:44:23,154] Trial 11 finished with value: 10.453051647941884 and parameters: {'alpha': 0.005691983673454497, 'solver': 'lsqr', 'fit_intercept': False}. Best is trial 2 with value: 10.45297653084107.
[I 2025-02-19 01:44:23,403] Trial 12 finished with value: 10.910641014141065 and parameters: {'alpha': 0.007411091856672183, 'solver': 'saga', 'fit

P {'alpha': 0.09992483053683908, 'solver': 'lsqr', 'fit_intercept': False}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='obje

[I 2025-02-19 01:44:26,133] Trial 0 finished with value: 9.34135969327366 and parameters: {'eta': 0.07914187795520122, 'gamma': 0.011027612889263998, 'max_depth': 10, 'min_child_weight': 1, 'subsample': 0.906710920382424, 'colsample_bytree': 0.7993988918400448}. Best is trial 0 with value: 9.34135969327366.
[I 2025-02-19 01:44:26,225] Trial 1 finished with value: 9.398606891670239 and parameters: {'eta': 0.17869102653637997, 'gamma': 3.596113399495026e-05, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.6875918430682448, 'colsample_bytree': 0.7715855912446198}. Best is trial 0 with value: 9.34135969327366.
[I 2025-02-19 01:44:26,427] Trial 2 finished with value: 9.359828688542143 and parameters: {'eta': 0.1413083430226152, 'gamma': 0.00020349681670614472, 'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.9189095546787999, 'colsample_bytree': 0.7967473593294471}. Best is trial 0 with value: 9.34135969327366.
[I 2025-02-19 01:44:26,658] Trial 3 finished with value: 9.4545491107

P {'eta': 0.08618562343003146, 'gamma': 0.002120982028955736, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.9605788448013486, 'colsample_bytree': 0.7616987969181312}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G'

[I 2025-02-19 01:44:38,771] Trial 1 finished with value: 9.539569886146557 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 6, 'min_samples_split': 3, 'min_samples_leaf': 3}. Best is trial 1 with value: 9.539569886146557.
[I 2025-02-19 01:44:40,600] Trial 2 finished with value: 9.615150910999377 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 6, 'min_samples_split': 3, 'min_samples_leaf': 4}. Best is trial 1 with value: 9.539569886146557.
[I 2025-02-19 01:44:42,563] Trial 3 finished with value: 9.364638624227299 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 9}. Best is trial 3 with value: 9.364638624227299.
[I 2025-02-19 01:44:42,682] Trial 4 finished with value: 9.77225645235035 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 10, 'min_samples_leaf': 9}. Best is trial 3 

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 4, 'min_samples_leaf': 6}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_

[I 2025-02-19 01:45:18,657] A new study created in memory with name: no-name-c96cf1c8-90d2-4037-927e-117218c2d06f


Fold: 5/5
Model name: random_forest
MAE: 7.359460518397484
MSE: 84.8952656229647
RMSE: 9.213862687438136
PCC: 0.683679275237935
Spearman R: 0.6889284751673731
R2 Score: 0.45343074705422715

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:45:18,914] Trial 0 finished with value: 11.490301330817305 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.002892827059194366, 'subsample': 0.1128898540937431, 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_leaf_nodes': 928}. Best is trial 0 with value: 11.490301330817305.
[I 2025-02-19 01:45:19,797] Trial 1 finished with value: 18.897686720431597 and parameters: {'loss': 'quantile', 'criterion': 'friedman_mse', 'learning_rate': 0.007083807606428477, 'subsample': 0.8321703694897674, 'max_depth': 5, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'log2', 'max_leaf_nodes': 25}. Best is trial 0 with value: 11.490301330817305.
[I 2025-02-19 01:45:20,220] Trial 2 finished with value: 17.30917000754812 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.013038787794589899, 'subsample': 0.22832795552235652, 'max_depth': 6, 'min_samples_split'

P {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.06884682275875297, 'subsample': 0.7205188998242785, 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 9, 'max_features': 'sqrt', 'max_leaf_nodes': 474}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
   

[I 2025-02-19 01:46:34,625] A new study created in memory with name: no-name-af5735b8-25a3-4980-9e17-3f1837d8e005


Fold: 5/5
Model name: gradient_boosting
MAE: 7.208293276540481
MSE: 82.0828934688996
RMSE: 9.059961008133511
PCC: 0.6871829220219541
Spearman R: 0.6916982584758822
R2 Score: 0.47153724729276403

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:46:36,530] Trial 0 finished with value: 9.456419479074421 and parameters: {'n_estimators': 66, 'loss': 'square', 'learning_rate': 0.0026054405163681337}. Best is trial 0 with value: 9.456419479074421.
[I 2025-02-19 01:46:38,033] Trial 1 finished with value: 9.434168918837216 and parameters: {'n_estimators': 52, 'loss': 'square', 'learning_rate': 0.0016752312911628429}. Best is trial 1 with value: 9.434168918837216.
[I 2025-02-19 01:46:40,157] Trial 2 finished with value: 9.382257317372655 and parameters: {'n_estimators': 75, 'loss': 'square', 'learning_rate': 0.017010297450272487}. Best is trial 2 with value: 9.382257317372655.
[I 2025-02-19 01:46:42,571] Trial 3 finished with value: 9.446511112221623 and parameters: {'n_estimators': 85, 'loss': 'linear', 'learning_rate': 0.0010193963270989494}. Best is trial 2 with value: 9.382257317372655.
[I 2025-02-19 01:46:45,410] Trial 4 finished with value: 9.410801582224764 and parameters: {'n_estimators': 100, 'loss': 'exponen

P {'n_estimators': 95, 'loss': 'exponential', 'learning_rate': 0.07548508931312467}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dt

[I 2025-02-19 01:50:40,788] A new study created in memory with name: no-name-30d15191-fc07-4f2f-84ed-63facad23b1f
[I 2025-02-19 01:50:40,913] Trial 0 finished with value: 11.581136172518343 and parameters: {'learning_rate': 0.003444635840553244, 'num_leaves': 504, 'subsample': 0.22708541687349598, 'colsample_bytree': 0.4473334717626529, 'min_data_in_leaf': 14}. Best is trial 0 with value: 11.581136172518343.
[I 2025-02-19 01:50:40,946] Trial 1 finished with value: 9.709276452986401 and parameters: {'learning_rate': 0.015962986732779707, 'num_leaves': 184, 'subsample': 0.2103463300250239, 'colsample_bytree': 0.5265781347832432, 'min_data_in_leaf': 99}. Best is trial 1 with value: 9.709276452986401.


Fold: 5/5
Model name: ada_boost
MAE: 7.398124946702292
MSE: 86.51651002892038
RMSE: 9.301425161173979
PCC: 0.6659809412734874
Spearman R: 0.6749758283631325
R2 Score: 0.442992917131636

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-19 01:50:41,033] Trial 2 finished with value: 11.332084144909112 and parameters: {'learning_rate': 0.0034046842410066546, 'num_leaves': 1002, 'subsample': 0.29714477159544683, 'colsample_bytree': 0.6118493831946349, 'min_data_in_leaf': 40}. Best is trial 1 with value: 9.709276452986401.
[I 2025-02-19 01:50:41,539] Trial 3 finished with value: 11.216740819432147 and parameters: {'learning_rate': 0.08064901664148853, 'num_leaves': 656, 'subsample': 0.8590962535749335, 'colsample_bytree': 0.05436946174673503, 'min_data_in_leaf': 8}. Best is trial 1 with value: 9.709276452986401.
[I 2025-02-19 01:50:41,811] Trial 4 finished with value: 9.33600420376519 and parameters: {'learning_rate': 0.03070395847080292, 'num_leaves': 353, 'subsample': 0.5065436482234044, 'colsample_bytree': 0.509174425879903, 'min_data_in_leaf': 11}. Best is trial 4 with value: 9.33600420376519.
[I 2025-02-19 01:50:41,912] Trial 5 finished with value: 11.970498697313962 and parameters: {'learning_rate': 0.001

P {'learning_rate': 0.08003873909615403, 'num_leaves': 5, 'subsample': 0.8814678546992564, 'colsample_bytree': 0.9696469364859929, 'min_data_in_leaf': 77}
F ['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E'

Compute average scores and rank models by R2 score

In [15]:
for model_name, model_metrics in model_scores.items():
    for metric, scores in model_metrics.items():
        model_scores[model_name][metric] = sum(scores) / len(scores)
    model_scores[model_name] = dict(model_scores[model_name])

model_scores = dict(sorted(model_scores.items(), key=lambda x: x[1]["r2_score"], reverse=True))

In [16]:
# Print results
for i, (model_name, model_metrics) in enumerate(model_scores.items()):
    print(f"No.{i+1} Model: {model_name}")
    for metric, score in model_metrics.items():
        print(f"{metric}: {score}")
    print()

No.1 Model: xgb
mae: 7.323102736934819
mse: 84.52200068685292
rmse: 9.192693995866223
pcc: 0.690838338913945
spearman_r: 0.6959746259666508
r2_score: 0.47655656788060047

No.2 Model: lgbm
mae: 7.364657864911964
mse: 85.7068538006461
rmse: 9.256216963166535
pcc: 0.6854965899029641
spearman_r: 0.6909994697263554
r2_score: 0.46928862415410444

No.3 Model: gradient_boosting
mae: 7.36822877277964
mse: 86.01123240012332
rmse: 9.273105016899219
pcc: 0.6846974488875457
spearman_r: 0.6897459122054345
r2_score: 0.4673792316081009

No.4 Model: random_forest
mae: 7.576283759348522
mse: 90.46937723228541
rmse: 9.510119194162447
pcc: 0.6797883789860796
spearman_r: 0.685469626745948
r2_score: 0.4398181267202007

No.5 Model: ada_boost
mae: 7.563373532687355
mse: 90.59023126629195
rmse: 9.516605147851903
pcc: 0.663719800642355
spearman_r: 0.6728394167525569
r2_score: 0.438994865874278

No.6 Model: lasso
mae: 8.556754527481356
mse: 114.73265627001578
rmse: 10.709077028967846
pcc: 0.5390384842945855
spea