In [1]:
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import optuna
import os
import json
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

from src.utils import get_kfold_data, convert_non_numeric_to_numeric, calculate_r2_score, calculate_metrics, is_outlier
from src.normalisation import Normaliser
from src.constants import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(DATA_PATH)

In [3]:
data.head()

Unnamed: 0,outcome,carat,cut,color,clarity,depth,table,price,x,y,...,a6,a7,a8,a9,a10,b6,b7,b8,b9,b10
0,-26.701232,1.14,Ideal,G,VS1,62.3,56.0,7948,6.73,6.7,...,0.168836,-0.273758,1.107832,1.247795,0.482344,0.489511,-0.321138,0.573382,0.446871,-1.990581
1,6.548093,0.38,Premium,H,VS2,60.5,59.0,898,4.69,4.66,...,-0.256549,0.315373,-0.030326,-0.114335,-1.059588,-1.76136,-1.343951,-1.00255,-0.22503,-0.446653
2,6.612562,0.5,Very Good,E,SI1,60.7,58.0,1351,5.09,5.13,...,-1.193327,-0.657307,-0.591726,-0.446856,-0.765286,-0.816544,-1.397794,-0.47713,0.810509,1.725131
3,-5.073562,0.7,Premium,D,SI1,61.2,58.0,2512,5.74,5.7,...,-1.740788,-1.77886,-0.82507,0.444932,1.173109,0.453606,-0.26344,0.24621,-0.850503,-0.41295
4,-14.436557,0.83,Ideal,G,SI2,62.4,54.0,2751,6.01,6.08,...,-0.859322,1.409268,0.861992,1.109063,-1.436722,-1.461618,0.081787,0.258087,0.851146,2.204813


Inspecting columns

In [4]:
# Find columns
all_columns = data.columns.tolist()
print(all_columns)

numeric_columns = data.select_dtypes(include=["number"]).columns.tolist()
numeric_columns.remove("outcome") # Remove the target column
print(numeric_columns)

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns.tolist()
print(non_numeric_columns)

['outcome', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']
['cut', 'color', 'clarity']


In [5]:
for non_numeric_column in non_numeric_columns:
    print(data[non_numeric_column].value_counts())

cut
Ideal        4040
Premium      2439
Very Good    2296
Good          925
Fair          300
Name: count, dtype: int64
color
G    2120
E    1873
F    1746
H    1506
D    1246
I     983
J     526
Name: count, dtype: int64
clarity
SI1     2408
VS2     2256
SI2     1743
VS1     1503
VVS2     951
VVS1     675
IF       318
I1       146
Name: count, dtype: int64


Removing outliers

In [6]:
print(numeric_columns)
mean_per_column = {col: data[col].mean() for col in numeric_columns}
std_per_column = {col: data[col].std() for col in numeric_columns}


['carat', 'depth', 'table', 'price', 'x', 'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10']


In [7]:
print(f"Before removing outliers: {data.shape}")
if REMOVE_OUTLIERS:
    data = data[~data.apply(is_outlier, axis=1, numeric_columns=numeric_columns, mean_per_column=mean_per_column, std_per_column=std_per_column)]
print(f"After removing outliers: {data.shape}")

Before removing outliers: (10000, 31)
After removing outliers: (9291, 31)


Converting non-numeric features to numerical features

In [8]:
data = convert_non_numeric_to_numeric(data=data)
print(data)

['G', 'E', 'F', 'H', 'D', 'I', 'J']
        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9994  14.026668   0.38    0        5   61.4   57.0    653  4.64  4.67  2.86   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...      

Normalise data using each columns respective mean and std.

In [9]:
print(data)

        outcome  carat  cut  clarity  depth  table  price     x     y     z  \
0    -26.701232   1.14    0        3   62.3   56.0   7948  6.73  6.70  4.18   
1      6.548093   0.38    1        4   60.5   59.0    898  4.69  4.66  2.83   
2      6.612562   0.50    2        5   60.7   58.0   1351  5.09  5.13  3.10   
3     -5.073562   0.70    1        5   61.2   58.0   2512  5.74  5.70  3.50   
4    -14.436557   0.83    0        6   62.4   54.0   2751  6.01  6.08  3.77   
...         ...    ...  ...      ...    ...    ...    ...   ...   ...   ...   
9994  14.026668   0.38    0        5   61.4   57.0    653  4.64  4.67  2.86   
9995  10.718277   0.33    0        3   62.6   57.0   1002  4.42  4.40  2.76   
9997  11.122516   0.52    2        6   57.9   61.0   1273  5.28  5.33  3.07   
9998 -24.730782   0.31    0        0   62.0   54.0    801  4.35  4.39  2.71   
9999   8.735755   0.37    2        5   59.9   59.0    649  4.68  4.70  2.81   

      ...        b8        b9       b10  colour_G  

Data splitting:
- Split the entire dataset into training and testing sets first.
- Use the training set to generate folds (one validation and the rest training folds) (K-Fold Cross Validation)

In [10]:
train_data, test_data = train_test_split(data, test_size=0.2, random_state=REPRODUCIBILITY_SEED)
print(f"Training set size: {len(train_data)} | Test set size: {len(test_data)}")    
print()


Training set size: 7432 | Test set size: 1859



In [11]:
normaliser = Normaliser()
os.makedirs(TRAINING_STATISTICS_DIR, exist_ok=True)
stats_for_each_column = {}
for column in numeric_columns:
    print(data[column])
    train_data_column_mean = normaliser.calculate_mean(train_data[column])
    train_data_column_std = normaliser.calculate_std(train_data[column])

    train_data[column] = normaliser.standardise(train_data[column], mean=train_data_column_mean, std=train_data_column_std)

    stats_for_each_column[column] = {
        "mean": train_data_column_mean,
        "std": train_data_column_std
    }

    # Normalise test data using the mean and std of the training data
    test_data[column] = normaliser.standardise(test_data[column], mean=train_data_column_mean, std=train_data_column_std)
    print("after", train_data[column])

with open(f"{TRAINING_STATISTICS_DIR}/stats.json", "w") as f:
    json.dump(stats_for_each_column, f)

0       1.14
1       0.38
2       0.50
3       0.70
4       0.83
        ... 
9994    0.38
9995    0.33
9997    0.52
9998    0.31
9999    0.37
Name: carat, Length: 9291, dtype: float64
after 1446   -1.091530
4333    0.630628
802    -1.091530
1344   -0.756666
5113   -0.613152
          ...   
6170    1.778733
5580   -0.063019
5802    0.606709
926     0.798060
7813   -1.043692
Name: carat, Length: 7432, dtype: float64
0       62.3
1       60.5
2       60.7
3       61.2
4       62.4
        ... 
9994    61.4
9995    62.6
9997    57.9
9998    62.0
9999    59.9
Name: depth, Length: 9291, dtype: float64
after 1446   -0.036532
4333    0.587798
802     0.587798
1344    0.275633
5113    0.275633
          ...   
6170    0.587798
5580   -1.207151
5802    1.290170
926    -0.036532
7813    0.197592
Name: depth, Length: 7432, dtype: float64
0       56.0
1       59.0
2       58.0
3       58.0
4       54.0
        ... 
9994    57.0
9995    57.0
9997    61.0
9998    54.0
9999    59.0
Name: table, Leng

In [12]:
kfold_data = get_kfold_data(data=train_data, k=NUM_FOLDS, reproducibility_seed=REPRODUCIBILITY_SEED)

Fold: 0/5
Train shape: (5945, 37) | 79.99%
Validation shape: (1487, 37) | 20.01%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5',
       'a6', 'a7', 'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10',
       'colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J'],
      dtype='object')

Fold: 1/5
Train shape: (5945, 37) | 79.99%
Validation shape: (1487, 37) | 20.01%
Index(['outcome', 'carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x',
       'y', 'z', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', '

Define models and hyperparameter tuning objectives for each model

In [13]:
models = {
        "linear_regression": LinearRegression,
        "lasso": Lasso,
        "ridge": Ridge,
        "xgb": xgb.XGBRegressor,
        "random_forest": RandomForestRegressor,
        "gradient_boosting": GradientBoostingRegressor,
        "ada_boost": AdaBoostRegressor,
        "lgbm": lgb.LGBMRegressor
        }

# Must contain the one-hot encoded columns (otherwise this does not make sense)
minimum_features = [feature for feature in data.columns if feature.startswith("colour")] 

def objective(model_type, trial, x_train, y_train, x_val, y_val):
    if model_type == LinearRegression:
        parameters = {
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        }
    elif model_type == Lasso:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
            "warm_start": trial.suggest_categorical("warm_start", [True, False]),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == Ridge:
        parameters = {
            "alpha": trial.suggest_float("alpha", 1e-3, 0.1, log=True),
            "solver": trial.suggest_categorical("solver", ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]),
            "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
            "positive": False,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == xgb.XGBRegressor:
        parameters = {
            "objective": "reg:squarederror",
            "eval_metric": "rmse",
            "n_estimators": 100,
            "eta": trial.suggest_float("eta", 1e-2, 0.2, log=True),
            "gamma": trial.suggest_float("gamma", 1e-8, 10, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1, 6),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "seed": REPRODUCIBILITY_SEED
        }
    elif model_type == RandomForestRegressor:
        parameters = {
            "n_estimators": 100,
            "criterion": trial.suggest_categorical("criterion", ["absolute_error", "squared_error"]),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "bootstrap": True,
            "oob_score": False,
            "n_jobs": -1,
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == GradientBoostingRegressor:
        parameters = {
            "n_estimators": 100,
            "loss": trial.suggest_categorical("loss", ["absolute_error", "squared_error", "huber", "quantile"]),
            "criterion": trial.suggest_categorical("criterion", ["friedman_mse", "squared_error"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "subsample": trial.suggest_float("subsample", 0.05, 1.0),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2, 2**10),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == AdaBoostRegressor:
        parameters = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 100),
            "loss": trial.suggest_categorical("loss", ["linear", "square", "exponential"]),
            "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
            "random_state": REPRODUCIBILITY_SEED
        }
    elif model_type == lgb.LGBMRegressor:
        parameters = {
                    "objective": "regression",
                    "metric": "rmse",
                    "n_estimators": 100,
                    "verbosity": -1,
                    "bagging_freq": 1,
                    "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
                    "num_leaves": trial.suggest_int("num_leaves", 2, 2**10),
                    "subsample": trial.suggest_float("subsample", 0.05, 1.0),
                    "colsample_bytree": trial.suggest_float("colsample_bytree", 0.05, 1.0),
                    "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
                    "seed": REPRODUCIBILITY_SEED
        }
    
    # Wrapper method for feature selection
    if USE_FEATURE_SELECTION:
        selected_features = minimum_features.copy()
        train_columns = x_train.columns.tolist() 
        for feature in train_columns:
            if feature.startswith("colour"):
                continue
            if trial.suggest_categorical(feature, [0, 1]) == 1:
                selected_features.append(feature)
        if len(selected_features) == 0:
            return float("inf")
        
        x_train = x_train[selected_features]
        x_val = x_val[selected_features]

    model = model_type(**parameters) # Create the model
    model.fit(x_train, y_train)
    predictions = model.predict(x_val)
    metrics = calculate_metrics(targets=y_val, preds=predictions)
    rmse = metrics["rmse"]
    return rmse

In [14]:
# Train + Validate models
metrics = ["mae", "mse", "rmse", "pcc", "spearman_r", "r2_score"]
model_scores = {model_name: defaultdict(list) for model_name in models.keys()}

if os.path.exists("model_best_hyperparameters"):
    raise Exception("Directory for best hyperparameters already exists. Please delete it before running this script.")

os.makedirs("model_best_hyperparameters")

for fold in range(NUM_FOLDS):
    fold_data = kfold_data[fold]
     
    # Extract data
    train_data = fold_data["train"]
    val_data = fold_data["val"]

    train_y = train_data["outcome"]
    val_y = val_data["outcome"]
    
    train_x = train_data.drop(columns=["outcome"])
    val_x = val_data.drop(columns=["outcome"])

    # print(f"Fold {fold+1}/{NUM_FOLDS}")
    # print(f"Train data shape: {train_x.shape} | Train target shape: {train_y.shape}")
    # print(f"Val data shape: {val_x.shape} | Val target shape: {val_y.shape}")
    # print(f"Test data shape: {test_x.shape} | Test target shape: {test_y.shape}")
    training_features = set(train_x.columns.tolist())


    # Train model
    for model_name, model in models.items():
        study = optuna.create_study(direction="minimize")
        print("G", train_x.columns)
        print("G", val_x.columns)
        study.optimize(lambda trial: objective(trial=trial, 
                                               model_type=model, 
                                               x_train=train_x, 
                                               y_train=train_y, 
                                               x_val=val_x, 
                                               y_val=val_y
                                               ), n_trials=N_TRIALS)
        
        # Train model with best hyperparameters
        best_trial = study.best_trial 

        if USE_FEATURE_SELECTION:
            best_fold_params = {} # Hyperparameters
            best_selected_features = minimum_features.copy() # Features selected by the model
            for param in best_trial.params:
                if param in training_features: # i.e., if the parameter is a feature
                    if best_trial.params[param] == 1:
                        best_selected_features.append(param)
                else: # A hyperparameter
                    best_fold_params[param] = best_trial.params[param]
        else:
            best_fold_params = best_trial.params
            best_selected_features = train_x.columns.tolist() # All feature columns
        print("P", best_fold_params)
        print("F", best_selected_features)
        
        # Select the best features
        print(train_x.columns)
        print(val_x.columns)
        train_x = train_x[best_selected_features]
        val_x = val_x[best_selected_features]

        model = model(**best_fold_params)
        model.fit(train_x, train_y)
        preds = model.predict(val_x)
        
        # Save the best hyperparameters for this model at this fold.
        os.makedirs(f"model_best_hyperparameters/{model_name}", exist_ok=True)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}.json", "w") as f:
            json.dump(best_fold_params, f)
        with open(f"model_best_hyperparameters/{model_name}/fold_{fold+1}_selected_features.json", "w") as f:
            json.dump(best_selected_features, f)

        # Calculate metrics
        metrics = calculate_metrics(targets=val_y, preds=preds)
        mae = metrics["mae"]
        mse = metrics["mse"]
        rmse = metrics["rmse"]
        pcc = metrics["pcc"]
        spearman_r = metrics["spearman_r"]
        r2_score = metrics["r2_score"]

        for metric in metrics:
            model_scores[model_name][metric].append(metrics[metric])

        print(f"Fold: {fold+1}/{NUM_FOLDS}")
        print(f"Model name: {model_name}")
        print(f"MAE: {mae}")
        print(f"MSE: {mse}")
        print(f"RMSE: {rmse}")
        print(f"PCC: {pcc}")
        print(f"Spearman R: {spearman_r}")
        print(f"R2 Score: {r2_score}")
        print()


[I 2025-02-18 23:31:54,247] A new study created in memory with name: no-name-daf46459-f1c1-4665-8554-59cecb29c913
[I 2025-02-18 23:31:54,257] Trial 0 finished with value: 10.724077215002952 and parameters: {'fit_intercept': False, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 0, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 0, 'b8': 1, 'b9': 1, 'b10': 0}. Best is trial 0 with value: 10.724077215002952.
[I 2025-02-18 23:31:54,267] Trial 1 finished with value: 12.146226373218816 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 1, 'clarity': 1, 'depth': 0, 'table': 1, 'price': 0, 'x': 0, 'y': 0, 'z': 0, 'a1': 0, 'a2': 1, 'a3': 0, 'a4': 0, 'a5': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 0, 'a6': 1, 'a7': 1, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 1, 'b8': 1, 'b9': 1, 'b10': 1}. Best is trial 0 with value: 10.724077215002952.

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')


[I 2025-02-18 23:31:54,341] Trial 11 finished with value: 10.792532716501293 and parameters: {'fit_intercept': False, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 1, 'b8': 1, 'b9': 1, 'b10': 0}. Best is trial 0 with value: 10.724077215002952.
[I 2025-02-18 23:31:54,353] Trial 12 finished with value: 10.792532716501293 and parameters: {'fit_intercept': False, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 1, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 1, 'b8': 1, 'b9': 1, 'b10': 0}. Best is trial 0 with value: 10.724077215002952.
[I 2025-02-18 23:31:54,366] Trial 13 finished with value: 10.792532716501293 and parameters: {'fit_intercept':

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'cut', 'depth', 'table', 'price', 'a1', 'a3', 'a4', 'b1', 'b3', 'a6', 'a8', 'a9', 'b6', 'b7', 'b10']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 1/5
Model name: linear_regression
MAE: 8.445879572969561
MSE: 111.9218226393591
RMSE: 10.579311066386087
PCC: 0.5589595

[I 2025-02-18 23:31:55,734] Trial 20 finished with value: 11.097292532693897 and parameters: {'alpha': 0.09926204309020928, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False, 'carat': 1, 'cut': 1, 'depth': 1, 'table': 1, 'price': 0, 'a1': 1, 'a3': 0, 'a4': 0, 'b1': 1, 'b3': 0, 'a6': 1, 'a8': 1, 'a9': 1, 'b6': 0, 'b7': 0, 'b10': 1}. Best is trial 18 with value: 11.024663517842555.
[I 2025-02-18 23:31:55,743] Trial 21 finished with value: 11.090664797544816 and parameters: {'alpha': 0.09390809003723272, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False, 'carat': 1, 'cut': 1, 'depth': 1, 'table': 1, 'price': 0, 'a1': 1, 'a3': 0, 'a4': 0, 'b1': 1, 'b3': 0, 'a6': 1, 'a8': 1, 'a9': 1, 'b6': 0, 'b7': 0, 'b10': 1}. Best is trial 18 with value: 11.024663517842555.
[I 2025-02-18 23:31:55,753] Trial 22 finished with value: 11.054490520609177 and parameters: {'alpha': 0.06070009329063373, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': False, 'carat':

P {'alpha': 0.0023468075473453494, 'fit_intercept': False, 'selection': 'random', 'warm_start': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a1', 'a3', 'a4', 'b1', 'b3', 'b6', 'b7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'depth', 'table', 'price', 'a1', 'a3', 'a4',
       'b1', 'b3', 'a6', 'a8', 'a9', 'b6', 'b7', 'b10'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'depth', 'table', 'price', 'a1', 'a3', 'a4',
       'b1', 'b3', 'a6', 'a8', 'a9', 'b6', 'b7', 'b10'],
      dtype='object')
Fold: 1/5
Model name: lasso
MAE: 8.448858017234208
MSE: 111.94129610061485
RMSE: 10.58023138218701
PCC: 0.5587581930372544
Spearman R: 0.5793491493784056
R2 Score: 0.31159610752055156

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',

[I 2025-02-18 23:31:56,894] Trial 20 finished with value: 10.819210186849471 and parameters: {'alpha': 0.013463975393838586, 'solver': 'sag', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 0, 'a3': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b6': 0, 'b7': 1}. Best is trial 19 with value: 10.81921018363883.
[I 2025-02-18 23:31:56,913] Trial 21 finished with value: 10.819210193449628 and parameters: {'alpha': 0.005723002069592281, 'solver': 'sag', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 0, 'a3': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b6': 0, 'b7': 1}. Best is trial 19 with value: 10.81921018363883.
[I 2025-02-18 23:31:56,933] Trial 22 finished with value: 10.819210185591634 and parameters: {'alpha': 0.01493226023869965, 'solver': 'sag', 'fit_intercept': True, 'carat': 1, 'depth': 1, 'table': 0, 'a1': 0, 'a3': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b6': 0, 'b7': 1}. Best is trial 19 with value: 10.81921018363883.
[I 2025-02-18 23:31:56,953] Trial 23 finished with value: 10.8

P {'alpha': 0.08631188001830808, 'solver': 'auto', 'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b6', 'b7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a3', 'a4', 'b1', 'b3',
       'b6', 'b7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a1', 'a3', 'a4', 'b1', 'b3',
       'b6', 'b7'],
      dtype='object')
Fold: 1/5
Model name: ridge
MAE: 8.641567856016813
MSE: 116.84748174460155
RMSE: 10.809601368441001
PCC: 0.5316626099431154
Spearman R: 0.5558712856390866
R2 Score: 0.28142460323931184

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b6', 'b7'],
      dtype='object')
G Index(['colour_G', '

[I 2025-02-18 23:31:57,975] Trial 1 finished with value: 12.625178464127801 and parameters: {'eta': 0.04548801969894445, 'gamma': 0.29257808736835833, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.7677582420483131, 'colsample_bytree': 0.8991516092849734, 'carat': 0, 'depth': 0, 'table': 0, 'a4': 0, 'b1': 1, 'b3': 0, 'b6': 1, 'b7': 1}. Best is trial 1 with value: 12.625178464127801.
[I 2025-02-18 23:31:58,029] Trial 2 finished with value: 9.929952457307405 and parameters: {'eta': 0.029345864319542937, 'gamma': 0.00698668954452883, 'max_depth': 5, 'min_child_weight': 6, 'subsample': 0.8494555384352709, 'colsample_bytree': 0.9648182610620324, 'carat': 0, 'depth': 1, 'table': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'b6': 0, 'b7': 1}. Best is trial 2 with value: 9.929952457307405.
[I 2025-02-18 23:31:58,146] Trial 3 finished with value: 12.572509873641335 and parameters: {'eta': 0.012393047687946846, 'gamma': 0.0006732857142768674, 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.643078

P {'eta': 0.07306639051356126, 'gamma': 0.022024156794985938, 'max_depth': 3, 'min_child_weight': 6, 'subsample': 0.9933792205202334, 'colsample_bytree': 0.921580244130331}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b6', 'b7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b6', 'b7'],
      dtype='object')
Fold: 1/5
Model name: xgb
MAE: 7.535025417899413
MSE: 90.57234360458293
RMSE: 9.51695033109782
PCC: 0.6669672921921033
Spearman R: 0.6685960616955692
R2 Score: 0.4430084690788345

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b7'],
 

[I 2025-02-18 23:32:05,329] Trial 1 finished with value: 12.252271543519456 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 3, 'min_samples_leaf': 3, 'carat': 0, 'depth': 0, 'table': 0, 'a4': 0, 'b1': 1, 'b3': 1, 'b7': 1}. Best is trial 1 with value: 12.252271543519456.
[I 2025-02-18 23:32:05,423] Trial 2 finished with value: 12.458691353570496 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 1, 'carat': 0, 'depth': 0, 'table': 1, 'a4': 0, 'b1': 0, 'b3': 1, 'b7': 1}. Best is trial 1 with value: 12.252271543519456.
[I 2025-02-18 23:32:05,516] Trial 3 finished with value: 9.69089577833958 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 10, 'carat': 0, 'depth': 1, 'table': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b7': 1}. Best is trial 3 with value: 9.69089577833958.
[I 2025-02

P {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 2}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'table', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'depth', 'table', 'a4', 'b1', 'b3', 'b7'],
      dtype='object')


[I 2025-02-18 23:32:22,631] A new study created in memory with name: no-name-bba96bdc-72b7-4aa1-b48c-ab074b7e7c0d


Fold: 1/5
Model name: random_forest
MAE: 7.632098641725515
MSE: 92.3989150168598
RMSE: 9.612435436290836
PCC: 0.6602187392474357
Spearman R: 0.660615167045931
R2 Score: 0.43177562727778107

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:32:23,627] Trial 0 finished with value: 12.578916495914864 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.011471629496460163, 'subsample': 0.8972444299346176, 'max_depth': 8, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_leaf_nodes': 542, 'depth': 0, 'table': 0, 'a4': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 12.578916495914864.
[I 2025-02-18 23:32:24,215] Trial 1 finished with value: 12.412259612569159 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.09721936118642882, 'subsample': 0.4942498058902069, 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'max_leaf_nodes': 330, 'depth': 0, 'table': 0, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 12.412259612569159.
[I 2025-02-18 23:32:24,473] Trial 2 finished with value: 12.41962448314181 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.

P {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.05167254341344138, 'subsample': 0.87907904807685, 'max_depth': 4, 'min_samples_split': 8, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 21}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'table', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:33:02,318] A new study created in memory with name: no-name-4b0772e8-6ef9-47af-9dc1-45b8425ea741


Fold: 1/5
Model name: gradient_boosting
MAE: 7.730118630361399
MSE: 93.84812538100405
RMSE: 9.687524213182853
PCC: 0.6525554845829036
Spearman R: 0.6529477394722437
R2 Score: 0.4228634376708128

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:33:02,632] Trial 0 finished with value: 9.980737499916833 and parameters: {'n_estimators': 96, 'loss': 'exponential', 'learning_rate': 0.003380439099528019, 'depth': 1, 'table': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 9.980737499916833.
[I 2025-02-18 23:33:02,981] Trial 1 finished with value: 9.746232762543096 and parameters: {'n_estimators': 81, 'loss': 'linear', 'learning_rate': 0.015196622691679177, 'depth': 1, 'table': 0, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 9.746232762543096.
[I 2025-02-18 23:33:03,265] Trial 2 finished with value: 9.764327765309645 and parameters: {'n_estimators': 66, 'loss': 'exponential', 'learning_rate': 0.018353853343641185, 'depth': 1, 'table': 0, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 9.746232762543096.
[I 2025-02-18 23:33:03,569] Trial 3 finished with value: 12.236237441240732 and parameters: {'n_estimators': 78, 'loss': 'square', 'learning_rate': 0.06579580685222655, 'depth': 0, 'table': 1, 'b1': 1, 'b3': 1}. B

P {'n_estimators': 76, 'loss': 'square', 'learning_rate': 0.08496926640377352}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'table', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:33:31,825] A new study created in memory with name: no-name-414fa2e9-21e6-4729-85c7-712bfda26fcc
[I 2025-02-18 23:33:31,928] Trial 0 finished with value: 12.06089751533401 and parameters: {'learning_rate': 0.0026556478274381745, 'num_leaves': 760, 'subsample': 0.9207902711114846, 'colsample_bytree': 0.5763008202925638, 'min_data_in_leaf': 52, 'depth': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 12.06089751533401.
[I 2025-02-18 23:33:31,952] Trial 1 finished with value: 12.75920016821243 and parameters: {'learning_rate': 0.001688731620697829, 'num_leaves': 996, 'subsample': 0.29448398045613766, 'colsample_bytree': 0.7497432753565627, 'min_data_in_leaf': 99, 'depth': 0, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 12.06089751533401.
[I 2025-02-18 23:33:31,977] Trial 2 finished with value: 12.733472592131646 and parameters: {'learning_rate': 0.04601862625901497, 'num_leaves': 752, 'subsample': 0.7077031906691259, 'colsample_bytree': 0.508771206208972, 'min_data_in

Fold: 1/5
Model name: ada_boost
MAE: 7.7441280017971
MSE: 94.45002399167613
RMSE: 9.718540219172636
PCC: 0.6482757746716892
Spearman R: 0.6513542514288378
R2 Score: 0.4191619498294338

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:33:32,243] Trial 3 finished with value: 12.62899062183159 and parameters: {'learning_rate': 0.014414992787911519, 'num_leaves': 1007, 'subsample': 0.2644724935279589, 'colsample_bytree': 0.43470936987254744, 'min_data_in_leaf': 2, 'depth': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 12.06089751533401.
[I 2025-02-18 23:33:32,274] Trial 4 finished with value: 10.64832226117612 and parameters: {'learning_rate': 0.06818758268756742, 'num_leaves': 663, 'subsample': 0.8265205281208929, 'colsample_bytree': 0.3124488029592975, 'min_data_in_leaf': 40, 'depth': 1, 'b1': 0, 'b3': 0}. Best is trial 4 with value: 10.64832226117612.
[I 2025-02-18 23:33:32,298] Trial 5 finished with value: 12.73845894981905 and parameters: {'learning_rate': 0.0780910971858118, 'num_leaves': 278, 'subsample': 0.48704399807703597, 'colsample_bytree': 0.5540633087883351, 'min_data_in_leaf': 68, 'depth': 0, 'b1': 0, 'b3': 0}. Best is trial 4 with value: 10.64832226117612.
[I 2025-02-18 23:33:32,327

P {'learning_rate': 0.0866903747332521, 'num_leaves': 3, 'subsample': 0.058641574215494735, 'colsample_bytree': 0.4707679337278526, 'min_data_in_leaf': 50}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'b1', 'b3'],
      dtype='object')
Fold: 1/5
Model name: lgbm
MAE: 7.710957386965174
MSE: 93.36381425104328
RMSE: 9.66249523938011
PCC: 0.6541362406304497
Spearman R: 0.6528366273138534
R2 Score: 0.42584179935367605

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', '

[I 2025-02-18 23:33:37,212] Trial 18 finished with value: 11.876694941982436 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 0, 'table': 0, 'price': 0, 'x': 1, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 0, 'a6': 0, 'a7': 0, 'a8': 1, 'a9': 0, 'a10': 0, 'b6': 1, 'b7': 1, 'b8': 0, 'b9': 0, 'b10': 0}. Best is trial 14 with value: 10.351950246161582.
[I 2025-02-18 23:33:37,223] Trial 19 finished with value: 10.351950246161582 and parameters: {'fit_intercept': False, 'carat': 1, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 1, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 0, 'a6': 0, 'a7': 0, 'a8': 1, 'a9': 0, 'a10': 0, 'b6': 1, 'b7': 0, 'b8': 0, 'b9': 1, 'b10': 0}. Best is trial 14 with value: 10.351950246161582.
[I 2025-02-18 23:33:37,232] Trial 20 finished with value: 10.351950246161582 and parameters: {'fit_intercept'

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'b1', 'b2', 'b3', 'b4', 'a8', 'b6', 'b7', 'b9']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 2/5
Model name: linear_regression
MAE: 8.266717907221476
MSE: 107.09349324391829
RMSE: 10.34859861256191
PCC: 0.5525937876763782
Spearman R:

[I 2025-02-18 23:33:38,433] Trial 25 finished with value: 10.507021690601464 and parameters: {'alpha': 0.0616740338749459, 'fit_intercept': True, 'selection': 'random', 'warm_start': False, 'depth': 1, 'price': 0, 'y': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 0, 'a8': 0, 'b6': 1, 'b7': 0, 'b9': 0}. Best is trial 17 with value: 10.504541296504843.
[I 2025-02-18 23:33:38,443] Trial 26 finished with value: 12.235270315106918 and parameters: {'alpha': 0.0161807977240607, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'depth': 0, 'price': 1, 'y': 1, 'a1': 0, 'a2': 0, 'a4': 1, 'b1': 1, 'b2': 0, 'b3': 0, 'b4': 1, 'a8': 0, 'b6': 1, 'b7': 1, 'b9': 0}. Best is trial 17 with value: 10.504541296504843.
[I 2025-02-18 23:33:38,452] Trial 27 finished with value: 10.504858014990498 and parameters: {'alpha': 0.029223193041911198, 'fit_intercept': True, 'selection': 'random', 'warm_start': False, 'depth': 1, 'price': 0, 'y': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 0

P {'alpha': 0.045005380017589545, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'price', 'a1', 'a4', 'b1', 'b2', 'b3', 'b4', 'a8', 'b7', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'b1', 'b2', 'b3',
       'b4', 'a8', 'b6', 'b7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'price', 'y', 'a1', 'a2', 'a4', 'b1', 'b2', 'b3',
       'b4', 'a8', 'b6', 'b7', 'b9'],
      dtype='object')
Fold: 2/5
Model name: lasso
MAE: 8.26352331191648
MSE: 107.07613320010155
RMSE: 10.347759815539861
PCC: 0.5528024583231206
Spearman R: 0.5716676981487133
R2 Score: 0.3045013336842465

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'price', 'a1'

[I 2025-02-18 23:33:39,405] Trial 19 finished with value: 10.346359519883888 and parameters: {'alpha': 0.001990683475650225, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'price': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 0, 'a8': 0, 'b7': 1, 'b9': 0}. Best is trial 16 with value: 10.346357925803936.
[I 2025-02-18 23:33:39,427] Trial 20 finished with value: 10.34635937004032 and parameters: {'alpha': 0.0010893176693629765, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'price': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 0, 'a8': 0, 'b7': 1, 'b9': 0}. Best is trial 16 with value: 10.346357925803936.
[I 2025-02-18 23:33:39,449] Trial 21 finished with value: 10.34635935830914 and parameters: {'alpha': 0.001018748853404261, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'price': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 0, 'a8': 0, 'b7': 1, 'b9': 0}. Best is trial 16 with value: 10.346357925803936.
[I 2025-02-18 23:33:39,472] Trial 22 f

P {'alpha': 0.027883736771883216, 'solver': 'sparse_cg', 'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a8', 'b7', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'price', 'a1', 'a4', 'b1', 'b2', 'b3', 'b4', 'a8',
       'b7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'price', 'a1', 'a4', 'b1', 'b2', 'b3', 'b4', 'a8',
       'b7', 'b9'],
      dtype='object')
Fold: 2/5
Model name: ridge
MAE: 8.26154784849549
MSE: 106.92964096657528
RMSE: 10.340678941277273
PCC: 0.55352973841596
Spearman R: 0.5707013862832576
R2 Score: 0.3054528543452768

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a8', 'b7', 'b9'],
      dtype='object')
G Index(['colour_G', 'colo

[I 2025-02-18 23:33:40,576] Trial 3 finished with value: 12.387052624691123 and parameters: {'eta': 0.014119787596892886, 'gamma': 0.2146724052164495, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.9233620107469221, 'colsample_bytree': 0.9259983893737727, 'depth': 0, 'a1': 0, 'a4': 1, 'b1': 0, 'b3': 0, 'a8': 1, 'b7': 0, 'b9': 1}. Best is trial 0 with value: 10.028881023068983.
[I 2025-02-18 23:33:40,666] Trial 4 finished with value: 10.263580207803136 and parameters: {'eta': 0.013242173134751232, 'gamma': 0.20239973775937412, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.9609585303207082, 'colsample_bytree': 0.7195193231904471, 'depth': 1, 'a1': 1, 'a4': 0, 'b1': 1, 'b3': 0, 'a8': 1, 'b7': 1, 'b9': 0}. Best is trial 0 with value: 10.028881023068983.
[I 2025-02-18 23:33:40,699] Trial 5 finished with value: 12.016308368662932 and parameters: {'eta': 0.028064002132447224, 'gamma': 1.773096973272531e-06, 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.8549900080331898,

P {'eta': 0.0545777653358924, 'gamma': 0.0002264862946819672, 'max_depth': 3, 'min_child_weight': 5, 'subsample': 0.7704220700029191, 'colsample_bytree': 0.7662433995874792}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a8', 'b7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a8', 'b7', 'b9'],
      dtype='object')
Fold: 2/5
Model name: xgb
MAE: 7.229747128000615
MSE: 83.12124383868688
RMSE: 9.117085271000095
PCC: 0.6788405715874712
Spearman R: 0.6832546304853913
R2 Score: 0.4600971056334481

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'col

[I 2025-02-18 23:33:47,387] Trial 0 finished with value: 12.122804477210572 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 8, 'min_samples_split': 5, 'min_samples_leaf': 9, 'depth': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 12.122804477210572.
[I 2025-02-18 23:33:48,130] Trial 1 finished with value: 12.026298719549537 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 3, 'min_samples_leaf': 4, 'depth': 0, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 12.026298719549537.
[I 2025-02-18 23:33:48,805] Trial 2 finished with value: 9.270065558672082 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 2, 'min_samples_leaf': 5, 'depth': 1, 'a1': 1, 'a4': 0, 'b1': 1, 'b3': 1}. Best is trial 2 with value: 9.270065558672082.
[I 2025-02-18 23:33:49,464] Trial 3 finished with value: 12.25802754232069 an

P {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 6}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:34:42,301] A new study created in memory with name: no-name-78c1f724-0dea-4982-add5-641637cb8fae


Fold: 2/5
Model name: random_forest
MAE: 7.329659984171049
MSE: 85.5286215930993
RMSE: 9.248168553454208
PCC: 0.6682078613999656
Spearman R: 0.6715293998952586
R2 Score: 0.44446030621351473

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:34:42,484] Trial 0 finished with value: 21.19316994284512 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.0012847421774045212, 'subsample': 0.4424980877286122, 'max_depth': 3, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_leaf_nodes': 286, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 21.19316994284512.
[I 2025-02-18 23:34:43,637] Trial 1 finished with value: 12.412351882572086 and parameters: {'loss': 'absolute_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0013453374759887034, 'subsample': 0.5318930818290569, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 641, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 12.412351882572086.
[I 2025-02-18 23:34:43,849] Trial 2 finished with value: 11.73877877611147 and parameters: {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.0021156085

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.0899450939122773, 'subsample': 0.8349865256313405, 'max_depth': 3, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_leaf_nodes': 177}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:35:24,207] A new study created in memory with name: no-name-090ed837-7ffb-4d86-bd4a-d6025c09a124


Fold: 2/5
Model name: gradient_boosting
MAE: 7.43786987807581
MSE: 88.83710807657003
RMSE: 9.425343923516532
PCC: 0.6509820427772482
Spearman R: 0.6534004287458544
R2 Score: 0.42297047586563097

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 23:35:24,493] Trial 0 finished with value: 9.517817848492601 and parameters: {'n_estimators': 65, 'loss': 'square', 'learning_rate': 0.0010186464635019844, 'depth': 1, 'a1': 1, 'b3': 1}. Best is trial 0 with value: 9.517817848492601.
[I 2025-02-18 23:35:24,767] Trial 1 finished with value: 11.954481505665743 and parameters: {'n_estimators': 70, 'loss': 'linear', 'learning_rate': 0.027793663661911168, 'depth': 0, 'a1': 1, 'b3': 1}. Best is trial 0 with value: 9.517817848492601.
[I 2025-02-18 23:35:24,879] Trial 2 finished with value: 12.472743149381227 and parameters: {'n_estimators': 65, 'loss': 'exponential', 'learning_rate': 0.0031484793751025094, 'depth': 0, 'a1': 0, 'b3': 0}. Best is trial 0 with value: 9.517817848492601.
[I 2025-02-18 23:35:25,004] Trial 3 finished with value: 12.47722770208452 and parameters: {'n_estimators': 73, 'loss': 'exponential', 'learning_rate': 0.05683182129284397, 'depth': 0, 'a1': 0, 'b3': 0}. Best is trial 0 with value: 9.517817848492601.

P {'n_estimators': 51, 'loss': 'linear', 'learning_rate': 0.05802273253934243}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 23:35:50,263] A new study created in memory with name: no-name-7399d5a0-d3ed-44a9-8be0-136dafdc5167
[I 2025-02-18 23:35:50,293] Trial 0 finished with value: 12.478655239169917 and parameters: {'learning_rate': 0.03874477126232192, 'num_leaves': 713, 'subsample': 0.48332212270225855, 'colsample_bytree': 0.6263491733351568, 'min_data_in_leaf': 93, 'depth': 0, 'a1': 0, 'b3': 0}. Best is trial 0 with value: 12.478655239169917.
[I 2025-02-18 23:35:50,432] Trial 1 finished with value: 10.221326231395942 and parameters: {'learning_rate': 0.02324298034059269, 'num_leaves': 876, 'subsample': 0.4774442028073538, 'colsample_bytree': 0.5671901493173651, 'min_data_in_leaf': 15, 'depth': 1, 'a1': 1, 'b3': 0}. Best is trial 1 with value: 10.221326231395942.
[I 2025-02-18 23:35:50,450] Trial 2 finished with value: 12.465109019827482 and parameters: {'learning_rate': 0.0036011197075093357, 'num_leaves': 18, 'subsample': 0.3477807175534577, 'colsample_bytree': 0.30496538270686907, 'min_dat

Fold: 2/5
Model name: ada_boost
MAE: 7.486980224723856
MSE: 89.56316899161706
RMSE: 9.4637819602745
PCC: 0.6475401836660395
Spearman R: 0.6536930914580724
R2 Score: 0.41825444454298766

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 23:35:50,468] Trial 3 finished with value: 12.115049328991967 and parameters: {'learning_rate': 0.041597844207395415, 'num_leaves': 1021, 'subsample': 0.06271091137456086, 'colsample_bytree': 0.7281112056190565, 'min_data_in_leaf': 83, 'depth': 0, 'a1': 0, 'b3': 1}. Best is trial 1 with value: 10.221326231395942.
[I 2025-02-18 23:35:50,586] Trial 4 finished with value: 10.029279435708201 and parameters: {'learning_rate': 0.05093190266117598, 'num_leaves': 146, 'subsample': 0.3039907953696576, 'colsample_bytree': 0.5824975744915625, 'min_data_in_leaf': 6, 'depth': 1, 'a1': 0, 'b3': 1}. Best is trial 4 with value: 10.029279435708201.
[I 2025-02-18 23:35:50,643] Trial 5 finished with value: 12.207141152205564 and parameters: {'learning_rate': 0.013550257930718694, 'num_leaves': 496, 'subsample': 0.7246049034994299, 'colsample_bytree': 0.8554513953573639, 'min_data_in_leaf': 63, 'depth': 0, 'a1': 0, 'b3': 1}. Best is trial 4 with value: 10.029279435708201.
[I 2025-02-18 23:35

P {'learning_rate': 0.042820723494515205, 'num_leaves': 368, 'subsample': 0.17855727374290403, 'colsample_bytree': 0.753377483739025, 'min_data_in_leaf': 37}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Fold: 2/5
Model name: lgbm
MAE: 7.606344191369775
MSE: 93.08908848974097
RMSE: 9.64826867835577
PCC: 0.6301239038022592
Spearman R: 0.6319417112156164
R2 Score: 0.3953523072020815

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 

[I 2025-02-18 23:35:58,641] Trial 7 finished with value: 12.796849252133601 and parameters: {'fit_intercept': False, 'carat': 0, 'cut': 1, 'clarity': 1, 'depth': 0, 'table': 0, 'price': 0, 'x': 1, 'y': 1, 'z': 1, 'a1': 0, 'a2': 0, 'a3': 0, 'a4': 0, 'a5': 1, 'b1': 0, 'b2': 0, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 0, 'a7': 1, 'a8': 0, 'a9': 1, 'a10': 0, 'b6': 0, 'b7': 0, 'b8': 0, 'b9': 0, 'b10': 0}. Best is trial 4 with value: 11.61328785672708.
[I 2025-02-18 23:35:58,647] Trial 8 finished with value: 11.306000254638787 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 1, 'price': 0, 'x': 1, 'y': 0, 'z': 1, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 0, 'a5': 0, 'b1': 1, 'b2': 0, 'b3': 0, 'b4': 1, 'b5': 1, 'a6': 0, 'a7': 0, 'a8': 0, 'a9': 1, 'a10': 0, 'b6': 0, 'b7': 1, 'b8': 0, 'b9': 1, 'b10': 1}. Best is trial 8 with value: 11.306000254638787.
[I 2025-02-18 23:35:58,658] Trial 9 finished with value: 10.835005311743851 and parameters: {'fit_intercept': True

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'clarity', 'depth', 'table', 'z', 'a1', 'a2', 'a4', 'b1', 'b3', 'b4', 'a7', 'b6']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 3/5
Model name: linear_regression
MAE: 8.486590406878847
MSE: 113.96199455681528
RMSE: 10.675298335728856
PCC: 0.5495544801319368
Spearman R: 0.571

[I 2025-02-18 23:35:59,982] Trial 25 finished with value: 10.941177393072014 and parameters: {'alpha': 0.04338844235242202, 'fit_intercept': True, 'selection': 'random', 'warm_start': True, 'clarity': 0, 'depth': 1, 'table': 0, 'z': 1, 'a1': 1, 'a2': 0, 'a4': 0, 'b1': 0, 'b3': 1, 'b4': 0, 'a7': 1, 'b6': 1}. Best is trial 25 with value: 10.941177393072014.
[I 2025-02-18 23:35:59,990] Trial 26 finished with value: 11.174451393591577 and parameters: {'alpha': 0.046338432000306136, 'fit_intercept': False, 'selection': 'random', 'warm_start': False, 'clarity': 0, 'depth': 1, 'table': 0, 'z': 0, 'a1': 1, 'a2': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'b4': 0, 'a7': 1, 'b6': 1}. Best is trial 25 with value: 10.941177393072014.
[I 2025-02-18 23:35:59,999] Trial 27 finished with value: 10.941492419568766 and parameters: {'alpha': 0.06714597513182409, 'fit_intercept': True, 'selection': 'random', 'warm_start': True, 'clarity': 0, 'depth': 1, 'table': 0, 'z': 1, 'a1': 1, 'a2': 0, 'a4': 0, 'b1': 0, 'b3': 1, 

P {'alpha': 0.05274900785767071, 'fit_intercept': True, 'selection': 'random', 'warm_start': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'z', 'a1', 'a2', 'a4', 'b1',
       'b3', 'b4', 'a7', 'b6'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'z', 'a1', 'a2', 'a4', 'b1',
       'b3', 'b4', 'a7', 'b6'],
      dtype='object')
Fold: 3/5
Model name: lasso
MAE: 8.503056806362983
MSE: 113.98793331418197
RMSE: 10.676513162741006
PCC: 0.5496073704949838
Spearman R: 0.5728658831012924
R2 Score: 0.30128261132993894

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G

[I 2025-02-18 23:36:01,079] Trial 27 finished with value: 10.689866607474398 and parameters: {'alpha': 0.03356702420775735, 'solver': 'saga', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 10 with value: 10.689858816237034.
[I 2025-02-18 23:36:01,099] Trial 28 finished with value: 10.689883704783695 and parameters: {'alpha': 0.09492632931662508, 'solver': 'sag', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 10 with value: 10.689858816237034.
[I 2025-02-18 23:36:01,105] Trial 29 finished with value: 11.182347629735656 and parameters: {'alpha': 0.048303060171354655, 'solver': 'lsqr', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 0}. Best is trial 10 with value: 10.689858816237034.
[I 2025-02-18 23:36:01,111] Trial 30 finished with value: 10.68987294269978 and parameters: {'alpha': 0.014779407765687555, 'solver': 'auto', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3

P {'alpha': 0.09995974322027469, 'solver': 'cholesky', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Fold: 3/5
Model name: ridge
MAE: 8.510002986290072
MSE: 114.27305417015147
RMSE: 10.689857537411408
PCC: 0.5478612040001869
Spearman R: 0.5697456388875335
R2 Score: 0.2995348921271591

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='objec

[I 2025-02-18 23:36:01,868] Trial 2 finished with value: 10.578977707581078 and parameters: {'eta': 0.1508963311378338, 'gamma': 0.0026540733850967924, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.9043552191701254, 'colsample_bytree': 0.50349736497612, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 0}. Best is trial 2 with value: 10.578977707581078.
[I 2025-02-18 23:36:01,948] Trial 3 finished with value: 12.478512835980831 and parameters: {'eta': 0.043835765059490495, 'gamma': 2.0635043426888884, 'max_depth': 8, 'min_child_weight': 2, 'subsample': 0.9265809861233105, 'colsample_bytree': 0.6153205945874866, 'depth': 0, 'a1': 1, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 2 with value: 10.578977707581078.
[I 2025-02-18 23:36:02,026] Trial 4 finished with value: 10.486501519075922 and parameters: {'eta': 0.02987038943568705, 'gamma': 1.7507550515276452e-08, 'max_depth': 9, 'min_child_weight': 3, 'subsample': 0.6666341170348943, 'colsample_bytree': 0.5286282175480514, 'depth': 1, 'a1

P {'eta': 0.11866773470188091, 'gamma': 0.00011338109477941614, 'max_depth': 3, 'min_child_weight': 1, 'subsample': 0.669745677412996, 'colsample_bytree': 0.679790079366941}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Fold: 3/5
Model name: xgb
MAE: 7.422476839955651
MSE: 87.70367317055472
RMSE: 9.365023927922167
PCC: 0.6805891885551159
Spearman R: 0.6862690027914705
R2 Score: 0.4623985213803483

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'col

[I 2025-02-18 23:36:08,129] Trial 0 finished with value: 12.79680814188111 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 2, 'min_samples_leaf': 2, 'depth': 0, 'a1': 0, 'a4': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 12.79680814188111.
[I 2025-02-18 23:36:08,219] Trial 1 finished with value: 12.455194243233706 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 5, 'min_samples_split': 6, 'min_samples_leaf': 3, 'depth': 0, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 12.455194243233706.
[I 2025-02-18 23:36:08,309] Trial 2 finished with value: 9.889865703297 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 2, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1}. Best is trial 2 with value: 9.889865703297.
[I 2025-02-18 23:36:08,662] Trial 3 finished with value: 12.779199136086415 and paramet

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 4}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:36:25,130] A new study created in memory with name: no-name-4aa35829-ceb2-4b81-b0aa-8303bcd1d35e


Fold: 3/5
Model name: random_forest
MAE: 7.482926873275211
MSE: 89.30399861336296
RMSE: 9.450079291379675
PCC: 0.6771678902004711
Spearman R: 0.6811067882622974
R2 Score: 0.4525889285409094

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:36:25,540] Trial 0 finished with value: 16.20526595587761 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.04264553451935599, 'subsample': 0.34333487955715725, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 10, 'max_features': 'log2', 'max_leaf_nodes': 75, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 16.20526595587761.
[I 2025-02-18 23:36:25,619] Trial 1 finished with value: 9.955135727253207 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.015595625232799382, 'subsample': 0.06275484783477697, 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 7, 'max_features': 'log2', 'max_leaf_nodes': 149, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 9.955135727253207.
[I 2025-02-18 23:36:26,048] Trial 2 finished with value: 12.71721994689587 and parameters: {'loss': 'absolute_error', 'criterion': 'squared_error', 'learni

P {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.03920364675977538, 'subsample': 0.7354357037350406, 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 9, 'max_features': 'sqrt', 'max_leaf_nodes': 18}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:37:03,890] A new study created in memory with name: no-name-7eef4949-8c93-43c4-b89a-c7f1e9f7ded4


Fold: 3/5
Model name: gradient_boosting
MAE: 7.541532395905688
MSE: 89.97152634951546
RMSE: 9.485332168644145
PCC: 0.6723106519128103
Spearman R: 0.6744923831258469
R2 Score: 0.4484971512526613

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:37:04,076] Trial 0 finished with value: 10.28374395996823 and parameters: {'n_estimators': 56, 'loss': 'exponential', 'learning_rate': 0.00631823577323218, 'depth': 1, 'a1': 1, 'b1': 0, 'b3': 0}. Best is trial 0 with value: 10.28374395996823.
[I 2025-02-18 23:37:04,299] Trial 1 finished with value: 12.618343425375325 and parameters: {'n_estimators': 78, 'loss': 'exponential', 'learning_rate': 0.003850743368289743, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 10.28374395996823.
[I 2025-02-18 23:37:04,578] Trial 2 finished with value: 12.620097352245846 and parameters: {'n_estimators': 98, 'loss': 'linear', 'learning_rate': 0.0017270375143871978, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 10.28374395996823.
[I 2025-02-18 23:37:04,886] Trial 3 finished with value: 9.947028476656563 and parameters: {'n_estimators': 95, 'loss': 'linear', 'learning_rate': 0.0011868115568262406, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 1}. Best is tr

P {'n_estimators': 75, 'loss': 'exponential', 'learning_rate': 0.09998603259358968}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:37:38,414] A new study created in memory with name: no-name-715ef8c9-060f-4fc2-9d59-978f0551fd6b
[I 2025-02-18 23:37:38,447] Trial 0 finished with value: 12.755337480731814 and parameters: {'learning_rate': 0.0010228605900394513, 'num_leaves': 477, 'subsample': 0.3270381476591637, 'colsample_bytree': 0.49958043304174055, 'min_data_in_leaf': 99, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 12.755337480731814.
[I 2025-02-18 23:37:38,488] Trial 1 finished with value: 11.160232221658045 and parameters: {'learning_rate': 0.007727887616173296, 'num_leaves': 568, 'subsample': 0.15355777447148722, 'colsample_bytree': 0.56601435861492, 'min_data_in_leaf': 34, 'depth': 1, 'a1': 1, 'b1': 1, 'b3': 0}. Best is trial 1 with value: 11.160232221658045.
[I 2025-02-18 23:37:38,597] Trial 2 finished with value: 12.759206726305894 and parameters: {'learning_rate': 0.07269452078976271, 'num_leaves': 1018, 'subsample': 0.9447129622300946, 'colsample_bytree': 0.6544596

Fold: 3/5
Model name: ada_boost
MAE: 7.656358019894142
MSE: 92.61292752864468
RMSE: 9.623561062758665
PCC: 0.6583312750403891
Spearman R: 0.6647060594618305
R2 Score: 0.4323060257479623

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:37:38,643] Trial 3 finished with value: 12.404964449736077 and parameters: {'learning_rate': 0.007216431764418206, 'num_leaves': 897, 'subsample': 0.6297648595150821, 'colsample_bytree': 0.05840419129646494, 'min_data_in_leaf': 38, 'depth': 1, 'a1': 1, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 11.160232221658045.
[I 2025-02-18 23:37:38,659] Trial 4 finished with value: 12.308753456118273 and parameters: {'learning_rate': 0.07413231151121638, 'num_leaves': 248, 'subsample': 0.19510093442500587, 'colsample_bytree': 0.07827305238082294, 'min_data_in_leaf': 58, 'depth': 0, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 1 with value: 11.160232221658045.
[I 2025-02-18 23:37:38,693] Trial 5 finished with value: 12.025690935354875 and parameters: {'learning_rate': 0.0022830064331779702, 'num_leaves': 368, 'subsample': 0.3484642212339051, 'colsample_bytree': 0.8877090358067456, 'min_data_in_leaf': 67, 'depth': 1, 'a1': 0, 'b1': 0, 'b3': 0}. Best is trial 1 with value: 11.160232

P {'learning_rate': 0.05626113063672299, 'num_leaves': 508, 'subsample': 0.24959446999270818, 'colsample_bytree': 0.9309864141658539, 'min_data_in_leaf': 84}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Fold: 3/5
Model name: lgbm
MAE: 7.608345362183975
MSE: 92.73611612276001
RMSE: 9.629959300161138
PCC: 0.6579778273095561
Spearman R: 0.6596917044391386
R2 Score: 0.43155091062049555

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'col

[I 2025-02-18 23:37:43,897] Trial 13 finished with value: 10.776454802317177 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 1, 'clarity': 1, 'depth': 0, 'table': 0, 'price': 1, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 0, 'a3': 0, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 1, 'a6': 0, 'a7': 1, 'a8': 1, 'a9': 1, 'a10': 0, 'b6': 1, 'b7': 1, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 6 with value: 10.66358061043357.
[I 2025-02-18 23:37:43,907] Trial 14 finished with value: 10.669871251090422 and parameters: {'fit_intercept': True, 'carat': 0, 'cut': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 1, 'y': 1, 'z': 1, 'a1': 1, 'a2': 0, 'a3': 0, 'a4': 0, 'a5': 0, 'b1': 1, 'b2': 0, 'b3': 1, 'b4': 0, 'b5': 1, 'a6': 0, 'a7': 1, 'a8': 1, 'a9': 1, 'a10': 0, 'b6': 0, 'b7': 0, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 6 with value: 10.66358061043357.
[I 2025-02-18 23:37:43,916] Trial 15 finished with value: 10.669871251090422 and parameters: {'fit_intercept': Tru

P {'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'a7', 'b6', 'b7', 'b8', 'b9']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 4/5
Model name: linear_regression
MAE: 8.424740312131597
MSE: 111.7630540845796
RMSE: 10.571804

[I 2025-02-18 23:37:45,228] Trial 16 finished with value: 10.750058640284356 and parameters: {'alpha': 0.006186638869021168, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True, 'carat': 1, 'cut': 0, 'clarity': 1, 'depth': 1, 'table': 1, 'x': 1, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 0, 'a7': 0, 'b6': 0, 'b7': 1, 'b8': 1, 'b9': 0}. Best is trial 6 with value: 10.582357426024938.
[I 2025-02-18 23:37:45,246] Trial 17 finished with value: 10.747376367334608 and parameters: {'alpha': 0.003024209042442234, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True, 'carat': 1, 'cut': 0, 'clarity': 1, 'depth': 1, 'table': 1, 'x': 1, 'a1': 0, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 1, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 1, 'a7': 0, 'b6': 0, 'b7': 1, 'b8': 1, 'b9': 1}. Best is trial 6 with value: 10.582357426024938.
[I 2025-02-18 23:37:45,266] Trial 18 finished with value: 10.729850342952764 and parameters: {'alpha': 0.0028688348904042545, 'fit

P {'alpha': 0.04184954361818671, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'carat', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1', 'b3', 'b4', 'a7', 'b9']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'a1',
       'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'a7', 'b6', 'b7', 'b8',
       'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'cut', 'clarity', 'depth', 'table', 'x', 'a1',
       'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'a7', 'b6', 'b7', 'b8',
       'b9'],
      dtype='object')
Fold: 4/5
Model name: lasso
MAE: 8.407823832686102
MSE: 111.45606778662678
RMSE: 10.557275585425758
PCC: 0.5496836603385397
Spearman R: 0.5668714193794272
R2 Score: 0.30081748572444056

G In

[I 2025-02-18 23:37:46,482] Trial 21 finished with value: 10.577389890984865 and parameters: {'alpha': 0.0946534258912089, 'solver': 'sag', 'fit_intercept': True, 'carat': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b4': 1, 'a7': 0, 'b9': 0}. Best is trial 20 with value: 10.577389707240345.
[I 2025-02-18 23:37:46,506] Trial 22 finished with value: 10.577391870666554 and parameters: {'alpha': 0.06094350800400011, 'solver': 'sag', 'fit_intercept': True, 'carat': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b4': 1, 'a7': 0, 'b9': 0}. Best is trial 20 with value: 10.577389707240345.
[I 2025-02-18 23:37:46,528] Trial 23 finished with value: 10.577391444337037 and parameters: {'alpha': 0.06820658133121144, 'solver': 'sag', 'fit_intercept': True, 'carat': 0, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 1, 'b4': 1, 'a7': 0, 'b9': 0}. Best is trial 20 with value: 10.5773897072403

P {'alpha': 0.06564783931083261, 'solver': 'svd', 'fit_intercept': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1', 'b3', 'b4', 'a7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1',
       'b3', 'b4', 'a7', 'b9'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'carat', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1',
       'b3', 'b4', 'a7', 'b9'],
      dtype='object')
Fold: 4/5
Model name: ridge
MAE: 8.418056542663354
MSE: 111.60348749717987
RMSE: 10.564255179480467
PCC: 0.5492317032475118
Spearman R: 0.5652821491504161
R2 Score: 0.29989269727707113

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1', 'b3',


[I 2025-02-18 23:37:48,154] Trial 2 finished with value: 9.908840266535988 and parameters: {'eta': 0.01658731893279766, 'gamma': 3.5313775765230895e-05, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.511133700376783, 'colsample_bytree': 0.9751513780753547, 'clarity': 0, 'depth': 1, 'table': 0, 'x': 1, 'a1': 1, 'a4': 0, 'b1': 1, 'b3': 0, 'b4': 0, 'a7': 1}. Best is trial 0 with value: 9.66895334381157.
[I 2025-02-18 23:37:48,193] Trial 3 finished with value: 9.821402560601413 and parameters: {'eta': 0.10743160771301828, 'gamma': 0.13312115086261467, 'max_depth': 3, 'min_child_weight': 3, 'subsample': 0.8804565176841971, 'colsample_bytree': 0.5525616865265, 'clarity': 1, 'depth': 1, 'table': 0, 'x': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'b4': 1, 'a7': 0}. Best is trial 0 with value: 9.66895334381157.
[I 2025-02-18 23:37:48,226] Trial 4 finished with value: 11.022111529429388 and parameters: {'eta': 0.011080628161055903, 'gamma': 0.01048531975193463, 'max_depth': 3, 'min_child_wei

P {'eta': 0.10071935285523413, 'gamma': 1.3531863523459543e-08, 'max_depth': 3, 'min_child_weight': 4, 'subsample': 0.5446287544147144, 'colsample_bytree': 0.9470793336883647}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1', 'b3',
       'b4', 'a7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'x', 'a1', 'a4', 'b1', 'b3',
       'b4', 'a7'],
      dtype='object')
Fold: 4/5
Model name: xgb
MAE: 7.560367411773406
MSE: 89.49087004817437
RMSE: 9.459961418958027
PCC: 0.6643201326119819
Spearman R: 0.6742036038112124
R2 Score: 0.4386088369385527

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
     

[I 2025-02-18 23:37:54,871] Trial 0 finished with value: 9.760328071545022 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 5, 'min_samples_split': 8, 'min_samples_leaf': 3, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 9.760328071545022.
[I 2025-02-18 23:37:55,463] Trial 1 finished with value: 12.20050122332161 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 4, 'min_samples_split': 10, 'min_samples_leaf': 8, 'depth': 0, 'a1': 1, 'b1': 1, 'b3': 0}. Best is trial 0 with value: 9.760328071545022.
[I 2025-02-18 23:37:55,556] Trial 2 finished with value: 12.168815857871614 and parameters: {'criterion': 'squared_error', 'max_features': 'log2', 'max_depth': 4, 'min_samples_split': 3, 'min_samples_leaf': 7, 'depth': 0, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 9.760328071545022.
[I 2025-02-18 23:37:56,131] Trial 3 finished with value: 9.616998887551565 and parameters: {'criterion': 'ab

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 9}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:38:14,786] A new study created in memory with name: no-name-01a6f52d-2bd2-437f-91de-8eff43651b81


Fold: 4/5
Model name: random_forest
MAE: 7.541202807492841
MSE: 89.20604946849329
RMSE: 9.444895418610695
PCC: 0.6656893084119679
Spearman R: 0.6737858207651597
R2 Score: 0.44039556396897406

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:38:15,068] Trial 0 finished with value: 12.35130965661596 and parameters: {'loss': 'huber', 'criterion': 'friedman_mse', 'learning_rate': 0.01286162919438755, 'subsample': 0.9591486605815768, 'max_depth': 3, 'min_samples_split': 3, 'min_samples_leaf': 6, 'max_features': 'log2', 'max_leaf_nodes': 820, 'depth': 0, 'a1': 1, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 12.35130965661596.
[I 2025-02-18 23:38:15,164] Trial 1 finished with value: 9.776719067224146 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.0709932045845248, 'subsample': 0.23392265157412112, 'max_depth': 4, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 816, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 1 with value: 9.776719067224146.
[I 2025-02-18 23:38:15,331] Trial 2 finished with value: 20.40057254858441 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.0475624770380371, '

P {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.053341693433937354, 'subsample': 0.6881703896197164, 'max_depth': 4, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_leaf_nodes': 183}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Fold: 4/5
Model name: gradient_boosting
MAE: 7.556831717182081
MSE: 89.2256466571059
RMSE: 9.445932810321377
PCC: 0.6648209372242575
Spearman R: 0.6728484657223724
R2 Score: 0.4402726275342066

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
G Index

[I 2025-02-18 23:38:42,331] Trial 0 finished with value: 12.024402669137752 and parameters: {'n_estimators': 99, 'loss': 'square', 'learning_rate': 0.004529502746660639, 'depth': 0, 'a1': 1, 'b1': 1, 'b3': 1}. Best is trial 0 with value: 12.024402669137752.
[I 2025-02-18 23:38:42,492] Trial 1 finished with value: 12.48336659526997 and parameters: {'n_estimators': 55, 'loss': 'exponential', 'learning_rate': 0.008251055328567975, 'depth': 0, 'a1': 0, 'b1': 0, 'b3': 1}. Best is trial 0 with value: 12.024402669137752.
[I 2025-02-18 23:38:42,720] Trial 2 finished with value: 9.739501661281023 and parameters: {'n_estimators': 52, 'loss': 'square', 'learning_rate': 0.0016772765137143164, 'depth': 1, 'a1': 0, 'b1': 1, 'b3': 1}. Best is trial 2 with value: 9.739501661281023.
[I 2025-02-18 23:38:43,012] Trial 3 finished with value: 12.028625779547745 and parameters: {'n_estimators': 59, 'loss': 'square', 'learning_rate': 0.02160348662090041, 'depth': 0, 'a1': 1, 'b1': 1, 'b3': 1}. Best is trial 

P {'n_estimators': 72, 'loss': 'square', 'learning_rate': 0.0012629493641027408}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1', 'b3'],
      dtype='object')


[I 2025-02-18 23:39:13,955] A new study created in memory with name: no-name-199fb080-59ab-41a2-a9df-df322ce3400c
[I 2025-02-18 23:39:13,990] Trial 0 finished with value: 12.61482162419176 and parameters: {'learning_rate': 0.008369373295798937, 'num_leaves': 528, 'subsample': 0.5482188432519814, 'colsample_bytree': 0.8075481575131539, 'min_data_in_leaf': 42, 'depth': 0, 'a1': 0, 'b1': 0}. Best is trial 0 with value: 12.61482162419176.
[I 2025-02-18 23:39:14,034] Trial 1 finished with value: 10.157959315482307 and parameters: {'learning_rate': 0.020064044347010705, 'num_leaves': 849, 'subsample': 0.43460004420047965, 'colsample_bytree': 0.7530572338286412, 'min_data_in_leaf': 81, 'depth': 1, 'a1': 1, 'b1': 0}. Best is trial 1 with value: 10.157959315482307.
[I 2025-02-18 23:39:14,082] Trial 2 finished with value: 11.287453934883057 and parameters: {'learning_rate': 0.007520470221853807, 'num_leaves': 876, 'subsample': 0.8600651836409174, 'colsample_bytree': 0.5314465739953396, 'min_data

Fold: 4/5
Model name: ada_boost
MAE: 7.714293986397465
MSE: 94.13717310005373
RMSE: 9.70243129839391
PCC: 0.6413477483928272
Spearman R: 0.6516850618479836
R2 Score: 0.40946180246647346

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1'],
      dtype='object')


[I 2025-02-18 23:39:14,161] Trial 5 finished with value: 12.614205562040983 and parameters: {'learning_rate': 0.0016072800433048018, 'num_leaves': 700, 'subsample': 0.8821220601340911, 'colsample_bytree': 0.12445002022749524, 'min_data_in_leaf': 63, 'depth': 0, 'a1': 0, 'b1': 1}. Best is trial 1 with value: 10.157959315482307.
[I 2025-02-18 23:39:14,243] Trial 6 finished with value: 12.513748912700416 and parameters: {'learning_rate': 0.0172666628402737, 'num_leaves': 495, 'subsample': 0.7671753287796601, 'colsample_bytree': 0.7262638458163134, 'min_data_in_leaf': 42, 'depth': 0, 'a1': 1, 'b1': 0}. Best is trial 1 with value: 10.157959315482307.
[I 2025-02-18 23:39:14,322] Trial 7 finished with value: 10.214628473318848 and parameters: {'learning_rate': 0.01822631790788987, 'num_leaves': 84, 'subsample': 0.2196759559107262, 'colsample_bytree': 0.7130015529539719, 'min_data_in_leaf': 10, 'depth': 1, 'a1': 0, 'b1': 1}. Best is trial 1 with value: 10.157959315482307.
[I 2025-02-18 23:39:1

P {'learning_rate': 0.023755690631947685, 'num_leaves': 910, 'subsample': 0.6299524237514249, 'colsample_bytree': 0.623979241591244, 'min_data_in_leaf': 82}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b1']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b1'],
      dtype='object')
Fold: 4/5
Model name: lgbm
MAE: 7.858855901247396
MSE: 97.93614374981676
RMSE: 9.896269183374953
PCC: 0.641853682975169
Spearman R: 0.6498770600099577
R2 Score: 0.38563022556529125

G Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 

[I 2025-02-18 23:39:20,426] Trial 10 finished with value: 10.863844998801067 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 1, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 1, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 0, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 0, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 10 with value: 10.863844998801067.
[I 2025-02-18 23:39:20,437] Trial 11 finished with value: 10.863844998801067 and parameters: {'fit_intercept': True, 'carat': 1, 'cut': 1, 'clarity': 0, 'depth': 1, 'table': 0, 'price': 1, 'x': 1, 'y': 0, 'z': 0, 'a1': 1, 'a2': 1, 'a3': 1, 'a4': 1, 'a5': 0, 'b1': 1, 'b2': 1, 'b3': 1, 'b4': 1, 'b5': 1, 'a6': 1, 'a7': 1, 'a8': 0, 'a9': 0, 'a10': 1, 'b6': 0, 'b7': 0, 'b8': 0, 'b9': 0, 'b10': 1}. Best is trial 10 with value: 10.863844998801067.
[I 2025-02-18 23:39:20,448] Trial 12 finished with value: 10.863844998801067 and parameters: {'fit_intercept':

P {'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'clarity', 'depth', 'table', 'price', 'y', 'z', 'a1', 'a4', 'a5', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10']
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Index(['carat', 'cut', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z',
       'a1', 'a2', 'a3', 'a4', 'a5', 'b1', 'b2', 'b3', 'b4', 'b5', 'a6', 'a7',
       'a8', 'a9', 'a10', 'b6', 'b7', 'b8', 'b9', 'b10', 'colour_G',
       'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J'],
      dtype='object')
Fold: 5/5
Model name: linear_regression
MAE: 8.63141555041228
MSE: 117.4273683580186
RMSE: 10.836390928626495
PCC: 0.54205495199385

[I 2025-02-18 23:39:21,743] Trial 20 finished with value: 10.983018429740874 and parameters: {'alpha': 0.006456592722420799, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'clarity': 1, 'depth': 1, 'table': 1, 'price': 1, 'y': 0, 'z': 0, 'a1': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 0, 'b3': 1, 'a7': 1, 'a8': 1, 'a10': 1}. Best is trial 20 with value: 10.983018429740874.
[I 2025-02-18 23:39:21,752] Trial 21 finished with value: 10.983007194032252 and parameters: {'alpha': 0.008139126293957247, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'clarity': 1, 'depth': 1, 'table': 1, 'price': 1, 'y': 0, 'z': 0, 'a1': 1, 'a4': 1, 'a5': 1, 'b1': 0, 'b2': 0, 'b3': 1, 'a7': 1, 'a8': 1, 'a10': 1}. Best is trial 21 with value: 10.983007194032252.
[I 2025-02-18 23:39:21,762] Trial 22 finished with value: 10.983089963385973 and parameters: {'alpha': 0.00572719796826341, 'fit_intercept': True, 'selection': 'cyclic', 'warm_start': False, 'clarity': 1, 'depth': 1, 'ta

P {'alpha': 0.024727982492300515, 'fit_intercept': False, 'selection': 'cyclic', 'warm_start': True}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'price', 'y', 'z', 'a1', 'a4',
       'a5', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'clarity', 'depth', 'table', 'price', 'y', 'z', 'a1', 'a4',
       'a5', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10'],
      dtype='object')
Fold: 5/5
Model name: lasso
MAE: 8.650140510018245
MSE: 117.71794255117673
RMSE: 10.849789977284203
PCC: 0.5402774559234241
Spearman R: 0.5546005298865665
R2 Score: 0.29148846324152566

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth

[I 2025-02-18 23:39:22,766] Trial 26 finished with value: 11.411166394342736 and parameters: {'alpha': 0.08194466837485298, 'solver': 'lsqr', 'fit_intercept': False, 'depth': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b2': 1, 'b3': 0, 'a7': 1, 'a8': 0, 'a10': 0}. Best is trial 25 with value: 10.854479332942416.
[I 2025-02-18 23:39:22,773] Trial 27 finished with value: 10.854478858610062 and parameters: {'alpha': 0.09673678194211334, 'solver': 'lsqr', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'a7': 0, 'a8': 1, 'a10': 0}. Best is trial 27 with value: 10.854478858610062.
[I 2025-02-18 23:39:22,781] Trial 28 finished with value: 10.854480905692913 and parameters: {'alpha': 0.08912444977195044, 'solver': 'lsqr', 'fit_intercept': False, 'depth': 1, 'a1': 1, 'a4': 1, 'b1': 1, 'b2': 0, 'b3': 1, 'a7': 0, 'a8': 1, 'a10': 0}. Best is trial 27 with value: 10.854478858610062.
[I 2025-02-18 23:39:22,795] Trial 29 finished with value: 12.360381043299675 and parameters: {'al

P {'alpha': 0.09035624458648071, 'solver': 'saga', 'fit_intercept': False}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7', 'a8']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b2', 'b3', 'a7', 'a8', 'a10'],
      dtype='object')
Fold: 5/5
Model name: ridge
MAE: 8.646192155261014
MSE: 117.79960681970616
RMSE: 10.853552728010591
PCC: 0.5399463873091838
Spearman R: 0.5539927217828381
R2 Score: 0.2909969487354096

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7', 'a8'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
 

[I 2025-02-18 23:39:23,940] Trial 2 finished with value: 12.900805195360373 and parameters: {'eta': 0.01708934317537124, 'gamma': 5.038637046534322e-06, 'max_depth': 8, 'min_child_weight': 1, 'subsample': 0.9941699606044896, 'colsample_bytree': 0.9077172215474194, 'depth': 0, 'a1': 0, 'a4': 1, 'b1': 0, 'b3': 0, 'a7': 0, 'a8': 1}. Best is trial 1 with value: 10.169842662786834.
[I 2025-02-18 23:39:23,990] Trial 3 finished with value: 10.280734332382526 and parameters: {'eta': 0.0371159896296391, 'gamma': 0.006715918443864401, 'max_depth': 5, 'min_child_weight': 1, 'subsample': 0.5456388736069225, 'colsample_bytree': 0.5109927358154328, 'depth': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'a7': 1, 'a8': 1}. Best is trial 1 with value: 10.169842662786834.
[I 2025-02-18 23:39:24,032] Trial 4 finished with value: 10.489865288172144 and parameters: {'eta': 0.064201577710337, 'gamma': 1.1277500431979818e-07, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.7902607968367448, 'colsample_bytree'

P {'eta': 0.05128655990529329, 'gamma': 1.8234090056525107e-07, 'max_depth': 4, 'min_child_weight': 2, 'subsample': 0.8741626352918169, 'colsample_bytree': 0.7351431547070326}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7', 'a8'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7', 'a8'],
      dtype='object')
Fold: 5/5
Model name: xgb
MAE: 7.505802707408782
MSE: 88.54810878699588
RMSE: 9.410000466896689
PCC: 0.6852302225917168
Spearman R: 0.6885608559480183
R2 Score: 0.4670535750617918

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')
G Index(['colour_G', 'c

[I 2025-02-18 23:39:30,973] Trial 0 finished with value: 12.435763612578961 and parameters: {'criterion': 'absolute_error', 'max_features': 'log2', 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 6, 'depth': 0, 'a1': 1, 'a4': 1, 'b1': 1, 'b3': 0, 'a7': 1}. Best is trial 0 with value: 12.435763612578961.
[I 2025-02-18 23:39:31,068] Trial 1 finished with value: 9.883337851562478 and parameters: {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 4, 'min_samples_leaf': 2, 'depth': 1, 'a1': 1, 'a4': 0, 'b1': 0, 'b3': 1, 'a7': 0}. Best is trial 1 with value: 9.883337851562478.
[I 2025-02-18 23:39:31,747] Trial 2 finished with value: 10.187080048411739 and parameters: {'criterion': 'absolute_error', 'max_features': 'sqrt', 'max_depth': 8, 'min_samples_split': 2, 'min_samples_leaf': 9, 'depth': 1, 'a1': 0, 'a4': 1, 'b1': 1, 'b3': 0, 'a7': 1}. Best is trial 1 with value: 9.883337851562478.
[I 2025-02-18 23:39:32,451] Trial 3 finished with val

P {'criterion': 'squared_error', 'max_features': 'sqrt', 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 3}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 23:39:50,127] A new study created in memory with name: no-name-e6c3bc32-935f-4a41-aa8b-a66cc2491260


Fold: 5/5
Model name: random_forest
MAE: 7.601646782611173
MSE: 90.53020459461965
RMSE: 9.514736181030962
PCC: 0.6801275224348295
Spearman R: 0.6821640270940215
R2 Score: 0.45512389198861436

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 23:39:50,354] Trial 0 finished with value: 18.845819581080562 and parameters: {'loss': 'quantile', 'criterion': 'squared_error', 'learning_rate': 0.00522041876829393, 'subsample': 0.4174187749797681, 'max_depth': 4, 'min_samples_split': 8, 'min_samples_leaf': 9, 'max_features': 'log2', 'max_leaf_nodes': 707, 'depth': 1, 'a1': 0, 'a4': 0, 'b1': 0, 'b3': 1, 'a7': 1}. Best is trial 0 with value: 18.845819581080562.
[I 2025-02-18 23:39:50,514] Trial 1 finished with value: 12.580868978727965 and parameters: {'loss': 'squared_error', 'criterion': 'friedman_mse', 'learning_rate': 0.007855950744037431, 'subsample': 0.3380803959264815, 'max_depth': 6, 'min_samples_split': 3, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'max_leaf_nodes': 870, 'depth': 0, 'a1': 1, 'a4': 0, 'b1': 0, 'b3': 1, 'a7': 1}. Best is trial 1 with value: 12.580868978727965.
[I 2025-02-18 23:39:50,855] Trial 2 finished with value: 10.622706701188655 and parameters: {'loss': 'huber', 'criterion': 'squared_err

P {'loss': 'huber', 'criterion': 'squared_error', 'learning_rate': 0.03446657881297697, 'subsample': 0.8201273586605756, 'max_depth': 6, 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_features': 'log2', 'max_leaf_nodes': 983}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3', 'a7']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'a4', 'b1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 23:40:48,628] A new study created in memory with name: no-name-8b0d3fd0-e482-4f50-8020-234197bae399


Fold: 5/5
Model name: gradient_boosting
MAE: 7.884526082493534
MSE: 97.10756943475647
RMSE: 9.854317299273271
PCC: 0.6475121940575355
Spearman R: 0.6514423305793284
R2 Score: 0.4155365634156525

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3', 'a7'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 23:40:49,010] Trial 0 finished with value: 9.91781958519372 and parameters: {'n_estimators': 90, 'loss': 'exponential', 'learning_rate': 0.03987150673627861, 'depth': 1, 'a1': 1, 'b3': 1, 'a7': 0}. Best is trial 0 with value: 9.91781958519372.
[I 2025-02-18 23:40:49,286] Trial 1 finished with value: 12.480867047403034 and parameters: {'n_estimators': 56, 'loss': 'linear', 'learning_rate': 0.038824525155148594, 'depth': 0, 'a1': 1, 'b3': 1, 'a7': 1}. Best is trial 0 with value: 9.91781958519372.
[I 2025-02-18 23:40:49,547] Trial 2 finished with value: 10.245168296919339 and parameters: {'n_estimators': 82, 'loss': 'exponential', 'learning_rate': 0.03728542720419472, 'depth': 1, 'a1': 1, 'b3': 0, 'a7': 0}. Best is trial 0 with value: 9.91781958519372.
[I 2025-02-18 23:40:49,717] Trial 3 finished with value: 10.48913558102713 and parameters: {'n_estimators': 82, 'loss': 'linear', 'learning_rate': 0.0029380989222627034, 'depth': 1, 'a1': 0, 'b3': 0, 'a7': 0}. Best is trial 0 

P {'n_estimators': 81, 'loss': 'exponential', 'learning_rate': 0.059114460818983805}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3', 'a7'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3', 'a7'],
      dtype='object')


[I 2025-02-18 23:41:19,905] A new study created in memory with name: no-name-8974f66c-6854-4560-90bf-a055388bded8


Fold: 5/5
Model name: ada_boost
MAE: 7.917338153236487
MSE: 97.87314530000232
RMSE: 9.89308573196464
PCC: 0.6418833059647899
Spearman R: 0.6458355858375695
R2 Score: 0.4109287753330947

G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
G Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')


[I 2025-02-18 23:41:20,376] Trial 0 finished with value: 10.70386303441481 and parameters: {'learning_rate': 0.019762149818082712, 'num_leaves': 750, 'subsample': 0.7017567840600334, 'colsample_bytree': 0.7600219417258738, 'min_data_in_leaf': 6, 'depth': 1, 'a1': 1, 'b3': 0}. Best is trial 0 with value: 10.70386303441481.
[I 2025-02-18 23:41:20,421] Trial 1 finished with value: 12.794183653537388 and parameters: {'learning_rate': 0.09293087206104114, 'num_leaves': 252, 'subsample': 0.7909359212724346, 'colsample_bytree': 0.7352428223894203, 'min_data_in_leaf': 86, 'depth': 0, 'a1': 1, 'b3': 0}. Best is trial 0 with value: 10.70386303441481.
[I 2025-02-18 23:41:20,541] Trial 2 finished with value: 10.416802797968653 and parameters: {'learning_rate': 0.06653903026289387, 'num_leaves': 869, 'subsample': 0.6690256396122187, 'colsample_bytree': 0.5931102397388968, 'min_data_in_leaf': 26, 'depth': 1, 'a1': 0, 'b3': 1}. Best is trial 2 with value: 10.416802797968653.
[I 2025-02-18 23:41:20,61

P {'learning_rate': 0.04880270694341699, 'num_leaves': 5, 'subsample': 0.4313420532575576, 'colsample_bytree': 0.7958031635051172, 'min_data_in_leaf': 31}
F ['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I', 'colour_J', 'depth', 'a1', 'b3']
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Index(['colour_G', 'colour_E', 'colour_F', 'colour_H', 'colour_D', 'colour_I',
       'colour_J', 'depth', 'a1', 'b3'],
      dtype='object')
Fold: 5/5
Model name: lgbm
MAE: 7.861164966457033
MSE: 96.68296334655786
RMSE: 9.83274953136496
PCC: 0.6485259748717046
Spearman R: 0.651131264068086
R2 Score: 0.41809214929786254



Compute average scores and rank models by R2 score

In [15]:
for model_name, model_metrics in model_scores.items():
    for metric, scores in model_metrics.items():
        model_scores[model_name][metric] = sum(scores) / len(scores)
    model_scores[model_name] = dict(model_scores[model_name])

model_scores = dict(sorted(model_scores.items(), key=lambda x: x[1]["r2_score"], reverse=True))

In [16]:
# Print results
for i, (model_name, model_metrics) in enumerate(model_scores.items()):
    print(f"No.{i+1} Model: {model_name}")
    for metric, score in model_metrics.items():
        print(f"{metric}: {score}")
    print()

No.1 Model: xgb
mae: 7.450683901007574
mse: 87.88724788979896
rmse: 9.373804283174959
pcc: 0.6751894815076779
spearman_r: 0.6801768309463323
r2_score: 0.45423330161859504

No.2 Model: random_forest
mae: 7.5175070178551575
mse: 89.39355785728699
rmse: 9.454062976153274
pcc: 0.6702822643389339
spearman_r: 0.6738402406125337
r2_score: 0.4448688635979587

No.3 Model: gradient_boosting
mae: 7.630175740803702
mse: 91.79799517979038
rmse: 9.579690082987636
pcc: 0.657636262110951
spearman_r: 0.6610262695291291
r2_score: 0.43002805114779286

No.4 Model: ada_boost
mae: 7.70381967720981
mse: 93.72728778239878
rmse: 9.68028005451287
pcc: 0.647475657547147
spearman_r: 0.6534548100068588
r2_score: 0.4180225995839904

No.5 Model: lgbm
mae: 7.72913356164467
mse: 94.76162519198378
rmse: 9.733948386527386
pcc: 0.6465235259178278
spearman_r: 0.6490956734093304
r2_score: 0.41129347840788133

No.6 Model: lasso
mae: 8.454680495643604
mse: 112.43587459054038
rmse: 10.602313984635568
pcc: 0.5502258276234644
s