In [1]:
# Packages

import gc
import polars as pl
import numpy as np 
import pandas as pd
import json
import glob
import matplotlib.pyplot as plt
import xgboost as xgb
import mlflow
import mlflow.xgboost
import warnings
from itertools import product
from tqdm.auto import tqdm
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import KFold
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Loading and concatenating the whole dataset

class LoadData:
    
    def __init__(self, file_paths):
        self.file_paths = file_paths
        
    def load_and_concat(self):

        partitioned_data = [pl.read_parquet(file_path) for file_path in self.file_paths]
        df = pl.concat(partitioned_data, rechunk=False)
        
        return df
    
# Specify file paths
file_paths = sorted(glob.glob('Data/train.parquet/*/*.parquet'))

# Initialize the loader and load data as a lazy frame
loader = LoadData(file_paths)
df_full = loader.load_and_concat()

In [3]:
# Import necessary libraries
warnings.filterwarnings('ignore')

# Set up MLflow
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Jane Street Forecasting Custom R2")

# Ensure df_full is sorted by 'date_id' and 'time_id'
df_full = df_full.sort(['date_id', 'time_id'])

# Define hyperparameter distributions for random search
from scipy.stats import uniform, randint

param_distributions = {
    'n_estimators': randint(50, 500),
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.19),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
    'gamma': uniform(0, 0.5),
    'min_child_weight': randint(1, 6),
    'reg_alpha': uniform(0.0, 1.0),
    'reg_lambda': uniform(0.5, 4.5)
}

# Number of hyperparameter combinations to try
n_iter = 20

# Initialize random number generator
rng = np.random.default_rng(seed=42)

# Generate random hyperparameter combinations
hyperparameter_list = []
for _ in range(n_iter):
    params = {
        'n_estimators': rng.integers(50, 500),
        'max_depth': rng.integers(3, 10),
        'learning_rate': rng.uniform(0.01, 0.2),
        'subsample': rng.uniform(0.6, 1.0),
        'colsample_bytree': rng.uniform(0.6, 1.0),
        'gamma': rng.uniform(0, 0.5),
        'min_child_weight': rng.integers(1, 6),
        'reg_alpha': rng.uniform(0.0, 1.0),
        'reg_lambda': rng.uniform(0.5, 5.0)
    }
    hyperparameter_list.append(params)


# Exclude non-informative features and the target
excluded_features = [col for col in df_full.columns if col.startswith('responder_')] + ['weight']

# Define feature columns (excluding the target and unwanted features)
feature_cols = [col for col in df_full.columns if col not in excluded_features]

# Define custom cross-validation folds
n_splits = 8
data_length = len(df_full)

# Split the data into n_splits folds
fold_sizes = np.full(n_splits, data_length // n_splits)
fold_sizes[:data_length % n_splits] += 1  

indices = np.arange(data_length)
current = 0
fold_indices_list = []
for fold_size in fold_sizes:
    start, stop = current, current + fold_size
    fold_indices_list.append(indices[start:stop])
    current = stop

# Create custom fold indices
validation_fraction = 0.2

custom_fold_indices = []

for i in range(1, n_splits):
    # Get indices for folds i - 1 and i
    fold_i_minus1_indices = fold_indices_list[i - 1]
    fold_i_indices = fold_indices_list[i]

    # Split fold_i_indices into training and validation sets
    val_size = int(len(fold_i_indices) * validation_fraction)
    if val_size == 0:
        val_size = 1 

    train_fold_i_indices = fold_i_indices[:-val_size]
    val_fold_i_indices = fold_i_indices[-val_size:]

    # Combine training indices
    train_indices = np.concatenate([fold_i_minus1_indices, train_fold_i_indices])

    # Validation indices
    val_indices = val_fold_i_indices

    custom_fold_indices.append((train_indices, val_indices))

# Define the weighted R² metric for evaluation
def weighted_r2_score(y_true, y_pred, sample_weight):
    numerator = np.sum(sample_weight * (y_true - y_pred) ** 2)
    denominator = np.sum(sample_weight * y_true ** 2)
    return 1 - (numerator / denominator)

# Define the custom evaluation metric for XGBoost
def weighted_r2_eval(preds, dtrain):
    y_true = dtrain.get_label()
    w = dtrain.get_weight()
    numerator = np.sum(w * (y_true - preds) ** 2)
    denominator = np.sum(w * y_true ** 2)
    r2 = 1 - (numerator / denominator)
    return 'weighted_r2', r2

def train_evaluate_model(df_full, feature_cols, params, custom_fold_indices):                  

    fold_weighted_r2_list = []

    for fold, (train_indices, val_indices) in enumerate(custom_fold_indices):
        print(f"\nProcessing Fold {fold + 1}")
        try:
            # Select training and validation data from Polars DataFrame
            df_train = df_full[train_indices]
            df_val = df_full[val_indices]

            # Convert Polars DataFrames to Pandas DataFrames for XGBoost
            df_train_pd = df_train.to_pandas()
            df_val_pd = df_val.to_pandas()

            # Extract features, target, and weights
            X_train = df_train_pd[feature_cols].astype(np.float32)
            y_train = df_train_pd['responder_6'].astype(np.float32)
            w_train = df_train_pd['weight'].astype(np.float32)

            X_val = df_val_pd[feature_cols].astype(np.float32)
            y_val = df_val_pd['responder_6'].astype(np.float32)
            w_val = df_val_pd['weight'].astype(np.float32)

            # Convert to XGBoost DMatrix
            dtrain = xgb.DMatrix(X_train, label=y_train, weight=w_train)
            dval = xgb.DMatrix(X_val, label=y_val, weight=w_val)

            # Set up model parameters
            model_params = params.copy()
            model_params.update({
                'objective': 'reg:squarederror',
                'tree_method': 'hist',     
                'eval_metric': 'rmse',
                'n_jobs': -1,              
                'verbosity': 0,
                'seed': 42                 
            })

            # Specify evaluation set
            evals = [(dval, 'validation')]

            # Train the model with early stopping and custom metric
            model = xgb.train(
                model_params,
                dtrain,
                num_boost_round=1000,
                evals=evals,
                early_stopping_rounds=10,
                feval=weighted_r2_eval,
                maximize=True,  # Because higher R² is better
                verbose_eval=False
            )

            # Predict and evaluate
            y_pred = model.predict(dval)
            weighted_r2 = weighted_r2_score(y_val, y_pred, w_val)

            print(f"Fold {fold + 1} - Weighted R^2: {weighted_r2:.4f}")

            # Store metrics
            fold_weighted_r2_list.append(weighted_r2)

            # Log metrics
            mlflow.log_metric(f"weighted_r2_fold_{fold + 1}", weighted_r2)

            # Extract feature importance
            feature_importance = model.get_score(importance_type='gain')
            sorted_importance = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)
            importance_df = pd.DataFrame(sorted_importance, columns=['Feature', 'Importance'])

            # Limit to top 15 features
            top_n = 15
            importance_df = importance_df.head(top_n)

            # Save feature importance to a CSV file
            importance_csv = f'feature_importance_fold_{fold + 1}.csv'
            importance_df.to_csv(importance_csv, index=False)

            # Log the CSV file as an artifact
            mlflow.log_artifact(importance_csv, artifact_path=f'feature_importance/fold_{fold + 1}')

            # Plot feature importance
            plt.figure(figsize=(10, 6))
            importance_df.plot(kind='bar', x='Feature', y='Importance', legend=False)
            plt.title(f'Feature Importance - Fold {fold + 1}')
            plt.xticks(rotation=45, ha='right')  
            plt.tight_layout()

            # Save the plot as an image
            importance_plot = f'feature_importance_fold_{fold + 1}.png'
            plt.savefig(importance_plot)

            # Log the image as an artifact
            mlflow.log_artifact(importance_plot, artifact_path=f'feature_importance/fold_{fold + 1}')

            # Close the plot to free memory
            plt.close()

            # Clean up
            del df_train, df_val, df_train_pd, df_val_pd, X_train, X_val, y_train, y_val, w_train, w_val, dtrain, dval, model
            gc.collect()

        except Exception as e:
            print(f"Error on fold {fold + 1}: {e}")
            mlflow.log_param(f"error_fold_{fold + 1}", str(e))
            continue

    # Calculate average metrics
    avg_weighted_r2 = np.mean(fold_weighted_r2_list)
    std_weighted_r2 = np.std(fold_weighted_r2_list)

    return {
        'avg_weighted_r2': avg_weighted_r2,
        'std_weighted_r2': std_weighted_r2
    }

# Initialize results list
grid_search_results = []

# Begin MLflow run
with mlflow.start_run(run_name="Random Hyperparameter Search with Custom Cross-Validation") as parent_run:
    for idx, params in enumerate(hyperparameter_list):
        print(f"\nEvaluating hyperparameters set {idx + 1}/{n_iter}: {params}")

        # Start a nested MLflow run for this hyperparameter combination
        with mlflow.start_run(run_name=f"Params set {idx + 1}", nested=True):
            # Log hyperparameters
            mlflow.log_params(params)

            # Train and evaluate the model
            metrics = train_evaluate_model(df_full, feature_cols, params, custom_fold_indices)

            # Log average metrics
            mlflow.log_metric("avg_weighted_r2", metrics['avg_weighted_r2'])
            mlflow.log_metric("std_weighted_r2", metrics['std_weighted_r2'])

            # Save results
            result = {
                'params': params,
                'avg_weighted_r2': metrics['avg_weighted_r2'],
                'std_weighted_r2': metrics['std_weighted_r2']
            }
            grid_search_results.append(result)

    # Save grid search results as an artifact

    class NumpyEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, (np.integer, np.int64, np.int32)):
                return int(obj)
            elif isinstance(obj, (np.floating, np.float64, np.float32)):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            elif isinstance(obj, (np.bool_)):
                return bool(obj)
            else:
                return super(NumpyEncoder, self).default(obj)

    # At the end of your script
    with open('grid_search_results.json', 'w') as f:
        json.dump(grid_search_results, f, indent=4, cls=NumpyEncoder)
    mlflow.log_artifact('grid_search_results.json')


2024/11/28 16:57:55 INFO mlflow.tracking.fluent: Experiment with name 'Jane Street Forecasting Custom R2' does not exist. Creating a new experiment.



Evaluating hyperparameters set 1/20: {'n_estimators': 90, 'max_depth': 8, 'learning_rate': 0.09338690355288994, 'subsample': 0.943439167964553, 'colsample_bytree': 0.8789472116237456, 'gamma': 0.047088673943824766, 'min_child_weight': 3, 'reg_alpha': 0.761139701990353, 'reg_lambda': 4.037289373746292}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0090

Processing Fold 2
Fold 2 - Weighted R^2: 0.0210

Processing Fold 3
Fold 3 - Weighted R^2: 0.0049

Processing Fold 4
Fold 4 - Weighted R^2: 0.0057

Processing Fold 5
Fold 5 - Weighted R^2: 0.0116

Processing Fold 6
Fold 6 - Weighted R^2: 0.0084

Processing Fold 7
Fold 7 - Weighted R^2: 0.0060


2024/11/28 17:08:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 1 at: http://localhost:5000/#/experiments/12/runs/6730b587107d4ee0a151613ca53e9dc5.
2024/11/28 17:08:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 2/20: {'n_estimators': 489, 'max_depth': 6, 'learning_rate': 0.09557332820015775, 'subsample': 0.7483192096930324, 'colsample_bytree': 0.9707059955394407, 'gamma': 0.32193256004033227, 'min_child_weight': 1, 'reg_alpha': 0.82276161327083, 'reg_lambda': 2.49536389472299}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0095

Processing Fold 2
Fold 2 - Weighted R^2: 0.0213

Processing Fold 3
Fold 3 - Weighted R^2: 0.0057

Processing Fold 4
Fold 4 - Weighted R^2: 0.0093

Processing Fold 5
Fold 5 - Weighted R^2: 0.0132

Processing Fold 6
Fold 6 - Weighted R^2: 0.0086

Processing Fold 7
Fold 7 - Weighted R^2: 0.0052


2024/11/28 17:17:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 2 at: http://localhost:5000/#/experiments/12/runs/d4902e1be5444067a4a1a572bf134fb5.
2024/11/28 17:17:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 3/20: {'n_estimators': 252, 'max_depth': 4, 'learning_rate': 0.11537110953300861, 'subsample': 0.6255269024416701, 'colsample_bytree': 0.9310524687970329, 'gamma': 0.3158321995610324, 'min_child_weight': 1, 'reg_alpha': 0.35452596812986836, 'reg_lambda': 4.868141109777064}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0100

Processing Fold 2
Fold 2 - Weighted R^2: 0.0196

Processing Fold 3
Fold 3 - Weighted R^2: 0.0057

Processing Fold 4
Fold 4 - Weighted R^2: 0.0093

Processing Fold 5
Fold 5 - Weighted R^2: 0.0155

Processing Fold 6
Fold 6 - Weighted R^2: 0.0077

Processing Fold 7
Fold 7 - Weighted R^2: 0.0054


2024/11/28 17:28:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 3 at: http://localhost:5000/#/experiments/12/runs/fe02f14e65fa455fb299a5f48305b7bb.
2024/11/28 17:28:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 4/20: {'n_estimators': 391, 'max_depth': 6, 'learning_rate': 0.15789286444401476, 'subsample': 0.677855483140787, 'colsample_bytree': 0.7866884014908136, 'gamma': 0.02190188289361439, 'min_child_weight': 5, 'reg_alpha': 0.15428949206754783, 'reg_lambda': 3.573720289591046}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0074

Processing Fold 2
Fold 2 - Weighted R^2: 0.0159

Processing Fold 3
Fold 3 - Weighted R^2: 0.0031

Processing Fold 4
Fold 4 - Weighted R^2: 0.0072

Processing Fold 5
Fold 5 - Weighted R^2: 0.0130

Processing Fold 6
Fold 6 - Weighted R^2: 0.0093

Processing Fold 7
Fold 7 - Weighted R^2: 0.0058


2024/11/28 17:37:05 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 4 at: http://localhost:5000/#/experiments/12/runs/95f85a6f86ae4667aa87c05553eec84a.
2024/11/28 17:37:05 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 5/20: {'n_estimators': 465, 'max_depth': 8, 'learning_rate': 0.19382684916249993, 'subsample': 0.7303301432552608, 'colsample_bytree': 0.7481838824139475, 'gamma': 0.23477790563790396, 'min_child_weight': 4, 'reg_alpha': 0.12992150533547164, 'reg_lambda': 2.640672168016702}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0068

Processing Fold 2
Fold 2 - Weighted R^2: 0.0180

Processing Fold 3
Fold 3 - Weighted R^2: 0.0036

Processing Fold 4
Fold 4 - Weighted R^2: 0.0040

Processing Fold 5
Fold 5 - Weighted R^2: 0.0071

Processing Fold 6
Fold 6 - Weighted R^2: 0.0077

Processing Fold 7
Fold 7 - Weighted R^2: 0.0035


2024/11/28 17:44:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 5 at: http://localhost:5000/#/experiments/12/runs/e5718212e92a4e07832c11e5c266e2c7.
2024/11/28 17:44:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 6/20: {'n_estimators': 135, 'max_depth': 5, 'learning_rate': 0.13726465898967696, 'subsample': 0.7748607675489323, 'colsample_bytree': 0.933071278423135, 'gamma': 0.35013255100112456, 'min_child_weight': 2, 'reg_alpha': 0.31236664138204107, 'reg_lambda': 4.245169106278405}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0095

Processing Fold 2
Fold 2 - Weighted R^2: 0.0194

Processing Fold 3
Fold 3 - Weighted R^2: 0.0046

Processing Fold 4
Fold 4 - Weighted R^2: 0.0091

Processing Fold 5
Fold 5 - Weighted R^2: 0.0146

Processing Fold 6
Fold 6 - Weighted R^2: 0.0056

Processing Fold 7
Fold 7 - Weighted R^2: 0.0059


2024/11/28 17:52:59 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 6 at: http://localhost:5000/#/experiments/12/runs/bb0f0849ab7341f18cc3886f2f433980.
2024/11/28 17:52:59 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 7/20: {'n_estimators': 245, 'max_depth': 8, 'learning_rate': 0.08362089201573314, 'subsample': 0.7153312415720976, 'colsample_bytree': 0.8729982015899902, 'gamma': 0.0698762418046549, 'min_child_weight': 5, 'reg_alpha': 0.007362269751005512, 'reg_lambda': 4.041159698759623}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0084

Processing Fold 2
Fold 2 - Weighted R^2: 0.0210

Processing Fold 3
Fold 3 - Weighted R^2: 0.0066

Processing Fold 4
Fold 4 - Weighted R^2: 0.0082

Processing Fold 5
Fold 5 - Weighted R^2: 0.0122

Processing Fold 6
Fold 6 - Weighted R^2: 0.0078

Processing Fold 7
Fold 7 - Weighted R^2: 0.0055


2024/11/28 18:02:07 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 7 at: http://localhost:5000/#/experiments/12/runs/c60aae1ce5074e0db83e1a61dbab6451.
2024/11/28 18:02:07 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 8/20: {'n_estimators': 139, 'max_depth': 8, 'learning_rate': 0.14398142193900368, 'subsample': 0.9122916124087872, 'colsample_bytree': 0.783566310215336, 'gamma': 0.28437059797644687, 'min_child_weight': 4, 'reg_alpha': 0.13979699812765745, 'reg_lambda': 1.0153853309118805}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0081

Processing Fold 2
Fold 2 - Weighted R^2: 0.0181

Processing Fold 3
Fold 3 - Weighted R^2: -0.0009

Processing Fold 4
Fold 4 - Weighted R^2: 0.0070

Processing Fold 5
Fold 5 - Weighted R^2: 0.0109

Processing Fold 6
Fold 6 - Weighted R^2: 0.0080

Processing Fold 7
Fold 7 - Weighted R^2: 0.0043


2024/11/28 18:10:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 8 at: http://localhost:5000/#/experiments/12/runs/3da66797bff841808d012f845a12cedd.
2024/11/28 18:10:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 9/20: {'n_estimators': 247, 'max_depth': 7, 'learning_rate': 0.09950827916719518, 'subsample': 0.8260944425924756, 'colsample_bytree': 0.9059995429664103, 'gamma': 0.3173591600002954, 'min_child_weight': 3, 'reg_alpha': 0.5592071607454135, 'reg_lambda': 1.8677754412817549}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0089

Processing Fold 2
Fold 2 - Weighted R^2: 0.0208

Processing Fold 3
Fold 3 - Weighted R^2: 0.0056

Processing Fold 4
Fold 4 - Weighted R^2: 0.0087

Processing Fold 5
Fold 5 - Weighted R^2: 0.0147

Processing Fold 6
Fold 6 - Weighted R^2: 0.0070

Processing Fold 7
Fold 7 - Weighted R^2: 0.0052


2024/11/28 18:19:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 9 at: http://localhost:5000/#/experiments/12/runs/fbcd047245374b639c6ca73599775467.
2024/11/28 18:19:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 10/20: {'n_estimators': 299, 'max_depth': 7, 'learning_rate': 0.09297630395414884, 'subsample': 0.6858338691278116, 'colsample_bytree': 0.7634114574898545, 'gamma': 0.42670153663408306, 'min_child_weight': 1, 'reg_alpha': 0.23393948586534075, 'reg_lambda': 0.7623623376007971}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0088

Processing Fold 2
Fold 2 - Weighted R^2: 0.0214

Processing Fold 3
Fold 3 - Weighted R^2: 0.0059

Processing Fold 4
Fold 4 - Weighted R^2: 0.0087

Processing Fold 5
Fold 5 - Weighted R^2: 0.0142

Processing Fold 6
Fold 6 - Weighted R^2: 0.0076

Processing Fold 7
Fold 7 - Weighted R^2: 0.0052


2024/11/28 18:29:58 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 10 at: http://localhost:5000/#/experiments/12/runs/662439af8bd3416facde861e06dfd16f.
2024/11/28 18:29:58 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 11/20: {'n_estimators': 435, 'max_depth': 4, 'learning_rate': 0.06578281397566989, 'subsample': 0.8647666058907579, 'colsample_bytree': 0.8228128609365113, 'gamma': 0.39194910455320675, 'min_child_weight': 5, 'reg_alpha': 0.4063868614400705, 'reg_lambda': 4.163091730997156}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0100

Processing Fold 2
Fold 2 - Weighted R^2: 0.0198

Processing Fold 3
Fold 3 - Weighted R^2: 0.0048

Processing Fold 4
Fold 4 - Weighted R^2: 0.0093

Processing Fold 5
Fold 5 - Weighted R^2: 0.0147

Processing Fold 6
Fold 6 - Weighted R^2: 0.0088

Processing Fold 7
Fold 7 - Weighted R^2: 0.0052


2024/11/28 18:44:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 11 at: http://localhost:5000/#/experiments/12/runs/31e8d09418144c7e933da0a3ae48c8a3.
2024/11/28 18:44:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 12/20: {'n_estimators': 348, 'max_depth': 5, 'learning_rate': 0.014315293895433491, 'subsample': 0.6360191443102566, 'colsample_bytree': 0.88894374023858, 'gamma': 0.2309386151256937, 'min_child_weight': 1, 'reg_alpha': 0.1612717790336018, 'reg_lambda': 2.7547014879651357}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0099

Processing Fold 2
Fold 2 - Weighted R^2: 0.0207

Processing Fold 3
Fold 3 - Weighted R^2: 0.0052

Processing Fold 4
Fold 4 - Weighted R^2: 0.0102

Processing Fold 5
Fold 5 - Weighted R^2: 0.0147

Processing Fold 6
Fold 6 - Weighted R^2: 0.0085

Processing Fold 7
Fold 7 - Weighted R^2: 0.0058


2024/11/28 19:17:32 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 12 at: http://localhost:5000/#/experiments/12/runs/77f0d3d3f5fc449d8372a1af9a06ba8e.
2024/11/28 19:17:32 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 13/20: {'n_estimators': 472, 'max_depth': 4, 'learning_rate': 0.14230087126476987, 'subsample': 0.7784625102296123, 'colsample_bytree': 0.752408490438593, 'gamma': 0.15075604457393826, 'min_child_weight': 4, 'reg_alpha': 0.3618126105533904, 'reg_lambda': 0.8944246369224544}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0098

Processing Fold 2
Fold 2 - Weighted R^2: 0.0195

Processing Fold 3
Fold 3 - Weighted R^2: 0.0051

Processing Fold 4
Fold 4 - Weighted R^2: 0.0096

Processing Fold 5
Fold 5 - Weighted R^2: 0.0084

Processing Fold 6
Fold 6 - Weighted R^2: 0.0076

Processing Fold 7
Fold 7 - Weighted R^2: 0.0047


2024/11/28 19:26:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 13 at: http://localhost:5000/#/experiments/12/runs/86fb7e27a8304733a8bf0b4e94fc5756.
2024/11/28 19:26:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 14/20: {'n_estimators': 333, 'max_depth': 5, 'learning_rate': 0.19276055626440777, 'subsample': 0.9634322762830428, 'colsample_bytree': 0.8798828535242997, 'gamma': 0.1329349807297598, 'min_child_weight': 1, 'reg_alpha': 0.9691763773477239, 'reg_lambda': 4.004379067846076}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0082

Processing Fold 2
Fold 2 - Weighted R^2: 0.0200

Processing Fold 3
Fold 3 - Weighted R^2: 0.0042

Processing Fold 4
Fold 4 - Weighted R^2: 0.0080

Processing Fold 5
Fold 5 - Weighted R^2: 0.0141

Processing Fold 6
Fold 6 - Weighted R^2: 0.0079

Processing Fold 7
Fold 7 - Weighted R^2: 0.0049


2024/11/28 19:33:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 14 at: http://localhost:5000/#/experiments/12/runs/737c4c06eb4f4d95af64da9d60e2ab63.
2024/11/28 19:33:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 15/20: {'n_estimators': 167, 'max_depth': 8, 'learning_rate': 0.09537868540731985, 'subsample': 0.7088966247380636, 'colsample_bytree': 0.6385563848613997, 'gamma': 0.45130119827192083, 'min_child_weight': 1, 'reg_alpha': 0.20236336479523032, 'reg_lambda': 1.8768048086779363}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0089

Processing Fold 2
Fold 2 - Weighted R^2: 0.0203

Processing Fold 3
Fold 3 - Weighted R^2: 0.0058

Processing Fold 4
Fold 4 - Weighted R^2: 0.0059

Processing Fold 5
Fold 5 - Weighted R^2: 0.0131

Processing Fold 6
Fold 6 - Weighted R^2: 0.0087

Processing Fold 7
Fold 7 - Weighted R^2: 0.0044


2024/11/28 19:43:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 15 at: http://localhost:5000/#/experiments/12/runs/08e0278674f749d7bce2ebb2941d1633.
2024/11/28 19:43:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 16/20: {'n_estimators': 255, 'max_depth': 8, 'learning_rate': 0.04358682875845403, 'subsample': 0.9426457136369502, 'colsample_bytree': 0.903407811934084, 'gamma': 0.3597314779754684, 'min_child_weight': 3, 'reg_alpha': 0.4320930397751037, 'reg_lambda': 3.3228897831609943}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0097

Processing Fold 2
Fold 2 - Weighted R^2: 0.0223

Processing Fold 3
Fold 3 - Weighted R^2: 0.0050

Processing Fold 4
Fold 4 - Weighted R^2: 0.0097

Processing Fold 5
Fold 5 - Weighted R^2: 0.0141

Processing Fold 6
Fold 6 - Weighted R^2: 0.0086

Processing Fold 7
Fold 7 - Weighted R^2: 0.0064


2024/11/28 19:57:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 16 at: http://localhost:5000/#/experiments/12/runs/74f9692aa6cf4d77be5233943d5d7b00.
2024/11/28 19:57:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 17/20: {'n_estimators': 113, 'max_depth': 7, 'learning_rate': 0.1334708542954158, 'subsample': 0.6337777284559556, 'colsample_bytree': 0.7663229608682438, 'gamma': 0.02080708693094624, 'min_child_weight': 1, 'reg_alpha': 0.3298612123327853, 'reg_lambda': 1.150358849897211}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0088

Processing Fold 2
Fold 2 - Weighted R^2: 0.0172

Processing Fold 3
Fold 3 - Weighted R^2: 0.0032

Processing Fold 4
Fold 4 - Weighted R^2: 0.0068

Processing Fold 5
Fold 5 - Weighted R^2: 0.0117

Processing Fold 6
Fold 6 - Weighted R^2: 0.0084

Processing Fold 7
Fold 7 - Weighted R^2: 0.0050


2024/11/28 20:05:26 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 17 at: http://localhost:5000/#/experiments/12/runs/c1f9358247ff403389837d355d9051c6.
2024/11/28 20:05:26 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 18/20: {'n_estimators': 272, 'max_depth': 7, 'learning_rate': 0.12165246871376528, 'subsample': 0.6682371874147545, 'colsample_bytree': 0.9700480473507189, 'gamma': 0.2905305698501975, 'min_child_weight': 1, 'reg_alpha': 0.34686980453483707, 'reg_lambda': 3.159119711666375}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0090

Processing Fold 2
Fold 2 - Weighted R^2: 0.0208

Processing Fold 3
Fold 3 - Weighted R^2: 0.0047

Processing Fold 4
Fold 4 - Weighted R^2: 0.0056

Processing Fold 5
Fold 5 - Weighted R^2: 0.0139

Processing Fold 6
Fold 6 - Weighted R^2: 0.0069

Processing Fold 7
Fold 7 - Weighted R^2: 0.0051


2024/11/28 20:13:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 18 at: http://localhost:5000/#/experiments/12/runs/4638ca71b8c141dd9df7c5f703cfca31.
2024/11/28 20:13:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 19/20: {'n_estimators': 182, 'max_depth': 3, 'learning_rate': 0.19212625051587462, 'subsample': 0.7929213747771601, 'colsample_bytree': 0.9130940909001145, 'gamma': 0.041364999961219284, 'min_child_weight': 2, 'reg_alpha': 0.4907069943545209, 'reg_lambda': 4.720219047387423}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0098

Processing Fold 2
Fold 2 - Weighted R^2: 0.0179

Processing Fold 3
Fold 3 - Weighted R^2: 0.0025

Processing Fold 4
Fold 4 - Weighted R^2: 0.0088

Processing Fold 5
Fold 5 - Weighted R^2: 0.0147

Processing Fold 6
Fold 6 - Weighted R^2: 0.0067

Processing Fold 7
Fold 7 - Weighted R^2: 0.0044


2024/11/28 20:23:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 19 at: http://localhost:5000/#/experiments/12/runs/cd6a693e106e4acc94c8e09fc31db4de.
2024/11/28 20:23:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.



Evaluating hyperparameters set 20/20: {'n_estimators': 268, 'max_depth': 4, 'learning_rate': 0.09996298620082122, 'subsample': 0.7067902652367575, 'colsample_bytree': 0.7326275989370209, 'gamma': 0.2603362012357689, 'min_child_weight': 3, 'reg_alpha': 0.4389114603050467, 'reg_lambda': 0.597254359461487}

Processing Fold 1
Fold 1 - Weighted R^2: 0.0099

Processing Fold 2
Fold 2 - Weighted R^2: 0.0201

Processing Fold 3
Fold 3 - Weighted R^2: 0.0049

Processing Fold 4
Fold 4 - Weighted R^2: 0.0102

Processing Fold 5
Fold 5 - Weighted R^2: 0.0152

Processing Fold 6
Fold 6 - Weighted R^2: 0.0077

Processing Fold 7
Fold 7 - Weighted R^2: 0.0050


2024/11/28 20:34:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Params set 20 at: http://localhost:5000/#/experiments/12/runs/b1779ae1925d45bcadb5b18ce952446f.
2024/11/28 20:34:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.
2024/11/28 20:34:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Hyperparameter Search with Custom Cross-Validation at: http://localhost:5000/#/experiments/12/runs/6ea1f4624e2e47948146a9240ae98027.
2024/11/28 20:34:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/12.


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>