# Tune XGB Regressor Model

This notebook is used to optimize an XGB Regressor Model

In [57]:
import os
import time
import joblib
import polars as pl
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import (
    StandardScaler,
    PolynomialFeatures,
)
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, root_mean_squared_error as rmse
from xgboost import XGBRegressor

In [2]:
def get_data(poly_features: int = 1):
    # Get Data
    data = pl.read_parquet("data.parquet")
    data = data.drop(["Step", "Light_ID", "Lane", "Intersection_u", "Sim_ID"])
    data = data.with_columns(pl.col("Is_Entrypoint").cast(pl.Int8))
    print(f"Data: {data.shape}")
    print(f"{data.collect_schema()}")

    # Split Data
    X = data.drop("Num_Cars").to_numpy()
    y = data.select(pl.col("Num_Cars")).to_numpy()
    y = y.ravel()
    print("")
    print(f"X: {X.shape}")
    print(f"y: {y.shape}")

    # Scale
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Polynomial Features
    if poly_features > 1:
        poly = PolynomialFeatures(degree=poly_features)
        X = poly.fit_transform(X)

    # Train Test Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.6, test_size=0.4, random_state=42
    )
    print("")
    print(f"X_train: {X_train.shape}")
    print(f"X_test: {X_test.shape}")
    print(f"y_train: {y_train.shape}")
    print(f"y_test {y_test.shape}")

    # Train Test Validation Split
    X_test, X_val, y_test, y_val = train_test_split(
        X_test, y_test, train_size=0.5, test_size=0.5, random_state=42
    )
    print("")
    print(f"X_test: {X_test.shape}")
    print(f"X_val: {X_val.shape}")
    print(f"y_test: {y_test.shape}")
    print(f"y_val: {y_val.shape}")

    return X_train, X_test, X_val, y_train, y_test, y_val

In [4]:
X_train, X_test, X_val, y_train, y_test, y_val = get_data()

Data: (6610000, 5)
Schema({'Time': Int16, 'Num_Cars': Int16, 'Centrality': Float32, 'Is_Entrypoint': Int8, 'Distance': Int16})

X: (6610000, 4)
y: (6610000,)

X_train: (3966000, 4)
X_test: (2644000, 4)
y_train: (3966000,)
y_test (2644000,)

X_test: (1322000, 4)
X_val: (1322000, 4)
y_test: (1322000,)
y_val: (1322000,)


## Base

In [5]:
model = XGBRegressor(n_jobs=(os.cpu_count() - 1), random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=None,
             n_jobs=7, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.21278494596481323
RMSE (Training Data): 1.3919494152069092
R2 Score (Test Data): 0.20956403017044067
RMSE (Test Data): 1.3935534954071045


## Estimators

In [14]:
model = XGBRegressor(n_estimators=200, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=200,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2222939133644104
RMSE (Training Data): 1.3835170269012451
R2 Score (Test Data): 0.21727150678634644
RMSE (Test Data): 1.3867425918579102


In [15]:
model = XGBRegressor(n_estimators=300, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.22840863466262817
RMSE (Training Data): 1.3780673742294312
R2 Score (Test Data): 0.22170007228851318
RMSE (Test Data): 1.3828141689300537


In [16]:
model = XGBRegressor(n_estimators=500, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=None, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=500,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2362128496170044
RMSE (Training Data): 1.3710803985595703
R2 Score (Test Data): 0.2262347936630249
RMSE (Test Data): 1.3787797689437866


300 seems to be a good base

## Learning Rate

In [17]:
model = XGBRegressor(n_estimators=300, learning_rate=0.01, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.01, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.17912393808364868
RMSE (Training Data): 1.4213975667953491
R2 Score (Test Data): 0.17810559272766113
RMSE (Test Data): 1.4210138320922852


In [18]:
model = XGBRegressor(n_estimators=300, learning_rate=0.001, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.001, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.07236886024475098
RMSE (Training Data): 1.5109999179840088
R2 Score (Test Data): 0.07216638326644897
RMSE (Test Data): 1.5098206996917725


In [19]:
model = XGBRegressor(n_estimators=300, learning_rate=0.1, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.1, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.21185463666915894
RMSE (Training Data): 1.392771601676941
R2 Score (Test Data): 0.20865070819854736
RMSE (Test Data): 1.3943583965301514


In [20]:
model = XGBRegressor(n_estimators=300, learning_rate=0.15, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.15, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.21798813343048096
RMSE (Training Data): 1.3873417377471924
R2 Score (Test Data): 0.21378618478775024
RMSE (Test Data): 1.3898266553878784


In [22]:
model = XGBRegressor(n_estimators=300, learning_rate=0.2, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.2, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.22191280126571655
RMSE (Training Data): 1.383855938911438
R2 Score (Test Data): 0.2166287899017334
RMSE (Test Data): 1.3873118162155151


In [23]:
model = XGBRegressor(n_estimators=300, learning_rate=0.25, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.25, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.22642111778259277
RMSE (Training Data): 1.3798410892486572
R2 Score (Test Data): 0.22013872861862183
RMSE (Test Data): 1.3842004537582397


In [24]:
model = XGBRegressor(n_estimators=300, learning_rate=0.3, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.3, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.22840863466262817
RMSE (Training Data): 1.3780673742294312
R2 Score (Test Data): 0.22170007228851318
RMSE (Test Data): 1.3828141689300537


In [25]:
model = XGBRegressor(n_estimators=300, learning_rate=0.35, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2309439778327942
RMSE (Training Data): 1.37580144405365
R2 Score (Test Data): 0.22321468591690063
RMSE (Test Data): 1.3814679384231567


## Max Depth

In [30]:
model = XGBRegressor(
    n_estimators=300, learning_rate=0.35, max_depth=25, n_jobs=-1, random_state=42
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=25,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.320204496383667
RMSE (Training Data): 1.2934983968734741
R2 Score (Test Data): 0.10901790857315063
RMSE (Test Data): 1.4795334339141846


In [31]:
model = XGBRegressor(
    n_estimators=300, learning_rate=0.35, max_depth=10, n_jobs=-1, random_state=42
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=10,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2809116840362549
RMSE (Training Data): 1.3303560018539429
R2 Score (Test Data): 0.22061705589294434
RMSE (Test Data): 1.3837758302688599


In [32]:
model = XGBRegressor(
    n_estimators=300, learning_rate=0.35, max_depth=7, n_jobs=-1, random_state=42
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24233931303024292
RMSE (Training Data): 1.3655705451965332
R2 Score (Test Data): 0.2281225323677063
RMSE (Test Data): 1.3770968914031982


In [33]:
model = XGBRegressor(
    n_estimators=300, learning_rate=0.35, max_depth=5, n_jobs=-1, random_state=42
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=5,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2194962501525879
RMSE (Training Data): 1.3860032558441162
R2 Score (Test Data): 0.21521472930908203
RMSE (Test Data): 1.3885635137557983


## Min Child Weight

Default is 1 / None

In [34]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=2,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=2, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2426851987838745
RMSE (Training Data): 1.365258812904358
R2 Score (Test Data): 0.22796261310577393
RMSE (Test Data): 1.3772395849227905


In [35]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=3,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=3, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2421284317970276
RMSE (Training Data): 1.3657605648040771
R2 Score (Test Data): 0.2276458740234375
RMSE (Test Data): 1.3775219917297363


## Subsample

Default is 1

In [38]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=0.8,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2401258945465088
RMSE (Training Data): 1.3675637245178223
R2 Score (Test Data): 0.22413218021392822
RMSE (Test Data): 1.380651831626892


In [41]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=0.9,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24153614044189453
RMSE (Training Data): 1.3662941455841064
R2 Score (Test Data): 0.22546231746673584
RMSE (Test Data): 1.3794678449630737


In [42]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24233931303024292
RMSE (Training Data): 1.3655705451965332
R2 Score (Test Data): 0.2281225323677063
RMSE (Test Data): 1.3770968914031982


## Col Sample by Tree

Default is 1

In [44]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1.0,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=1.0, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24233931303024292
RMSE (Training Data): 1.3655705451965332
R2 Score (Test Data): 0.2281225323677063
RMSE (Test Data): 1.3770968914031982


In [43]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=0.9,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.9, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.35, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=-1, num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2324831485748291
RMSE (Training Data): 1.3744239807128906
R2 Score (Test Data): 0.22345006465911865
RMSE (Test Data): 1.3812586069107056


## Gamma

Default is 0

In [46]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24233931303024292
RMSE (Training Data): 1.3655705451965332
R2 Score (Test Data): 0.2281225323677063
RMSE (Test Data): 1.3770968914031982


In [47]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0.1,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0.1, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2405276894569397
RMSE (Training Data): 1.3672021627426147
R2 Score (Test Data): 0.22713768482208252
RMSE (Test Data): 1.3779751062393188


## Reg Alpha

Default is 0

In [56]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    reg_alpha=0,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24233931303024292
RMSE (Training Data): 1.3655705451965332
R2 Score (Test Data): 0.2281225323677063
RMSE (Test Data): 1.3770968914031982


In [49]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    reg_alpha=0.1,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24199968576431274
RMSE (Training Data): 1.3658766746520996
R2 Score (Test Data): 0.2279198169708252
RMSE (Test Data): 1.3772777318954468


## Reg Lambda

In [50]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    reg_alpha=0,
    reg_lambda=0,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.2421703338623047
RMSE (Training Data): 1.3657227754592896
R2 Score (Test Data): 0.22750824689865112
RMSE (Test Data): 1.3776447772979736


In [51]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    reg_alpha=0,
    reg_lambda=0.1,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24227726459503174
RMSE (Training Data): 1.3656264543533325
R2 Score (Test Data): 0.22787904739379883
RMSE (Test Data): 1.3773140907287598


In [52]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.35,
    max_depth=7,
    min_child_weight=1,
    subsample=1.0,
    colsample_bytree=1,
    gamma=0,
    reg_alpha=0,
    reg_lambda=0.2,
    n_jobs=-1,
    random_state=42,
)
model.fit(X_train, y_train)
print(model)

print("")

y_pred = model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
y_pred = model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None, colsample_bytree=1,
             device=None, early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, feature_types=None, feature_weights=None,
             gamma=0, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.35, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=7, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=300, n_jobs=-1,
             num_parallel_tree=None, ...)

R2 Score (Training Data): 0.24207836389541626
RMSE (Training Data): 1.3658056259155273
R2 Score (Test Data): 0.2278539538383484
RMSE (Test Data): 1.3773363828659058


In [59]:
param_dist = {
    "n_estimators": np.arange(250, 450, 50),
    "learning_rate": np.arange(0.3, 0.5, 0.05),
    "max_depth": np.arange(6, 10, 1),
    "min_child_weight": np.arange(1, 10, 1),
    "subsample": [1.0],
    "colsample_bytree": [1.0],
    "gamma": [0, 0.1],
    "reg_alpha": [0, 0.1],
    "reg_lambda": [0, 0.1, 0.2],
}

model = XGBRegressor(random_state=42)

random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_dist,
    n_iter=150,
    cv=3,
    verbose=3,
    random_state=42,
    n_jobs=-1,
    scoring="neg_mean_squared_error",
)

random_search.fit(X_train, y_train)

best_model: XGBRegressor = random_search.best_estimator_

# Evaluate model
y_pred = best_model.predict(X_train)
print(f"R2 Score (Training Data): {r2_score(y_true=y_train, y_pred=y_pred)}")
print(f"RMSE (Training Data): {rmse(y_true=y_train, y_pred=y_pred)}")
print("")
y_pred = best_model.predict(X_test)
print(f"R2 Score (Test Data): {r2_score(y_true=y_test, y_pred=y_pred)}")
print(f"RMSE (Test Data): {rmse(y_true=y_test, y_pred=y_pred)}")

Fitting 3 folds for each of 150 candidates, totalling 450 fits
[CV 2/3] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.44999999999999996, max_depth=7, min_child_weight=6, n_estimators=400, reg_alpha=0.1, reg_lambda=0.1, subsample=1.0;, score=-1.905 total time=  48.8s
[CV 1/3] END colsample_bytree=1.0, gamma=0.1, learning_rate=0.44999999999999996, max_depth=7, min_child_weight=6, n_estimators=400, reg_alpha=0.1, reg_lambda=0.1, subsample=1.0;, score=-1.909 total time=  52.5s
[CV 3/3] END colsample_bytree=1.0, gamma=0, learning_rate=0.35, max_depth=6, min_child_weight=6, n_estimators=300, reg_alpha=0.1, reg_lambda=0.1, subsample=1.0;, score=-1.915 total time=  55.4s
[CV 1/3] END colsample_bytree=1.0, gamma=0, learning_rate=0.35, max_depth=6, min_child_weight=6, n_estimators=300, reg_alpha=0.1, reg_lambda=0.1, subsample=1.0;, score=-1.913 total time=  55.8s
[CV 2/3] END colsample_bytree=1.0, gamma=0, learning_rate=0.35, max_depth=6, min_child_weight=6, n_estimators=300, reg_alpha=0.

In [60]:
best_model

In [61]:
joblib.dump(best_model, "model.pkl")

['model.pkl']