In [3]:
# create and evealuate a stacking ensemble of the best three models and their optimized hyperparameters

import numpy as np
import pandas as pd
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score

# Load datasets
main_data = pd.read_csv("./data/train.csv")  # Superconductivity dataset
unique_m = pd.read_csv("./data/unique_m.csv")

# Remove 'critical_temp' from unique_m to avoid duplication
unique_m = unique_m.drop(columns=["critical_temp"], errors='ignore')

# Merge datasets assuming rows align (index-based merge)
merged_data = pd.concat([main_data, unique_m], axis=1)

# Feature Engineering: Physics-Based Ratio, Thermal Conductivity Transformation, Log transformation
merged_data["mass_density_ratio"] = merged_data["wtd_mean_atomic_mass"] / (merged_data["wtd_mean_Density"] + 1e-9)
merged_data["affinity_valence_ratio"] = merged_data["wtd_mean_ElectronAffinity"] / (merged_data["wtd_mean_Valence"] + 1e-9)
merged_data["log_thermal_conductivity"] = np.log1p(merged_data["range_ThermalConductivity"])

# Define target and features
target = "critical_temp"
features = ['mean_atomic_mass', 'wtd_mean_atomic_mass', 'gmean_atomic_mass',
       'entropy_atomic_mass', 'wtd_entropy_atomic_mass', 'range_atomic_mass',
       'wtd_range_atomic_mass', 'wtd_std_atomic_mass', 'mean_fie',
       'wtd_mean_fie', 'wtd_entropy_fie', 'range_fie', 'wtd_range_fie',
       'wtd_std_fie', 'mean_atomic_radius', 'wtd_mean_atomic_radius',
       'gmean_atomic_radius', 'range_atomic_radius', 'wtd_range_atomic_radius',
       'mean_Density', 'wtd_mean_Density', 'gmean_Density', 'entropy_Density',
       'wtd_entropy_Density', 'range_Density', 'wtd_range_Density',
       'wtd_std_Density', 'mean_ElectronAffinity', 'wtd_mean_ElectronAffinity',
       'gmean_ElectronAffinity', 'wtd_gmean_ElectronAffinity',
       'entropy_ElectronAffinity', 'wtd_entropy_ElectronAffinity',
       'range_ElectronAffinity', 'wtd_range_ElectronAffinity',
       'wtd_std_ElectronAffinity', 'mean_FusionHeat', 'wtd_mean_FusionHeat',
       'gmean_FusionHeat', 'entropy_FusionHeat', 'wtd_entropy_FusionHeat',
       'range_FusionHeat', 'wtd_range_FusionHeat', 'wtd_std_FusionHeat',
       'mean_ThermalConductivity', 'wtd_mean_ThermalConductivity',
       'gmean_ThermalConductivity', 'wtd_gmean_ThermalConductivity',
       'entropy_ThermalConductivity', 'wtd_entropy_ThermalConductivity',
       'range_ThermalConductivity', 'wtd_range_ThermalConductivity',
       'mean_Valence', 'wtd_mean_Valence', 'range_Valence',
       'wtd_range_Valence', 'wtd_std_Valence', 'H', 'B', 'C', 'O', 'F', 'Na',
       'Mg', 'Al', 'Cl', 'K', 'Ca', 'V', 'Cr', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
       'As', 'Se', 'Sr', 'Y', 'Nb', 'Sn', 'I', 'Ba', 'La', 'Ce', 'Pr', 'Nd',
       'Sm', 'Eu', 'Gd', 'Tb', 'Yb', 'Hg', 'Tl', 'Pb', 'Bi',
       'mass_density_ratio', 'affinity_valence_ratio',
       'log_thermal_conductivity']
X = merged_data[features]
y = merged_data[target]


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


# Define base models with their optimized parameters
base_estimators = [
    ('xgb', XGBRegressor(
         n_estimators=374, max_depth=16, learning_rate=0.02, 
         min_child_weight=1, colsample_bytree=0.5, random_state=42, 
         objective='reg:squarederror')),
    ('lgb', LGBMRegressor(
         n_estimators=496, max_depth=15, learning_rate=0.0579, 
         subsample=0.6619, colsample_bytree=0.7512, num_leaves=148, verbose=-1,
         random_state=42)),
    ('cat', CatBoostRegressor(
         iterations=998, learning_rate=0.0962, depth=9, 
         l2_leaf_reg=4.1926, loss_function='RMSE', random_seed=42, verbose=0))
]

# Define a meta-model (here, we use a simple linear regression)
meta_model = LinearRegression()

# Create the stacking ensemble
stacking_model = StackingRegressor(
    estimators=base_estimators,
    final_estimator=meta_model,
    cv=5  # use 5-fold CV to generate out-of-fold predictions
)

# Train on your training set (assuming X_train, y_train are defined)
stacking_model.fit(X_train, y_train)

# Evaluate on the reserved test set
y_pred = stacking_model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"Stacking Ensemble - Test RMSE: {rmse:.4f}, Test R²: {r2:.4f}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008783 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17629
[LightGBM] [Info] Number of data points in the train set: 19136, number of used features: 99
[LightGBM] [Info] Start training from score 34.464032
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17340
[LightGBM] [Info] Number of data points in the train set: 15308, number of used features: 99
[LightGBM] [Info] Start training from score 34.516733
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005788 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17358
[LightGBM] [Info] Number of data points in the train set: 15309, number of used features: 99
[LightGBM] [Info] Star

Results:

Stacking Ensemble - Test RMSE: 8.2961, Test R²: 0.9400

Best yet.

In [4]:
import joblib

# Save the stacking model to disk
joblib.dump(stacking_model, './output/pkl/first_stacking_model.pkl')

# Later, load the model from disk without needing to retrain
# loaded_model = joblib.load('stacking_model.pkl')

# To see the parameters of the stacking model:
print(stacking_model.get_params())

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619)), ('cat', <catboost.core.CatBoostRegressor obje

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619)), ('cat', <catboost.core.CatBoostRegressor object at 0x7ffaf43017f0>)], 'final_estimator__copy_X': True, 'final_estimator__fit_intercept': True, 'final_estimator__n_jobs': None, 'final_estimator__positive': False, 'final_estimator': LinearRegression(), 'n_jobs': None, 'passthrough': False, 'verbose': 0, 'xgb': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...), 'lgb': LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619), 'cat': <catboost.core.CatBoostRegressor object at 0x7ffaf43017f0>, 'xgb__objective': 'reg:squarederror', 'xgb__base_score': None, 'xgb__booster': None, 'xgb__callbacks': None, 'xgb__colsample_bylevel': None, 'xgb__colsample_bynode': None, 'xgb__colsample_bytree': 0.5, 'xgb__device': None, 'xgb__early_stopping_rounds': None, 'xgb__enable_categorical': False, 'xgb__eval_metric': None, 'xgb__feature_types': None, 'xgb__gamma': None, 'xgb__grow_policy': None, 'xgb__importance_type': None, 'xgb__interaction_constraints': None, 'xgb__learning_rate': 0.02, 'xgb__max_bin': None, 'xgb__max_cat_threshold': None, 'xgb__max_cat_to_onehot': None, 'xgb__max_delta_step': None, 'xgb__max_depth': 16, 'xgb__max_leaves': None, 'xgb__min_child_weight': 1, 'xgb__missing': nan, 'xgb__monotone_constraints': None, 'xgb__multi_strategy': None, 'xgb__n_estimators': 374, 'xgb__n_jobs': None, 'xgb__num_parallel_tree': None, 'xgb__random_state': 42, 'xgb__reg_alpha': None, 'xgb__reg_lambda': None, 'xgb__sampling_method': None, 'xgb__scale_pos_weight': None, 'xgb__subsample': None, 'xgb__tree_method': None, 'xgb__validate_parameters': None, 'xgb__verbosity': None, 'lgb__boosting_type': 'gbdt', 'lgb__class_weight': None, 'lgb__colsample_bytree': 0.7512, 'lgb__importance_type': 'split', 'lgb__learning_rate': 0.0579, 'lgb__max_depth': 15, 'lgb__min_child_samples': 20, 'lgb__min_child_weight': 0.001, 'lgb__min_split_gain': 0.0, 'lgb__n_estimators': 496, 'lgb__n_jobs': None, 'lgb__num_leaves': 148, 'lgb__objective': None, 'lgb__random_state': 42, 'lgb__reg_alpha': 0.0, 'lgb__reg_lambda': 0.0, 'lgb__subsample': 0.6619, 'lgb__subsample_for_bin': 200000, 'lgb__subsample_freq': 0, 'cat__iterations': 998, 'cat__learning_rate': 0.0962, 'cat__depth': 9, 'cat__l2_leaf_reg': 4.1926, 'cat__loss_function': 'RMSE', 'cat__random_seed': 42, 'cat__verbose': 0}

In [5]:
# now try with XGBoost as the meta_model

# Define a meta-model (here, we use a simple linear regression)
meta_model_2 = XGBRegressor(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42,
    objective='reg:squarederror'
)

# Create the stacking ensemble
stacking_model_2 = StackingRegressor(
    estimators=base_estimators,
    final_estimator=meta_model_2,
    cv=5  # use 5-fold CV to generate out-of-fold predictions
)

# Train on your training set (assuming X_train, y_train are defined)
stacking_model_2.fit(X_train, y_train)

# Evaluate on the reserved test set
y_pred = stacking_model_2.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"Second Stacking Ensemble - Test RMSE: {rmse:.4f}, Test R²: {r2:.4f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 17629
[LightGBM] [Info] Number of data points in the train set: 19136, number of used features: 99
[LightGBM] [Info] Start training from score 34.464032
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004291 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17340
[LightGBM] [Info] Number of data points in the train set: 15308, number of used features: 99
[LightGBM] [Info] Start training from score 34.516733
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003759 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Tota

Results of second stacking:

Second Stacking Ensemble - Test RMSE: 8.3381, Test R²: 0.9394



In [9]:
# Save the stacking model to disk
joblib.dump(stacking_model_2, './output/pkl/second_stacking_model.pkl')

# To see the parameters of the stacking model:
print(stacking_model_2.get_params())

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619)), ('cat', <catboost.core.CatBoostRegressor obje

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619)), ('cat', <catboost.core.CatBoostRegressor object at 0x7ffaf43017f0>)], 'final_estimator__objective': 'reg:squarederror', 'final_estimator__base_score': None, 'final_estimator__booster': None, 'final_estimator__callbacks': None, 'final_estimator__colsample_bylevel': None, 'final_estimator__colsample_bynode': None, 'final_estimator__colsample_bytree': None, 'final_estimator__device': None, 'final_estimator__early_stopping_rounds': None, 'final_estimator__enable_categorical': False, 'final_estimator__eval_metric': None, 'final_estimator__feature_types': None, 'final_estimator__gamma': None, 'final_estimator__grow_policy': None, 'final_estimator__importance_type': None, 'final_estimator__interaction_constraints': None, 'final_estimator__learning_rate': 0.1, 'final_estimator__max_bin': None, 'final_estimator__max_cat_threshold': None, 'final_estimator__max_cat_to_onehot': None, 'final_estimator__max_delta_step': None, 'final_estimator__max_depth': 3, 'final_estimator__max_leaves': None, 'final_estimator__min_child_weight': None, 'final_estimator__missing': nan, 'final_estimator__monotone_constraints': None, 'final_estimator__multi_strategy': None, 'final_estimator__n_estimators': 100, 'final_estimator__n_jobs': None, 'final_estimator__num_parallel_tree': None, 'final_estimator__random_state': 42, 'final_estimator__reg_alpha': None, 'final_estimator__reg_lambda': None, 'final_estimator__sampling_method': None, 'final_estimator__scale_pos_weight': None, 'final_estimator__subsample': None, 'final_estimator__tree_method': None, 'final_estimator__validate_parameters': None, 'final_estimator__verbosity': None, 'final_estimator': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=100, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...), 'n_jobs': None, 'passthrough': False, 'verbose': 0, 'xgb': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...), 'lgb': LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619), 'cat': <catboost.core.CatBoostRegressor object at 0x7ffaf43017f0>, 'xgb__objective': 'reg:squarederror', 'xgb__base_score': None, 'xgb__booster': None, 'xgb__callbacks': None, 'xgb__colsample_bylevel': None, 'xgb__colsample_bynode': None, 'xgb__colsample_bytree': 0.5, 'xgb__device': None, 'xgb__early_stopping_rounds': None, 'xgb__enable_categorical': False, 'xgb__eval_metric': None, 'xgb__feature_types': None, 'xgb__gamma': None, 'xgb__grow_policy': None, 'xgb__importance_type': None, 'xgb__interaction_constraints': None, 'xgb__learning_rate': 0.02, 'xgb__max_bin': None, 'xgb__max_cat_threshold': None, 'xgb__max_cat_to_onehot': None, 'xgb__max_delta_step': None, 'xgb__max_depth': 16, 'xgb__max_leaves': None, 'xgb__min_child_weight': 1, 'xgb__missing': nan, 'xgb__monotone_constraints': None, 'xgb__multi_strategy': None, 'xgb__n_estimators': 374, 'xgb__n_jobs': None, 'xgb__num_parallel_tree': None, 'xgb__random_state': 42, 'xgb__reg_alpha': None, 'xgb__reg_lambda': None, 'xgb__sampling_method': None, 'xgb__scale_pos_weight': None, 'xgb__subsample': None, 'xgb__tree_method': None, 'xgb__validate_parameters': None, 'xgb__verbosity': None, 'lgb__boosting_type': 'gbdt', 'lgb__class_weight': None, 'lgb__colsample_bytree': 0.7512, 'lgb__importance_type': 'split', 'lgb__learning_rate': 0.0579, 'lgb__max_depth': 15, 'lgb__min_child_samples': 20, 'lgb__min_child_weight': 0.001, 'lgb__min_split_gain': 0.0, 'lgb__n_estimators': 496, 'lgb__n_jobs': None, 'lgb__num_leaves': 148, 'lgb__objective': None, 'lgb__random_state': 42, 'lgb__reg_alpha': 0.0, 'lgb__reg_lambda': 0.0, 'lgb__subsample': 0.6619, 'lgb__subsample_for_bin': 200000, 'lgb__subsample_freq': 0, 'cat__iterations': 998, 'cat__learning_rate': 0.0962, 'cat__depth': 9, 'cat__l2_leaf_reg': 4.1926, 'cat__loss_function': 'RMSE', 'cat__random_seed': 42, 'cat__verbose': 0}

In [7]:
# stacking only the XGB and LightGBM models linerally

base_estimators_3 = [
    ('xgb', XGBRegressor(
         n_estimators=374, max_depth=16, learning_rate=0.02, 
         min_child_weight=1, colsample_bytree=0.5, random_state=42, 
         objective='reg:squarederror')),
    ('lgb', LGBMRegressor(
         n_estimators=496, max_depth=15, learning_rate=0.0579, 
         subsample=0.6619, colsample_bytree=0.7512, num_leaves=148, verbose=-1,
         random_state=42)),
    # ('cat', CatBoostRegressor(
    #      iterations=998, learning_rate=0.0962, depth=9, 
    #      l2_leaf_reg=4.1926, loss_function='RMSE', random_seed=42, verbose=0))
]

# Define a meta-model (here, we use a simple linear regression)
meta_model_3 = LinearRegression()

# Create the stacking ensemble
stacking_model_3 = StackingRegressor(
    estimators=base_estimators_3,
    final_estimator=meta_model_3,
    cv=5  # use 5-fold CV to generate out-of-fold predictions
)

# Train on your training set (assuming X_train, y_train are defined)
stacking_model_3.fit(X_train, y_train)

# Evaluate on the reserved test set
y_pred = stacking_model_3.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"Third Stacking Ensemble - Test RMSE: {rmse:.4f}, Test R²: {r2:.4f}")

Third Stacking Ensemble - Test RMSE: 8.2968, Test R²: 0.9400


Results of third stacking:

Third Stacking Ensemble - Test RMSE: 8.2968, Test R²: 0.9400



In [8]:
# Save the stacking model to disk
joblib.dump(stacking_model_3, './output/pkl/third_stacking_model.pkl')

# To see the parameters of the stacking model:
print(stacking_model_3.get_params())

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619, verbose=-1))], 'final_estimator__copy_X': True,

{'cv': 5, 'estimators': [('xgb', XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)), ('lgb', LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619, verbose=-1))], 'final_estimator__copy_X': True, 'final_estimator__fit_intercept': True, 'final_estimator__n_jobs': None, 'final_estimator__positive': False, 'final_estimator': LinearRegression(), 'n_jobs': None, 'passthrough': False, 'verbose': 0, 'xgb': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.5, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.02, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=16, max_leaves=None,
             min_child_weight=1, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=374, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...), 'lgb': LGBMRegressor(colsample_bytree=0.7512, learning_rate=0.0579, max_depth=15,
              n_estimators=496, num_leaves=148, random_state=42,
              subsample=0.6619, verbose=-1), 'xgb__objective': 'reg:squarederror', 'xgb__base_score': None, 'xgb__booster': None, 'xgb__callbacks': None, 'xgb__colsample_bylevel': None, 'xgb__colsample_bynode': None, 'xgb__colsample_bytree': 0.5, 'xgb__device': None, 'xgb__early_stopping_rounds': None, 'xgb__enable_categorical': False, 'xgb__eval_metric': None, 'xgb__feature_types': None, 'xgb__gamma': None, 'xgb__grow_policy': None, 'xgb__importance_type': None, 'xgb__interaction_constraints': None, 'xgb__learning_rate': 0.02, 'xgb__max_bin': None, 'xgb__max_cat_threshold': None, 'xgb__max_cat_to_onehot': None, 'xgb__max_delta_step': None, 'xgb__max_depth': 16, 'xgb__max_leaves': None, 'xgb__min_child_weight': 1, 'xgb__missing': nan, 'xgb__monotone_constraints': None, 'xgb__multi_strategy': None, 'xgb__n_estimators': 374, 'xgb__n_jobs': None, 'xgb__num_parallel_tree': None, 'xgb__random_state': 42, 'xgb__reg_alpha': None, 'xgb__reg_lambda': None, 'xgb__sampling_method': None, 'xgb__scale_pos_weight': None, 'xgb__subsample': None, 'xgb__tree_method': None, 'xgb__validate_parameters': None, 'xgb__verbosity': None, 'lgb__boosting_type': 'gbdt', 'lgb__class_weight': None, 'lgb__colsample_bytree': 0.7512, 'lgb__importance_type': 'split', 'lgb__learning_rate': 0.0579, 'lgb__max_depth': 15, 'lgb__min_child_samples': 20, 'lgb__min_child_weight': 0.001, 'lgb__min_split_gain': 0.0, 'lgb__n_estimators': 496, 'lgb__n_jobs': None, 'lgb__num_leaves': 148, 'lgb__objective': None, 'lgb__random_state': 42, 'lgb__reg_alpha': 0.0, 'lgb__reg_lambda': 0.0, 'lgb__subsample': 0.6619, 'lgb__subsample_for_bin': 200000, 'lgb__subsample_freq': 0, 'lgb__verbose': -1}