In [66]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer, OneHotEncoder
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.metrics import r2_score

In [48]:
df = pd.read_csv(r"D:/Coding/ML/Databases/electricity_cost_dataset.csv")
df

Unnamed: 0,site area,structure type,water consumption,recycling rate,utilisation rate,air qality index,issue reolution time,resident count,electricity cost
0,1360,Mixed-use,2519.0,69,52,188,1,72,1420.0
1,4272,Mixed-use,2324.0,50,76,165,65,261,3298.0
2,3592,Mixed-use,2701.0,20,94,198,39,117,3115.0
3,966,Residential,1000.0,13,60,74,3,35,1575.0
4,4926,Residential,5990.0,23,65,32,57,185,4301.0
...,...,...,...,...,...,...,...,...,...
9995,708,Commercial,1365.0,76,77,22,23,0,885.0
9996,2960,Residential,3332.0,22,72,100,47,54,3081.0
9997,3942,Commercial,2427.0,61,43,48,49,0,3325.0
9998,3211,Commercial,4639.0,72,73,105,55,0,2892.0


In [50]:

num_cols = ['site area','water consumption','recycling rate','utilisation rate',
            'air qality index','issue reolution time','resident count']
cat_cols = ['structure type']

# Creating sub-pipelines
num_cols_without_skewed = [col for col in num_cols if col != 'water consumption']

numeric_pipeline = Pipeline(steps=[
    ('scaler', StandardScaler())
])

skewed_pipeline = Pipeline(steps=[
    ('yeojohnson', PowerTransformer(method='yeo-johnson')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ('ohe', OneHotEncoder(sparse_output=False))
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_pipeline, num_cols_without_skewed),
    ('skewed', skewed_pipeline, ['water consumption']),
    ('cat', categorical_pipeline, cat_cols)
])

preprocessor


0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [71]:

x=df.iloc[: , 0:8]
y = df.iloc[: , 8]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=42)
print(x_train.shape , y_train.shape)

(8000, 8) (8000,)


In [None]:
base_models1 = [ 
    ('svr' , SVR()),
    ('xgb' , XGBRegressor())
]

meta_model1 = SVR()

stack_model1 = StackingRegressor(estimators = base_models1 , final_estimator = meta_model1)
pipeline1 = make_pipeline(preprocessor, stack_model1)
pipeline1.fit(x_train, y_train)

#stack_model.fit(x_train,y_train)

In [None]:
y_pred1 = pipeline1.predict(x_test)
r2 = r2_score(y_pred1,y_test)
r2

0.6560947248300824

In [None]:
base_models2 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor())
]

meta_model2 = XGBRegressor()
stack_model2 = StackingRegressor(estimators = base_models2 , final_estimator=meta_model2)

pipeline2 = make_pipeline(preprocessor, stack_model2)
pipeline2.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,"XGBRegressor(...ree=None, ...)"
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [70]:
y_pred2 = pipeline2.predict(x_test)
r2 = r2_score(y_pred2,y_test)
r2

0.9560715192732573

In [76]:
base_models3 = [ 
    ('xgb'  , XGBRegressor()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor())
]

meta_model2 = XGBRegressor()
stack_model3 = StackingRegressor(estimators = base_models3 , final_estimator=meta_model2)

pipeline3 = make_pipeline(preprocessor, stack_model3)
pipeline3.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('xgb', ...), ('svr', ...), ...]"
,final_estimator,"XGBRegressor(...ree=None, ...)"
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [79]:
y_pred2 = pipeline3.predict(x_test)
r2 = r2_score(y_pred2,y_test)
r2

0.9551514908077512

In [80]:
base_models4 = [ 
    ('xgb'  , XGBRegressor()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor())
]

meta_model4 = XGBRegressor()
stack_model4 = StackingRegressor(estimators = base_models4 , final_estimator=meta_model4)

pipeline4 = make_pipeline(preprocessor, stack_model4)
pipeline4.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('xgb', ...), ('svr', ...), ...]"
,final_estimator,"XGBRegressor(...ree=None, ...)"
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [81]:
y_pred4 = pipeline4.predict(x_test)
r2 = r2_score(y_pred4,y_test)
r2

0.9535750912883484

In [82]:
base_models5 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor()),
    ('xgb' , XGBRegressor())
]

meta_model5 = XGBRegressor()
stack_model5 = StackingRegressor(estimators = base_models5 , final_estimator=meta_model5)

pipeline5 = make_pipeline(preprocessor, stack_model5)
pipeline5.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,"XGBRegressor(...ree=None, ...)"
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [86]:
y_pred5 = pipeline5.predict(x_test)
r2 = r2_score(y_pred5,y_test)
r2

0.9565598951821811

In [84]:
base_models6 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor()),
    ('xgb' , XGBRegressor())
]

meta_model6 = SVR()
stack_model6 = StackingRegressor(estimators = base_models6 , final_estimator=meta_model6)

pipeline6 = make_pipeline(preprocessor, stack_model6)
pipeline6.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,SVR()
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [87]:
y_pred6 = pipeline6.predict(x_test)
r2 = r2_score(y_pred6,y_test)
r2

0.6631929549918736

In [88]:
base_models7 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor())
]

meta_model7 = SVR()
stack_model7 = StackingRegressor(estimators = base_models7 , final_estimator=meta_model7)

pipeline7 = make_pipeline(preprocessor, stack_model7)
pipeline7.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,SVR()
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [89]:
y_pred7 = pipeline7.predict(x_test)
r2 = r2_score(y_pred7,y_test)
r2

0.6549161038127609

In [91]:
base_models8 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor()),
    ('xgb' , XGBRegressor())
]

meta_model8 = RandomForestRegressor()
stack_model8 = StackingRegressor(estimators = base_models8 , final_estimator=meta_model8)

pipeline8 = make_pipeline(preprocessor, stack_model8)
pipeline8.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,RandomForestRegressor()
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [92]:
y_pred8 = pipeline8.predict(x_test)
r2 = r2_score(y_pred8,y_test)
r2

0.9596731380850773

In [102]:
print(pipeline8.named_steps)


{'columntransformer': ColumnTransformer(transformers=[('num',
                                 Pipeline(steps=[('scaler', StandardScaler())]),
                                 ['site area', 'recycling rate',
                                  'utilisation rate', 'air qality index',
                                  'issue reolution time', 'resident count']),
                                ('skewed',
                                 Pipeline(steps=[('yeojohnson',
                                                  PowerTransformer()),
                                                 ('scaler', StandardScaler())]),
                                 ['water consumption']),
                                ('cat',
                                 Pipeline(steps=[('ohe',
                                                  OneHotEncoder(sparse_output=False))]),
                                 ['structure type'])]), 'stackingregressor': StackingRegressor(estimators=[('lr', LinearRegression()), ('s

In [100]:
from sklearn.model_selection import RandomizedSearchCV


param_grid = {
    # Final estimator (Random Forest)
    'stackingregressor__final_estimator__n_estimators': [100, 200],
    'stackingregressor__final_estimator__max_depth': [10, 20]
}

random_search = RandomizedSearchCV(
    estimator=pipeline8,
    param_distributions=param_grid,
    n_iter=20,
    cv=5,
    scoring='r2',
    verbose=2,
    random_state=42,
    n_jobs=-1
)

random_search.fit(x_train, y_train)





Fitting 5 folds for each of 4 candidates, totalling 20 fits


10 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "d:\Coding\ML\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Coding\ML\venv\Lib\site-packages\sklearn\base.py", line 1363, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\Coding\ML\venv\Lib\site-packages\sklearn\pipeline.py", line 661, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Coding\ML\venv\Lib

0,1,2
,estimator,Pipeline(step...egressor()))])
,param_distributions,"{'stackingregressor__final_estimator__max_depth': [10, 20], 'stackingregressor__final_estimator__n_estimators': [100, 200]}"
,n_iter,20
,scoring,'r2'
,n_jobs,-1
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,RandomForestR...stimators=200)
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,n_estimators,200
,criterion,'squared_error'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [101]:
# ✅ Print best R² score and hyperparameters
print("\nBest R² score on CV set: ", random_search.best_score_)
print("Best hyperparameters:\n", random_search.best_params_)


Best R² score on CV set:  0.9597495957002131
Best hyperparameters:
 {'stackingregressor__final_estimator__n_estimators': 200, 'stackingregressor__final_estimator__max_depth': 10}


### Okay then, now we just ass these hyperparameters in the random forest and then move on to creation of model.pkl!

In [105]:
base_models9 = [ 
    ('lr' , LinearRegression()),
    ('svr' , SVR()),
    ('rf' , RandomForestRegressor()),
    ('knn' , KNeighborsRegressor()),
    ('xgb' , XGBRegressor())
]

meta_model9 = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
stack_model9 = StackingRegressor(estimators = base_models9 , final_estimator=meta_model9)

pipeline9 = make_pipeline(preprocessor, stack_model9)
pipeline9.fit(x_train, y_train)

0,1,2
,steps,"[('columntransformer', ...), ('stackingregressor', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('skewed', ...), ...]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,method,'yeo-johnson'
,standardize,True
,copy,True

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,estimators,"[('lr', ...), ('svr', ...), ...]"
,final_estimator,RandomForestR...ndom_state=42)
,cv,
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False

0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False

0,1,2
,n_estimators,200
,criterion,'squared_error'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [106]:
y_pred9 = pipeline9.predict(x_test)
r2 = r2_score(y_pred9,y_test)
r2


0.9610909746399812

In [107]:
import pickle

In [109]:
pickel_model_path = "model.pkl"
with open(pickel_model_path, "wb") as f:
    pickle.dump(pipeline9,f)