In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import StackingClassifier,StackingRegressor
from sklearn import metrics
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn import datasets , ensemble , svm , linear_model , tree

In [2]:
data = datasets.load_diabetes()
df = pd.DataFrame(data.data , columns= data.feature_names)
df['target'] = data.target

In [17]:
#splitting the data 
x_train , x_test ,y_train , y_test = train_test_split(data.data , data.target , test_size=0.2)

In [26]:
estimators = [('rfr',linear_model.Ridge()) , ('svr' , linear_model.Lasso()), ('gbr' ,linear_model.LinearRegression())]

In [27]:
sr = StackingRegressor(estimators=estimators , final_estimator=ensemble.RandomForestRegressor() , cv=10)
sr.fit(x_train,y_train)

In [28]:
from sklearn.metrics import r2_score

In [29]:
r2_score(y_true=y_test , y_pred=sr.predict(x_test))

0.25689286166910974

In [31]:
# Define the base estimators for the StackingRegressor
# Example base estimators:
estimators = [('ridge', linear_model.Ridge()), ('lasso', linear_model.Lasso()), ('svr', svm.SVR())]

# Define the StackingRegressor
sr1 = StackingRegressor(estimators=estimators, final_estimator=ensemble.RandomForestRegressor())

param_grid = {
    'final_estimator__n_estimators': [50, 100, 200],
    'final_estimator__max_depth': [None, 10, 20],
    # Additional parameters to tune
    'final_estimator__min_samples_split': [2, 5, 10],
    'final_estimator__min_samples_leaf': [1, 2, 4],
    'final_estimator__bootstrap': [True, False],
}

# Create the GridSearchCV object
gsearch = GridSearchCV(estimator=sr1, param_grid=param_grid, scoring='r2', cv=10, n_jobs=-1)

# Fit the GridSearchCV object to the training data
gsearch.fit(x_train, y_train)

# Get the best parameters and best score
best_params = gsearch.best_params_
best_score = gsearch.best_score_


print("Best Parameters:", gsearch.best_params_)
print("Best Score:", gsearch.best_score_)


Best Parameters: {'final_estimator__bootstrap': True, 'final_estimator__max_depth': None, 'final_estimator__min_samples_leaf': 4, 'final_estimator__min_samples_split': 10, 'final_estimator__n_estimators': 100}
Best Score: 0.4286767831545741


In [34]:
import time
base_estimators = [
    ('ridge', linear_model.Ridge()),
    ('lasso', linear_model.Lasso()),
    ('svr', svm.SVR())
]

# Define the StackingRegressor
sr2 = StackingRegressor(estimators=base_estimators, final_estimator=ensemble.RandomForestRegressor())

# Define the parameter grid for base estimators
param_grid_base = {
    'ridge__alpha': [0.1, 1.0, 10.0],  # Ridge hyperparameters
    'lasso__alpha': [0.1, 1.0, 10.0],  # Lasso hyperparameters
    'svr__C': [0.1, 1.0, 10.0],        # SVR hyperparameters
}

# Define the parameter grid for the final estimator
param_grid_final = {
    'final_estimator__n_estimators': [50, 100, 200],
    'final_estimator__max_depth': [None, 10, 20],
    'final_estimator__min_samples_split': [2, 5, 10],
    'final_estimator__min_samples_leaf': [1, 2, 4],
    'final_estimator__bootstrap': [True, False],
}

# Create the GridSearchCV objects for base and final estimators
start_time = time.time()
gsearch_base = GridSearchCV(estimator=sr2, param_grid=param_grid_base, scoring='r2', cv=5, n_jobs=-1)
gsearch_base.fit(x_train, y_train)
base_time = time.time() - start_time

start_time = time.time()
gsearch_final = GridSearchCV(estimator=sr2, param_grid=param_grid_final, scoring='r2', cv=5, n_jobs=-1)
gsearch_final.fit(x_train, y_train)
final_time = time.time() - start_time

# Get the best parameters and best score for base and final estimators
best_params_base = gsearch_base.best_params_
best_score_base = gsearch_base.best_score_
best_params_final = gsearch_final.best_params_
best_score_final = gsearch_final.best_score_

# Print the best parameters and best scores
print("Best Parameters (Base Estimators):", best_params_base)
print("Best Score (Base Estimators):", best_score_base)
print("Base Estimators Grid Search Time:", base_time)
print("Best Parameters (Final Estimator):", best_params_final)
print("Best Score (Final Estimator):", best_score_final)
print("Final Estimator Grid Search Time:", final_time)

Best Parameters (Base Estimators): {'lasso__alpha': 0.1, 'ridge__alpha': 10.0, 'svr__C': 10.0}
Best Score (Base Estimators): 0.4617980539064231
Base Estimators Grid Search Time: 24.413153171539307
Best Parameters (Final Estimator): {'final_estimator__bootstrap': True, 'final_estimator__max_depth': None, 'final_estimator__min_samples_leaf': 4, 'final_estimator__min_samples_split': 10, 'final_estimator__n_estimators': 100}
Best Score (Final Estimator): 0.4300584548277258
Final Estimator Grid Search Time: 144.9639904499054


In [40]:
import time
base_estimators = [
    ('ridge', linear_model.Ridge(alpha=10.0)),
    ('lasso', linear_model.Lasso(alpha=0.1)),
    ('svr', svm.SVR(C=10.0))
]

# Define the StackingRegressor
sr3 = StackingRegressor(estimators=base_estimators, final_estimator=ensemble.RandomForestRegressor(n_estimators=100,
                                                                                                   bootstrap=True,
                                                                                                  max_depth=None,
                                                                                                  min_samples_leaf=4,
                                                                                                  min_samples_split=10))
sr3.fit(x_train,y_train)
r2_score(y_true=y_test , y_pred=sr3.predict(x_test))

0.36259083681111437