<a href="https://colab.research.google.com/github/O7Mejri/Advanced_ML_Concepts/blob/main/Stacking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Stacking

In [None]:
from mlxtend.regressor import StackingCVRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression

import matplotlib.pyplot as plt
import numpy as np


from sklearn.metrics import mean_squared_error

# Define a function to calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))


# Create a toy dataset
X, y = make_regression(n_samples=1000, n_features=10, random_state=42)
print("shape: ", X.shape,y.shape)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models
base_models = [RandomForestRegressor(n_estimators=50, random_state=42),
               LinearRegression()]

# Define meta-model
meta_model = LinearRegression()

# Create the stacking regressor
stacking_regressor = StackingCVRegressor(regressors=base_models,
                                         meta_regressor=meta_model,
                                         random_state=42)

# Train the base guys first to see
rmse_bases = []
for base_model in base_models:
    base_model.fit(X_train, y_train)
    base_predictions = base_model.predict(X_test)
    rmse_base = rmse(y_test, base_predictions)
    rmse_bases.append(rmse_base)
    print(f'RMSE for {base_model.__class__.__name__}: {rmse_base}')

# Train the stacking regressor and see
stacking_regressor.fit(X_train, y_train)
stacking_predictions = stacking_regressor.predict(X_test)
rmse_stacking = rmse(y_test, stacking_predictions)
print(f'RMSE for Stacking Regressor: {rmse_stacking}')

# Comparison
avg_rmse_base = np.mean(rmse_bases)
comp = ((avg_rmse_base - rmse_stacking)/avg_rmse_base)
print("COMPARISON")
print(f'Average RMSE of Base Models: {avg_rmse_base}')
print(f'RMSE for Stacking Regressor: {rmse_stacking}')
print(f'Percentage Reduction in RMSE: {comp}%')

shape:  (1000, 10) (1000,)
RMSE for RandomForestRegressor: 58.38148624586215
RMSE for LinearRegression: 1.3826607542204874e-13
RMSE for Stacking Regressor: 1.5199844770546964e-13
COMPARISON
Average RMSE of Base Models: 29.190743122931142
RMSE for Stacking Regressor: 1.5199844770546964e-13
Percentage Reduction in RMSE: 0.9999999999999948%
