#**Stacking Classification and Regression:**

#**Stacking Classifier:**

In [1]:
from sklearn.ensemble import StackingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [3]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)


In [6]:
X

array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
         0.74236675,  0.23154789],
       [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
        -0.21892143, -0.37608578],
       [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
         0.79903499, -0.17121177],
       ...,
       [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
         0.90239871, -0.69230951],
       [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
         1.5711921 ,  1.14877729],
       [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
         0.34329241,  1.07350284]])

In [7]:
y

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [9]:
base_models = [
    ('xgb', XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=1)),
    ('catboost', CatBoostClassifier(iterations=100, learning_rate=0.1, depth=5, verbose=0, random_state=1)),
    ('decision_tree', DecisionTreeClassifier(max_depth=5, random_state=1))
]


# Meta model (final estimator)
meta_model = LogisticRegression()

# **Stacking Classifier:**

In [10]:
stacking_classifier = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)


In [11]:
stacking_classifier

In [12]:
stacking_classifier.fit(X_train, y_train)

# Predict
y_pred = stacking_classifier.predict(X_test)

# Evaluate the model
print("Stacking Classifier Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Stacking Classifier Performance:
Accuracy: 0.86
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.86      0.85       139
           1       0.88      0.86      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[120  19]
 [ 23 138]]


#**Stacking Regressor:**

In [13]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingRegressor

In [14]:
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [15]:
X, y = make_regression(n_samples=1000, n_features=10, noise=10, random_state=1)


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


In [17]:
# Base models
base_models = [
    ('xgb', XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=1)),
    ('catboost', CatBoostRegressor(iterations=100, learning_rate=0.1, depth=5, verbose=0, random_state=1)),
    ('decision_tree', DecisionTreeRegressor(max_depth=5, random_state=1))
]

# Meta model (final estimator)
meta_model = Ridge()

# **Stacking Regressor:**

In [18]:
stacking_regressor = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=5)


In [19]:
stacking_regressor

In [20]:
# Train the model
stacking_regressor.fit(X_train, y_train)

# Predict
y_pred = stacking_regressor.predict(X_test)

# Evaluate the model
print("Stacking Regressor Performance:")
print(f"R2 Score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Stacking Regressor Performance:
R2 Score: 0.9668241918897891
Mean Absolute Error: 24.874969155918674
Mean Squared Error: 1097.638983376918


In [21]:
#We can tune hyperparameters for stacking models using GridSearchCV.
#However, tuning a stacked model is computationally expensive because it involves multiple base models and cross-validation.
#homework>>try to do it