# Stacking

In [6]:
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load data
df_mushrooms = pd.read_csv('data/mushrooms.csv')

X = df_mushrooms.iloc[:,1:]
y = df_mushrooms['class']
le = LabelEncoder()
y = le.fit_transform(y)

# Assuming X and y are already defined
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

for col in X_train.select_dtypes(include=['object']).columns:
    X_train[col] = le.fit_transform(X_train[col])
    X_test[col] = le.transform(X_test[col])

# Define layer one estimators for Stacking
layer_one_estimators = [
    ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('knn_1', KNeighborsClassifier(n_neighbors=5))
]

# Define layer two estimators for Stacking
layer_two_estimators = [
    ('dt_2', DecisionTreeClassifier()),
    ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
]
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=LogisticRegression())

# Create classifiers
random_forest = RandomForestClassifier(n_estimators=50, random_state=42)
adaboost = AdaBoostClassifier(n_estimators=50, random_state=42)

# Fit classifiers
random_forest.fit(X_train, y_train)
adaboost.fit(X_train, y_train)
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)
clf.fit(X_train, y_train)

# Get accuracy scores
rf_accuracy = accuracy_score(y_test, random_forest.predict(X_test))
ada_accuracy = accuracy_score(y_test, adaboost.predict(X_test))
stack_accuracy = accuracy_score(y_test, clf.predict(X_test))

# Print accuracies
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"AdaBoost Accuracy: {ada_accuracy:.4f}")
print(f"Stacking Classifier Accuracy: {stack_accuracy:.4f}")

# Calculate and print accuracy differences
accuracy_differences = {
    'Random Forest vs Stacking': rf_accuracy - stack_accuracy,
    'AdaBoost vs Stacking': ada_accuracy - stack_accuracy,
}

print("\nAccuracy Differences:")
for key, value in accuracy_differences.items():
    print(f"{key}: {value:.4f}")




Random Forest Accuracy: 1.0000
AdaBoost Accuracy: 1.0000
Stacking Classifier Accuracy: 1.0000

Accuracy Differences:
Random Forest vs Stacking: 0.0000
AdaBoost vs Stacking: 0.0000
