In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv('mushroom.csv')

# Separate features and target
X = data.drop('class', axis=1)
y = data['class']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def train_and_evaluate(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Task A.1: Decision Stump
stump = DecisionTreeClassifier(max_depth=1, random_state=42)
stump_accuracy = train_and_evaluate(stump, X_train, X_test, y_train, y_test)

# Task A.2-5: AdaBoost with different numbers of weak learners
adaboost_results = []
for n_estimators in [1, 2, 3, 10]:  # 10 is our choice for 'n'
    adaboost = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1),
                              n_estimators=n_estimators, random_state=42)
    accuracy = train_and_evaluate(adaboost, X_train, X_test, y_train, y_test)
    adaboost_results.append((n_estimators, accuracy))

# Task A.6: Summarize results
results = pd.DataFrame([
    {'Model': 'Decision Stump', 'Weak Learners': 1, 'Accuracy': stump_accuracy}
] + [
    {'Model': 'AdaBoost', 'Weak Learners': n, 'Accuracy': acc}
    for n, acc in adaboost_results
])

print(results.to_string(index=False))



         Model  Weak Learners  Accuracy
Decision Stump              1  0.609050
      AdaBoost              1  0.609050
      AdaBoost              2  0.622837
      AdaBoost              3  0.622837
      AdaBoost             10  0.673637
