In [2]:
# Step 1: Imports
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Step 2: Load dataset and split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train, y_train)

# Step 4: Evaluate
print("Decision Tree Classifier:")
print("Train Accuracy:", accuracy_score(y_train, clf.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, clf.predict(X_test)))

# Step 5: Cost Complexity Pruning
path = clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alpha = path.ccp_alphas[-2]  # pick a reasonable alpha
pruned_clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
pruned_clf.fit(X_train, y_train)

print("\nAfter Pruning:")
print("Train Accuracy:", accuracy_score(y_train, pruned_clf.predict(X_train)))
print("Test Accuracy:", accuracy_score(y_test, pruned_clf.predict(X_test)))

# Step 6: Random Forest
rf = RandomForestClassifier(random_state=0)
rf.fit(X_train, y_train)
print("\nRandom Forest Accuracy:", accuracy_score(y_test, rf.predict(X_test)))

# Step 7: AdaBoost with Decision Stumps
ada = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=0)
ada.fit(X_train, y_train)
print("AdaBoost Accuracy:", accuracy_score(y_test, ada.predict(X_test)))

Decision Tree Classifier:
Train Accuracy: 1.0
Test Accuracy: 1.0

After Pruning:
Train Accuracy: 0.675
Test Accuracy: 0.6333333333333333

Random Forest Accuracy: 1.0
AdaBoost Accuracy: 1.0


