In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
print("Setup complete")

In [None]:
iris = load_iris()
X, y = iris.data, iris.target
print(f"Data shape: {X.shape}")

In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_scores = cross_val_score(rf, X, y, cv=5)
print(f"Random Forest CV scores: {rf_scores}")
print(f"Mean: {rf_scores.mean():.3f}")

In [None]:
# Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_scores = cross_val_score(gb, X, y, cv=5)
print(f"Gradient Boosting CV scores: {gb_scores}")
print(f"Mean: {gb_scores.mean():.3f}")

In [None]:
# AdaBoost
ada = AdaBoostClassifier(n_estimators=50, random_state=42)
ada_scores = cross_val_score(ada, X, y, cv=5)
print(f"AdaBoost CV scores: {ada_scores}")
print(f"Mean: {ada_scores.mean():.3f}")

In [None]:
# Feature importance
rf.fit(X, y)
importances = rf.feature_importances_
plt.figure(figsize=(10, 5))
plt.barh(iris.feature_names, importances)
plt.xlabel('Importance')
plt.title('Random Forest Feature Importance')
plt.show()

In [None]:
# Voting ensemble
voting = VotingClassifier(
    estimators=[('rf', RandomForestClassifier()), ('gb', GradientBoostingClassifier()), ('ada', AdaBoostClassifier())],
    voting='soft'
)
voting_scores = cross_val_score(voting, X, y, cv=5)
print(f"Voting Ensemble scores: {voting_scores}")
print(f"Mean: {voting_scores.mean():.3f}")

In [None]:
# Comparison
models = {'Random Forest': rf_scores, 'Gradient Boosting': gb_scores, 'AdaBoost': ada_scores, 'Voting': voting_scores}
plt.figure(figsize=(10, 5))
plt.boxplot(models.values(), labels=models.keys())
plt.ylabel('CV Score')
plt.title('Ensemble Methods Comparison')
plt.xticks(rotation=45)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
test_results = []
test1 = rf_scores.mean() > 0.9
test_results.append(("Test 1: RF high accuracy", test1))
test2 = gb_scores.mean() > 0.9
test_results.append(("Test 2: GB high accuracy", test2))
test3 = ada_scores.mean() > 0.7
test_results.append(("Test 3: AdaBoost decent accuracy", test3))
test4 = voting_scores.mean() > 0.9
test_results.append(("Test 4: Voting ensemble good", test4))
test5 = len(importances) == 4
test_results.append(("Test 5: Feature importances correct", test5))
passed = sum(1 for _, r in test_results if r)
print(f"\nPASSED: {passed}/{len(test_results)}")
for name, result in test_results:
    print(f"{'✅' if result else '❌'} {name}")

In [None]:
print("PRACTICAL 10 COMPLETE")