In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)
print(f"Random Forest Accuracy: {rf_acc:.4f}")
print("Confusion Matrix (RF):\n", confusion_matrix(y_test, rf_pred))

# AdaBoost
ada = AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=42)
ada.fit(X_train, y_train)
ada_pred = ada.predict(X_test)
ada_acc = accuracy_score(y_test, ada_pred)
print(f"\nAdaBoost Accuracy: {ada_acc:.4f}")
print("Confusion Matrix (AdaBoost):\n", confusion_matrix(y_test, ada_pred))

# Feature Importance (Random Forest)
importance = pd.Series(rf.feature_importances_, index=data.feature_names)
print("\nTop 5 Important Features (RF):\n", importance.nlargest(5))

Random Forest Accuracy: 0.9649
Confusion Matrix (RF):
 [[40  3]
 [ 1 70]]

AdaBoost Accuracy: 0.9649
Confusion Matrix (AdaBoost):
 [[40  3]
 [ 1 70]]

Top 5 Important Features (RF):
 worst area              0.164545
worst concave points    0.158464
mean concave points     0.111305
worst radius            0.078391
worst perimeter         0.073372
dtype: float64
