In [1]:
# Day 66 – Ensemble Learning

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [2]:
# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [7]:
# ---------------------------
# Bagging
# ---------------------------
bagging = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=50,
    random_state=42
)
bagging.fit(X_train, y_train)
y_pred_bag = bagging.predict(X_test)
print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bag))

Bagging Accuracy: 0.9385964912280702


In [8]:
# ---------------------------
# Boosting (AdaBoost & Gradient Boosting)
# ---------------------------
adaboost = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50,
    random_state=42
)
adaboost.fit(X_train, y_train)
y_pred_ada = adaboost.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))

gradboost = GradientBoostingClassifier(n_estimators=100, random_state=42)
gradboost.fit(X_train, y_train)
y_pred_gb = gradboost.predict(X_test)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))

AdaBoost Accuracy: 0.956140350877193
Gradient Boosting Accuracy: 0.956140350877193


In [11]:
# ---------------------------
# Voting Classifier (Hard & Soft Voting)
# ---------------------------
log_clf = LogisticRegression(max_iter=5000)
svm_clf = SVC(probability=True)
dt_clf = DecisionTreeClassifier()

voting = VotingClassifier(
    estimators=[("lr", log_clf), ("svm", svm_clf), ("dt", dt_clf)],
    voting="soft"
)

voting.fit(X_train, y_train)
y_pred_vote = voting.predict(X_test)
print("Voting Classifier Accuracy:", accuracy_score(y_test, y_pred_vote))

Voting Classifier Accuracy: 0.9473684210526315


In [10]:

# ---------------------------
# Comparison
# ---------------------------
print("\n Ensemble Learning Comparison:")
print("Bagging:", accuracy_score(y_test, y_pred_bag))
print("AdaBoost:", accuracy_score(y_test, y_pred_ada))
print("Gradient Boosting:", accuracy_score(y_test, y_pred_gb))
print("Voting Classifier:", accuracy_score(y_test, y_pred_vote))


 Ensemble Learning Comparison:
Bagging: 0.9385964912280702
AdaBoost: 0.956140350877193
Gradient Boosting: 0.956140350877193
Voting Classifier: 0.9385964912280702
