Dataset:
Use any of the following datasets (your choice):

load_breast_cancer() from sklearn.datasets

Iris dataset

Titanic dataset (if using Pandas)


In [5]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### Tasks:

#### **1. Voting Classifier**
Load the dataset and preprocess it (e.g., train/test split, scaling).

Build 3 different classifiers: Logistic Regression, SVM, KNN.

Combine them into a VotingClassifier (use soft voting).

Evaluate and print the accuracy.


#### **2. Bagging**

Use a BaggingClassifier with Decision Trees or directly use RandomForestClassifier.

Train the model and evaluate on test data.

Try changing the number of trees (n_estimators) and observe the effect.


#### **3. Boosting**

Use GradientBoostingClassifier or AdaBoostClassifier.

Train on the same dataset.

Report accuracy and compare to bagging.


#### **4. Stacking**
Choose 2–3 base models (e.g., Logistic Regression, Random Forest, SVM).

Use StackingClassifier with a final estimator (e.g., Logistic Regression).

Train and evaluate the model.


In [6]:
# Voting Classifier

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score

log_clf = LogisticRegression(max_iter=1000, random_state=42)
svc_clf = SVC(probability=True, random_state=42)
knn_clf = KNeighborsClassifier()

voting_clf = VotingClassifier(estimators=[
    ('log_clf', log_clf), 
    ('svc_clf', svc_clf), 
    ('knn_clf', knn_clf)
], voting='soft')

voting_clf.fit(X_train, y_train)
y_pred_voting = voting_clf.predict(X_test)
acc_voting = accuracy_score(y_test, y_pred_voting)
print(f"Voting Classifier Accuracy: {acc_voting:.4f}")

Voting Classifier Accuracy: 0.9649


In [9]:
# Bagging (RandomForest)
from sklearn.ensemble import BaggingClassifier 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy (100 trees): {acc_rf:.4f}")

# Try with fewer trees
rf_clf_small = RandomForestClassifier(n_estimators=10, random_state=42)
rf_clf_small.fit(X_train, y_train)
y_pred_rf_small = rf_clf_small.predict(X_test)
acc_rf_small = accuracy_score(y_test, y_pred_rf_small)
print(f"Random Forest Accuracy (10 trees): {acc_rf_small:.4f}")


model = BaggingClassifier(
    estimator=DecisionTreeClassifier(max_depth=5, random_state=42),
    n_estimators=50,        # 50 ta daraxt
    bootstrap=True,
    random_state=42
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Random Forest Accuracy (100 trees): 0.9649
Random Forest Accuracy (10 trees): 0.9561
Accuracy: 0.956140350877193


In [10]:
# Boosting

from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier

# Gradient Boosting
gb_clf = GradientBoostingClassifier(random_state=42)
gb_clf.fit(X_train, y_train)
y_pred_gb = gb_clf.predict(X_test)
acc_gb = accuracy_score(y_test, y_pred_gb)
print(f"Gradient Boosting Accuracy: {acc_gb:.4f}")

# AdaBoost
ada_clf = AdaBoostClassifier(random_state=42)
ada_clf.fit(X_train, y_train)
y_pred_ada = ada_clf.predict(X_test)
acc_ada = accuracy_score(y_test, y_pred_ada)
print(f"AdaBoost Accuracy: {acc_ada:.4f}")

Gradient Boosting Accuracy: 0.9561
AdaBoost Accuracy: 0.9649


In [11]:
# Stacking

from sklearn.ensemble import StackingClassifier


estimators = [
    ('lr', LogisticRegression(max_iter=1000, random_state=42)),
    ('rf', RandomForestClassifier(n_estimators=50, random_state=42)),
    ('svm', SVC(probability=True, random_state=42))
]

stack_clf = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    passthrough=False
)

stack_clf.fit(X_train, y_train)
y_pred_stack = stack_clf.predict(X_test)
acc_stack = accuracy_score(y_test, y_pred_stack)
print(f"Stacking Classifier Accuracy: {acc_stack:.4f}")

Stacking Classifier Accuracy: 0.9737
