#Step 1: Setup & Dataset

In [18]:
# Import libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = iris.data      # Features
y = iris.target    # Labels

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)




```
load_iris() loads the dataset (150 samples, 3 classes).

We split data into training (70%) and testing (30%).
```



# Step 2: Bagging Classifier

In [20]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Base learner: Decision Tree
base_tree = DecisionTreeClassifier(random_state=42)

# Bagging: combine many trees trained on bootstrapped samples
bagging = BaggingClassifier(estimator=base_tree, n_estimators=50, random_state=42)

# Train
bagging.fit(X_train, y_train)

# Predict
y_pred_bagging = bagging.predict(X_test)

# Evaluate
print("Bagging Accuracy:", accuracy_score(y_test, y_pred_bagging))

Bagging Accuracy: 1.0



```
Bagging = “Bootstrap Aggregating”.

Each tree is trained on a random sample with replacement.

Reduces variance (avoids overfitting)

```

#Step 3: Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest (bagging + random feature selection)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))


Random Forest Accuracy: 1.0




```
Random Forest = Bagging + random feature subsets.

Stronger than plain bagging because it decorrelates trees.
```



#Step 4: AdaBoost

In [22]:
from sklearn.ensemble import AdaBoostClassifier

# AdaBoost (Adaptive Boosting)
ada = AdaBoostClassifier(n_estimators=50, random_state=42)
ada.fit(X_train, y_train)

y_pred_ada = ada.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))


AdaBoost Accuracy: 1.0




```
Boosting = sequential training.

Misclassified samples get higher weights in the next round.

AdaBoost combines weak learners into a strong classifier
```



#Step 5: Gradient Boosting

In [23]:
from sklearn.ensemble import GradientBoostingClassifier

# Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))


Gradient Boosting Accuracy: 1.0




```
Instead of reweighting samples (like AdaBoost), Gradient Boosting fits new learners to residual errors.

More flexible and powerful.
```



# Step 6: Voting Classifier

In [24]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Define base models
clf1 = LogisticRegression(max_iter=200)
clf2 = RandomForestClassifier(n_estimators=50, random_state=42)
clf3 = SVC(probability=True)

# Voting (Hard or Soft)
voting = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft')
voting.fit(X_train, y_train)

y_pred_voting = voting.predict(X_test)
print("Voting Accuracy:", accuracy_score(y_test, y_pred_voting))


Voting Accuracy: 1.0




```
Hard Voting = majority rule (most votes win).

Soft Voting = averages class probabilities.

Combines very different models (good for diverse feature spaces).
```

