Bagging classifier

In [16]:
# Import necessary libraries
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Create synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5,
                           n_redundant=0, random_state=42)

# Step 2: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

# Step 3: Create a base model (weak learner)
base_model = DecisionTreeClassifier()

# Step 4: Create the BaggingClassifier
bagging_model = BaggingClassifier(estimator=base_model,
                                  n_estimators=50,
                               )

# Step 5: Train the Bagging model
bagging_model.fit(X_train, y_train)

# Step 6: Predict on test data
y_pred = bagging_model.predict(X_test)

# Step 7: Evaluate the model
print("Bagging Classifier Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Bagging Classifier Accuracy: 0.93

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.89      0.93       151
           1       0.90      0.97      0.93       149

    accuracy                           0.93       300
   macro avg       0.93      0.93      0.93       300
weighted avg       0.93      0.93      0.93       300



AdaBoost Classifier

In [17]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Generate a synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5,
                           n_redundant=0, random_state=42)

# Step 2: Split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

# Step 3: Create a base model (weak learner) — Decision Stump (tree with max depth = 1)
base_model = DecisionTreeClassifier(max_depth=1)

# Step 4: Create the AdaBoost model using the base model
boost_model = AdaBoostClassifier(estimator=base_model,
                                 n_estimators=50,
                                )

# Step 5: Train the boosting model
boost_model.fit(X_train, y_train)

# Step 6: Predict on test data
y_pred = boost_model.predict(X_test)

# Step 7: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8366666666666667

Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.79      0.83       151
           1       0.81      0.88      0.84       149

    accuracy                           0.84       300
   macro avg       0.84      0.84      0.84       300
weighted avg       0.84      0.84      0.84       300



Gradient Boosting with GradientBoostingClassifier

In [18]:
from sklearn.ensemble import GradientBoostingClassifier

# Step 1: Create the Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100,
                                      learning_rate=0.1,
                                      max_depth=3,
                                      random_state=1)

# Step 2: Train the model
gb_model.fit(X_train, y_train)

# Step 3: Predict on test data
y_pred_gb = gb_model.predict(X_test)

# Step 4: Evaluate the model
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_gb))

Gradient Boosting Accuracy: 0.9033333333333333

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.87      0.90       151
           1       0.88      0.94      0.91       149

    accuracy                           0.90       300
   macro avg       0.91      0.90      0.90       300
weighted avg       0.91      0.90      0.90       300



XGBoost with XGBClassifier

In [19]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
from xgboost import XGBClassifier

# Step 1: Create the XGBoost model
xgb_model = XGBClassifier(n_estimators=100)

# Step 2: Train the model
xgb_model.fit(X_train, y_train)

# Step 3: Predict on test data
y_pred_xgb = xgb_model.predict(X_test)

# Step 4: Evaluate the model
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))

XGBoost Accuracy: 0.9233333333333333

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.89      0.92       151
           1       0.90      0.95      0.93       149

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



Stacking Classifier on Iris Dataset

Base learners: KNeighborsClassifier, DecisionTreeClassifier

Meta-model: LogisticRegression

In [24]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
iris = load_iris()
X, y = iris.data, iris.target

# Step 2: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Define base models
base_learners = [
    ('knn', KNeighborsClassifier()),
    ('dt', DecisionTreeClassifier())
]

# Step 4: Define meta-learner
meta_learner = LogisticRegression()

# Step 5: Create the Stacking Classifier
stack_model = StackingClassifier(base_learners,meta_learner)

# Step 6: Train the model
stack_model.fit(X_train, y_train)

# Step 7: Predict & evaluate
y_pred = stack_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Stacking Classifier Accuracy: {:.2f}%".format(accuracy * 100))

Stacking Classifier Accuracy: 100.00%
