Bagging classifier

In [5]:
# Import necessary libraries
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Create synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5,
                           n_redundant=0, random_state=42)

# Step 2: Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

# Step 3: Create a base model (weak learner)
base_model = DecisionTreeClassifier()

# Step 4: Create the BaggingClassifier
bagging_model = BaggingClassifier(base_estimator=base_model,
                                  n_estimators=50,
                                  max_samples=0.8,
                                  max_features=1.0,
                                  bootstrap=True,
                                  random_state=1,
                                  n_jobs=-1)

# Step 5: Train the Bagging model
bagging_model.fit(X_train, y_train)

# Step 6: Predict on test data
y_pred = bagging_model.predict(X_test)

# Step 7: Evaluate the model
print("Bagging Classifier Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



Bagging Classifier Accuracy: 0.9

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.85      0.90       151
           1       0.86      0.95      0.90       149

    accuracy                           0.90       300
   macro avg       0.90      0.90      0.90       300
weighted avg       0.90      0.90      0.90       300



AdaBoost Classifier

In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Generate a synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5,
                           n_redundant=0, random_state=42)

# Step 2: Split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=1)

# Step 3: Create a base model (weak learner) — Decision Stump (tree with max depth = 1)
base_model = DecisionTreeClassifier(max_depth=1)

# Step 4: Create the AdaBoost model using the base model
boost_model = AdaBoostClassifier(base_estimator=base_model,
                                 n_estimators=50,
                                 learning_rate=1.0,
                                 random_state=1)

# Step 5: Train the boosting model
boost_model.fit(X_train, y_train)

# Step 6: Predict on test data
y_pred = boost_model.predict(X_test)

# Step 7: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



Accuracy: 0.8266666666666667

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.79      0.82       151
           1       0.81      0.86      0.83       149

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



Gradient Boosting with GradientBoostingClassifier

In [2]:
from sklearn.ensemble import GradientBoostingClassifier

# Step 1: Create the Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100,
                                      learning_rate=0.1,
                                      max_depth=3,
                                      random_state=1)

# Step 2: Train the model
gb_model.fit(X_train, y_train)

# Step 3: Predict on test data
y_pred_gb = gb_model.predict(X_test)

# Step 4: Evaluate the model
print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_gb))

Gradient Boosting Accuracy: 0.9033333333333333

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.87      0.90       151
           1       0.88      0.94      0.91       149

    accuracy                           0.90       300
   macro avg       0.91      0.90      0.90       300
weighted avg       0.91      0.90      0.90       300



XGBoost with XGBClassifier

In [3]:
pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/5e/03/15cd49e855c62226ecf1831bbe4c8e73a4324856077a23c495538a36e557/xgboost-3.0.0-py3-none-win_amd64.whl.metadata
  Downloading xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.0-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 0.1/150.0 MB 4.3 MB/s eta 0:00:35
   ---------------------------------------- 0.2/150.0 MB 3.6 MB/s eta 0:00:42
   ---------------------------------------- 0.3/150.0 MB 2.6 MB/s eta 0:00:58
   ---------------------------------------- 0.4/150.0 MB 2.4 MB/s eta 0:01:02
   ---------------------------------------- 0.4/150.0 MB 1.9 MB/s eta 0:01:20
   ---------------------------------------- 0.4/150.0 MB 1.7 MB/s eta 0:01:27
   ---------------------------------------- 0.5/150.0 MB 1.5 MB/s eta 0:01:41
   ---------------------------

In [4]:
from xgboost import XGBClassifier

# Step 1: Create the XGBoost model
xgb_model = XGBClassifier(n_estimators=100,
                          learning_rate=0.1,
                          max_depth=3,
                          use_label_encoder=False,
                          eval_metric='logloss',
                          random_state=1)

# Step 2: Train the model
xgb_model.fit(X_train, y_train)

# Step 3: Predict on test data
y_pred_xgb = xgb_model.predict(X_test)

# Step 4: Evaluate the model
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("\nClassification Report:\n", classification_report(y_test, y_pred_xgb))

XGBoost Accuracy: 0.9066666666666666

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.87      0.90       151
           1       0.88      0.95      0.91       149

    accuracy                           0.91       300
   macro avg       0.91      0.91      0.91       300
weighted avg       0.91      0.91      0.91       300



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Stacking Classifier on Iris Dataset

Base learners: KNeighborsClassifier, DecisionTreeClassifier

Meta-model: LogisticRegression

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
iris = load_iris()
X, y = iris.data, iris.target

# Step 2: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Define base models
base_learners = [
    ('knn', KNeighborsClassifier(n_neighbors=3)),
    ('dt', DecisionTreeClassifier(max_depth=3, random_state=42))
]

# Step 4: Define meta-learner
meta_learner = LogisticRegression()

# Step 5: Create the Stacking Classifier
stack_model = StackingClassifier(estimators=base_learners, final_estimator=meta_learner, cv=5)

# Step 6: Train the model
stack_model.fit(X_train, y_train)

# Step 7: Predict & evaluate
y_pred = stack_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Stacking Classifier Accuracy: {:.2f}%".format(accuracy * 100))

Stacking Classifier Accuracy: 100.00%
