In [6]:
# ==============================
# 1. Import Libraries
# ==============================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# ==============================
# 2. Load Dataset
# ==============================

df = pd.read_excel("glass.xlsx", sheet_name="glass")

print("Dataset Shape:", df.shape)
print(df.head())

# ==============================
# 3. Check Class Imbalance
# ==============================

print("\nClass Distribution:")
print(df["Type"].value_counts())

# ==============================
# 4. Feature & Target Split
# ==============================

X = df.drop("Type", axis=1)
y = df["Type"]

# ==============================
# 5. Feature Scaling
# ==============================

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ==============================
# 6. Train-Test Split
# ==============================

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

# ==============================
# 7. BAGGING CLASSIFIER
# ==============================

bagging_model = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=200,
    random_state=42
)

bagging_model.fit(X_train, y_train)
y_pred_bag = bagging_model.predict(X_test)

print("\n===== Bagging Results =====")
print("Accuracy :", accuracy_score(y_test, y_pred_bag))
print("Precision:", precision_score(y_test, y_pred_bag, average="weighted"))
print("Recall   :", recall_score(y_test, y_pred_bag, average="weighted"))
print("F1 Score :", f1_score(y_test, y_pred_bag, average="weighted"))

# ==============================
# 8. BOOSTING (AdaBoost)
# ==============================

boosting_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=200,
    random_state=42
)

boosting_model.fit(X_train, y_train)
y_pred_boost = boosting_model.predict(X_test)

print("\n===== Boosting Results =====")
print("Accuracy :", accuracy_score(y_test, y_pred_boost))
print("Precision:", precision_score(y_test, y_pred_boost, average="weighted"))
print("Recall   :", recall_score(y_test, y_pred_boost, average="weighted"))
print("F1 Score :", f1_score(y_test, y_pred_boost, average="weighted"))

# ==============================
# 9. Handling Imbalanced Data (Class Weight)
# ==============================

balanced_tree = DecisionTreeClassifier(class_weight="balanced")

balanced_boost = AdaBoostClassifier(
    estimator=balanced_tree,
    n_estimators=200,
    random_state=42
)

balanced_boost.fit(X_train, y_train)
y_pred_balanced = balanced_boost.predict(X_test)

print("\n===== Boosting with Imbalance Handling =====")
print("Accuracy :", accuracy_score(y_test, y_pred_balanced))
print("F1 Score :", f1_score(y_test, y_pred_balanced, average="weighted"))

# ==============================
# 10. Final Comparison
# ==============================

results = pd.DataFrame({
    "Model": ["Bagging", "Boosting", "Boosting (Balanced)"],
    "F1 Score": [
        f1_score(y_test, y_pred_bag, average="weighted"),
        f1_score(y_test, y_pred_boost, average="weighted"),
        f1_score(y_test, y_pred_balanced, average="weighted")
    ]
})

print("\n===== Model Comparison =====")
print(results)


Dataset Shape: (214, 10)
        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1

Class Distribution:
Type
2    76
1    70
7    29
3    17
5    13
6     9
Name: count, dtype: int64

===== Bagging Results =====
Accuracy : 0.7441860465116279
Precision: 0.7445267910384189
Recall   : 0.7441860465116279
F1 Score : 0.740761564017378

===== Boosting Results =====
Accuracy : 0.46511627906976744
Precision: 0.4333887043189369
Recall   : 0.46511627906976744
F1 Score : 0.4181985089573829

===== Boosting with Imbalance Handling =====
Accuracy : 0.627906976744186
F1 Score : 0.6225455378622982

===== Model Comparison =====
                 Model  F1 Score
0              Ba

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Interview Question:

1. Bagging Vs Boosting:
     Bagging reduces variance by training models independently, while Boosting reduces bias by focusing on misclassified samples. Imbalanced data can be handled using class weighting, resampling techniques like SMOTE, and appropriate evaluation metrics.

2. Handling Imbalanced Data:
    Imbalanced data means some classes have fewer samples than others.

    Common Techniques:

    1.Class Weighting:
      Assigns higher penalty to minority class errors  
      Example: class_weight="balanced"

    2.Oversampling:
       Increase minority class samples
       Example: SMOTE

    3.Undersampling:
       Reduce majority class samples
       Use Proper Metrics
       Prefer Precision, Recall, F1-score instead of accuracy
