<a href="https://colab.research.google.com/github/2022civarshara-dot/ML_Lab/blob/main/Session_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Bagging and Boosting
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# -----------------------------------------------------------
# 1. Generate a random supervised classification dataset
# -----------------------------------------------------------

X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=6,
    n_redundant=2,
    n_classes=2,
    random_state=42
)

df = pd.DataFrame(X, columns=[f"Feature_{i}" for i in range(X.shape[1])])
df["Target"] = y

print("Random dataset sample:")
print(df.head())

# -----------------------------------------------------------
# 2. Train-test split
# -----------------------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=["Target"]),
    df["Target"],
    test_size=0.30,
    random_state=42
)

# -----------------------------------------------------------
# 3. Scaling
# -----------------------------------------------------------

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -----------------------------------------------------------
# 4. Base Decision Tree
# -----------------------------------------------------------

dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

# -----------------------------------------------------------
# 5. Bagging (FIXED for sklearn >=1.4)
# -----------------------------------------------------------

bag_model = BaggingClassifier(
    estimator=DecisionTreeClassifier(),   # <-- FIXED
    n_estimators=50,
    random_state=42
)
bag_model.fit(X_train, y_train)
bag_pred = bag_model.predict(X_test)

# -----------------------------------------------------------
# 6. Boosting (AdaBoost FIXED)
# -----------------------------------------------------------

boost_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),  # <-- FIXED
    n_estimators=50,
    learning_rate=0.8,
    random_state=42
)
boost_model.fit(X_train, y_train)
boost_pred = boost_model.predict(X_test)

# -----------------------------------------------------------
# 7. Evaluation Function
# -----------------------------------------------------------

def evaluate(model_name, y_true, y_pred):
    print("\n===================================")
    print(f" Model: {model_name}")
    print("===================================")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

# -----------------------------------------------------------
# 8. Print Model Performances
# -----------------------------------------------------------

evaluate("Decision Tree", y_test, dt_pred)
evaluate("Bagging", y_test, bag_pred)
evaluate("Boosting (AdaBoost)", y_test, boost_pred)

Random dataset sample:
   Feature_0  Feature_1  Feature_2  Feature_3  Feature_4  Feature_5  \
0  -1.030931   1.391626   0.547274   0.928932  -1.738880   1.250002   
1  -2.766254   1.247870  -0.303691   1.083145   0.710836   1.968202   
2  -0.558987   0.299849   1.527071   0.360442  -1.360209   1.100793   
3  -1.350289  -2.046078  -0.614264   0.126459  -0.783923   5.895026   
4  -0.275754  -0.728495   0.027727  -0.660834  -1.928161   3.544945   

   Feature_6  Feature_7  Feature_8  Feature_9  Target  
0   1.332551   1.578256   2.124722  -0.318434       0  
1  -1.794192   2.346422   1.700778  -0.001190       1  
2  -0.755951   1.331933   2.041105  -0.824404       0  
3  -0.915477  -3.184768  -0.399260  -3.920960       0  
4   1.446944  -1.111662   0.313766  -2.376528       0  

 Model: Decision Tree
Accuracy: 0.85

Confusion Matrix:
[[134  28]
 [ 17 121]]

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.83      0.86       162
 