 CODE EXAMPLE 1: BAGGING (Random Forest)

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt

# Purchase dataset
X = np.array([
    [25, 30, 0], [35, 60, 1], [45, 80, 0], [20, 20, 1],
    [35, 70, 0], [52, 90, 1], [23, 25, 0], [40, 65, 1],
    [30, 45, 1], [50, 85, 0], [28, 35, 1], [42, 75, 0]
])
y = np.array(['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# ==========================================
# SINGLE DECISION TREE (Baseline)
# ==========================================
single_tree = DecisionTreeClassifier(random_state=42)
single_tree.fit(X_train, y_train)

print("=" * 60)
print("SINGLE DECISION TREE (Baseline)")
print(f"Train accuracy: {accuracy_score(y_train, single_tree.predict(X_train)):.3f}")
print(f"Test accuracy: {accuracy_score(y_test, single_tree.predict(X_test)):.3f}")

# ==========================================
# BAGGING: Random Forest
# ==========================================
random_forest = RandomForestClassifier(
    n_estimators=10,      # Number of trees
    max_depth=3,          # Depth of each tree
    random_state=42
)
random_forest.fit(X_train, y_train)

print("=" * 60)
print("BAGGING: RANDOM FOREST (10 trees)")
print(f"Train accuracy: {accuracy_score(y_train, random_forest.predict(X_train)):.3f}")
print(f"Test accuracy: {accuracy_score(y_test, random_forest.predict(X_test)):.3f}")

# ==========================================
# BAGGING: Manual (with any classifier)
# ==========================================
bagging = BaggingClassifier(
    estimator=DecisionTreeClassifier(max_depth=3),
    n_estimators=10,      # Number of models
    random_state=42
)
bagging.fit(X_train, y_train)

print("=" * 60)
print("BAGGING: Manual BaggingClassifier (10 trees)")
print(f"Train accuracy: {accuracy_score(y_train, bagging.predict(X_train)):.3f}")
print(f"Test accuracy: {accuracy_score(y_test, bagging.predict(X_test)):.3f}")

SINGLE DECISION TREE (Baseline)
Train accuracy: 1.000
Test accuracy: 1.000
BAGGING: RANDOM FOREST (10 trees)
Train accuracy: 1.000
Test accuracy: 1.000
BAGGING: Manual BaggingClassifier (10 trees)
Train accuracy: 1.000
Test accuracy: 1.000


ðŸ’» CODE EXAMPLE 2: BOOSTING (AdaBoost)

In [2]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier

# ==========================================
# BOOSTING: AdaBoost
# ==========================================
adaboost = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),  # Weak learner
    n_estimators=10,      # Number of boosting rounds
    random_state=42
)
adaboost.fit(X_train, y_train)

print("=" * 60)
print("BOOSTING: AdaBoost (10 rounds)")
print(f"Train accuracy: {accuracy_score(y_train, adaboost.predict(X_train)):.3f}")
print(f"Test accuracy: {accuracy_score(y_test, adaboost.predict(X_test)):.3f}")

# ==========================================
# BOOSTING: Gradient Boosting
# ==========================================
gradient_boost = GradientBoostingClassifier(
    n_estimators=10,
    max_depth=3,
    learning_rate=0.1,    # How much each model contributes
    random_state=42
)
gradient_boost.fit(X_train, y_train)

print("=" * 60)
print("BOOSTING: Gradient Boosting (10 rounds)")
print(f"Train accuracy: {accuracy_score(y_train, gradient_boost.predict(X_train)):.3f}")
print(f"Test accuracy: {accuracy_score(y_test, gradient_boost.predict(X_test)):.3f}")

# ==========================================
# COMPARISON
# ==========================================
print("\n" + "=" * 60)
print("COMPARISON SUMMARY:")
print(f"{'Model':<25} {'Train Acc':<12} {'Test Acc':<12}")
print("-" * 60)

models = {
    'Single Tree': single_tree,
    'Random Forest (Bagging)': random_forest,
    'AdaBoost': adaboost,
    'Gradient Boosting': gradient_boost
}

for name, model in models.items():
    train_acc = accuracy_score(y_train, model.predict(X_train))
    test_acc = accuracy_score(y_test, model.predict(X_test))
    print(f"{name:<25} {train_acc:<12.3f} {test_acc:<12.3f}")

BOOSTING: AdaBoost (10 rounds)
Train accuracy: 1.000
Test accuracy: 1.000
BOOSTING: Gradient Boosting (10 rounds)
Train accuracy: 1.000
Test accuracy: 1.000

COMPARISON SUMMARY:
Model                     Train Acc    Test Acc    
------------------------------------------------------------
Single Tree               1.000        1.000       
Random Forest (Bagging)   1.000        1.000       
AdaBoost                  1.000        1.000       
Gradient Boosting         1.000        1.000       
