## Importing Libraries

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.dummy import DummyClassifier

## 1️⃣ Create Binary Classification Dataset


In [2]:
X, y = make_classification(
    n_samples=1000,        # Total samples
    n_features=10,         # Total features
    n_informative=2,       # Useful features
    n_redundant=0,         # Noise
    n_clusters_per_class=1,
    flip_y=0.1,            # Add noise to target labels (10% flipped)
    random_state=42
)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42, stratify=y, shuffle=True)

## 2️⃣ Random Guessing (Baseline)

In [4]:
random_clf = DummyClassifier(strategy="uniform", random_state=42)
random_clf.fit(X_train, y_train)
y_pred_random = random_clf.predict(X_test)

accuracy_random_clf = accuracy_score(y_test, y_pred_random)
accuracy_random_clf

0.48

## 3️⃣ Weak Learner (Decision Stump)


In [5]:
weak_clf = DecisionTreeClassifier(max_depth=1, random_state=42)
weak_clf.fit(X_train, y_train)
y_pred_weak_clf = weak_clf.predict(X_test)
accuracy_weak_clf = accuracy_score(y_pred_weak_clf, y_test)
accuracy_weak_clf

0.895

## 4️⃣ Boosted Weak Learners (AdaBoost)

In [6]:
boosted_clf = AdaBoostClassifier(
                                estimator=DecisionTreeClassifier(max_depth=1),
                                n_estimators = 50, # Number of weak learners
                                learning_rate = 0.1,  # Weight of each learner
                                random_state = 42)
boosted_clf.fit(X_train, y_train)
y_pred_boosted = boosted_clf.predict(X_test)

accuracy_boosted_clf = accuracy_score(y_test, y_pred_boosted)
accuracy_boosted_clf

0.895

## 5️⃣ Strong Learner (Random Forest)

In [None]:
strong_learner_clf = RandomForestClassifier(
                     n_estimators = 100, # Many deep trees
                     max_depth = None, # Fully grown trees
                     random_state = 42)
strong_learner_clf.fit(X_train, y_train)
y_pred_strong_clf = strong_learner_clf.predict(X_test)

accuracy_strong_clf = accuracy_score(y_test, y_pred_strong_clf)
accuracy_strong_clf

0.93

## 🔚 Final Results

In [9]:
print("📊 Final Accuracy Comparison:\n")
print(f"🎲 Random Guessing (DummyClassifier): {accuracy_random_clf:.2f}")
print(f"🪓 Weak Learner (Decision Stump, max_depth=1): {accuracy_weak_clf:.2f}")
print(f"🔥 AdaBoost (50 Weak Learners): {accuracy_boosted_clf:.2f}")
print(f"🌲 Strong Learner (Random Forest): {accuracy_strong_clf:.2f}")

📊 Final Accuracy Comparison:

🎲 Random Guessing (DummyClassifier): 0.48
🪓 Weak Learner (Decision Stump, max_depth=1): 0.90
🔥 AdaBoost (50 Weak Learners): 0.90
🌲 Strong Learner (Random Forest): 0.93


### 🧠 Interpretation
* **Random Guessing** ≈ 50% (baseline)
* **Weak Learner** slightly beats that → proves it’s weak (but better than random)
* **AdaBoost** shows the power of combining weak learners
* **Strong Learner (RF)** confirms how full models outperform