In [1]:
import numpy as np
import pandas as pd

In [2]:
# Create dataset
data = pd.DataFrame({
    "Age": [38, 52, 45, 29, 61],
    "Income": [420000, 360000, 780000, 300000, 500000],
    "Smoking": [0, 5, 0, 12, 8],
    "y": [-1, 1, 1, 1, 1]   # No = -1, Yes = +1 (with noise in row 3)
})

# Initial weights
data["w"] = 1 / len(data)

print(data)

   Age  Income  Smoking  y    w
0   38  420000        0 -1  0.2
1   52  360000        5  1  0.2
2   45  780000        0  1  0.2
3   29  300000       12  1  0.2
4   61  500000        8  1  0.2


In [3]:
def stump_smoking(x):
    return 1 if x >= 1 else -1

def stump_age(x):
    return 1 if x >= 45 else -1

def stump_income_600k(x):
    return 1 if x < 600000 else -1

def stump_income_750k(x):
    return 1 if x >= 750000 else -1


In [4]:
def weighted_error(y_true, y_pred, weights):
    return np.sum(weights[y_true != y_pred])

def compute_alpha(error):
    return 0.5 * np.log((1 - error) / error)

def update_weights(weights, y, y_pred, alpha):
    new_weights = weights * np.exp(-alpha * y * y_pred)
    return new_weights / np.sum(new_weights)


In [5]:
stumps = [
    ("Smoking", lambda df: df["Smoking"].apply(stump_smoking)),
    ("Age ≥ 45", lambda df: df["Age"].apply(stump_age)),
    ("Income < 600k", lambda df: df["Income"].apply(stump_income_600k)),
    ("Income ≥ 750k", lambda df: df["Income"].apply(stump_income_750k))
]

models = []

for i, (name, stump) in enumerate(stumps, start=1):
    y_pred = stump(data)
    err = weighted_error(data["y"].values, y_pred.values, data["w"].values)

    # Skip weak learners worse than random
    if err >= 0.5:
        print(f"Round {i}: {name} skipped (error={err:.3f})")
        continue

    alpha = compute_alpha(err)
    data["w"] = update_weights(data["w"].values, data["y"].values, y_pred.values, alpha)

    models.append((name, alpha, y_pred))

    print(f"\nRound {i}")
    print(f"Stump: {name}")
    print(f"Weighted Error ε = {err:.4f}")
    print(f"Alpha α = {alpha:.4f}")
    print("Updated weights:")
    print(data["w"])



Round 1
Stump: Smoking
Weighted Error ε = 0.2000
Alpha α = 0.6931
Updated weights:
0    0.125
1    0.125
2    0.500
3    0.125
4    0.125
Name: w, dtype: float64

Round 2
Stump: Age ≥ 45
Weighted Error ε = 0.1250
Alpha α = 0.9730
Updated weights:
0    0.071429
1    0.071429
2    0.285714
3    0.500000
4    0.071429
Name: w, dtype: float64

Round 3
Stump: Income < 600k
Weighted Error ε = 0.3571
Alpha α = 0.2939
Updated weights:
0    0.100000
1    0.055556
2    0.400000
3    0.388889
4    0.055556
Name: w, dtype: float64
Round 4: Income ≥ 750k skipped (error=0.500)


In [6]:
def strong_classifier(df, models):
    score = np.zeros(len(df))
    for name, alpha, preds in models:
        score += alpha * preds
    return np.sign(score), score

final_pred, final_score = strong_classifier(data, models)

data["Final Score"] = final_score
data["Final Prediction"] = final_pred

print("\nFinal Results")
print(data[["Age", "Income", "Smoking", "y", "Final Prediction", "Final Score"]])



Final Results
   Age  Income  Smoking  y  Final Prediction  Final Score
0   38  420000        0 -1              -1.0    -1.372209
1   52  360000        5  1               1.0     1.959996
2   45  780000        0  1              -1.0    -0.014085
3   29  300000       12  1               1.0     0.014085
4   61  500000        8  1               1.0     1.959996
