In [2]:
import numpy as np
import pandas as pd
import math

# Dataset (with noise)
data = {
    "Age": [38, 52, 45, 29, 61],
    "Income": [420000, 360000, 780000, 300000, 500000],
    "Smoking": [0, 5, 0, 12, 8],
    "y": [-1, +1, +1, +1, +1]  # Yes=+1, No=-1
}

df = pd.DataFrame(data)

# Initial weights
n = len(df)
weights = np.array([1/n] * n)

print("Initial Data")
print(df)
print("\nInitial Weights:", weights)


Initial Data
   Age  Income  Smoking  y
0   38  420000        0 -1
1   52  360000        5  1
2   45  780000        0  1
3   29  300000       12  1
4   61  500000        8  1

Initial Weights: [0.2 0.2 0.2 0.2 0.2]


In [3]:
def stump_smoking(x):
    return 1 if x >= 1 else -1

# Predictions
pred1 = np.array([stump_smoking(x) for x in df["Smoking"]])

# Weighted error
error1 = np.sum(weights[pred1 != df["y"].values])

# Alpha
alpha1 = 0.5 * math.log((1 - error1) / error1)

# Update weights
weights = weights * np.exp(-alpha1 * df["y"].values * pred1)
weights = weights / np.sum(weights)

print("\nROUND 1")
print("Predictions:", pred1)
print("Weighted Error ε1:", error1)
print("Alpha α1:", round(alpha1, 4))
print("Updated Weights:", np.round(weights, 4))



ROUND 1
Predictions: [-1  1 -1  1  1]
Weighted Error ε1: 0.2
Alpha α1: 0.6931
Updated Weights: [0.125 0.125 0.5   0.125 0.125]


In [4]:
def stump_age(x):
    return 1 if x >= 45 else -1

# Predictions
pred2 = np.array([stump_age(x) for x in df["Age"]])

# Weighted error
error2 = np.sum(weights[pred2 != df["y"].values])

# Alpha
alpha2 = 0.5 * math.log((1 - error2) / error2)

# Update weights
weights = weights * np.exp(-alpha2 * df["y"].values * pred2)
weights = weights / np.sum(weights)

print("\nROUND 2")
print("Predictions:", pred2)
print("Weighted Error ε2:", error2)
print("Alpha α2:", round(alpha2, 4))
print("Updated Weights:", np.round(weights, 4))



ROUND 2
Predictions: [-1  1  1 -1  1]
Weighted Error ε2: 0.125
Alpha α2: 0.973
Updated Weights: [0.0714 0.0714 0.2857 0.5    0.0714]


In [5]:
# Predictions (same smoking stump)
pred3 = np.array([stump_smoking(x) for x in df["Smoking"]])

# Weighted error
error3 = np.sum(weights[pred3 != df["y"].values])

# Alpha
alpha3 = 0.5 * math.log((1 - error3) / error3)

# Update weights
weights = weights * np.exp(-alpha3 * df["y"].values * pred3)
weights = weights / np.sum(weights)

print("\nROUND 3")
print("Predictions:", pred3)
print("Weighted Error ε3:", error3)
print("Alpha α3:", round(alpha3, 4))
print("Updated Weights:", np.round(weights, 4))



ROUND 3
Predictions: [-1  1 -1  1  1]
Weighted Error ε3: 0.28571428571428575
Alpha α3: 0.4581
Updated Weights: [0.05 0.05 0.5  0.35 0.05]


In [6]:
def final_classifier(row):
    score = (
        alpha1 * stump_smoking(row["Smoking"]) +
        alpha2 * stump_age(row["Age"]) +
        alpha3 * stump_smoking(row["Smoking"])
    )
    return 1 if score > 0 else -1

df["Final Prediction"] = df.apply(final_classifier, axis=1)

print("\nFINAL OUTPUT")
print(df[["Smoking", "Age", "y", "Final Prediction"]])



FINAL OUTPUT
   Smoking  Age  y  Final Prediction
0        0   38 -1                -1
1        5   52  1                 1
2        0   45  1                -1
3       12   29  1                 1
4        8   61  1                 1
