In [1]:
import numpy as np
import pandas as pd

# =========================================================
# Q1 & Q2: Dataset creation and target encoding (+1 / -1)
# =========================================================
data = pd.DataFrame({
    "Age": [38, 52, 45, 29, 61],
    "Smoking": [0, 5, 0, 12, 8],
    "y": [-1, +1, +1, +1, +1]   # Illness: No=-1, Yes=+1
})

# Initial weights
n = len(data)
data["w"] = 1 / n

print("Initial Dataset with Weights")
print(data)
print("-" * 50)


# =========================================================
# Weak learners (decision stumps)
# =========================================================
def smoking_stump(row):
    return 1 if row["Smoking"] >= 1 else -1

def age_stump(row):
    return 1 if row["Age"] >= 45 else -1


# =========================================================
# Helper functions (AdaBoost formulas)
# =========================================================
def weighted_error(df, pred_col):
    return np.sum(df["w"] * (df["y"] != df[pred_col]))

def compute_alpha(error):
    return 0.5 * np.log((1 - error) / error)

def update_weights(df, alpha, pred_col):
    df["w"] = df["w"] * np.exp(-alpha * df["y"] * df[pred_col])
    df["w"] = df["w"] / df["w"].sum()
    return df


# =========================================================
# Q3–Q6: ROUND 1 (Smoking stump)
# =========================================================
data["h1"] = data.apply(smoking_stump, axis=1)
err1 = weighted_error(data, "h1")
alpha1 = compute_alpha(err1)
data = update_weights(data, alpha1, "h1")

print("ROUND 1")
print("Weighted Error ε1 =", err1)
print("Alpha α1 =", round(alpha1, 4))
print(data[["Smoking", "y", "h1", "w"]])
print("-" * 50)


# =========================================================
# Q7–Q9: ROUND 2 (Age stump)
# =========================================================
data["h2"] = data.apply(age_stump, axis=1)
err2 = weighted_error(data, "h2")
alpha2 = compute_alpha(err2)
data = update_weights(data, alpha2, "h2")

print("ROUND 2")
print("Weighted Error ε2 =", err2)
print("Alpha α2 =", round(alpha2, 4))
print(data[["Age", "y", "h2", "w"]])
print("-" * 50)


# =========================================================
# Q10–Q11: ROUND 3 (Smoking stump again)
# =========================================================
data["h3"] = data.apply(smoking_stump, axis=1)
err3 = weighted_error(data, "h3")
alpha3 = compute_alpha(err3)

print("ROUND 3")
print("Weighted Error ε3 =", round(err3, 4))
print("Alpha α3 =", round(alpha3, 4))
print("-" * 50)


# =========================================================
# Q12–Q13: Final Strong Classifier
# =========================================================
data["Final_Score"] = (
    alpha1 * data["h1"] +
    alpha2 * data["h2"] +
    alpha3 * data["h3"]
)

data["Final_Prediction"] = np.sign(data["Final_Score"])

print("FINAL STRONG CLASSIFIER RESULTS")
print(data[["y", "Final_Score", "Final_Prediction"]])

accuracy = np.mean(data["y"] == data["Final_Prediction"]) * 100
print("\nFinal Accuracy =", accuracy, "%")

Initial Dataset with Weights
   Age  Smoking  y    w
0   38        0 -1  0.2
1   52        5  1  0.2
2   45        0  1  0.2
3   29       12  1  0.2
4   61        8  1  0.2
--------------------------------------------------
ROUND 1
Weighted Error ε1 = 0.2
Alpha α1 = 0.6931
   Smoking  y  h1      w
0        0 -1  -1  0.125
1        5  1   1  0.125
2        0  1  -1  0.500
3       12  1   1  0.125
4        8  1   1  0.125
--------------------------------------------------
ROUND 2
Weighted Error ε2 = 0.125
Alpha α2 = 0.973
   Age  y  h2         w
0   38 -1  -1  0.071429
1   52  1   1  0.071429
2   45  1   1  0.285714
3   29  1  -1  0.500000
4   61  1   1  0.071429
--------------------------------------------------
ROUND 3
Weighted Error ε3 = 0.2857
Alpha α3 = 0.4581
--------------------------------------------------
FINAL STRONG CLASSIFIER RESULTS
   y  Final_Score  Final_Prediction
0 -1    -2.124248              -1.0
1  1     2.124248               1.0
2  1    -0.178337              -1.0