<a href="https://colab.research.google.com/github/AdithGH762/Fundamentals-of-ML/blob/main/FML_Lab2_adith.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

# -----------------------------
# 1. Create the dataset
# -----------------------------
data = {
    "Gender": ["M", "F", "M", "F", "M"],
    "Age": [38, 52, 45, 29, 61],
    "Income": [420000, 360000, 780000, 300000, 500000],
    "Smoking": [0, 5, 0, 12, 8],
    "y": [-1, +1, +1, +1, +1]  # Illness: No=-1, Yes=+1 (with noise)
}

df = pd.DataFrame(data)

n = len(df)
weights = np.ones(n) / n   # initial weights = 0.2

# -----------------------------
# 2. Define decision stumps
# -----------------------------
def stump_smoking(x):
    return 1 if x >= 1 else -1

def stump_age(x):
    return 1 if x >= 45 else -1

# -----------------------------
# 3. AdaBoost training
# -----------------------------
alphas = []
stumps = []

def train_round(stump_func, feature, weights):
    predictions = df[feature].apply(stump_func).values
    incorrect = predictions != df["y"].values
    error = np.sum(weights * incorrect)

    alpha = 0.5 * np.log((1 - error) / error)

    # Update weights
    weights = weights * np.exp(-alpha * df["y"].values * predictions)
    weights = weights / np.sum(weights)

    return alpha, predictions, weights, error

# -------- ROUND 1 (Smoking) --------
alpha1, pred1, weights, err1 = train_round(stump_smoking, "Smoking", weights)
alphas.append(alpha1)
stumps.append(("Smoking", stump_smoking))

# -------- ROUND 2 (Age) --------
alpha2, pred2, weights, err2 = train_round(stump_age, "Age", weights)
alphas.append(alpha2)
stumps.append(("Age", stump_age))

# -------- ROUND 3 (Smoking again) --------
alpha3, pred3, weights, err3 = train_round(stump_smoking, "Smoking", weights)
alphas.append(alpha3)
stumps.append(("Smoking", stump_smoking))

# -----------------------------
# 4. Final strong classifier
# -----------------------------
def strong_classifier(row):
    score = 0
    for alpha, (feature, stump) in zip(alphas, stumps):
        score += alpha * stump(row[feature])
    return np.sign(score)

df["Final_Prediction"] = df.apply(strong_classifier, axis=1)

# -----------------------------
# 5. Results
# -----------------------------
print("Alphas:", alphas)
print("\nFinal Predictions:")
print(df[["Age", "Smoking", "y", "Final_Prediction"]])

Alphas: [np.float64(0.6931471805599453), np.float64(0.9729550745276566), np.float64(0.45814536593707733)]

Final Predictions:
   Age  Smoking  y  Final_Prediction
0   38        0 -1              -1.0
1   52        5  1               1.0
2   45        0  1              -1.0
3   29       12  1               1.0
4   61        8  1               1.0


In [None]:
import numpy as np
import pandas as pd

data = pd.DataFrame({
    "Gender": ["M", "F", "M", "F", "M"],
    "Age": [38, 52, 45, 29, 61],
    "Income": [420000, 360000, 780000, 300000, 500000],
    "Smoking": [0, 5, 0, 12, 8],
    "y": [-1, +1, +1, +1, +1]

n = len(data)
w = np.ones(n) / n


def stump_smoking(x):
    return 1 if x >= 1 else -1

def stump_age(x):
    return 1 if x >= 45 else -1

def stump_income(x, threshold):
    return 1 if x >= threshold else -1

def weighted_error(y_true, y_pred, w):
    return np.sum(w[y_true != y_pred])

def compute_alpha(err):
    return 0.5 * np.log((1 - err) / err)

def update_weights(w, y, y_pred, alpha):
    w_new = w * np.exp(-alpha * y * y_pred)
    return w_new / np.sum(w_new)

# -----------------------------
# ROUND 1: Smoking
# -----------------------------
y = data["y"].values
pred1 = data["Smoking"].apply(stump_smoking).values
err1 = weighted_error(y, pred1, w)
alpha1 = compute_alpha(err1)
w = update_weights(w, y, pred1, alpha1)

print("ROUND 1 (Smoking)")
print("Error:", err1)
print("Alpha:", alpha1)
print("Weights:", w, "\n")

# -----------------------------
# ROUND 2: Age
# -----------------------------
pred2 = data["Age"].apply(stump_age).values
err2 = weighted_error(y, pred2, w)
alpha2 = compute_alpha(err2)
w = update_weights(w, y, pred2, alpha2)

print("ROUND 2 (Age ≥ 45)")
print("Error:", err2)
print("Alpha:", alpha2)
print("Weights:", w, "\n")

# -----------------------------
# ROUND 3: Smoking again
# -----------------------------
pred3 = data["Smoking"].apply(stump_smoking).values
err3 = weighted_error(y, pred3, w)
alpha3 = compute_alpha(err3)
w = update_weights(w, y, pred3, alpha3)

print("ROUND 3 (Smoking again)")
print("Error:", err3)
print("Alpha:", alpha3)
print("Weights:", w, "\n")

# -----------------------------
# ROUND 4: Income (≥ 700000)
# -----------------------------
pred4 = data["Income"].apply(lambda x: stump_income(x, 700000)).values
err4 = weighted_error(y, pred4, w)
alpha4 = compute_alpha(err4)
w = update_weights(w, y, pred4, alpha4)

print("ROUND 4 (Income ≥ 700000)")
print("Error:", err4)
print("Alpha:", alpha4)
print("Weights:", w, "\n")

# -----------------------------
# Final Strong Classifier
# -----------------------------
def final_predict(row):
    score = (
        alpha1 * stump_smoking(row["Smoking"]) +
        alpha2 * stump_age(row["Age"]) +
        alpha3 * stump_smoking(row["Smoking"]) +
        alpha4 * stump_income(row["Income"], 700000)
    )
    return 1 if score >= 0 else -1

data["Final_Pred"] = data.apply(final_predict, axis=1)
data["Correct"] = data["Final_Pred"] == data["y"]

print("FINAL RESULTS")
print(data)


ROUND 1 (Smoking)
Error: 0.2
Alpha: 0.6931471805599453
Weights: [0.125 0.125 0.5   0.125 0.125] 

ROUND 2 (Age ≥ 45)
Error: 0.125
Alpha: 0.9729550745276566
Weights: [0.07142857 0.07142857 0.28571429 0.5        0.07142857] 

ROUND 3 (Smoking again)
Error: 0.28571428571428575
Alpha: 0.45814536593707733
Weights: [0.05 0.05 0.5  0.35 0.05] 

ROUND 4 (Income ≥ 700000)
Error: 0.45000000000000007
Alpha: 0.10033534773107544
Weights: [0.04545455 0.05555556 0.45454545 0.38888889 0.05555556] 

FINAL RESULTS
  Gender  Age  Income  Smoking  y  Final_Pred  Correct
0      M   38  420000        0 -1          -1     True
1      F   52  360000        5  1           1     True
2      M   45  780000        0  1          -1    False
3      F   29  300000       12  1           1     True
4      M   61  500000        8  1           1     True
