In [2]:
# ── Stage 2: recommendation engine & save ─────────────────────────────────────
import pandas as pd

# 1) read the predictions from Stage 1
preds = pd.read_csv("predictions.csv")

# 2) read your original train_data to compute mechanism success rates
train = pd.read_csv("train_data.csv")

# 3) explode the coping list into one row per (student, mechanism)
train["Mechanism"] = train["Stress Coping Mechanisms"].str.split(",")
mechs = train.explode("Mechanism")

# 4) mark who ended up Low
mechs["is_low"] = (mechs["Stress Level Category"] == "Low").astype(int)

# 5) compute success rate per mechanism
success = (
    mechs.groupby("Mechanism")["is_low"]
         .agg(total_uses="count", low_count="sum")
         .assign(success_rate=lambda df: df["low_count"] / df["total_uses"])
         .sort_values("success_rate", ascending=False)
)
# a simple ranked list:
ranked_mechs = success.index.tolist()

# 6) recommendation function: top-N not already used
def recommend(row, N=3):
    already = row["Stress Coping Mechanisms"].split(",")
    choices = [m for m in ranked_mechs if m not in already]
    # if they already do all top-3, still hand them something
    return choices[:N] if len(choices) >= N else choices + ranked_mechs[:N-len(choices)]

# 7) probability of dropping category
def p_drop(row):
    orig = row["Stress Level Category"]
    if orig == "High":
        return row["P_med"] + row["P_low"]
    if orig == "Medium":
        return row["P_low"]
    return 0.0

# 8) apply to the predictions table
preds["recommendations"]   = preds.apply(recommend, axis=1)
preds["P_category_drop"]   = preds.apply(p_drop, axis=1)

# 9) save final recommendations
OUT2 = "recommendations.csv"
preds.to_csv(OUT2, index=False)
print(f"✅ Wrote recommendations → {OUT2}")


KeyError: 'Stress Coping Mechanisms'