In [3]:
import os
import numpy as np
import pandas as pd
from sqlalchemy import create_engine, text
import os
import uuid
import pandas as pd
from sqlalchemy import create_engine, text
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import os
import pandas as pd

load_dotenv()
engine = create_engine(os.getenv("DATABASE_URL"))
rng = np.random.default_rng(42)

# Pull interventions that don't have outcomes yet
sql = """
SELECT
  intervention_id,
  run_id,
  customer_id,
  segment_label,
  recency_days,
  churn_probability,
  recommended_action,
  action_ts
FROM hpce.intervention_logs
WHERE outcome_ts IS NULL
"""
df = pd.read_sql(sql, engine)

if df.empty:
    print("No new interventions found without outcomes. Nothing to update.")
else:
    # ---- Simple outcome rules (minimal, supervisor-friendly) ----
    # Base response/convert probability by action (tweakable, but keep simple)
    base = {
        "Win-back coupon":        (0.35, 0.12),
        "Soft win-back":          (0.25, 0.08),
        "Reminder message":       (0.22, 0.06),
        "Upsell recommendation":  (0.28, 0.15),
        "Browse-to-buy nudge":    (0.18, 0.05),
        "Onboarding offer":       (0.15, 0.03),
        "Education content":      (0.12, 0.02),
        "No action":              (0.00, 0.00),
    }

    def clamp(x, lo=0.0, hi=0.95):
        return float(max(lo, min(hi, x)))

    responded = []
    converted = []
    revenue = []
    outcome_ts = []
    outcome_source = []

    for _, r in df.iterrows():
        action = r["recommended_action"]
        seg = r["segment_label"] if pd.notna(r["segment_label"]) else "Unknown"
        p = float(r["churn_probability"] or 0.0)
        rec = int(r["recency_days"] if pd.notna(r["recency_days"]) else 999)

        resp_base, conv_base = base.get(action, (0.15, 0.04))

        # Segment bonus (very small & explainable)
        seg_resp_bonus = 0.06 if seg in ["Champion", "Loyal"] else (0.02 if seg == "Recent" else 0.0)
        seg_conv_bonus = 0.06 if seg in ["Champion", "Loyal"] else (0.02 if seg == "Recent" else 0.0)

        # Churn-risk effect:
        # - Higher churn risk => slightly less likely to convert
        # - but coupons can improve response (handled via base)
        churn_penalty_resp = 0.05 * max(0, p - 0.5)   # mild
        churn_penalty_conv = 0.10 * max(0, p - 0.5)   # stronger

        # Recency effect:
        # Very recent buyers convert better; very stale convert worse
        rec_bonus_conv = 0.05 if rec <= 7 else (0.02 if rec <= 30 else (-0.03 if rec > 90 and rec != 999 else 0.0))

        # Never purchased: tough to convert
        if rec == 999:
            churn_penalty_conv += 0.05

        resp_prob = clamp(resp_base + seg_resp_bonus - churn_penalty_resp)
        conv_prob = clamp(conv_base + seg_conv_bonus + rec_bonus_conv - churn_penalty_conv)

        did_respond = rng.random() < resp_prob
        # Conversion is conditional on response (simple funnel)
        did_convert = did_respond and (rng.random() < conv_prob)

        # Synthetic revenue: higher for Champions/Loyal and upsell;
        # otherwise moderate; 0 if no conversion.
        if did_convert:
            if action == "Upsell recommendation":
                rev = float(rng.normal(180, 60))
            elif action in ["Win-back coupon", "Soft win-back"]:
                rev = float(rng.normal(120, 50))
            else:
                rev = float(rng.normal(80, 40))

            # Segment scaling
            if seg in ["Champion", "Loyal"]:
                rev *= 1.25
            elif seg == "Low Value":
                rev *= 0.75
            elif seg == "Never Purchased":
                rev *= 0.60

            rev = max(10.0, rev)  # keep positive realistic minimum
        else:
            rev = 0.0

        responded.append(bool(did_respond))
        converted.append(bool(did_convert))
        revenue.append(float(rev))
        outcome_ts.append(pd.Timestamp.now())
        outcome_source.append("synthetic_rule_v1")

    df["responded"] = responded
    df["converted_14d"] = converted
    df["revenue_14d"] = revenue
    df["outcome_ts"] = outcome_ts
    df["outcome_source"] = outcome_source

    # Update back to DB (row-wise updates using intervention_id)
    with engine.begin() as conn:
        for _, r in df.iterrows():
            conn.execute(
                text("""
                    UPDATE hpce.intervention_logs
                    SET responded = :responded,
                        converted_14d = :converted_14d,
                        revenue_14d = :revenue_14d,
                        outcome_ts = :outcome_ts,
                        outcome_source = :outcome_source
                    WHERE intervention_id = :intervention_id
                """),
                {
                    "responded": r["responded"],
                    "converted_14d": r["converted_14d"],
                    "revenue_14d": r["revenue_14d"],
                    "outcome_ts": r["outcome_ts"].to_pydatetime(),
                    "outcome_source": r["outcome_source"],
                    "intervention_id": int(r["intervention_id"]),
                }
            )

    print("Updated outcomes for rows:", len(df))


Updated outcomes for rows: 800
