# SPRINT 4 — Campaign Analytics
# Goal: Evaluate response rates, ROI, and define contact policy per cluster.

In [7]:
import numpy as np
import pandas as pd
from pathlib import Path
from IPython.display import display

# PATHS
processed_dir = Path("../Data/Processed")
df = pd.read_csv(processed_dir / "customers_master.csv")

# 1) Attach cluster labels (from Step 3)
labels_path = processed_dir / "cluster_labels.csv"
if "Cluster" not in df.columns:
    assert labels_path.exists(), "Missing clusters — run Step 3 first."
    labels = pd.read_csv(labels_path).drop_duplicates(subset=["ID"], keep="last")
    df = df.merge(labels, on="ID", how="left")

df = df.dropna(subset=["Cluster"]).copy()
df["Cluster"] = df["Cluster"].astype(int)

# Normalize column names (defensive)
df.columns = df.columns.str.strip()

# 2) Resolve campaign columns (AcceptedCmp1..5 or AcceptedCmpOverall; fallback to Response)
cmp_cols = [c for c in df.columns if c.startswith("AcceptedCmp") and c not in ("AcceptedCmpOverall",)]
if not cmp_cols:
    overall = [c for c in df.columns if c.lower() == "acceptedcmpoverall"]
    if overall:
        cmp_cols = overall

use_response_as_campaign = False
if not cmp_cols:
# Fallback: treat 'Response' as the last campaign indicator
    assert "Response" in df.columns, f"No campaign columns nor 'Response'. Available: {df.columns.tolist()}"
    use_response_as_campaign = True
    cmp_cols = ["Response"]

# Ensure 0/1 binary values for campaign flags and Response
for c in set(cmp_cols + (["Response"] if "Response" in df.columns else [])):
    df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0).astype(int).clip(0, 1)

# 3) Produce long format: ID | Cluster | Campaign | Accepted
if use_response_as_campaign:
    accepted_long = (
        df[["ID","Cluster","Response"]]
          .rename(columns={"Response":"Accepted"})
          .assign(Campaign="LastCampaign")[["ID","Cluster","Campaign","Accepted"]]
    )
else:
    accepted_long = (
        df[["ID","Cluster"] + cmp_cols]
          .melt(id_vars=["ID","Cluster"], value_vars=cmp_cols,
                var_name="Campaign", value_name="Accepted")
    )
    accepted_long["Accepted"] = pd.to_numeric(accepted_long["Accepted"], errors="coerce").fillna(0).astype(int).clip(0, 1)

# 4) Global baselines (used for lift/ROAS/ROI proxies)
baseline_response = float(df["Response"].mean()) if "Response" in df.columns else 0.0
mean_aov_global   = float(pd.to_numeric(df["AOV"], errors="coerce").fillna(0).mean())
cost_per_contact  = (
    float(df["Z_CostContact"].median())
    if "Z_CostContact" in df.columns and df["Z_CostContact"].notna().any()
    else 3.0
)
if not np.isfinite(cost_per_contact) or cost_per_contact <= 0:
    cost_per_contact = 3.0  # safe default

def roas_proxy(response_rate: float, aov: float, cost: float) -> float:
    """ROAS proxy = (resp_rate * AOV) / cost_per_contact."""
    if not np.isfinite(response_rate) or not np.isfinite(aov) or cost <= 0:
        return np.nan
    return (response_rate * aov) / cost

def roi_proxy(response_rate: float, aov: float, cost: float) -> float:
    """ROI proxy = ((resp_rate * AOV) - cost_per_contact) / cost_per_contact."""
    if not np.isfinite(response_rate) or not np.isfinite(aov) or cost <= 0:
        return np.nan
    return ((response_rate * aov) - cost) / cost

# 5) Segment-level metrics (business view per cluster)
seg = (
    df.groupby("Cluster", dropna=False)
      .agg(
          n=("ID","count"),
          response_rate=("Response","mean"),
          Recency_median=("Recency","median"),
          AOV_mean=("AOV","mean"),
          Income_median=("Income","median"),
          TotalSpend_median=("TotalSpend","median")
      )
      .reset_index()
)

seg["lift_vs_baseline"] = np.where(baseline_response > 0, seg["response_rate"] / baseline_response, np.nan)
seg["ROAS_proxy"] = seg.apply(lambda r: roas_proxy(r["response_rate"], r["AOV_mean"], cost_per_contact), axis=1)
seg["ROI_proxy"]  = seg.apply(lambda r: roi_proxy (r["response_rate"], r["AOV_mean"], cost_per_contact), axis=1)

# 6) Campaign × cluster acceptance rates
camp_cluster = (
    accepted_long
      .groupby(["Campaign","Cluster"], dropna=False)
      .agg(n=("ID","count"), accept_rate=("Accepted","mean"))
      .reset_index()
)

# 7) Contact policy rule (actionable recommendation)
# CONTACT if both ROI>0 and LIFT>1; otherwise DO_NOT_CONTACT
policy = (
    seg.assign(
        recommend=lambda d: np.where(
            (d["ROI_proxy"] > 0) & (d["lift_vs_baseline"] > 1), "CONTACT", "DO_NOT_CONTACT"
        )
    )
    .sort_values(["recommend","ROI_proxy","AOV_mean"], ascending=[False, False, False])
)

# 8) QA checks
assert seg["n"].sum() == len(df), "Segment sizes do not sum to total rows."
for name, tdf in [("segment_uplift_roi", seg), ("campaign_by_cluster", camp_cluster), ("contact_policy", policy)]:
    assert not tdf.empty, f"{name} is empty."

# 9) Export CSVs for BI/dashboarding
(seg.round(6)).to_csv(processed_dir / "segment_uplift_roi.csv", index=False)
(camp_cluster.round(6)).to_csv(processed_dir / "campaign_by_cluster.csv", index=False)
(policy.round(6)).to_csv(processed_dir / "contact_policy.csv", index=False)

print("STEP 4  Saved:")
print(" -", processed_dir / "campaign_by_cluster.csv")
print(" -", processed_dir / "segment_uplift_roi.csv")
print(" -", processed_dir / "contact_policy.csv")

# Quick preview
display(seg.sort_values("ROI_proxy", ascending=False).head(10))
display(policy[["Cluster","n","response_rate","AOV_mean","lift_vs_baseline","ROAS_proxy","ROI_proxy","recommend"]].head(10))
display(camp_cluster.sort_values(["Campaign","accept_rate"], ascending=[True, False]).head(12))

# ============================
# VALIDATION + KPI EXPORTS (CV-friendly)
# ============================

# 10) Recommendation consistency: rule vs produced flag
check = policy.copy()
check["expected_recommend"] = np.where(
    (check["ROI_proxy"] > 0) & (check["lift_vs_baseline"] > 1),
    "CONTACT", "DO_NOT_CONTACT"
)
assert (check["recommend"] == check["expected_recommend"]).all(), "Inconsistent recommendations!"
print(" Recommendations consistent with ROI>0 & lift>1")

# 11) Compact KPI summary (for README/BI)
kpi = pd.DataFrame({
    "customers_total": [len(df)],
    "baseline_response": [baseline_response],
    "mean_AOV_global": [mean_aov_global],
    "cost_per_contact": [cost_per_contact],
    "segments_contact": [(policy["recommend"] == "CONTACT").sum()],
    "segments_do_not_contact": [(policy["recommend"] == "DO_NOT_CONTACT").sum()],
}).round(6)
kpi_path = processed_dir / "kpi_summary.csv"
kpi.to_csv(kpi_path, index=False)

# 12) (Optional) Budget allocation example by ROAS
monthly_budget = 10000  # <- adjust for your scenario
contacts_capacity = int(monthly_budget // cost_per_contact)

alloc = (
    policy.sort_values(["recommend","ROAS_proxy"], ascending=[False, False])
          .assign(weight=lambda d: np.where(d["recommend"] == "CONTACT", d["ROAS_proxy"], 0))
)
if alloc["weight"].sum() > 0:
    alloc["contacts_quota"] = np.floor(alloc["weight"] / alloc["weight"].sum() * contacts_capacity).astype(int)
else:
    alloc["contacts_quota"] = 0

alloc_path = processed_dir / "contact_allocation_plan.csv"
alloc[["Cluster","n","response_rate","AOV_mean","ROAS_proxy","ROI_proxy","recommend","contacts_quota"]] \
    .to_csv(alloc_path, index=False)

print("\nKPI & allocation saved:")
print(" -", kpi_path)
print(" -", alloc_path)
display(kpi)
display(alloc.head())


STEP 4  Saved:
 - ..\Data\Processed\campaign_by_cluster.csv
 - ..\Data\Processed\segment_uplift_roi.csv
 - ..\Data\Processed\contact_policy.csv


Unnamed: 0,Cluster,n,response_rate,Recency_median,AOV_mean,Income_median,TotalSpend_median,lift_vs_baseline,ROAS_proxy,ROI_proxy
0,0,695,0.241727,54.0,66.043114,74165.0,1289.0,1.626893,5.32146,4.32146
2,2,640,0.153125,46.0,27.692657,53128.5,462.0,1.030578,1.413479,0.413479
1,1,886,0.072235,49.0,9.458636,33297.5,55.0,0.486162,0.227747,-0.772253


Unnamed: 0,Cluster,n,response_rate,AOV_mean,lift_vs_baseline,ROAS_proxy,ROI_proxy,recommend
1,1,886,0.072235,9.458636,0.486162,0.227747,-0.772253,DO_NOT_CONTACT
0,0,695,0.241727,66.043114,1.626893,5.32146,4.32146,CONTACT
2,2,640,0.153125,27.692657,1.030578,1.413479,0.413479,CONTACT


Unnamed: 0,Campaign,Cluster,n,accept_rate
0,LastCampaign,0,695,0.241727
2,LastCampaign,2,640,0.153125
1,LastCampaign,1,886,0.072235


 Recommendations consistent with ROI>0 & lift>1

KPI & allocation saved:
 - ..\Data\Processed\kpi_summary.csv
 - ..\Data\Processed\contact_allocation_plan.csv


Unnamed: 0,customers_total,baseline_response,mean_AOV_global,cost_per_contact,segments_contact,segments_do_not_contact
0,2221,0.148582,32.419458,3.0,2,1


Unnamed: 0,Cluster,n,response_rate,Recency_median,AOV_mean,Income_median,TotalSpend_median,lift_vs_baseline,ROAS_proxy,ROI_proxy,recommend,weight,contacts_quota
1,1,886,0.072235,49.0,9.458636,33297.5,55.0,0.486162,0.227747,-0.772253,DO_NOT_CONTACT,0.0,0
0,0,695,0.241727,54.0,66.043114,74165.0,1289.0,1.626893,5.32146,4.32146,CONTACT,5.32146,2633
2,2,640,0.153125,46.0,27.692657,53128.5,462.0,1.030578,1.413479,0.413479,CONTACT,1.413479,699
