# 4) Campaign Simulation — Call Order, ROI, and Targets


We turn predictions into **action**. Rank customers by probability, simulate **contracts vs calls**, and answer: *How many calls to get 300 contracts?*


In [1]:

# Common setup
import os, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)


In [2]:

import joblib, json
import plotly.graph_objects as go
import plotly.express as px

DATA_PATH = "./data/processed.csv"
PREPROC_PATH = "./models/preprocessor.joblib"
SPLIT_PATH = "./outputs/split_indices.json"
BEST_CALIBRATED_PATH = "./models/best_model_calibrated.joblib"
SIM_PATH = "./outputs/simulation_results.csv"
CALL_LIST_PATH = "./outputs/call_list_scored.csv"

df = pd.read_csv(DATA_PATH)
with open(SPLIT_PATH) as f:
    idx = json.load(f)

preprocess = joblib.load(PREPROC_PATH)
best_model = joblib.load(BEST_CALIBRATED_PATH)

y_all = (df["y"].astype(str).str.lower() == "yes").astype(int)
X_all = df.drop(columns=["y"])
X_valid, y_valid = X_all.loc[idx["valid_idx"]], y_all.loc[idx["valid_idx"]]

Xv = preprocess.transform(X_valid)
proba = best_model.predict_proba(Xv)[:,1]

sim_df = X_valid.copy()
sim_df["y_true"] = y_valid.values
sim_df["p_yes"] = proba
sim_df = sim_df.sort_values("p_yes", ascending=False).reset_index(drop=True)
sim_df["cum_calls"] = np.arange(1, len(sim_df)+1)
sim_df["cum_contracts_true"] = sim_df["y_true"].cumsum()
sim_df["cum_contracts_expected"] = sim_df["p_yes"].cumsum()

# Contracts vs Calls (True on holdout)
fig = go.Figure()
fig.add_trace(go.Scatter(x=sim_df["cum_calls"], y=sim_df["cum_contracts_true"], mode="lines", name="Observed (holdout)"))
fig.add_trace(go.Scatter(x=sim_df["cum_calls"], y=sim_df["cum_contracts_expected"], mode="lines", name="Expected (sum of p)"))
fig.update_layout(title="Contracts vs Calls (Top-ranked)", xaxis_title="Number of Calls", yaxis_title="Contracts", template="plotly_white")
fig.show()

# Estimate calls for 300 contracts (observed and expected)
TARGET_CONTRACTS = 300
calls_obs = sim_df.loc[sim_df["cum_contracts_true"] >= TARGET_CONTRACTS, "cum_calls"].min()
calls_exp = sim_df.loc[sim_df["cum_contracts_expected"] >= TARGET_CONTRACTS, "cum_calls"].min()

print(f"Estimated calls for {TARGET_CONTRACTS} contracts — Observed (holdout): {calls_obs if not np.isnan(calls_obs) else 'Not reached'}")
print(f"Estimated calls for {TARGET_CONTRACTS} contracts — Expected: {calls_exp if not np.isnan(calls_exp) else 'Not reached'}")

sim_df.to_csv(SIM_PATH, index=False)
sim_df.assign(rank=sim_df.index+1).to_csv(CALL_LIST_PATH, index=False)

print("Saved simulation ->", SIM_PATH)
print("Saved scored call list ->", CALL_LIST_PATH)



Estimated calls for 300 contracts — Observed (holdout): 381
Estimated calls for 300 contracts — Expected: 414
Saved simulation -> ./outputs/simulation_results.csv
Saved scored call list -> ./outputs/call_list_scored.csv
