In [11]:
import pandas as pd
import sqlite3
import numpy as np
import pickle

In [12]:
conn = sqlite3.connect("../churn.db")
original_df = pd.read_sql("SELECT * FROM customers", conn)

In [3]:
with open("../models/log_model.pkl", "rb") as f:
    model = pickle.load(f)

In [4]:
with open("../data/processed/X_test.pkl", "rb") as f:
    X_test = pickle.load(f)


In [6]:
with open("../data/processed/y_test.pkl", "rb") as f:
    y_test = pickle.load(f)


In [5]:
y_probs = model.predict_proba(X_test)[:, 1]


In [7]:
simulation_df = X_test.copy()
simulation_df["ActualChurn"] = y_test.values
simulation_df["ChurnProbability"] = y_probs


### Sort by risk

In [8]:
simulation_df = simulation_df.sort_values(by="ChurnProbability", ascending=False)


In [9]:
top_30 = int(len(simulation_df) * 0.30)
targeted = simulation_df.head(top_30)


In [10]:
captured = targeted["ActualChurn"].sum()
total_churn = y_test.sum()

print("Recall @ Top 30%:", captured / total_churn)


Recall @ Top 30%: 0.6577540106951871


In [13]:
simulation_df["MonthlyCharges"] = original_df.loc[X_test.index, "MonthlyCharges"].values

In [14]:
TARGET_PERCENTAGE = 0.30   # target top 30%
RETENTION_SUCCESS_RATE = 0.40  # 40% of contacted churners stay
MARKETING_COST_PER_CUSTOMER = 20  # €20 retention offer


In [15]:
top_n = int(len(simulation_df) * TARGET_PERCENTAGE)
targeted = simulation_df.head(top_n)


In [16]:
churners_targeted = targeted["ActualChurn"].sum()
total_churners = y_test.sum()

recall_at_target = churners_targeted / total_churners


In [17]:
revenue_at_risk = targeted[targeted["ActualChurn"] == 1]["MonthlyCharges"].sum()

In [18]:
revenue_saved = revenue_at_risk * RETENTION_SUCCESS_RATE

In [19]:
marketing_cost = len(targeted) * MARKETING_COST_PER_CUSTOMER

In [20]:
net_gain = revenue_saved - marketing_cost

In [21]:
print("Targeted customers:", len(targeted))
print("Recall at 30%:", round(recall_at_target, 3))
print("Revenue at risk:", round(revenue_at_risk, 2))
print("Expected revenue saved:", round(revenue_saved, 2))
print("Marketing cost:", round(marketing_cost, 2))
print("Net monthly impact:", round(net_gain, 2))


Targeted customers: 422
Recall at 30%: 0.658
Revenue at risk: 16319.75
Expected revenue saved: 6527.9
Marketing cost: 8440
Net monthly impact: -1912.1


Under baseline assumptions (40% retention success, €20 marketing cost), targeting top 30% high-risk customers results in negative ROI.

This indicates that retention campaigns must either:
- Improve success rate,
- Reduce incentive cost,
- Or target a smaller high-risk segment.

Further optimization is required to maximize profitability.
