In [21]:
import joblib

logreg_model = joblib.load("../../artifacts/logreg_model.pkl")


In [16]:
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv("../../data/processed/telco_clean.csv")

X = df.drop("Churn", axis=1)
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [22]:
y_prob = logreg_model.predict_proba(X_test)[:, 1]


In [23]:
y_prob[:10]


array([0.04972056, 0.78695435, 0.01423996, 0.4018803 , 0.22787954,
       0.72174566, 0.07370182, 0.34959822, 0.84829954, 0.04671267])

In [24]:
import numpy as np

thresholds = np.arange(0.2, 0.81, 0.05)
thresholds


array([0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 , 0.65, 0.7 ,
       0.75, 0.8 ])

In [25]:
from sklearn.metrics import precision_score, recall_score, f1_score

results = []

for t in thresholds:
    y_pred_t = (y_prob >= t).astype(int)
    
    precision = precision_score(y_test, y_pred_t)
    recall = recall_score(y_test, y_pred_t)
    f1 = f1_score(y_test, y_pred_t)
    
    results.append([t, precision, recall, f1])


In [26]:
import pandas as pd

threshold_df = pd.DataFrame(
    results,
    columns=["Threshold", "Precision", "Recall", "F1"]
)

threshold_df


Unnamed: 0,Threshold,Precision,Recall,F1
0,0.2,0.386853,0.959893,0.551459
1,0.25,0.397727,0.935829,0.558214
2,0.3,0.413588,0.927807,0.572135
3,0.35,0.427665,0.90107,0.580034
4,0.4,0.451524,0.871658,0.594891
5,0.45,0.469789,0.831551,0.600386
6,0.5,0.490939,0.796791,0.607543
7,0.55,0.515426,0.759358,0.614054
8,0.6,0.541414,0.716578,0.616801
9,0.65,0.574032,0.673797,0.619926


In [None]:
After testing multiple probability thresholds, a cutoff of 0.40 was chosen for the final churn predictions.
The main goal of this model is to identify as many customers who are likely to churn as possible, even if it means occasionally flagging customers who would not actually churn.
At lower thresholds (such as 0.20â€“0.30), the model achieves very high recall, but precision becomes too low, meaning too many non-churn customers are incorrectly flagged.
At higher thresholds (above 0.50), precision improves, but recall drops significantly, causing the model to miss a larger number of customers who actually churn.
A threshold of 0.40 provides a good balance between these two extremes. At this level, the model maintains high recall, ensuring most churned customers are correctly identified, while keeping the number of false positives at an acceptable level.
This trade-off aligns well with real-world churn scenarios, where missing a churned customer is more costly than offering retention incentives to a customer who would have stayed.
Therefore, 0.40 was selected as the final decision threshold for churn prediction.
