In [None]:
import joblib
import pandas as pd

from src.data_loader import load_data
from src.preprocessing import clean_data
from src.features import build_features

df = load_data()
df = clean_data(df)
df = build_features(df)

model = joblib.load("../models/churn_model.pkl")

TARGET = "Churn"

X = df.drop(columns=[TARGET, "customerID", "MultipleLines", "InternetService", "Contract", "PaymentMethod"])
y = df[TARGET]


In [None]:
import shap

explainer = shap.TreeExplainer(model)

sample = X.sample(1000, random_state=42)  # быстрее

shap_values = explainer.shap_values(sample)


In [None]:
shap.summary_plot(shap_values, sample)


In [None]:
shap.summary_plot(shap_values, sample, plot_type="bar")


In [None]:
i = 5

shap.force_plot(
    explainer.expected_value,
    shap_values[i],
    sample.iloc[i]
)


In [None]:
df["churn_proba"] = model.predict_proba(X)[:, 1]


In [None]:
def risk_segment(p):
    if p < 0.3:
        return "low"
    elif p < 0.6:
        return "medium"
    else:
        return "high"

df["risk_segment"] = df["churn_proba"].apply(risk_segment)

df["risk_segment"].value_counts()


In [None]:
df.groupby("risk_segment").agg(
    clients=("Churn", "count"),
    churn_rate=("Churn", "mean"),
    avg_revenue=("TotalCharges", "mean")
)


In [None]:
RETENTION_COST = 50
PROFIT = 300

df["expected_profit"] = (
    df["churn_proba"] * PROFIT - RETENTION_COST
)


In [None]:
target_clients = df[df["expected_profit"] > 0]

len(target_clients)


## Основные драйверы churn
- Month-to-month контракт
- Высокий MonthlyCharges
- Малый tenure

## Рекомендации маркетингу
- скидки новым клиентам
- перевод на годовой контракт
- автоплатеж
- таргетировать только клиентов с positive expected_profit
