## Inference prediction

In [19]:
import pandas as pd
import numpy as np
import joblib

In [20]:
df = pd.read_csv('../data/churn_data.csv')
df.shape

(7043, 21)

In [21]:
df = df.drop('customerID', axis=1)

In [29]:
# FE data

def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    for col in ["tenure", "MonthlyCharges", "TotalCharges"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    df["TotalCharges"] = df["TotalCharges"].fillna(
        df["MonthlyCharges"] * df["tenure"]
    )

    df["TenureGroup"] = pd.cut(
        df["tenure"],
        bins=[0, 12, 24, 36, 48, 60, np.inf],
        labels=[
            "0-1 Year", "1-2 Years", "2-3 Years",
            "3-4 Years", "4-5 Years", "5+ Years"
        ]
    )

    df["avg_monthly_spend"] = df["TotalCharges"] / (df["tenure"] + 1)

    df["charge_ratio"] = (
        df["MonthlyCharges"] /
        df["TotalCharges"].replace(0, np.nan)
    ).fillna(0)

    df["monthly_segment"] = pd.cut(
        df["MonthlyCharges"],
        bins=[0, 30, 60, 90, np.inf],
        labels=["Low", "Medium", "High", "Premium"]
    )

    service_cols = [
        "PhoneService", "MultipleLines", "InternetService",
        "OnlineSecurity", "OnlineBackup", "DeviceProtection",
        "TechSupport", "StreamingTV", "StreamingMovies"
    ]

    df["total_services"] = (df[service_cols] == "Yes").sum(axis=1)

    premium_services = [
        "OnlineSecurity", "OnlineBackup",
        "DeviceProtection", "TechSupport"
    ]

    df["has_premium_services"] = (
        df[premium_services] == "Yes"
    ).any(axis=1).astype(int)

    tenure_norm = (df["tenure"] - df["tenure"].min()) / (
        df["tenure"].max() - df["tenure"].min() + 1e-6
    )

    charges_norm = (df["MonthlyCharges"] - df["MonthlyCharges"].min()) / (
        df["MonthlyCharges"].max() - df["MonthlyCharges"].min() + 1e-6
    )

    df["customer_value_score"] = 0.5 * tenure_norm + 0.5 * charges_norm

    df["senior_with_partner"] = (
        (df["SeniorCitizen"] == 1) &
        (df["Partner"] == "Yes")
    ).astype(int)

    return df


In [30]:
def assign_risk(prob, high=0.7, medium=0.4):
    if prob > high: return "HIGH"
    elif prob > medium: return "MEDIUM"
    else: return "LOW"

In [31]:
def predict_customer(customer_data):
    if isinstance(customer_data, dict):
        df = pd.DataFrame([customer_data])
    else:
        df = customer_data.copy()
    
    df_fe = feature_engineering(df) 
    pred = pipeline.predict(df_fe)[0]
    proba = pipeline.predict_proba(df_fe)[0]
    
    return {
        "prediction": "CHURN" if pred==1 else "NO CHURN",
        "churn_probability": proba[1],
        "confidence": max(proba),
        "risk_level": assign_risk(proba[1])
    }

In [32]:
def batch_predict_customers(df_customers):
    df_fe = feature_engineering(df_customers)
    preds = pipeline.predict(df_fe)
    probas = pipeline.predict_proba(df_fe)[:, 1]
    
    return pd.DataFrame({
        "Prediction": ["CHURN" if p==1 else "NO CHURN" for p in preds],
        "Churn_Probability": probas,
        "Confidence": [max(p, 1-p) for p in probas],
        "Risk": [assign_risk(p) for p in probas]
    }, index=df_customers.index)

In [33]:
artifacts = joblib.load("telco_churn_model_v1.pkl")
pipeline = artifacts["model"]

In [34]:
test1 = df.iloc[1000]
df_user = pd.DataFrame([test1])
# print(df_user)
X_user = df_user.drop(columns=["Churn"], errors="ignore")

In [35]:
# Single customer
single_result = predict_customer(X_user)
print(single_result)

{'prediction': 'CHURN', 'churn_probability': np.float64(0.7716510683486332), 'confidence': np.float64(0.7716510683486332), 'risk_level': 'HIGH'}


In [36]:
# Batch customers
batch_df = df.sample(10, random_state=42).drop(columns=["Churn"], errors="ignore")
batch_results = batch_predict_customers(batch_df)
display(batch_results)


Unnamed: 0,Prediction,Churn_Probability,Confidence,Risk
185,CHURN,0.701114,0.701114,HIGH
2715,NO CHURN,0.093256,0.906744,LOW
3825,NO CHURN,0.022288,0.977712,LOW
1807,CHURN,0.733674,0.733674,HIGH
132,NO CHURN,0.035074,0.964926,LOW
1263,NO CHURN,0.275826,0.724174,LOW
3732,NO CHURN,0.042131,0.957869,LOW
1672,NO CHURN,0.019722,0.980278,LOW
811,NO CHURN,0.109968,0.890032,LOW
2526,NO CHURN,0.299976,0.700024,LOW
