In [12]:
# --- Step 1: Imports + feature_engineering_fn definition ---
import pandas as pd
import joblib
from sklearn.base import BaseEstimator, TransformerMixin

# Recreate your feature engineering function (must match your training notebook)
def feature_engineering_fn(df):
    df = df.copy()
    df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
    df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)

    # tenure group bucketing
    df['tenure_group'] = pd.cut(
        df['tenure'],
        bins=[0, 12, 24, 48, 60, 72],
        labels=['0-12', '12-24', '24-48', '48-60', '60-72'],
        include_lowest=True
    )

    # --- ENGINEERED FEATURES (must exist for your pipeline) ---
    df['avg_monthly_from_total'] = df['TotalCharges'] / (df['tenure'].replace(0, 1))
    df['tenure_per_monthly'] = df['tenure'] * df['MonthlyCharges']
    df['has_streaming'] = ((df['StreamingTV'] == 'Yes') | (df['StreamingMovies'] == 'Yes')).astype(int)
    df['is_auto_payment'] = df['PaymentMethod'].str.contains('automatic', case=False, na=False).astype(int)
    df['has_support_services'] = ((df['OnlineSecurity'] == 'Yes') | (df['TechSupport'] == 'Yes')).astype(int)

    return df


In [13]:
# --- Step 2: Load trained pipeline safely ---
pipeline_path = "final_churn_xgb_pipeline.pkl"

try:
    model_pipeline = joblib.load(pipeline_path)
    print("✅ Pipeline loaded successfully.")
except Exception as e:
    print(f"⚠️ Failed to load pipeline: {e}")


✅ Pipeline loaded successfully.


In [14]:
# --- Step 3: Make a sample prediction ---
new_data = pd.DataFrame({
    'gender': ['Female'],
    'SeniorCitizen': [0],
    'Partner': ['Yes'],
    'Dependents': ['No'],
    'tenure': [24],
    'PhoneService': ['Yes'],
    'MultipleLines': ['No'],
    'InternetService': ['Fiber optic'],
    'OnlineSecurity': ['No'],
    'OnlineBackup': ['Yes'],
    'DeviceProtection': ['Yes'],
    'TechSupport': ['No'],
    'StreamingTV': ['Yes'],
    'StreamingMovies': ['No'],
    'Contract': ['Month-to-month'],
    'PaperlessBilling': ['Yes'],
    'PaymentMethod': ['Electronic check'],
    'MonthlyCharges': [80.85],
    'TotalCharges': ['1800.5']
})

pred = model_pipeline.predict(new_data)[0]
prob = model_pipeline.predict_proba(new_data)[0, 1]

print("\n===== Prediction Result =====")
print(f"Predicted Churn: {'Yes' if pred == 1 else 'No'}")
print(f"Churn Probability: {prob:.4f}")



===== Prediction Result =====
Predicted Churn: Yes
Churn Probability: 0.5997
