In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# DAY 2 NOTEBOOK - SHAP Interpretation + Streamlit App
# ──────────────────────────────────────────────────────────────────────────────

# 1. Imports
import pandas as pd
import shap
import joblib
import streamlit as st  # For the app code (save as app.py later)

# 2. Load saved model and data
best_model = joblib.load('../app/best_model.pkl')

df = pd.read_csv('../data/WA_Fn-UseC_-Telco-Customer-Churn.csv')
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges']).reset_index(drop=True)
df = df.drop('customerID', axis=1)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Churn'] = le.fit_transform(df['Churn'])

X = pd.get_dummies(df.drop('Churn', axis=1), drop_first=True)
y = df['Churn']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Data reloaded for SHAP.")

# 3. SHAP Explanations
explainer = shap.TreeExplainer(best_model)
shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values, X_test, plot_type="bar")
plt.title('SHAP Feature Importance')
plt.show()

shap.force_plot(explainer.expected_value, shap_values[0,:], X_test.iloc[0,:])

print("SHAP done — see top factors like tenure, Contract.")

# 4. Streamlit App Code (Save as app/app.py)
# Paste this in a separate file
# To test: streamlit run app/app.py

# Code for app.py:
import streamlit as st
import pandas as pd
import joblib
import shap
import matplotlib.pyplot as plt

best_model = joblib.load('best_model.pkl')
le = joblib.load('target_encoder.pkl')

st.title('Telco Churn Prediction App')

# Inputs
tenure = st.slider('Tenure (months)', 0, 72, 1)
monthly_charges = st.slider('Monthly Charges', 18.0, 118.0, 50.0)
contract = st.selectbox('Contract', ['Month-to-month', 'One year', 'Two year'])
# Add more as needed

input_data = pd.DataFrame({
    'tenure': [tenure],
    'MonthlyCharges': [monthly_charges],
    'Contract_Month-to-month': [1 if contract == 'Month-to-month' else 0],
    'Contract_One year': [1 if contract == 'One year' else 0],
    # Pad other columns with 0s to match X.shape
})

for col in best_model.feature_names_in_:
    if col not in input_data.columns:
        input_data[col] = 0

if st.button('Predict'):
    pred = best_model.predict(input_data)[0]
    prob = best_model.predict_proba(input_data)[0][1] * 100

    st.write(f'Prediction: {"Churn" if pred == 1 else "No Churn"}')
    st.write(f'Probability: {prob:.2f}%')

    explainer = shap.TreeExplainer(best_model)
    shap_values = explainer.shap_values(input_data)

    st.pyplot(shap.force_plot(explainer.expected_value, shap_values[0,:], input_data.iloc[0,:]))

print("App code ready — test locally.")