# Phase 4 — Deployment & Live Demo (Flask API + Streamlit UI)

Goal:
- Export batch predictions for Tableau.
- Provide a REST API for live prediction.
- Provide a Streamlit demo UI (single + batch prediction).
- Provide deployment instructions (Render for Flask, Streamlit Cloud for UI).

Assumptions:
- You have a trained model file inside `models/` (we will try to detect typical names).
- You have cleaned dataset at `data/cleaned/telco_churn_clean.csv`.
- `models/feature_columns.json` or similar may exist; if not the notebook will derive it.


In [2]:
# Imports & Path setup
from pathlib import Path
import json, joblib, os, sys
import pandas as pd
import numpy as np

REPO_ROOT = Path(".").resolve()
MODEL_DIR = REPO_ROOT / "models"
DATA_CLEANED = REPO_ROOT / "data" / "cleaned" / "Telco-Customer-Churn.csv"
ANALYSIS_DIR = REPO_ROOT / "data" / "analysis"
API_DIR = REPO_ROOT / "api"

ANALYSIS_DIR.mkdir(parents=True, exist_ok=True)
API_DIR.mkdir(parents=True, exist_ok=True)
MODEL_DIR.mkdir(parents=True, exist_ok=True)

print("Repo root:", REPO_ROOT)
print("Cleaned data exists:", DATA_CLEANED.exists())
print("Models dir:", MODEL_DIR.exists())
print("API dir:", API_DIR.exists())


Repo root: /Users/hrithik/customer-churn-Analysis
Cleaned data exists: True
Models dir: True
API dir: True


## 1) Locate & load the trained model
We try common filenames (`churn_model.pkl`, `churn_model_rf.pkl`, `churn_model_xgb.pkl`, etc.).  
If missing, stop and run Phase 3 to re-save the model.
We also prepare `feature_columns.json` (list of final dummy column names). If missing, derive it from the cleaned data using the same one-hot approach.


In [4]:
# Cell 2 — Find model file (robust)
possible_models = [
    MODEL_DIR / "churn_model.pkl",
    MODEL_DIR / "churn_model_rf.pkl",
    MODEL_DIR / "churn_model_xgb.pkl",
    MODEL_DIR / "churn_model_rf_v1.pkl",
    MODEL_DIR / "churn_model_rf_v2.pkl",
    MODEL_DIR / "model.pkl"
]
model_path = None
for p in possible_models:
    if p.exists():
        model_path = p
        break

# if none found look for any .pkl under models/
if model_path is None:
    pkls = list(MODEL_DIR.glob("*.pkl"))
    if pkls:
        model_path = pkls[0]

if model_path is None:
    raise FileNotFoundError("No model .pkl found in models/. Please run Phase 3 and save the model to models/")

print("Using model:", model_path)

# load model
model = joblib.load(model_path)
print("Model loaded. Type:", type(model))

# feature columns file
FEATURE_PATH = MODEL_DIR / "feature_columns.json"
if not FEATURE_PATH.exists():
    if not DATA_CLEANED.exists():
        raise FileNotFoundError("Cleaned dataset not found at data/cleaned/telco_churn_clean.csv. Run Phase 2 first.")
    dfc = pd.read_csv(DATA_CLEANED)
    X_sample = dfc.drop(columns=["churn"], errors="ignore")
    X_dummies = pd.get_dummies(X_sample, drop_first=True)
    FEATURE_COLS = X_dummies.columns.tolist()
    FEATURE_PATH.write_text(json.dumps(FEATURE_COLS), encoding="utf-8")
    print(f"Derived and saved {len(FEATURE_COLS)} feature columns to {FEATURE_PATH}")
else:
    FEATURE_COLS = json.loads(FEATURE_PATH.read_text(encoding="utf-8"))
    print(f"Loaded {len(FEATURE_COLS)} feature columns from {FEATURE_PATH}")


Using model: /Users/hrithik/customer-churn-Analysis/models/churn_model.pkl
Model loaded. Type: <class 'xgboost.sklearn.XGBClassifier'>
Loaded 13601 feature columns from /Users/hrithik/customer-churn-Analysis/models/feature_columns.json


## Helper: prepare_input(df)
Make a robust function that:
- strips strings,
- one-hot encodes (drop_first=True) like training,
- reindexes to the saved FEATURE_COLS (fills missing columns with 0).
This removes performance warnings and ensures exact feature alignment.


In [6]:
# prepare_input implementation (optimized)
def prepare_input(df_input: pd.DataFrame, feature_cols=FEATURE_COLS) -> pd.DataFrame:
    df = df_input.copy()
    # basic cleaning
    for c in df.select_dtypes(include="object").columns:
        df[c] = df[c].astype(str).str.strip()
    X = pd.get_dummies(df, drop_first=True)
    # reindex to target columns (this fills missing cols with 0 and drops extras)
    X = X.reindex(columns=feature_cols, fill_value=0)
    return X

# quick sanity check on a small sample
if DATA_CLEANED.exists():
    test_row = pd.read_csv(DATA_CLEANED).drop(columns=["churn"], errors="ignore").head(2)
    Xt = prepare_input(test_row)
    print("Prepared shape:", Xt.shape)
else:
    print("No cleaned data to test prepare_input with.")


Prepared shape: (2, 13601)


## Deployment & Batch Prediction

### 1- Setup & Imports

In [9]:
import os
import json
import joblib
import pandas as pd
import traceback


### 2- Paths

In [11]:
# Define project paths
DATA_CLEANED = "../data/cleaned/Telco-Customer-Churn.csv"   # cleaned dataset
MODEL_PATH = "../models/xgb_model.pkl"                      # trained model
FEATURES_PATH = "models/feature_columns.json"               # saved feature columns
OUTPUT_PATH = "../data/analysis/batch_predictions.csv"

# Ensure folders exist
os.makedirs("../data/analysis", exist_ok=True)


### 3- Load Model & Features

In [13]:
# Load trained model
model = joblib.load(MODEL_PATH)

# Load feature column order
with open(FEATURES_PATH, "r") as f:
    FEATURE_COLS = json.load(f)

print("✅ Model & feature columns loaded")
print("Model expects:", len(FEATURE_COLS), "features")


✅ Model & feature columns loaded
Model expects: 13601 features


### 4- Load Cleaned Data

In [15]:
# Load cleaned dataset
df_full = pd.read_csv(DATA_CLEANED)

print("Dataset shape:", df_full.shape)
df_full.head()


Dataset shape: (7043, 20)


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### 5- Prepare Input Function

In [17]:
# Function to align features exactly like training
def prepare_input(df, feature_cols):
    X = pd.get_dummies(df, drop_first=True)
    X = X.reindex(columns=feature_cols, fill_value=0)  # align with training
    return X


### 6- Run Batch Predictions

In [19]:
import pandas as pd
import json
import pickle

# Load cleaned data
df_full = pd.read_csv(DATA_CLEANED)

# Detect ID column
id_candidates = [c for c in df_full.columns if c.lower() in ("customerid","customer_id","id","custid","cust_id")]
id_col = id_candidates[0] if id_candidates else "customerid"
if not id_candidates:
    df_full.insert(0, id_col, range(1, len(df_full)+1))

# ---------------- Load feature columns ---------------- #
# Try JSON first, fallback to Pickle
try:
    with open(MODEL_DIR / "feature_cols.json") as f:
        feature_cols = json.load(f)
except FileNotFoundError:
    with open(MODEL_DIR / "feature_cols.pkl", "rb") as f:
        feature_cols = pickle.load(f)

# ---------------- Prepare input for prediction ---------------- #
X_all = prepare_input(df_full.drop(columns=["churn"], errors="ignore"), feature_cols=feature_cols)

# Ensure the DataFrame matches model features
X_all = X_all.reindex(columns=feature_cols, fill_value=0)

# ---------------- Predict churn ---------------- #
probs = model.predict_proba(X_all)[:,1]
preds = (probs >= 0.5).astype(int)

# ---------------- Save predictions ---------------- #
out = pd.DataFrame({
    id_col: df_full[id_col],
    "predicted_churn": preds,
    "churn_probability": probs
})

out_path = ANALYSIS_DIR / "predictions_for_tableau.csv"
out.to_csv(out_path, index=False)
print("Saved predictions for Tableau at:", out_path)

# Display first rows
out.head()


Saved predictions for Tableau at: /Users/hrithik/customer-churn-Analysis/data/analysis/predictions_for_tableau.csv


Unnamed: 0,customerid,predicted_churn,churn_probability
0,1,0,0.25
1,2,0,0.32
2,3,0,0.32
3,4,0,0.42
4,5,0,0.43


## 7) Write Flask API (api/app.py)
This creates a small REST service with:
- GET /ping (health)
- POST /predict — accepts JSON object or list, returns predictions & probabilities
We will write the file programmatically so you can deploy immediately.


In [45]:

flask_code = r"""
from flask import Flask, request, jsonify
import joblib, json, os, pandas as pd

BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
MODEL_PATHS = [
    os.path.join(BASE_DIR, "models", "churn_model.pkl"),
    os.path.join(BASE_DIR, "models", "churn_model_rf.pkl"),
    os.path.join(BASE_DIR, "models", "churn_model_xgb.pkl")
]
# pick first existing
model_path = next((p for p in MODEL_PATHS if os.path.exists(p)), None)
if model_path is None:
    raise FileNotFoundError("Model not found in models/")

model = joblib.load(model_path)
with open(os.path.join(BASE_DIR, "models", "feature_cols.pkl"), "r") as f:
    FEATURE_COLS = json.load(f)

def prepare_input(df_input):
    df = df_input.copy()
    for c in df.select_dtypes(include="object").columns:
        df[c] = df[c].astype(str).str.strip()
    X = pd.get_dummies(df, drop_first=True)
    # reindex to feature cols
    X = X.reindex(columns=FEATURE_COLS, fill_value=0)
    return X

app = Flask(__name__)

@app.route("/ping", methods=["GET"])
def ping():
    return jsonify({"status":"ok"})

@app.route("/predict", methods=["POST"])
def predict():
    try:
        data = request.get_json()
        if data is None:
            return jsonify({"error":"No JSON body provided"}), 400
        if isinstance(data, dict):
            df = pd.DataFrame([data])
        elif isinstance(data, list):
            df = pd.DataFrame(data)
        else:
            return jsonify({"error":"JSON must be object or list"}), 400
        X = prepare_input(df)
        preds = model.predict(X).tolist()
        probs = model.predict_proba(X)[:,1].tolist()
        return jsonify({"predictions": preds, "probabilities": probs})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 5000))
    app.run(host="0.0.0.0", port=port, debug=True)
"""
(API_DIR / "app.py").write_text(flask_code, encoding="utf-8")
print("Wrote", API_DIR / "app.py")


Wrote /Users/hrithik/customer-churn-Analysis/api/app.py


## 8) Write Streamlit app (`streamlit_app.py`)
The app:
- will use the local model by default for offline demos
- if `API_URL` env var is set, will call the remote Flask API instead
- supports CSV upload (batch) and single-record sidebar inputs
Write the file programmatically.


In [43]:

streamlit_code = r'''
import os, joblib, json, pandas as pd, requests, streamlit as st

st.set_page_config(page_title="Churn Predictor Demo", layout="wide")
BASE_DIR = os.path.abspath(".")
API_URL = os.environ.get("API_URL")  # e.g., https://your-flask.onrender.com

MODEL_PATHS = [
    os.path.join(BASE_DIR, "models", "churn_model.pkl"),
    os.path.join(BASE_DIR, "models", "churn_model_rf.pkl"),
    os.path.join(BASE_DIR, "models", "churn_model_xgb.pkl")
]
def find_model():
    for p in MODEL_PATHS:
        if os.path.exists(p):
            return p
    return None

model = None
FEATURE_COLS = None
if API_URL is None:
    model_file = find_model()
    if model_file is None:
        st.error("No local model found in models/. Set API_URL to call remote API.")
    else:
        model = joblib.load(model_file)
        with open(os.path.join(BASE_DIR, "models", "feature_cols.pkl"), "r") as f:
            FEATURE_COLS = json.load(f)

def prepare_input_local(df):
    df2 = df.copy()
    for c in df2.select_dtypes(include="object").columns:
        df2[c] = df2[c].astype(str).str.strip()
    X = pd.get_dummies(df2, drop_first=True)
    X = X.reindex(columns=FEATURE_COLS, fill_value=0)
    return X

st.title("Churn Predictor — Demo")
st.markdown("Upload a CSV (same columns as training) or use the sidebar single-record form.")

uploaded = st.file_uploader("Upload CSV (for batch prediction)", type=["csv"])
if uploaded is not None:
    df = pd.read_csv(uploaded)
    if API_URL:
        payload = df.to_dict(orient="records")
        resp = requests.post(API_URL.rstrip("/") + "/predict", json=payload, timeout=30)
        if resp.status_code == 200:
            res = resp.json()
            df["predicted_churn"] = res["predictions"]
            df["churn_probability"] = res["probabilities"]
            st.success("Predictions fetched from API")
            st.dataframe(df.head(100))
            st.download_button("Download predictions CSV", df.to_csv(index=False).encode(), "predictions.csv")
        else:
            st.error(f"API error: {resp.status_code}: {resp.text}")
    else:
        X = prepare_input_local(df.drop(columns=["churn"], errors="ignore"))
        df["predicted_churn"] = model.predict(X)
        df["churn_probability"] = model.predict_proba(X)[:,1]
        st.success("Local predictions ready")
        st.dataframe(df.head(100))
        st.download_button("Download predictions CSV", df.to_csv(index=False).encode(), "predictions.csv")

st.sidebar.header("Single-record")
# Adjust fields based on your dataset: this is an example. Replace/extend with your columns.
tenure = st.sidebar.number_input("tenure", min_value=0, max_value=200, value=12)
monthlycharges = st.sidebar.number_input("monthlycharges", min_value=0.0, value=70.0)
contract = st.sidebar.selectbox("contract", ["Month-to-month", "One year", "Two year"])
paymentmethod = st.sidebar.selectbox("paymentmethod", ["Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"])

if st.sidebar.button("Predict single"):
    payload = {"tenure": tenure, "monthlycharges": monthlycharges, "contract": contract, "paymentmethod": paymentmethod}
    if API_URL:
        resp = requests.post(API_URL.rstrip("/") + "/predict", json=payload, timeout=15)
        if resp.status_code == 200:
            r = resp.json()
            prob = r["probabilities"][0]
            st.metric("Churn probability", f"{prob:.2%}")
        else:
            st.error("API error: " + resp.text)
    else:
        df_single = pd.DataFrame([payload])
        Xs = prepare_input_local(df_single)
        prob = model.predict_proba(Xs)[:,1][0]
        st.metric("Churn probability", f"{prob:.2%}")
'''
Path("streamlit_app.py").write_text(streamlit_code, encoding="utf-8")
print("Wrote streamlit_app.py")


Wrote streamlit_app.py


## 9) Write `requirements.txt` `
`requirements.txt` will be used by Render / Streamlit Cloud to install dependencies.
`Procfile` is useful for Heroku or some other hosts.


In [49]:
# Cell 7 — requirements + procfile
reqs = [
    "pandas",
    "numpy",
    "scikit-learn",
    "joblib",
    "flask",
    "streamlit",
    "gunicorn",
    "requests",
    "xgboost"   # optional if your saved model needs it
]
(Path("requirements.txt")).write_text("\n".join(reqs))
print("Wrote requirements.txt")

proc_text = "web: gunicorn api.app:app\n"
(Path("Procfile")).write_text(proc_text)
print("Wrote Procfile")


Wrote requirements.txt
Wrote Procfile
