In [None]:
# 0️ Check GPU
import torch
if torch.cuda.is_available():
    print(" GPU Available:", torch.cuda.get_device_name(0))
else:
    print(" GPU not found — using CPU.")

# 1️ Install Dependencies
!pip install streamlit pyngrok pandas numpy scikit-learn xgboost matplotlib seaborn joblib -q

# 2️ Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib, json, os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

# 3️ Load Dataset
file_path = '/content/WA_Fn-UseC_-Telco-Customer-Churn.csv'
df = pd.read_csv(file_path)
print(f"Loaded dataset: {df.shape}")

# 4️ Preprocess Data
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce').fillna(0)
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})
if 'customerID' in df.columns:
    df.drop('customerID', axis=1, inplace=True)

# 5️ Save EDA Plots
os.makedirs('eda_images', exist_ok=True)
sns.set_style("whitegrid")

# Countplots
categorical_features = ['Contract', 'PaymentMethod', 'InternetService', 'TechSupport']
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
axes = axes.flatten()
for i, col in enumerate(categorical_features):
    sns.countplot(x=col, hue='Churn', data=df, ax=axes[i], palette='viridis')
    axes[i].set_title(f'Churn by {col}')
    axes[i].tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('eda_images/countplots.png', bbox_inches='tight')
plt.close()

# KDE plots
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sns.kdeplot(data=df, x='tenure', hue='Churn', fill=True, ax=axes[0], palette='viridis')
axes[0].set_title('Tenure Distribution by Churn')
sns.kdeplot(data=df, x='MonthlyCharges', hue='Churn', fill=True, ax=axes[1], palette='viridis')
axes[1].set_title('Monthly Charges Distribution by Churn')
plt.tight_layout()
plt.savefig('eda_images/kdeplots.png', bbox_inches='tight')
plt.close()

# Correlation heatmap
df_dummies = pd.get_dummies(df, drop_first=True)
plt.figure(figsize=(16, 12))
sns.heatmap(df_dummies.corr(), cmap='viridis')
plt.title('Correlation Heatmap')
plt.savefig('eda_images/correlation_heatmap.png', bbox_inches='tight')
plt.close()

# Feature importance
X = df_dummies.drop('Churn', axis=1)
y = df_dummies['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
rf = RandomForestClassifier(random_state=42).fit(X_train, y_train)
importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
plt.figure(figsize=(12, 10))
sns.barplot(x=importances, y=importances.index, palette='viridis')
plt.title('Feature Importance (RandomForest)')
plt.tight_layout()
plt.savefig('eda_images/feature_importance.png', bbox_inches='tight')
plt.close()

# 6️ Train GPU XGBoost
print("Training XGBoost Model...")
if torch.cuda.is_available():
    tree_method, predictor = 'gpu_hist', 'gpu_predictor'
else:
    tree_method, predictor = 'hist', 'auto'

model = xgb.XGBClassifier(
    n_estimators=100, learning_rate=0.1, max_depth=5, subsample=0.8,
    colsample_bytree=0.8, random_state=42, objective='binary:logistic',
    tree_method=tree_method, predictor=predictor, eval_metric='logloss'
)
model.fit(X_train, y_train)
print(" Model training complete.")

# 7️ Evaluate Model
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['No Churn','Churn'])
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.savefig('eda_images/confusion_matrix.png', bbox_inches='tight')
plt.close()
print(f" Model Accuracy: {acc:.2%}")

# 8️ Save Artifacts
joblib.dump(model, 'model_xgb.pkl')
json.dump(list(X.columns), open('x_columns.json','w'))
json.dump({'accuracy':acc,'report':report}, open('eval.json','w'))

# 9️ Write Streamlit App with BACKGROUND IMAGE
app_code = r"""
import streamlit as st, pandas as pd, joblib, json, os
import matplotlib.pyplot as plt

# -------------------- CUSTOM BACKGROUND --------------------
page_bg = '''
<style>
[data-testid="stAppViewContainer"] {
    background-image: url("https://www.freepik.com/free-vector/customer-experience-isometric-colorful-flowchart-with-purchase-data-monetization-product-evaluation-feedback-review-quality-symbol-vector-illustration_26761702.htm#fromView=keyword&page=1&position=12&uuid=39894e21-4b18-4811-9ac3-f2d729e1d21c&query=Customer+churn");
    background-size: cover;
    background-position: center;
    background-attachment: fixed;
}
[data-testid="stHeader"], [data-testid="stToolbar"] {visibility: hidden;}
</style>
'''
st.markdown(page_bg, unsafe_allow_html=True)
# ------------------------------------------------------------

st.set_page_config(page_title="Telco Churn Prediction", layout="wide")
st.title(" Telco Customer Churn Prediction (GPU Model)")

# Load assets
model = joblib.load('model_xgb.pkl')
columns = json.load(open('x_columns.json'))
eval_info = json.load(open('eval.json'))
EDA_DIR = 'eda_images'

st.header("1️ Model Evaluation Summary")
st.write(f"**Accuracy:** {eval_info['accuracy']:.2%}")
st.text(eval_info['report'])

st.header("2️ Dataset Overview")
df = pd.read_csv('/content/WA_Fn-UseC_-Telco-Customer-Churn.csv')
st.dataframe(df.head())

st.header("3️ Exploratory Data Analysis (EDA)")
images = ['countplots.png','kdeplots.png','correlation_heatmap.png','feature_importance.png','confusion_matrix.png']
cols = st.columns(3)
for i, img in enumerate(images):
    path = os.path.join(EDA_DIR, img)
    if os.path.exists(path):
        with cols[i % 3]:
            st.image(path, caption=img.replace('.png','').replace('_',' ').title(), use_container_width=True)

st.header("4️ Predict Single Customer Churn")
with st.form("predict_form"):
    tenure = st.number_input("Tenure (months)", 0, 200, 12)
    MonthlyCharges = st.number_input("MonthlyCharges", 0.0, 10000.0, 70.0)
    TotalCharges = st.number_input("TotalCharges", 0.0, 100000.0, 3000.0)
    Contract = st.selectbox("Contract", ["Month-to-month","One year","Two year"])
    InternetService = st.selectbox("InternetService", ["DSL","Fiber optic","No"])
    PaymentMethod = st.selectbox("PaymentMethod", ["Electronic check","Mailed check","Bank transfer (automatic)","Credit card (automatic)"])
    submitted = st.form_submit_button("Predict")

if submitted:
    input_df = pd.DataFrame([{
        'tenure': tenure,
        'MonthlyCharges': MonthlyCharges,
        'TotalCharges': TotalCharges,
        'Contract': Contract,
        'InternetService': InternetService,
        'PaymentMethod': PaymentMethod
    }])
    input_proc = pd.get_dummies(input_df, drop_first=True)
    input_proc = input_proc.reindex(columns=columns, fill_value=0)
    pred = model.predict(input_proc)[0]
    prob = model.predict_proba(input_proc)[0][1]
    if pred == 1:
        st.error(f"⚠ Customer likely to CHURN — Probability: {prob:.2%}")
    else:
        st.success(f" Customer likely to STAY — Churn Probability: {prob:.2%}")
"""

with open('app.py','w') as f:
    f.write(app_code)
print(" Streamlit app with background image written to app.py")

#  Launch Streamlit + ngrok tunnel
from pyngrok import ngrok
import time, subprocess

#  Replace YOUR_TOKEN_HERE with your ngrok token
!ngrok config add-authtoken "355akQ2J2cGJUzmHWwCnKVVHQ2U_7D9TGg7eKsbcgLkfJjXkZ"

subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501"])
time.sleep(10)
public_url = ngrok.connect(8501)
print(" OPEN THIS LINK TO VIEW YOUR APP:\n", public_url)


 GPU Available: Tesla T4
Loaded dataset: (7043, 21)



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=importances, y=importances.index, palette='viridis')


Training XGBoost Model...



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.



 Model training complete.



    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




 Model Accuracy: 80.34%
 Streamlit app with background image written to app.py
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
 OPEN THIS LINK TO VIEW YOUR APP:
 NgrokTunnel: "https://unresenting-tamisha-hangingly.ngrok-free.dev" -> "http://localhost:8501"


In [None]:
!pip uninstall -y xgboost


Found existing installation: xgboost 3.1.1
Uninstalling xgboost-3.1.1:
  Successfully uninstalled xgboost-3.1.1


In [None]:
!pip install xgboost==2.1.1 --no-cache-dir --upgrade


Collecting xgboost==2.1.1
  Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl (153.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.9/153.9 MB[0m [31m167.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-2.1.1


In [None]:
import xgboost as xgb
print("XGBoost version:", xgb.__version__)
print(xgb.build_info())


XGBoost version: 2.1.1
{'BUILTIN_PREFETCH_PRESENT': True, 'CUDA_VERSION': [11, 8], 'DEBUG': False, 'GCC_VERSION': [10, 3, 1], 'MM_PREFETCH_PRESENT': True, 'NCCL_VERSION': [2, 16, 5], 'THRUST_VERSION': [1, 15, 1], 'USE_CUDA': True, 'USE_DLOPEN_NCCL': True, 'USE_FEDERATED': True, 'USE_NCCL': True, 'USE_OPENMP': True, 'USE_RMM': False, 'libxgboost': '/usr/local/lib/python3.12/dist-packages/xgboost/lib/libxgboost.so'}


In [None]:
!pip install streamlit pyngrok pandas numpy scikit-learn xgboost matplotlib seaborn -q