In [2]:
# Install all necessary libraries
!pip install streamlit -q
!pip install pyngrok -q
!pip install scikit-learn==1.3.2 # Ensure version consistency
!pip install plotly -q



In [6]:
# Mount your Google Drive to access the files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import plotly.express as px
import os

# --- Page Configuration ---
st.set_page_config(
    page_title="ChurnGuard Telecom Dashboard",
    page_icon="📡",
    layout="wide",
    initial_sidebar_state="expanded"
)

# --- Helper Functions ---
@st.cache_data
def load_data(path):
    """Loads the telecom churn data for visualization."""
    if not os.path.exists(path):
        st.error(f"Error: The data file was not found at {path}.")
        st.info("Please ensure the file path is correct and the file is in your Google Drive.")
        return None
    try:
        df = pd.read_csv(path)
        df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
        return df
    except Exception as e:
        st.error(f"An error occurred while loading the data: {e}")
        return None

@st.cache_resource
def load_model(model_path, columns_path):
    """Loads the trained model and the feature columns."""
    if not os.path.exists(model_path) or not os.path.exists(columns_path):
        st.error("Model files not found. Please ensure file paths are correct and files are in your Google Drive.")
        return None, None
    try:
        model = joblib.load(model_path)
        model_columns = joblib.load(columns_path)
        return model, model_columns
    except Exception as e:
        st.error(f"An error occurred while loading the model: {e}")
        return None, None

# --- Define Paths within the Google Drive ---
# IMPORTANT: Update this path if your Google Drive folder is different
DRIVE_BASE_PATH = '/content/drive/MyDrive/ChurnGuard/'

DATA_PATH = os.path.join(DRIVE_BASE_PATH, 'data/telecom_churn_data.csv')
MODEL_PATH = os.path.join(DRIVE_BASE_PATH, 'src/assets/churn_model.joblib')
COLUMNS_PATH = os.path.join(DRIVE_BASE_PATH, 'src/assets/model_columns.joblib')

# --- Load Data and Model ---
df_viz = load_data(DATA_PATH)
model, model_columns = load_model(MODEL_PATH, COLUMNS_PATH)

# --- Sidebar ---
st.sidebar.title("📡 ChurnGuard Analytics")
st.sidebar.markdown("### Navigation")
page = st.sidebar.radio("Go to", ["Dashboard Overview", "Churn Risk Predictor"])

st.sidebar.markdown("---")
st.sidebar.info(
    "This dashboard provides tools for analyzing and predicting customer churn in the telecom industry."
)
st.sidebar.markdown("**Developed by:** Aman Agrawal")

# --- Main Application ---
if page == "Dashboard Overview":
    st.title("📊 Dashboard Overview")
    st.markdown("An analytical summary of customer demographics and churn drivers.")

    if df_viz is not None:
        total_customers = df_viz.shape[0]
        total_churned = df_viz[df_viz['Churn'] == 'Yes'].shape[0]
        churn_rate = (total_churned / total_customers) * 100

        col1, col2, col3 = st.columns(3)
        col1.metric("Total Customers", f"{total_customers:,}")
        col2.metric("Total Churned", f"{total_churned:,}")
        col3.metric("Churn Rate", f"{churn_rate:.2f}%")

        st.markdown("---")

        c1, c2 = st.columns((1, 1))
        with c1:
            st.subheader("Churn Rate by Contract Type")
            churn_by_contract = df_viz.groupby('Contract')['Churn'].value_counts(normalize=True).unstack().fillna(0)
            fig = px.bar(churn_by_contract, y='Yes', title="Churn Rate per Contract Type",
                         labels={'Yes': 'Churn Rate', 'Contract': 'Contract Type'},
                         template='plotly_white', color_discrete_sequence=['#E55451'])
            fig.update_yaxes(tickformat=".0%")
            st.plotly_chart(fig, use_container_width=True)

        with c2:
            st.subheader("Tenure Distribution")
            fig = px.histogram(df_viz, x='tenure', color='Churn', nbins=50,
                               title="Customer Tenure Distribution by Churn Status",
                               template='plotly_white',
                               color_discrete_map={'No': '#4A90E2', 'Yes': '#E55451'})
            st.plotly_chart(fig, use_container_width=True)

        st.subheader("Monthly Charges vs. Total Charges")
        fig = px.scatter(df_viz, x='MonthlyCharges', y='TotalCharges', color='Churn',
                         title="Monthly vs. Total Charges by Churn Status",
                         template='plotly_white',
                         color_discrete_map={'No': 'rgba(74, 144, 226, 0.6)', 'Yes': 'rgba(229, 84, 81, 0.8)'})
        st.plotly_chart(fig, use_container_width=True)
    else:
        st.warning("Could not load data for visualizations. Check file paths.")

elif page == "Churn Risk Predictor":
    st.title("🔮 Churn Risk Predictor")
    st.markdown("Enter customer details to predict their churn risk.")

    if model and model_columns is not None:
        with st.form("prediction_form"):
            st.header("Customer Details")

            contract_options = ['Month-to-month', 'One year', 'Two year']
            internet_options = ['DSL', 'Fiber optic', 'No']
            payment_options = ['Electronic check', 'Mailed check', 'Bank transfer (automatic)', 'Credit card (automatic)']

            col1, col2, col3 = st.columns(3)
            with col1:
                tenure = st.slider("Tenure (Months)", 0, 72, 12)
                contract = st.selectbox("Contract", options=contract_options)
                internet_service = st.selectbox("Internet Service", options=internet_options)
                online_security = st.selectbox("Online Security", options=['Yes', 'No', 'No internet service'])

            with col2:
                monthly_charges = st.slider("Monthly Charges ($)", 18.0, 120.0, 70.0, 0.1)
                tech_support = st.selectbox("Tech Support", options=['Yes', 'No', 'No internet service'])
                payment_method = st.selectbox("Payment Method", options=payment_options)
                dependents = st.selectbox("Has Dependents?", options=['No', 'Yes'])

            with col3:
                total_charges = st.number_input("Total Charges ($)", min_value=0.0, value=1000.0, step=50.0)
                paperless_billing = st.selectbox("Paperless Billing?", options=['Yes', 'No'])
                senior_citizen = st.selectbox("Is a Senior Citizen?", options=[0, 1], format_func=lambda x: 'Yes' if x == 1 else 'No')
                partner = st.selectbox("Has a Partner?", options=['No', 'Yes'])

            submit_button = st.form_submit_button(label="Predict Churn Risk")

        if submit_button:
            input_dict = {
                'gender': 'Male', 'SeniorCitizen': senior_citizen, 'Partner': partner, 'Dependents': dependents,
                'tenure': tenure, 'PhoneService': 'Yes', 'MultipleLines': 'No', 'InternetService': internet_service,
                'OnlineSecurity': online_security, 'OnlineBackup': 'No', 'DeviceProtection': 'No',
                'TechSupport': tech_support, 'StreamingTV': 'No', 'StreamingMovies': 'No',
                'Contract': contract, 'PaperlessBilling': paperless_billing, 'PaymentMethod': payment_method,
                'MonthlyCharges': monthly_charges, 'TotalCharges': total_charges
            }

            input_df = pd.DataFrame([input_dict])
            input_encoded = pd.get_dummies(input_df).reindex(columns=model_columns, fill_value=0)
            prediction_proba = model.predict_proba(input_encoded)[0][1]

            st.subheader("Prediction Result")
            st.progress(prediction_proba)

            if prediction_proba > 0.5:
                st.error(f"High Risk: There is a {prediction_proba:.1%} probability of churn.", icon="🚨")
            elif prediction_proba > 0.25:
                st.warning(f"Moderate Risk: There is a {prediction_proba:.1%} probability of churn.", icon="⚠️")
            else:
                st.success(f"Low Risk: There is a {prediction_proba:.1%} probability of churn.", icon="✅")
    else:
        st.warning("Prediction model is not available. Check file paths.")

Overwriting app.py


In [8]:
# Add your ngrok authtoken
from pyngrok import ngrok

# Paste your authtoken here when prompted
authtoken = "30vQqkznkT1Sp41PgevzugRrJXR_4cxCaSJP48Y7HeAwtB7CN" # Replace with your actual token
ngrok.set_auth_token(authtoken)

In [None]:
# Run the app using pyngrok
public_url = ngrok.connect(8501)
print('Your Streamlit app is live at:', public_url)
!streamlit run app.py --server.port 8501

Your Streamlit app is live at: NgrokTunnel: "https://8f5fb6b702a6.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.221.231.212:8501[0m
[0m




https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
