In [11]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
from sklearn.impute import SimpleImputer


In [3]:
import pandas as pd

df = pd.read_csv("/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [4]:
df.drop('customerID', axis=1, inplace=True)
df.dropna(inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

X = df.drop("Churn", axis=1)
y = df["Churn"].map({'Yes': 1, 'No': 0})  # Encode target


In [5]:
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()


In [12]:
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ("imputer", SimpleImputer(strategy="mean"))
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore')),
    ("imputer", SimpleImputer(strategy="most_frequent"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)


In [13]:
from imblearn.pipeline import Pipeline  # Notice this is imblearn, not sklearn
from imblearn.over_sampling import SMOTE

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('smote', SMOTE(random_state=42)),
    ('classifier', LogisticRegression())
])

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
param_grid = [
    {
        'classifier': [LogisticRegression(solver='liblinear')],
        'classifier__C': [0.1, 1, 10]
    },
    {
        'classifier': [RandomForestClassifier(random_state=42)],
        'classifier__n_estimators': [50, 100],
        'classifier__max_depth': [None, 10, 20]
    }
]

grid_search = GridSearchCV(
    pipeline,
    param_grid,
    scoring='recall',   # Focus on finding churners
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)



In [16]:
y_pred = grid_search.predict(X_test)
print("Best Parameters:\n", grid_search.best_params_)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Best Parameters:
 {'classifier': LogisticRegression(solver='liblinear'), 'classifier__C': 10}

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.73      0.81      1036
           1       0.53      0.83      0.64       373

    accuracy                           0.76      1409
   macro avg       0.72      0.78      0.73      1409
weighted avg       0.82      0.76      0.77      1409



In [17]:
final_threshold = 0.55
y_prob = grid_search.predict_proba(X_test)[:, 1]
y_final = (y_prob >= final_threshold).astype(int)


In [19]:
joblib.dump(grid_search.best_estimator_, 'telco_churn_pipeline.pkl')


['telco_churn_pipeline.pkl']

In [None]:
import gradio as gr
import joblib
import pandas as pd
import numpy as np

# --- 1. Load the trained pipeline ---
try:
    # Adjust path if your model is located elsewhere (e.g., '/kaggle/working/telco_churn_pipeline.pkl')
    pipeline = joblib.load("telco_churn_pipeline.pkl")
    print("Model pipeline loaded successfully.")
except FileNotFoundError:
    print("Error: 'telco_churn_pipeline.pkl' not found. Please ensure your trained model is in the correct directory.")
    exit()

# --- 2. Define the prediction function ---
def predict_churn(
    gender, SeniorCitizen, Partner, Dependents,
    tenure, PhoneService, MultipleLines, InternetService,
    OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
    StreamingTV, StreamingMovies,
    Contract, PaperlessBilling, PaymentMethod,
    MonthlyCharges, TotalCharges
):
    try:
        # Create a dictionary for the raw inputs
        raw_input_data = {
            "gender": gender,
            "SeniorCitizen": SeniorCitizen,
            "Partner": Partner,
            "Dependents": Dependents,
            "tenure": tenure,
            "PhoneService": PhoneService,
            "MultipleLines": MultipleLines,
            "InternetService": InternetService,
            "OnlineSecurity": OnlineSecurity,
            "OnlineBackup": OnlineBackup,
            "DeviceProtection": DeviceProtection,
            "TechSupport": TechSupport,
            "StreamingTV": StreamingTV,
            "StreamingMovies": StreamingMovies,
            "Contract": Contract,
            "PaperlessBilling": PaperlessBilling,
            "PaymentMethod": PaymentMethod,
            "MonthlyCharges": MonthlyCharges,
            "TotalCharges": TotalCharges
        }

        # Convert raw inputs to a Pandas DataFrame
        input_df = pd.DataFrame([raw_input_data])

        # --- Robustly handle numerical inputs using pd.to_numeric ---
        numerical_cols = ["SeniorCitizen", "tenure", "MonthlyCharges", "TotalCharges"]
        for col in numerical_cols:
            # Convert to numeric, coercing any errors (like empty strings) to NaN
            input_df[col] = pd.to_numeric(input_df[col], errors='coerce')

            # Fill NaN values with 0. This is crucial for new customers or missing inputs.
            # You might replace 0 with the mean/median from your training data for better imputation.
            input_df[col].fillna(0, inplace=True)

        # Ensure SeniorCitizen and tenure are integer types as they represent counts
        input_df["SeniorCitizen"] = input_df["SeniorCitizen"].astype(int)
        input_df["tenure"] = input_df["tenure"].astype(int)

        # --- ADDED LINE: Print the processed input DataFrame ---
        print("\n--- Processed Input DataFrame for Prediction ---")
        print(input_df)
        print("-----------------------------------------------\n")

        # --- Make prediction ---
        prob = pipeline.predict_proba(input_df)[0][1]
        pred = "Churn" if prob >= 0.55 else "No Churn" # Using 0.55 as a threshold
        return f"📊 **Prediction:** {pred}", f"🔢 **Churn Probability:** {prob:.2f}"

    except Exception as e:
        return "❌ **Error during prediction**", f"An unexpected error occurred: {str(e)}"

# --- 3. Define Gradio Interface Components ---

# Define choices for categorical features based on common Telco Churn datasets
GENDER_CHOICES = ["Male", "Female"]
YES_NO_CHOICES = ["Yes", "No"]
INTERNET_SERVICE_CHOICES = ["DSL", "Fiber optic", "No"]
MULTIPLE_LINES_CHOICES = ["No phone service", "No", "Yes"]
CONTRACT_CHOICES = ["Month-to-month", "One year", "Two year"]
PAYMENT_METHOD_CHOICES = ["Electronic check", "Mailed check", "Bank transfer (automatic)", "Credit card (automatic)"]

inputs = [
    gr.Radio(GENDER_CHOICES, label="Gender", value="Male"),
    gr.Radio(YES_NO_CHOICES, label="Senior Citizen", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Partner", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Dependents", value="No"),
    gr.Slider(minimum=0, maximum=72, step=1, label="Tenure (Months)", value=1),
    gr.Radio(YES_NO_CHOICES, label="Phone Service", value="Yes"),
    gr.Radio(MULTIPLE_LINES_CHOICES, label="Multiple Lines", value="No"),
    gr.Radio(INTERNET_SERVICE_CHOICES, label="Internet Service", value="DSL"),
    gr.Radio(YES_NO_CHOICES, label="Online Security", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Online Backup", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Device Protection", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Tech Support", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Streaming TV", value="No"),
    gr.Radio(YES_NO_CHOICES, label="Streaming Movies", value="No"),
    gr.Radio(CONTRACT_CHOICES, label="Contract", value="Month-to-month"),
    gr.Radio(YES_NO_CHOICES, label="Paperless Billing", value="Yes"),
    gr.Radio(PAYMENT_METHOD_CHOICES, label="Payment Method", value="Electronic check"),
    gr.Number(label="Monthly Charges", value=29.85),
    gr.Number(label="Total Charges", value=29.85)
]

outputs = [
    gr.Markdown(label="Prediction Result"),
    gr.Markdown(label="Churn Probability")
]

# --- 4. Create and launch the Gradio Interface ---
with gr.Blocks(title="Telco Churn Prediction") as demo:
    gr.Markdown(
        """
        # 📊 Telco Customer Churn Prediction
        Enter customer details to predict churn probability.
        """
    )
    with gr.Row():
        with gr.Column():
            for i in range(len(inputs) // 2): # Display first half of inputs in one column
                inputs[i].render()
        with gr.Column():
            for i in range(len(inputs) // 2, len(inputs)): # Display second half in another column
                inputs[i].render()
    predict_button = gr.Button("Predict Churn")
    with gr.Row():
        outputs[0].render()
        outputs[1].render()

    predict_button.click(
        fn=predict_churn,
        inputs=inputs,
        outputs=outputs
    )

if __name__ == "__main__":
    demo.launch(debug=True)

Model pipeline loaded successfully.
* Running on local URL:  http://127.0.0.1:7860
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://2660fd7c89dbb32ff1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  input_df[col].fillna(0, inplace=True)



--- Processed Input DataFrame for Prediction ---
   gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  Female              0     Yes         No      12          Yes   

  MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection  \
0            No     Fiber optic            Yes           No               No   

  TechSupport StreamingTV StreamingMovies        Contract PaperlessBilling  \
0         Yes          No             Yes  Month-to-month              Yes   

      PaymentMethod  MonthlyCharges  TotalCharges  
0  Electronic check            75.3         898.2  
-----------------------------------------------



In [24]:
import gradio as gr
import joblib
import pandas as pd

# ✅ Load your trained pipeline
pipeline = joblib.load("telco_churn_pipeline.pkl")

# ✅ Prediction function
def predict_churn(
    gender, SeniorCitizen, Partner, Dependents,
    tenure, PhoneService, MultipleLines, InternetService,
    OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
    StreamingTV, StreamingMovies,
    Contract, PaperlessBilling, PaymentMethod,
    MonthlyCharges, TotalCharges
):
    try:
        input_data = pd.DataFrame({
            "gender": [gender],
            "SeniorCitizen": [SeniorCitizen],
            "Partner": [Partner],
            "Dependents": [Dependents],
            "tenure": [tenure],
            "PhoneService": [PhoneService],
            "MultipleLines": [MultipleLines],
            "InternetService": [InternetService],
            "OnlineSecurity": [OnlineSecurity],
            "OnlineBackup": [OnlineBackup],
            "DeviceProtection": [DeviceProtection],
            "TechSupport": [TechSupport],
            "StreamingTV": [StreamingTV],
            "StreamingMovies": [StreamingMovies],
            "Contract": [Contract],
            "PaperlessBilling": [PaperlessBilling],
            "PaymentMethod": [PaymentMethod],
            "MonthlyCharges": [MonthlyCharges],
            "TotalCharges": [TotalCharges]
        })

        prob = pipeline.predict_proba(input_data)[0][1]
        pred = "Churn" if prob >= 0.55 else "No Churn"
        return f"📊 Prediction: {pred}", f"🔢 Churn Probability: {prob:.2f}"

    except Exception as e:
        return "❌ Error", str(e)

# ✅ Gradio app using Blocks
with gr.Blocks() as demo:
    gr.Markdown("## 📉 Telco Customer Churn Predictor")
    gr.Markdown("Enter customer details to predict the likelihood of churn.")

    with gr.Row():
        gender = gr.Radio(["Male", "Female"], label="Gender")
        SeniorCitizen = gr.Radio([0, 1], label="Senior Citizen (0 = No, 1 = Yes)")
        Partner = gr.Radio(["Yes", "No"], label="Partner")
        Dependents = gr.Radio(["Yes", "No"], label="Dependents")

    with gr.Row():
        tenure = gr.Slider(0, 72, step=1, label="Tenure (months)")
        PhoneService = gr.Radio(["Yes", "No"], label="Phone Service")
        MultipleLines = gr.Radio(["Yes", "No", "No phone service"], label="Multiple Lines")
        InternetService = gr.Radio(["DSL", "Fiber optic", "No"], label="Internet Service")

    with gr.Row():
        OnlineSecurity = gr.Radio(["Yes", "No", "No internet service"], label="Online Security")
        OnlineBackup = gr.Radio(["Yes", "No", "No internet service"], label="Online Backup")
        DeviceProtection = gr.Radio(["Yes", "No", "No internet service"], label="Device Protection")
        TechSupport = gr.Radio(["Yes", "No", "No internet service"], label="Tech Support")

    with gr.Row():
        StreamingTV = gr.Radio(["Yes", "No", "No internet service"], label="Streaming TV")
        StreamingMovies = gr.Radio(["Yes", "No", "No internet service"], label="Streaming Movies")
        Contract = gr.Radio(["Month-to-month", "One year", "Two year"], label="Contract")
        PaperlessBilling = gr.Radio(["Yes", "No"], label="Paperless Billing")

    with gr.Row():
        PaymentMethod = gr.Radio([
            "Electronic check", "Mailed check",
            "Bank transfer (automatic)", "Credit card (automatic)"
        ], label="Payment Method")

    with gr.Row():
        MonthlyCharges = gr.Number(label="Monthly Charges")
        TotalCharges = gr.Number(label="Total Charges")

    predict_btn = gr.Button("Predict")

    # Output below input
    output_label = gr.Textbox(label="Prediction")
    output_prob = gr.Textbox(label="Probability")

    # Button click handler
    predict_btn.click(
        predict_churn,
        inputs=[
            gender, SeniorCitizen, Partner, Dependents,
            tenure, PhoneService, MultipleLines, InternetService,
            OnlineSecurity, OnlineBackup, DeviceProtection, TechSupport,
            StreamingTV, StreamingMovies,
            Contract, PaperlessBilling, PaymentMethod,
            MonthlyCharges, TotalCharges
        ],
        outputs=[output_label, output_prob]
    )

demo.launch()


* Running on local URL:  http://127.0.0.1:7863
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://0285b3f5694fae20dd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


