<a href="https://colab.research.google.com/github/RajeshJoshi8848/BankChurn/blob/main/FinalBankChurn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.22.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import gradio as gr

# Load the dataset
df = pd.read_csv('https://github.com/YBI-Foundation/Dataset/raw/refs/heads/main/Bank%20Churn%20Modelling.csv')

# Preprocessing the dataset
df.set_index('CustomerId', inplace=True)
df.drop('Surname', axis=1, inplace=True)

# Encoding categorical variables
df['Geography'] = df['Geography'].replace({'France': 2, 'Germany': 1, 'Spain': 0})
df['Gender'] = df['Gender'].replace({'Male': 0, 'Female': 1})
df['Num Of Products'] = df['Num Of Products'].replace({1: 0, 2: 1, 3: 1, 4: 1})

# Creating a new feature for customers with zero bank balance
df['zero_balance'] = np.where(df['Balance'] == 0, 1, 0)

# Splitting the data into features and target variable
X = df.drop('Churn', axis=1)
y = df['Churn']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handling imbalanced data using random undersampling
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=42)
X_train_rus, y_train_rus = rus.fit_resample(X_train, y_train)

# Training the SVM model
best_svm_model = SVC(C=1, gamma=0.1, kernel="rbf", class_weight="balanced", probability=True)
best_svm_model.fit(X_train_rus, y_train_rus)

# Function to plot churn distribution
def plot_churn_distribution():
    plt.figure(figsize=(6, 4))
    sns.countplot(x='Churn', data=df, palette='Set2')
    plt.title('Churn Distribution')
    plt.xlabel('Churn (0 = Not Churn, 1 = Churn)')
    plt.ylabel('Count')
    plt.tight_layout()
    plt.savefig('churn_distribution.png')
    return 'churn_distribution.png'

# Function to make predictions based on user input
def predict_churn(CreditScore, Geography, Gender, Age, Tenure, Balance, NumOfProducts, HasCreditCard, IsActiveMember, EstimatedSalary):
    try:
        # Prepare the input data as a DataFrame
        input_data = pd.DataFrame({
            'CreditScore': [CreditScore],
            'Geography': [Geography],
            'Gender': [Gender],
            'Age': [Age],
            'Tenure': [Tenure],
            'Balance': [Balance],
            'Num Of Products': [NumOfProducts],
            'Has Credit Card': [HasCreditCard],
            'Is Active Member': [IsActiveMember],
            'Estimated Salary': [EstimatedSalary],
            'zero_balance': [1 if Balance == 0 else 0]
        })

        # Encode categorical variables
        input_data['Geography'] = input_data['Geography'].replace({'France': 2, 'Germany': 1, 'Spain': 0})
        input_data['Gender'] = input_data['Gender'].replace({'Male': 0, 'Female': 1})
        input_data['Num Of Products'] = input_data['Num Of Products'].replace({1: 0, 2: 1, 3: 1, 4: 1})

        # Scale the input data using the same scaler
        input_data_scaled = scaler.transform(input_data)

        # Make prediction
        prediction = best_svm_model.predict(input_data_scaled)[0]
        proba = best_svm_model.predict_proba(input_data_scaled)[0][1]  # Probability of churn

        # Plot probability as a bar
        plt.figure(figsize=(5, 3))
        plt.bar(['Not Churn', 'Churn'], [1-proba, proba], color=['green', 'red'])
        plt.ylim(0, 1)
        plt.title('Churn Prediction Probability')
        plt.xlabel('Prediction')
        plt.ylabel('Probability')
        plt.tight_layout()
        plt.savefig('churn_probability.png')

        # Return the prediction result
        if prediction == 1:
            return "The customer is likely to churn.", 'churn_probability.png', plot_churn_distribution()
        else:
            return "The customer is not likely to churn.", 'churn_probability.png', plot_churn_distribution()
    except Exception as e:
        return f"Error in prediction: {e}", None, None

# Gradio interface
inputs = [
    gr.Number(label="Credit Score"),
    gr.Dropdown(choices=["France", "Germany", "Spain"], label="Geography"),
    gr.Dropdown(choices=["Male", "Female"], label="Gender"),
    gr.Number(label="Age"),
    gr.Number(label="Tenure"),
    gr.Number(label="Balance"),
    gr.Number(label="Number of Products (1 or 2)"),
    gr.Dropdown(choices=["0", "1"], label="Has Credit Card (0=No, 1=Yes)"),
    gr.Dropdown(choices=["0", "1"], label="Is Active Member (0=No, 1=Yes)"),
    gr.Number(label="Estimated Salary"),
]

outputs = [
    gr.Textbox(label="Churn Prediction"),
    gr.Image(label="Prediction Probability Plot"),
    gr.Image(label="Churn Distribution in Dataset")  # Add Churn Distribution plot
]

iface = gr.Interface(
    fn=predict_churn,
    inputs=inputs,
    outputs=outputs,
    title="Customer Churn Prediction with Probability & Churn Distribution",
    description="Enter customer details to predict churn likelihood, see prediction probability, and view churn distribution.",
)

# Launch the Gradio app
iface.launch(share=True)

  df['Geography'] = df['Geography'].replace({'France': 2, 'Germany': 1, 'Spain': 0})
  df['Gender'] = df['Gender'].replace({'Male': 0, 'Female': 1})


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://66eddf24465e105b1a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


