<a href="https://colab.research.google.com/github/MananPoojara/DSEProject/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
!pip install streamlit pandas numpy scikit-learn joblib



In [16]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
import joblib
import os

# Load and preprocess dataset
@st.cache_data
def load_data():
    data = pd.read_csv("./data/WA_Fn-UseC_-Telco-Customer-Churn.csv")
    data = data.dropna()
    return data

def preprocess_data(data):
    label_encoders = {}
    for col in data.select_dtypes(include=['object']).columns:
        if col != 'Churn':  # Exclude target variable
            le = LabelEncoder()
            data[col] = le.fit_transform(data[col])
            label_encoders[col] = le

    # Encode target variable
    data['Churn'] = data['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
    return data, label_encoders

# Train and save model
def train_model(data):
    X = data.drop(columns=["customerID", "Churn"])
    y = data["Churn"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Save model and features
    joblib.dump(model, "churn_model.pkl")
    joblib.dump(X.columns.tolist(), "features.pkl")

    return model, X_train, y_train, X_test, y_test

# Load the saved model
def load_model():
    if not os.path.exists("churn_model.pkl"):
        return None, None
    model = joblib.load("churn_model.pkl")
    features = joblib.load("features.pkl")
    return model, features

# Plot churn distribution
def plot_churn_distribution(data):
    fig, ax = plt.subplots()
    sns.countplot(x='Churn', data=data, ax=ax, palette="coolwarm")
    ax.set_title("Churn Distribution")
    ax.set_xticklabels(['No Churn', 'Churn'])
    st.pyplot(fig)

# Plot feature importance
def plot_feature_importance(model, features):
    importances = model.feature_importances_
    feature_importance_df = pd.DataFrame({
        "Feature": features,
        "Importance": importances
    }).sort_values(by="Importance", ascending=False)

    fig, ax = plt.subplots(figsize=(8, 6))
    sns.barplot(x="Importance", y="Feature", data=feature_importance_df, ax=ax, palette="viridis")
    ax.set_title("Feature Importance")
    st.pyplot(fig)

# Plot confusion matrix
def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots()
    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
    ax.set_title("Confusion Matrix")
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Actual")
    st.pyplot(fig)

# Streamlit App
st.title("Telco Customer Churn Prediction")

menu = ["Train Model", "Predict Churn", "Visualize Data"]
choice = st.sidebar.selectbox("Menu", menu)

if choice == "Train Model":
    st.header("Train the Model")

    # Load and preprocess data
    st.write("Loading dataset...")
    data = load_data()
    st.write("Dataset loaded successfully! Here's a preview:")
    st.write(data.head())

    data, encoders = preprocess_data(data)

    if st.button("Train Model"):
        model, X_train, y_train, X_test, y_test = train_model(data)
        st.success("Model trained successfully!")
        st.write("Training accuracy:", model.score(X_train, y_train))
        st.write("Test accuracy:", model.score(X_test, y_test))

        # Show Feature Importance
        st.subheader("Feature Importance")
        plot_feature_importance(model, X_train.columns)

        # Confusion Matrix
        y_pred = model.predict(X_test)
        st.subheader("Confusion Matrix")
        plot_confusion_matrix(y_test, y_pred)

elif choice == "Predict Churn":
    st.header("Make Predictions")

    # Load model and features
    model, features = load_model()
    if model is None:
        st.error("Model not found! Train the model first.")
    else:
        st.write("Provide customer details to predict churn:")

        # Input fields
        user_input = []
        for feature in features:
            value = st.number_input(f"{feature}", value=0.0)
            user_input.append(value)

        if st.button("Predict"):
            input_array = np.array([user_input]).reshape(1, -1)
            prediction = model.predict(input_array)
            probability = model.predict_proba(input_array)[:, 1]
            st.write(f"Prediction: **{'Yes' if prediction[0] == 1 else 'No'}**")
            st.write(f"Churn Probability: **{probability[0]:.2f}**")

elif choice == "Visualize Data":
    st.header("Data Visualization")

    # Load and preprocess data
    data = load_data()
    data, _ = preprocess_data(data)

    st.subheader("Churn Distribution")
    plot_churn_distribution(data)

    st.subheader("Correlations")
    fig, ax = plt.subplots(figsize=(10, 8))
    sns.heatmap(data.corr(), annot=True, cmap="coolwarm", ax=ax)
    st.pyplot(fig)


2024-12-09 13:28:26.550 No runtime found, using MemoryCacheStorageManager
