In [5]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.3.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.26.1-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.wh

In [None]:
import gradio as gr
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import classification_report

In [9]:
def segment_users(row, median_sessions,median_sessions_2,median_sessions_3,median_sessions_4):
    """ Segment users based on engagement levels"""
    if row['sessions'] > median_sessions and row['total_navigations_fav1'] >= median_sessions_2 and row['n_days_after_onboarding'] >= median_sessions_3 and row['drives']<=median_sessions_4:
        return 'High Engagement'
    else:
        return 'Low Engagement'

In [10]:
def segment_driving_days(row, median_sessions,median_sessions_2):
    """ Segment users based on day levels"""
    if row['activity_days'] <= median_sessions and row['driving_days'] <= median_sessions_2 :
        return 'High day'
    else:
        return 'Low day'

In [15]:
# Load the scaler and model
with open('/content/scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

with open('/content/SelectFromModel.pkl', 'rb') as file:
    selector = pickle.load(file)

with open('/content/logistic_regression_model.pkl', 'rb') as file:
    logistic_regression_model = pickle.load(file)



In [None]:
def preprocess_dataframe(df):
    """Preprocess the input DataFrame."""
    columns_to_drop = ['ID', 'device']
    existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]
    df.drop(columns=existing_columns_to_drop, inplace=True)
    return df

In [37]:
def make_predictions(file=None, sessions=None, drives=None, total_sessions=None,
                     n_days_after_onboarding=None, total_navigations_fav1=None,
                     total_navigations_fav2=None, driven_km_drives=None,
                     duration_minutes_drives=None, activity_days=None,
                     driving_days=None, device=None):

    if file is not None:
        df = pd.read_csv(file.name)
        X_test = df.drop('label', axis=1, errors='ignore')
        y_test = df.get('label', None)
    else:
        columns = ['sessions', 'drives', 'total_sessions', 'n_days_after_onboarding',
                   'total_navigations_fav1', 'total_navigations_fav2',
                   'driven_km_drives', 'duration_minutes_drives',
                   'activity_days', 'driving_days', 'device']

        input_data = [sessions, drives, total_sessions, n_days_after_onboarding,
                      total_navigations_fav1, total_navigations_fav2,
                      driven_km_drives, duration_minutes_drives,
                      activity_days, driving_days, device]

        df = pd.DataFrame([input_data], columns=columns)
        X_test = df

    X_test = preprocess_dataframe(X_test)

    # Segmenting engagement levels and day levels
    X_test['engagement_level'] = X_test.apply(segment_users, axis=1, args=(
        X_test['sessions'].median(),
        X_test['total_navigations_fav1'].median(),
        X_test['n_days_after_onboarding'].median(),
        X_test['drives'].median()
    ))

    X_test['day_level'] = X_test.apply(segment_driving_days, axis=1, args=(
        X_test['activity_days'].median(),
        X_test['driving_days'].median()
    ))

    # Calculating new features
    X_test['activity_ratio'] = X_test['driving_days'] / X_test['activity_days'].replace(0, np.nan)
    X_test['avg_distance_per_drive'] = X_test['driven_km_drives'] / X_test['drives'].replace(0, np.nan)
    X_test['engagement_ratio'] = X_test['total_sessions'] / X_test['driving_days'].replace(0, np.nan)
    X_test['avg_navigations_fav'] = (X_test['total_navigations_fav1'] + X_test['total_navigations_fav2']) / 2

    # Filling NaN values
    for col in ['activity_ratio', 'avg_distance_per_drive', 'engagement_ratio', 'avg_navigations_fav']:
        X_test[col].fillna(X_test[col].median(), inplace=True)

    # Mapping engagement and day levels to numerical values
    X_test['engagement_level'] = X_test['engagement_level'].map({'Low Engagement': 0, 'High Engagement': 1})
    X_test['day_level'] = X_test['day_level'].map({'Low day': 0, 'High day': 1})


    # Prepare the DataFrame for scaling
    temp_X_test = X_test.copy()

    # Scale the necessary features
    temp_X_test['n_days_after_onboarding'] = (X_test['n_days_after_onboarding'] / 365).astype(float)
    temp_X_test['duration_minutes_drives'] = (X_test['duration_minutes_drives'] / (60 * 24)).astype(float)

    columns = ['total_navigations_fav1','total_navigations_fav2','total_sessions','driven_km_drives']

    temp_X_test[columns] = scaler.transform(temp_X_test[columns])

    # Feature Selection
    X_test_selected = selector.transform(temp_X_test)

    # Make predictions
    y_pred = logistic_regression_model.predict(X_test_selected)
    y_pred_label = ['churned' if pred == 0 else 'retained' for pred in y_pred]

    # Prepare results
    if file is not None:
        if y_test is not None:
            results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred_label})
            report = classification_report(y_test, y_pred_label, output_dict=True)
            return results, report
        else:
            results = pd.DataFrame({'Actual': [None], 'Predicted': [y_pred_label[0]]})
            report = {"Prediction": y_pred_label[0]}
            return results, report
    else:
        results = pd.DataFrame({'Actual': [None], 'Predicted': [y_pred_label[0]]})
        report = {"Prediction": y_pred_label[0]}
        return results, report


In [40]:
iface = gr.Interface(
    fn=make_predictions,
    inputs=[
        gr.File(label="Upload CSV File")
    ],
    outputs=[
        gr.DataFrame(label="Results"),
        gr.JSON(label="Custom Report")
    ],
    title="Waze App",
    description="Predict user churn for the Waze app. You can upload a CSV file with multiple user records."
)

iface.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://fbc20fb18f19ad3ffb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://fbc20fb18f19ad3ffb.gradio.live




In [41]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m81.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.3/79.3 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[

In [42]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import classification_report

# Load the scaler and model
with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

with open('SelectFromModel.pkl', 'rb') as file:
    selector = pickle.load(file)

with open('logistic_regression_model.pkl', 'rb') as file:
    logistic_regression_model = pickle.load(file)

def segment_users(row, median_sessions, median_sessions_2, median_sessions_3, median_sessions_4):
    """ Segment users based on engagement levels"""
    if (row['sessions'] > median_sessions and
        row['total_navigations_fav1'] >= median_sessions_2 and
        row['n_days_after_onboarding'] >= median_sessions_3 and
        row['drives'] <= median_sessions_4):
        return 'High Engagement'
    else:
        return 'Low Engagement'

def segment_driving_days(row, median_sessions, median_sessions_2):
    """ Segment users based on day levels"""
    if row['activity_days'] <= median_sessions and row['driving_days'] <= median_sessions_2:
        return 'High day'
    else:
        return 'Low day'

def preprocess_dataframe(df):
    """Preprocess the input DataFrame."""
    columns_to_drop = ['ID', 'device']
    existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]
    df.drop(columns=existing_columns_to_drop, inplace=True)
    return df

def make_predictions(df):
    X_test = preprocess_dataframe(df)

    # Segmenting engagement levels and day levels
    X_test['engagement_level'] = X_test.apply(segment_users, axis=1, args=(
        X_test['sessions'].median(),
        X_test['total_navigations_fav1'].median(),
        X_test['n_days_after_onboarding'].median(),
        X_test['drives'].median()
    ))

    X_test['day_level'] = X_test.apply(segment_driving_days, axis=1, args=(
        X_test['activity_days'].median(),
        X_test['driving_days'].median()
    ))

    # Calculating new features
    X_test['activity_ratio'] = X_test['driving_days'] / X_test['activity_days'].replace(0, np.nan)
    X_test['avg_distance_per_drive'] = X_test['driven_km_drives'] / X_test['drives'].replace(0, np.nan)
    X_test['engagement_ratio'] = X_test['total_sessions'] / X_test['driving_days'].replace(0, np.nan)
    X_test['avg_navigations_fav'] = (X_test['total_navigations_fav1'] + X_test['total_navigations_fav2']) / 2

    # Filling NaN values
    for col in ['activity_ratio', 'avg_distance_per_drive', 'engagement_ratio', 'avg_navigations_fav']:
        X_test[col].fillna(X_test[col].median(), inplace=True)

    # Mapping engagement and day levels to numerical values
    X_test['engagement_level'] = X_test['engagement_level'].map({'Low Engagement': 0, 'High Engagement': 1})
    X_test['day_level'] = X_test['day_level'].map({'Low day': 0, 'High day': 1})

    # Prepare the DataFrame for scaling
    temp_X_test = X_test.copy()

    # Scale the necessary features
    temp_X_test['n_days_after_onboarding'] = (X_test['n_days_after_onboarding'] / 365).astype(float)
    temp_X_test['duration_minutes_drives'] = (X_test['duration_minutes_drives'] / (60 * 24)).astype(float)

    columns = ['total_navigations_fav1', 'total_navigations_fav2', 'total_sessions', 'driven_km_drives']

    temp_X_test[columns] = scaler.transform(temp_X_test[columns])

    # Feature Selection
    X_test_selected = selector.transform(temp_X_test)

    # Make predictions
    y_pred = logistic_regression_model.predict(X_test_selected)
    y_pred_label = ['churned' if pred == 0 else 'retained' for pred in y_pred]

    return pd.DataFrame({'Predicted': y_pred_label})

# Streamlit app
st.title("Waze App User Churn Prediction")
st.write("Upload a CSV file with user records for churn prediction.")

# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.write("Data Preview:")
    st.dataframe(df.head())

    # Make predictions
    results = make_predictions(df)
    st.write("Prediction Results:")
    st.dataframe(results)


2024-10-22 12:21:17.497 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
