In [24]:
import pandas as pd
import numpy as np
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

df = pd.read_csv("/Users/levankikadze/Desktop/pandas/Bolt Food/churningUsers/Data/NewUserBehaviour.csv")
df.rename(columns={
    '2. User Information User ID': 'User ID',
    '2. User Information First Provider Viewed Event Ts Time': 'First Provider Viewed Time',
    '2. User Information First Cart Viewed Event Ts Time': 'First Cart Viewed Time',
    '2. User Information Food activation Ts Date': 'Activation Date',
    '2. User Information First Food Order ID': 'First Order ID',
    '2. User Information Last Food Order Created Ts Date': 'Last Order Date',
    '2. User Information Last Food Order ID': 'Last Order ID',
}, inplace=True)


In [28]:
# --- Prep datetime + recency buckets ---
today = pd.Timestamp(dt.datetime.today().date())
df['First Provider Viewed Time'] = pd.to_datetime(df['First Provider Viewed Time'])
df['First Cart Viewed Time']     = pd.to_datetime(df['First Cart Viewed Time'])
df['Activation Date']            = pd.to_datetime(df['Activation Date'])
df['Last Order Date']            = pd.to_datetime(df['Last Order Date'])

df['days_since_fpv'] = (today - df['First Provider Viewed Time']).dt.days
bins   = [0, 30, 60, 90, 120, np.inf]
labels = ['0-30', '31-60', '61-90', '91-120', '120+']
df['fpv_recency'] = pd.cut(df['days_since_fpv'], bins=bins, labels=labels, right=True)

# --- Function to compute your funnel stats inside each cohort ---
def funnel_stats(g):
    provider_viewed = g['First Provider Viewed Time'].notna()
    cart_viewed     = provider_viewed & g['First Cart Viewed Time'].notna()
    first_order     = cart_viewed & g['First Order ID'].notna()
    repeat_order    = (
        first_order &
        g['Last Order ID'].notna() &
        (g['First Order ID'] != g['Last Order ID'])
    )

    total_pv   = provider_viewed.sum()
    total_cv   = cart_viewed.sum()
    total_fo   = first_order.sum()
    total_rep  = repeat_order.sum()

    # Avoid /0
    def pct(x): return round((x / total_pv * 100), 1) if total_pv else 0.0

    # Avg time to first order
    mask_time = g['First Provider Viewed Time'].notna() & g['Activation Date'].notna()
    avg_time  = (g.loc[mask_time, 'Activation Date'] - g.loc[mask_time, 'First Provider Viewed Time']).mean()

    return pd.Series({
        'Provider Viewed (n)': total_pv,
        'Cart Viewed (n)':     total_cv,
        'First Order (n)':     total_fo,
        'Repeat Order (n)':    total_rep,
        'Cart Viewed (%)':     pct(total_cv),
        'First Order (%)':     pct(total_fo),
        'Repeat Order (%)':    pct(total_rep),
        'Avg Time to First Order': avg_time
    })

# --- Apply per cohort & overall ---
funnel_by_cohort = df.groupby('fpv_recency', dropna=False).apply(funnel_stats)

# Optional: overall row
overall = funnel_stats(df)
funnel_by_cohort.loc['Overall'] = overall

print(funnel_by_cohort)

             Provider Viewed (n)  Cart Viewed (n)  First Order (n)  \
fpv_recency                                                          
0-30                        1568             1090              806   
31-60                      11787             7802             5630   
61-90                      13042             8993             6823   
91-120                     13809             9779             7280   
120+                      134736            96635            75393   
Overall                   174942           124299            95932   

             Repeat Order (n)  Cart Viewed (%)  First Order (%)  \
fpv_recency                                                       
0-30                      314             69.5             51.4   
31-60                    2708             66.2             47.8   
61-90                    3321             69.0             52.3   
91-120                   3746             70.8             52.7   
120+                    45581        

  funnel_by_cohort = df.groupby('fpv_recency', dropna=False).apply(funnel_stats)
  funnel_by_cohort = df.groupby('fpv_recency', dropna=False).apply(funnel_stats)


In [52]:
plot_df = funnel_by_cohort.reset_index().rename(columns={'fpv_recency': 'Cohort'})
plot_df['Avg Time (hrs)'] = plot_df['Avg Time to First Order'].dt.total_seconds() / 3600

# Long format for bars
counts_cols = ['Provider Viewed (n)', 'Cart Viewed (n)', 'First Order (n)', 'Repeat Order (n)']
pct_cols    = ['Cart Viewed (%)', 'First Order (%)', 'Repeat Order (%)']  # Provider Viewed % would always be 100
# For parity, create a Provider Viewed (%) = 100
plot_df['Provider Viewed (%)'] = 100.0
pct_cols = ['Provider Viewed (%)'] + pct_cols

counts_long = plot_df.melt(id_vars=['Cohort', 'Avg Time (hrs)'], value_vars=counts_cols,
                           var_name='Stage', value_name='Value')
pct_long    = plot_df.melt(id_vars=['Cohort', 'Avg Time (hrs)'], value_vars=pct_cols,
                           var_name='Stage', value_name='Value')

# Order stages for consistent display
stage_order_counts = ['Provider Viewed (n)', 'Cart Viewed (n)', 'First Order (n)', 'Repeat Order (n)'][::-1]
stage_order_pct    = ['Provider Viewed (%)', 'Cart Viewed (%)', 'First Order (%)', 'Repeat Order (%)'][::-1]

counts_long['Stage'] = pd.Categorical(counts_long['Stage'], stage_order_counts, ordered=True)
pct_long['Stage']    = pd.Categorical(pct_long['Stage'], stage_order_pct, ordered=True)

# Build figures
fig = go.Figure()

def add_traces(df_long, mode_name):
    for stage in df_long['Stage'].cat.categories:
        sub = df_long[df_long['Stage'] == stage]
        fig.add_trace(go.Bar(
            x=sub['Value'],
            y=sub['Cohort'],
            orientation='h',
            name=stage,
            customdata=np.stack([sub['Avg Time (hrs)']], axis=-1),
            hovertemplate=(
                "<b>%{y}</b><br>" +
                stage + ": %{x}<br>" +
                "Avg Time→Order: %{customdata[0]:.1f} hrs" +
                "<extra></extra>"
            ),
            visible=True if mode_name == 'counts' else False
        ))

# Counts traces (default visible)
add_traces(counts_long, mode_name='counts')

# Percent traces (initially hidden)
for stage in pct_long['Stage'].cat.categories:
    sub = pct_long[pct_long['Stage'] == stage]
    fig.add_trace(go.Bar(
        x=sub['Value'],
        y=sub['Cohort'],
        orientation='h',
        name=stage,
        customdata=np.stack([sub['Avg Time (hrs)']], axis=-1),
        hovertemplate=(
            "<b>%{y}</b><br>" +
            stage + ": %{x:.1f}%<br>" +
            "Avg Time→Order: %{customdata[0]:.1f} hrs" +
            "<extra></extra>"
        ),
        visible=False
    ))

# Buttons to toggle
n_counts = len(stage_order_counts)
n_pct    = len(stage_order_pct)
btn_counts = dict(
    label="Counts",
    method="update",
    args=[
        {"visible": [True]*n_counts + [False]*n_pct},
        {"xaxis": {"title": "Users (n)"}}
    ]
)
btn_pct = dict(
    label="Percentages",
    method="update",
    args=[
        {"visible": [False]*n_counts + [True]*n_pct},
        {"xaxis": {"title": "Share of Provider Viewers (%)"}}
    ]
)

fig.update_layout(
    barmode='group',
    updatemenus=[dict(type="buttons", buttons=[btn_counts, btn_pct], direction="left", x=0.0, y=1.1)],
    xaxis_title="Users (n)",
    yaxis_title="Provider View Recency Cohort (days)",
    title="Funnel by Provider-View Recency Cohort",
    hovermode="y unified",
    legend_title="Stage"
)

fig.show()