In [1]:
import socket
import datetime
import numpy as np
import pandas as pd
from dateutil.relativedelta import relativedelta

import dash
from dash import dcc, html, Input, Output, State
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# ---------------------------
# Helper function: get random free port
# ---------------------------
def get_free_port():
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('127.0.0.1', 0))
    port = s.getsockname()[1]
    s.close()
    return port

In [3]:
# ---------------------------
# Data generation (synthetic)
# ---------------------------
def generate_synthetic_events(start_date='2024-01-01', end_date=None, n_users=50000, seed=42):
    if end_date is None:
        end_date = (pd.to_datetime(start_date) + pd.DateOffset(months=18)).strftime('%Y-%m-%d')

    np.random.seed(seed)
    start = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)

    user_ids = np.arange(1, n_users + 1)
    signup_dates = [start + pd.Timedelta(days=int(np.random.exponential(scale=180))) for _ in user_ids]
    signup_dates = [d if d <= end else end for d in signup_dates]

    platforms = np.random.choice(['Android', 'iOS', 'Web'], size=n_users, p=[0.7, 0.25, 0.05])
    countries = np.random.choice(['ZA', 'NG', 'KE', 'GH'], size=n_users, p=[0.6, 0.2, 0.12, 0.08])
    channels = np.random.choice(['organic', 'ads', 'referral', 'partnership'], size=n_users, p=[0.5,0.3,0.15,0.05])

    users = pd.DataFrame({
        'user_id': user_ids,
        'signup_date': signup_dates,
        'platform': platforms,
        'country': countries,
        'acq_channel': channels
    })

    feature_list = ['scan_to_pay', 'bill_payment', 'airtime_topup']
    events = []

    for _, u in users.iterrows():
        uid = u['user_id']
        reg = u['signup_date']
        events.append({'user_id': uid, 'event': 'register', 'event_date': reg})

        base_act_prob = 0.65 if u['acq_channel'] == 'organic' else 0.6
        if u['platform'] == 'iOS':
            base_act_prob += 0.03
        if np.random.rand() < base_act_prob:
            act_delay = int(np.random.exponential(scale=3))
            act_date = reg + pd.Timedelta(days=act_delay)
            if act_date <= end:
                events.append({'user_id': uid, 'event': 'activate', 'event_date': act_date})

                if np.random.rand() < 0.7:
                    first_txn_delay = int(np.random.exponential(scale=4))
                    first_txn_date = act_date + pd.Timedelta(days=first_txn_delay)
                    if first_txn_date <= end:
                        events.append({'user_id': uid, 'event': 'first_txn', 'event_date': first_txn_date})

                        n_repeat = np.random.poisson(lam=2)
                        for r in range(n_repeat):
                            delta = int(np.random.exponential(scale=30)) + 1
                            txn_date = first_txn_date + pd.Timedelta(days=delta*(r+1))
                            if txn_date <= end:
                                events.append({'user_id': uid, 'event': 'repeat_txn', 'event_date': txn_date})

                for f in feature_list:
                    if np.random.rand() < 0.4:
                        feature_delay = int(np.random.exponential(scale=5))
                        f_date = act_date + pd.Timedelta(days=feature_delay)
                        if f_date <= end:
                            events.append({'user_id': uid, 'event': f, 'event_date': f_date})

    events_df = pd.DataFrame(events)
    events_df['event_date'] = pd.to_datetime(events_df['event_date'])
    events_df = events_df.sort_values('event_date')
    return users, events_df

users_df, events_df = generate_synthetic_events(start_date='2024-01-01', end_date='2025-09-30', n_users=50000)

In [5]:
# ---------------------------
# Analytics functions
# ---------------------------
def compute_dau_wa_mau(events, date_col='event_date', active_event_types=None):
    if active_event_types is None:
        active_event_types = ['first_txn', 'repeat_txn']
    act = events[events['event'].isin(active_event_types)][['user_id', date_col]].drop_duplicates()
    act['date'] = act[date_col].dt.floor('D')
    dau = act.groupby('date')['user_id'].nunique().reset_index(name='DAU')
    dau['WAU'] = dau['DAU'].rolling(window=7, min_periods=1).mean()
    dau['MAU'] = dau['DAU'].rolling(window=30, min_periods=1).mean()
    return dau

def make_cohort_table(users, events):
    users2 = users.copy()
    users2['cohort'] = users2['signup_date'].dt.to_period('M').dt.to_timestamp()
    events2 = events.copy()
    events2['event_day'] = events2['event_date'].dt.floor('D')
    events2 = events2[events2['event'].isin(['first_txn','repeat_txn','activate'])]
    merged = events2.merge(users2[['user_id','cohort','signup_date']], on='user_id', how='left')
    merged['event_month'] = merged['event_date'].dt.to_period('M').dt.to_timestamp()
    merged['month_offset'] = ((merged['event_month'].dt.year - merged['cohort'].dt.year)*12 + (merged['event_month'].dt.month - merged['cohort'].dt.month)).astype(int)
    cohort_counts = merged.groupby(['cohort','month_offset'])['user_id'].nunique().reset_index()
    cohort_sizes = users2.groupby('cohort')['user_id'].nunique().reset_index(name='cohort_size')
    cohort = cohort_counts.merge(cohort_sizes, on='cohort')
    cohort['retention'] = cohort['user_id'] / cohort['cohort_size']
    pivot = cohort.pivot(index='cohort', columns='month_offset', values='retention').fillna(0)
    pivot.index = pd.to_datetime(pivot.index)
    pivot = pivot.sort_index(ascending=False)
    return pivot

def funnel_counts(users, events):
    last_date = events['event_date'].max()
    cutoff = last_date - pd.DateOffset(months=6)
    recent_users = users[users['signup_date'] >= cutoff]
    u = recent_users['user_id']
    reg = set(u)
    activated = set(events[(events['user_id'].isin(u)) & (events['event']=='activate')]['user_id'])
    first = set(events[(events['user_id'].isin(u)) & (events['event']=='first_txn')]['user_id'])
    repeat = set(events[(events['user_id'].isin(u)) & (events['event']=='repeat_txn')]['user_id'])
    return pd.DataFrame({'stage': ['registered','activated','first_txn','repeat_txn'], 'count': [len(reg), len(activated), len(first), len(repeat)]})

def compute_total_user_churn(users, events):
    act = events[['user_id','event_date']].drop_duplicates()
    act['month'] = act['event_date'].dt.to_period('M').dt.to_timestamp()
    monthly_active = act.groupby('month')['user_id'].nunique().reset_index(name='active_users')
    monthly_active['prev_active'] = monthly_active['active_users'].shift(1)
    monthly_active['churn_rate'] = (1 - (monthly_active['active_users'] / monthly_active['prev_active'])) * 100
    monthly_active = monthly_active.dropna()
    return monthly_active

In [6]:
# ---------------------------
# Dash Layout
# ---------------------------
app = dash.Dash(__name__, title='VodaPay — Decline Analysis (Demo)')
server = app.server

app.layout = html.Div([
    html.H2('VodaPay — Decline Analysis (Plotly Dash Demo)'),

    html.Div([
        html.Div([html.Label('Country'), dcc.Dropdown(id='country-filter', options=[{'label':c,'value':c} for c in sorted(users_df['country'].unique())]+[{'label':'All','value':'All'}], value='All')], style={'width':'20%','display':'inline-block','padding':'6px'}),
        html.Div([html.Label('Platform'), dcc.Dropdown(id='platform-filter', options=[{'label':p,'value':p} for p in sorted(users_df['platform'].unique())]+[{'label':'All','value':'All'}], value='All')], style={'width':'20%','display':'inline-block','padding':'6px'}),
        html.Div([html.Label('Acquisition Channel'), dcc.Dropdown(id='channel-filter', options=[{'label':ch,'value':ch} for ch in sorted(users_df['acq_channel'].unique())]+[{'label':'All','value':'All'}], value='All')], style={'width':'25%','display':'inline-block','padding':'6px'})
    ], style={'display':'flex','flex-wrap':'wrap'}),

    html.Hr(),

    html.Div([
        html.Div([dcc.Graph(id='dau-chart')], style={'width':'48%','display':'inline-block'}),
        html.Div([dcc.Graph(id='mau-chart')], style={'width':'48%','display':'inline-block'}),
    ]),

    html.Div([
        html.Div([dcc.Graph(id='funnel-chart')], style={'width':'48%','display':'inline-block'}),
        html.Div([dcc.Graph(id='cohort-heatmap')], style={'width':'48%','display':'inline-block'}),
    ]),

    html.Div([
        html.Div([dcc.Graph(id='feature-adoption-chart')], style={'width':'48%','display':'inline-block'}),
        html.Div([dcc.Graph(id='churn-trend-chart')], style={'width':'48%','display':'inline-block'}),
    ]),

    html.Hr(),
    html.Div([html.H4('Retention Table (cohort view)')]),
    html.Div(id='cohort-table')
])

In [7]:
# ---------------------------
# Callbacks
# ---------------------------
@app.callback(
    Output('dau-chart', 'figure'),
    Output('mau-chart', 'figure'),
    Output('funnel-chart', 'figure'),
    Output('cohort-heatmap', 'figure'),
    Output('feature-adoption-chart', 'figure'),
    Output('churn-trend-chart', 'figure'),
    Output('cohort-table', 'children'),
    Input('country-filter', 'value'),
    Input('platform-filter', 'value'),
    Input('channel-filter', 'value')
)
def update_dashboard(country, platform, channel):
    u = users_df.copy()
    if country != 'All': u = u[u['country']==country]
    if platform != 'All': u = u[u['platform']==platform]
    if channel != 'All': u = u[u['acq_channel']==channel]

    e = events_df[events_df['user_id'].isin(u['user_id'])].copy()

    dau = compute_dau_wa_mau(e)
    dau_fig = go.Figure()
    dau_fig.add_trace(go.Scatter(x=dau['date'], y=dau['DAU'], mode='lines', name='DAU'))
    dau_fig.add_trace(go.Scatter(x=dau['date'], y=dau['WAU'], mode='lines', name='WAU'))
    dau_fig.update_layout(title='DAU / WAU (7-day avg)')

    mau_fig = go.Figure()
    mau_fig.add_trace(go.Scatter(x=dau['date'], y=dau['MAU'], mode='lines', name='MAU (30-day avg)'))
    mau_fig.update_layout(title='MAU (30-day avg)')

    funnel = funnel_counts(u, e)
    funnel_fig = go.Figure(go.Funnel(y=funnel['stage'], x=funnel['count']))
    funnel_fig.update_layout(title='Acquisition -> Activation -> First Txn -> Repeat Txn')

    cohort_pv = make_cohort_table(u, e)
    heatmap_fig = go.Figure()
    if not cohort_pv.empty:
        z = (cohort_pv*100).round(2)
        heatmap_fig = go.Figure(data=go.Heatmap(z=z.values, x=[f'Month+{i}' for i in range(z.shape[1])], y=[d.strftime('%Y-%m') for d in z.index], hovertemplate='Cohort: %{y}<br>Offset %{x}<br>Retention %{z}%', zmin=0, zmax=100))
        heatmap_fig.update_layout(title='Cohort Retention (%)', yaxis={'autorange':'reversed'})

    feature_list = ['scan_to_pay', 'bill_payment', 'airtime_topup']
    fa_list = []
    for f in feature_list:
        users_with_feature = e[e['event']==f]['user_id'].nunique()
        total_users = u['user_id'].nunique()
        adoption_pct = users_with_feature / total_users * 100 if total_users>0 else 0
        fa_list.append({'feature': f, 'adoption_pct': adoption_pct})
    fa_df = pd.DataFrame(fa_list)
    feature_fig = px.bar(fa_df, x='feature', y='adoption_pct', text='adoption_pct', title='Feature Adoption %')

    churn_df = compute_total_user_churn(u, e)
    churn_fig = px.line(churn_df, x='month', y='churn_rate', title='Total User Churn Rate (%)')

    table = html.Div('')

    return dau_fig, mau_fig, funnel_fig, heatmap_fig, feature_fig, churn_fig, table

In [8]:
# ---------------------------
# Run server
# ---------------------------
if __name__ == '__main__':
    free_port = get_free_port()
    print(f"Running on port {free_port}")
    app.run(debug=True, port=free_port)

Running on port 53351
