In [88]:
#import and clean data
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go 
import plotly.subplots as make_subplots

df = pd.read_csv(
    '/Users/levankikadze/Desktop/pandas/Bolt Food/churningUsers/Data/UserOrdersTotal_past1yr_Breakdown_ShulaGiven.csv',
    dtype={'Spend Objective': str, 'Discount Value Local': 'float'},
    thousands=',',
    )

df.dropna(subset=['Order ID'], inplace=True)
df[['Order ID', 'User ID', 'Provider ID', 'Vendor ID']] = df[['Order ID', 'User ID', 'Provider ID', 'Vendor ID']].astype(int)
df['Order Created Date'] = pd.to_datetime(df['Order Created Date'])

#the data was fetched since 2024-01-01, so we filter out orders before July 28, 2024
cutoff = pd.Timestamp('2024-07-28')
df = df.loc[df['Order Created Date'] >= cutoff].copy()

In [81]:
df.sort_values(by='Order Created Date', ascending=True)

Unnamed: 0,Order Created Date,Order ID,User ID,Provider ID,Provider Name,Vendor ID,Vendor Name,Spend Objective,Discount Value Local
2029342,2024-07-28,185129653,209252714,50764,Madart Varketili,35016,Ltd Arbo 2009,provider_campaign,39.08
1992878,2024-07-28,185144889,225647933,50764,Madart Varketili,35016,Ltd Arbo 2009,provider_campaign,27.96
1873044,2024-07-28,185142386,215567016,50764,Madart Varketili,35016,Ltd Arbo 2009,provider_campaign,16.96
824606,2024-07-28,185282557,225809573,105076,DELICATE,73751,LTD Delicate 90,activation,2.00
576221,2024-07-28,185228030,219741668,92103,Wendy's Vake,63142,Wendy's,,0.00
...,...,...,...,...,...,...,...,...,...
909300,2025-07-25,249086814,111114515,74815,McDonald's Saburtalo,50081,MC,bolt_plus_campaign,2.20
1284269,2025-07-25,249082731,273364487,150235,KFC Batumi,111794,KFC GEORGIA Batumi,acquisition,5.01
893235,2025-07-25,249080153,226599109,146967,McDonald's Eristavi,50081,MC,bolt_plus_campaign,2.00
117035,2025-07-25,249088148,199153759,111349,Subway Sanapiro,10628,Subway Georgia 1,,0.00


In [89]:
campaigns = df['Spend Objective'].astype(str).str.lower()

conditions = [
    campaigns.isin(['activation', 'reactivation', 'churn', 'engagement', 'acquisition', 'experiment']),
    campaigns == 'provider_campaign_portal',
    campaigns.str.startswith('provider_campaign_'),
    campaigns == 'marketing',
    campaigns.str.startswith('sp_'),
    campaigns == 'bolt_plus_campaign',
    campaigns == 'other',
    campaigns == 'NaN'
]

choices = [
    'marketplace_campaigns',
    'portal_campaigns',
    'am_campaigns',
    'marketing',
    'smart_promotions',
    'bolt_plus',
    'other_campaigns',
    'no_campaign'
]

df['Spend Objective'] = np.select(conditions, choices, default='no_campaign')


In [83]:
len(df)

1375721

In [90]:
#Deduplicate the DataFrame
#Assign priority to each campaign type
priority = {
    'marketing': 1,
    'am_campaigns': 2,
    'portal_campaigns': 3, 
    'smart_promotions': 4,
    'marketplace_campaigns': 5,
    'bolt_plus': 6,
    'other_campaigns': 7,
    'no_campaign': 8,
}

# Map the priority to the DataFrame
df['Priority'] = df['Spend Objective'].map(priority)

# Sort the DataFrame by 'User ID' and 'Priority'
df.sort_values(by=['Order ID', 'Priority'], inplace=True)

# Drop duplicates, keeping the first occurrence
df_deduplicated = (
    df.drop_duplicates(subset=['Order ID'], keep='first')
      .copy()
)


In [95]:
df_deduplicated['Priority'].value_counts()
len(df_deduplicated)

1265915

In [91]:
#Run the last order analysis on the deduplicated DataFrame
df_last_orders = (df_deduplicated.sort_values(by=['User ID', 'Order Created Date'], ascending=False).drop_duplicates(subset=['User ID'], keep='first'))
# Make the needed columns
df_last_orders['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) -
    df_last_orders['Order Created Date']
).dt.days
df_last_orders['used_discount'] = df_last_orders['Discount Value Local'] > 0

# Define bins & labels
bins   = [0, 30, 60, 90, 120, np.inf]
labels = ['0-30', '31-60', '61-90', '91-120', '120+']

df_last_orders['recency_group'] = pd.cut(
    df_last_orders['days_since_last_order'],
    bins=bins,
    labels=labels,
    right=True
)

# Group & pivot for counts
counts = (
    df_last_orders
      .groupby(['recency_group', 'used_discount'], observed=True)
      .size()
      .unstack(fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

# (4) Calculate percentages
pct = counts.div(counts.sum(axis=1), axis=0).mul(100).round(1)
pct.columns = [c + ' %' for c in pct.columns]

# (5) Combine into one clean table
summary = counts.join(pct)
print(summary)


# (6) Breakdown of discount types among those who used a discount
discount_breakdown = (
    df_last_orders[df_last_orders['used_discount']]       # filter to only “used discount”
      .groupby(['recency_group', 'Spend Objective'])
      .size()
      .unstack(fill_value=0)
)

# (7) Turn those into percentages of the “used discount” total per bucket
discount_pct = (
    discount_breakdown
      .div(discount_breakdown.sum(axis=1), axis=0)
      .mul(100)
      .round(1)
      .add_suffix(' %')
)

# (8) Put counts and % side by side
discount_summary = discount_breakdown.join(discount_pct)

print(discount_summary)


               No Discount  Used Discount  No Discount %  Used Discount %
recency_group                                                            
0-30                 12635          35139           26.4             73.6
31-60                 6743          19767           25.4             74.6
61-90                 4757          14805           24.3             75.7
91-120                4223          12438           25.3             74.7
120+                 24565          66930           26.8             73.2
Spend Objective  am_campaigns  bolt_plus  marketing  marketplace_campaigns  \
recency_group                                                                
0-30                    15689       2136        285                  11643   
31-60                    5427        769        255                  10475   
61-90                    5867         70        765                   6009   
91-120                   3436          0        679                   6520   
120+          





In [92]:
# run the same analysis on one time users
# — Identify users with exactly one order —
order_counts = df_deduplicated['User ID'].value_counts()
single_users = order_counts[order_counts == 1].index

# — Filter to only those single‑order rows —
df_single = df_deduplicated[df_deduplicated['User ID'].isin(single_users)].copy()

# — Now rerun your “last order” steps on df_single —
# (since each user has only one order, it already is their “last”)
df_single['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) -
    df_single['Order Created Date']
).dt.days
df_single['used_discount'] = df_single['Discount Value Local'] > 0

# — Bin into recency groups —
df_single['recency_group'] = pd.cut(
    df_single['days_since_last_order'],
    bins=[0,30,60,90,120,np.inf],
    labels=['0-30','31-60','61-90','91-120','120+'],
    right=True
)

# — 1) Counts & percentages of discount usage —
counts_single = (
    df_single
      .groupby(['recency_group','used_discount'], observed=True)
      .size()
      .unstack(fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)
pct_single = counts_single.div(counts_single.sum(axis=1), axis=0).mul(100).round(1)
pct_single.columns = [c + ' %' for c in pct_single.columns]
summary_single = counts_single.join(pct_single)
print(summary_single)

# — 2) Breakdown of which campaign‑type discounts were used —
discount_breakdown_single = (
    df_single[df_single['used_discount']]
      .groupby(['recency_group','Spend Objective'], observed=True)
      .size()
      .unstack(fill_value=0)
)
discount_pct_single = (
    discount_breakdown_single
      .div(discount_breakdown_single.sum(axis=1), axis=0)
      .mul(100)
      .round(1)
      .add_suffix(' %')
)
discount_summary_single = discount_breakdown_single.join(discount_pct_single)
print(discount_summary_single)



               No Discount  Used Discount  No Discount %  Used Discount %
recency_group                                                            
0-30                  1448           5863           19.8             80.2
31-60                 1205           4671           20.5             79.5
61-90                  870           4908           15.1             84.9
91-120                 763           5171           12.9             87.1
120+                  7750          37140           17.3             82.7
Spend Objective  am_campaigns  bolt_plus  marketing  marketplace_campaigns  \
recency_group                                                                
0-30                     2113         46        150                   2933   
31-60                    1022         45        197                   2851   
61-90                    1552         15        693                   2129   
91-120                   1071          0        638                   2900   
120+          

In [93]:
# 0) Ensure clean categories (optional but neat)
spend_order = [
    'marketplace_campaigns','portal_campaigns','am_campaigns','marketing',
    'smart_promotions','bolt_plus','other_campaigns','no_campaign'
]
df_last_orders['Spend Objective'] = (
    df_last_orders['Spend Objective']
    .fillna('no_campaign')
    .astype(pd.CategoricalDtype(spend_order, ordered=True))
)

# 1) Counts table (no filter!)
counts_so = (
    df_last_orders
      .groupby(['recency_group','Spend Objective','used_discount'], observed=True)
      .size()
      .unstack('used_discount', fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

# 2) % table (row-wise within each recency_group × spend objective)
pct_so = (
    counts_so
      .div(counts_so.sum(axis=1), axis=0)
      .mul(100).round(1)
      .add_suffix(' %')
)

# 3) Final summary
summary_so = counts_so.join(pct_so)
print(summary_so)          # MultiIndex rows
summary_so_reset = summary_so.reset_index()
print(summary_so_reset)


used_discount                        No Discount  Used Discount  \
recency_group Spend Objective                                     
0-30          marketplace_campaigns            0          11643   
              portal_campaigns                 0           4553   
              am_campaigns                     0          15689   
              marketing                        0            285   
              smart_promotions                 0            682   
              bolt_plus                        0           2136   
              other_campaigns                  0             44   
              no_campaign                  12635            107   
31-60         marketplace_campaigns            0          10475   
              portal_campaigns                 0           2526   
              am_campaigns                     0           5427   
              marketing                        0            255   
              smart_promotions                 0            27

In [96]:
import pandas as pd
import numpy as np
from datetime import datetime

# ---- 0) Isolate single-order users ----
order_counts  = df_deduplicated['User ID'].value_counts()
single_users  = order_counts[order_counts == 1].index
df_single     = df_deduplicated[df_deduplicated['User ID'].isin(single_users)].copy()

# ---- 1) Last-order fields (it's their only order anyway) ----
df_single['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) - df_single['Order Created Date']
).dt.days
df_single['used_discount'] = df_single['Discount Value Local'] > 0

# ---- 2) Recency buckets ----
bins   = [0, 30, 60, 90, 120, np.inf]
labels = ['0-30','31-60','61-90','91-120','120+']
df_single['recency_group'] = pd.cut(df_single['days_since_last_order'],
                                    bins=bins, labels=labels, right=True)

# ---- 3) Clean Spend Objective cats ----
spend_order = [
    'marketplace_campaigns','portal_campaigns','am_campaigns','marketing',
    'smart_promotions','bolt_plus','other_campaigns','no_campaign'
]
df_single['Spend Objective'] = (
    df_single['Spend Objective']
    .fillna('no_campaign')
    .astype(pd.CategoricalDtype(spend_order, ordered=True))
)

# ---- 4) Counts & % (NO filtering!) ----
counts_so_single = (
    df_single
      .groupby(['recency_group','Spend Objective','used_discount'], observed=True)
      .size()
      .unstack('used_discount', fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

pct_so_single = (
    counts_so_single
      .div(counts_so_single.sum(axis=1), axis=0)
      .mul(100).round(1)
      .add_suffix(' %')
)

summary_so_single = counts_so_single.join(pct_so_single)

print("=== SINGLE-ORDER USERS: summary_so_single ===")
print(summary_so_single)
print("\nAs flat df:\n", summary_so_single.reset_index())


=== SINGLE-ORDER USERS: summary_so_single ===
used_discount                        No Discount  Used Discount  \
recency_group Spend Objective                                     
0-30          marketplace_campaigns            0           2933   
              portal_campaigns                 0            580   
              am_campaigns                     0           2113   
              marketing                        0            150   
              smart_promotions                 0             20   
              bolt_plus                        0             46   
              no_campaign                   1448             21   
31-60         marketplace_campaigns            0           2851   
              portal_campaigns                 0            526   
              am_campaigns                     0           1022   
              marketing                        0            197   
              smart_promotions                 0             19   
              bo

In [97]:
import plotly.graph_objects as go
import plotly.io as pio

# ---------- INPUT DFS YOU ALREADY BUILT ----------
# counts, pct
# counts_single, pct_single
# counts_so, pct_so
# counts_so_single, pct_so_single

# ---------- CONFIG ----------
RECENCY_ORDER_BOTTOM_FIRST = ['120+', '91-120', '61-90', '31-60', '0-30']
RECENCY_ORDER_TOP_FIRST    = RECENCY_ORDER_BOTTOM_FIRST[::-1]  # ['120+','91-120','61-90','31-60','0-30']
LABEL_HIDE_THRESHOLD = 3  # percent
DASHBOARD_FILE = 'discount_dashboard.html'

# ---------- HELPERS ----------
def make_discount_vs_nodiscount_fig(counts_df, pct_df, title, x_label):
    """ Horizontal stacked bars: Discount vs No Discount per recency bucket. """
    plot_df = counts_df.copy()
    for col in ['No Discount', 'Used Discount']:
        plot_df[col + ' %'] = pct_df[col + ' %']

    long = (
        plot_df.reset_index()
               .melt(id_vars='recency_group',
                     value_vars=['No Discount', 'Used Discount'],
                     var_name='discount_flag',
                     value_name='count')
    )
    pct_long = (
        plot_df.reset_index()
               .melt(id_vars='recency_group',
                     value_vars=['No Discount %', 'Used Discount %'],
                     var_name='discount_flag_pct',
                     value_name='pct')
    )
    long['pct'] = pct_long['pct'].values
    long['recency_group'] = pd.Categorical(long['recency_group'],
                                           RECENCY_ORDER_BOTTOM_FIRST,
                                           ordered=True)

    fig = go.Figure()
    for flag, chunk in long.groupby('discount_flag', observed=True):  # Added observed=True
        texts = chunk['pct'].astype(str) + '%'
        texts = [t if float(t.strip('%')) >= LABEL_HIDE_THRESHOLD else '' for t in texts]
        fig.add_trace(
            go.Bar(
                y=chunk['recency_group'],
                x=chunk['count'],
                name=flag,
                orientation='h',
                text=texts,
                textposition='inside',
                insidetextanchor='middle',
                hovertemplate=(
                    'Recency: %{y}<br>' +
                    flag + ' count: %{x}<br>' +
                    'Share: %{text}<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(
            orientation='h',
            yanchor='top',
            y=-0.25,
            xanchor='center',
            x=0.5
        ),
        margin=dict(l=90, r=40, t=70, b=150),
    )
    fig.update_yaxes(categoryorder='array', categoryarray=RECENCY_ORDER_TOP_FIRST)
    return fig


def collapse_so(counts_so_df):
    """ Collapse discount flags -> total count per (recency_group, SO). """
    collapsed = counts_so_df.sum(axis=1).rename('count').reset_index()
    collapsed['pct_bar'] = (
        collapsed['count'] /
        collapsed.groupby('recency_group', observed=True)['count'].transform('sum')  # Added observed=True
    ).mul(100).round(1)

    collapsed['label'] = collapsed['pct_bar'].astype(str) + '%'
    collapsed.loc[collapsed['pct_bar'] < LABEL_HIDE_THRESHOLD, 'label'] = ''
    collapsed['recency_group'] = pd.Categorical(collapsed['recency_group'],
                                                RECENCY_ORDER_BOTTOM_FIRST,
                                                ordered=True)
    return collapsed


def make_spend_objective_fig(collapsed_df, title, x_label):
    """ Horizontal stacked bars: Spend Objective segments only. """
    fig = go.Figure()
    for so, df_so in collapsed_df.groupby('Spend Objective', observed=True):  # Added observed=True
        fig.add_trace(
            go.Bar(
                y=df_so['recency_group'],
                x=df_so['count'],
                name=so,
                orientation='h',
                text=df_so['label'],
                textposition='inside',
                insidetextanchor='middle',
                customdata=np.stack([df_so['pct_bar']], axis=-1),
                hovertemplate=(
                    'Recency: %{y}<br>'
                    'Spend Objective: ' + so + '<br>'
                    'Count: %{x}<br>'
                    'Share of bar: %{customdata[0]}%<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(
            orientation='h',
            yanchor='top',
            y=-0.28,
            xanchor='center',
            x=0.5
        ),
        margin=dict(l=110, r=40, t=70, b=150),
    )
    fig.update_yaxes(categoryorder='array', categoryarray=RECENCY_ORDER_TOP_FIRST)

    # Dropdown
    buttons = []
    all_visible = [True] * len(fig.data)
    buttons.append(dict(
        label='All',
        method='update',
        args=[{'visible': all_visible},
              {'title': f'<b>{title} — All</b>'}]
    ))

    unique_sos = collapsed_df['Spend Objective'].unique()
    for so in unique_sos:
        vis = [tr.name == so for tr in fig.data]
        buttons.append(dict(
            label=so,
            method='update',
            args=[{'visible': vis},
                  {'title': f'<b>{title} — {so}</b>'}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            type='dropdown',
            x=1.02, y=1,
            xanchor='left', yanchor='top',
            buttons=buttons,
            showactive=True
        )]
    )
    return fig


# ---------- BUILD FIGURES ----------
fig_all_simple = make_discount_vs_nodiscount_fig(
    counts, pct,
    title='Discount vs No Discount by Recency Group (All Users)',
    x_label='Count of Orders/Users'
)

fig_single_simple = make_discount_vs_nodiscount_fig(
    counts_single, pct_single,
    title='Discount vs No Discount by Recency Group (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

collapsed_all = collapse_so(counts_so)
fig_all_so = make_spend_objective_fig(
    collapsed_all,
    title='Last-Order Split by Spend Objective (All Users)',
    x_label='Count of Orders (last order per user)'
)

collapsed_single = collapse_so(counts_so_single)
fig_single_so = make_spend_objective_fig(
    collapsed_single,
    title='Last-Order Split by Spend Objective (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

# ---------- EXPORT ONE HTML ----------
html1 = pio.to_html(fig_all_simple,   include_plotlyjs='cdn',  full_html=False)
html2 = pio.to_html(fig_single_simple, include_plotlyjs=False, full_html=False)
html3 = pio.to_html(fig_all_so,        include_plotlyjs=False, full_html=False)
html4 = pio.to_html(fig_single_so,     include_plotlyjs=False, full_html=False)

full_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Discount Dashboard</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0 20px 40px; }}
h1 {{ margin-top: 30px; }}
.nav {{
  position: fixed; top: 10px; right: 10px;
  background: #f7f7f7; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px;
  font-size: 14px; line-height: 1.6;
}}
.nav a {{ text-decoration: none; color: #0077cc; }}
.nav a:hover {{ text-decoration: underline; }}
.section {{ margin: 80px 0; }}
</style>
</head>
<body>

<div class="nav">
  <b>Jump to:</b><br>
  <a href="#fig1">1. All Users – Disc vs No Disc</a><br>
  <a href="#fig2">2. Single Order – Disc vs No Disc</a><br>
  <a href="#fig3">3. All Users – Spend Objective</a><br>
  <a href="#fig4">4. Single Order – Spend Objective</a>
</div>

<h1>Discount Dashboard</h1>

<div id="fig1" class="section">
  <h2>1. All Users – Discount vs No Discount</h2>
  {html1}
</div>

<div id="fig2" class="section">
  <h2>2. Single-Order Users – Discount vs No Discount</h2>
  {html2}
</div>

<div id="fig3" class="section">
  <h2>3. All Users – Split by Spend Objective</h2>
  {html3}
</div>

<div id="fig4" class="section">
  <h2>4. Single-Order Users – Split by Spend Objective</h2>
  {html4}
</div>

</body>
</html>
"""

with open(DASHBOARD_FILE, 'w', encoding='utf-8') as f:
    f.write(full_html)

print(f"Saved all charts to {DASHBOARD_FILE}")

Saved all charts to discount_dashboard.html
