In [20]:
#import and clean data
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go 
import plotly.subplots as make_subplots

df = pd.read_csv(
    '/Users/levankikadze/Desktop/pandas/Bolt Food/cherningUsers/Data/UserOrdersTotal_past1yr_Breakdown_ShulaGiven.csv',
    dtype={'Spend Objective': str, 'Discount Value Local': 'float'},
    thousands=',',
    )

df.dropna(subset=['Order ID'], inplace=True)
df[['Order ID', 'User ID', 'Provider ID', 'Vendor ID']] = df[['Order ID', 'User ID', 'Provider ID', 'Vendor ID']].astype(int)
df['Order Created Date'] = pd.to_datetime(df['Order Created Date'])

In [8]:
from datetime import datetime


In [21]:
campaigns = df['Spend Objective'].astype(str).str.lower()

conditions = [
    campaigns.isin(['activation', 'reactivation', 'churn', 'engagement', 'acquisition', 'experiment']),
    campaigns == 'provider_campaign_portal',
    campaigns.str.startswith('provider_campaign_'),
    campaigns == 'marketing',
    campaigns.str.startswith('sp_'),
    campaigns == 'bolt_plus_campaign',
    campaigns == 'other',
    campaigns == 'NaN'
]

choices = [
    'marketplace_campaigns',
    'portal_campaigns',
    'am_campaigns',
    'marketing',
    'smart_promotions',
    'bolt_plus',
    'other_campaigns',
    'no_campaign'
]

df['Spend Objective'] = np.select(conditions, choices, default='no_campaign')


In [None]:
df.tail(30)

Unnamed: 0,Order Created Date,Order ID,User ID,Provider ID,Provider Name,Vendor ID,Vendor Name,Spend Objective,Discount Value Local
2053331,2025-02-08,218131437,222096030,96368,Dipstrip,66468,I/E Lasha Qituashvili,am_campaigns,220.2
2053332,2025-05-09,234590932,18690110,50761,Madart Pekini,35016,Ltd Arbo 2009,am_campaigns,220.32
2053333,2025-06-06,240165962,225620726,58799,Sushi Hut Saburtalo,7487,Ltd Sushi Hut,portal_campaigns,227.0
2053334,2025-06-24,243542256,268976876,109756,Bao Sushi,77815,I/E Beka Kontselidze,portal_campaigns,228.0
2053335,2025-06-25,243793678,208499389,36424,Arigato,25555,Entrepreneur Omari Kvintradze,portal_campaigns,228.4
2053336,2024-09-05,191331597,219442205,50763,Madart Gldani,35016,Ltd Arbo 2009,am_campaigns,233.28
2053337,2024-06-14,177963289,149918345,50761,Madart Pekini,35016,Ltd Arbo 2009,no_campaign,234.4
2053338,2025-06-10,240822814,111650954,69487,Gochit's Monster Burger Vazha-Pshavela,35278,Gochit's Burger,am_campaigns,236.25
2053339,2024-10-26,199884641,41683996,44652,Domino's Pizza Kostava,30938,Domino's Pizza,portal_campaigns,239.4
2053340,2024-07-27,184982221,205902952,50763,Madart Gldani,35016,Ltd Arbo 2009,no_campaign,239.76


In [22]:
#Deduplicate the DataFrame
#Assign priority to each campaign type
priority = {
    # the ranking of top 3 does not matter cause they don't overlap and they should be prioritized over campaigns that overlap
    'marketing': 1,
    'am_campaigns': 2,
    'portal_campaigns': 3, 
    'smart_promotions': 4,
    'marketplace_campaigns': 5,
    'bolt_plus': 6,
    'other_campaigns': 7,
    'no_campaign': 8,
}

# Map the priority to the DataFrame
df['Priority'] = df['Spend Objective'].map(priority)

# Sort the DataFrame by 'User ID' and 'Priority'
df.sort_values(by=['Order ID', 'Priority'], inplace=True)

# Drop duplicates, keeping the first occurrence
df_deduplicated = (
    df.drop_duplicates(subset=['Order ID'], keep='first')
      .drop(columns=['Priority'])
      .copy()
)




In [12]:
df_deduplicated.sort_values(by=['User ID', 'Order Created Date'], ascending=False)


Unnamed: 0,Order Created Date,Order ID,User ID,Provider ID,Provider Name,Vendor ID,Vendor Name,Spend Objective,Discount Value Local
873820,2025-07-25,249086428,273796998,92130,Wendy's Batumi,63152,Wendy's Batumi,am_campaigns,2.00
1302025,2025-07-25,249080598,273788568,150190,KFC Isani Mall,111675,KFC GEORGIA Tbilisi,marketplace_campaigns,5.26
1406713,2025-07-25,249078802,273785506,150235,KFC Batumi,111794,KFC GEORGIA Batumi,marketplace_campaigns,6.61
1923579,2025-07-25,249078543,273783263,123177,Woki Box Vazha Pshavela,88608,JSC Sushi Mania 2,marketplace_campaigns,20.04
1354332,2025-07-25,249074218,273781726,150191,KFC Gamrekeli,111675,KFC GEORGIA Tbilisi,marketplace_campaigns,5.99
...,...,...,...,...,...,...,...,...,...
1436306,2024-05-10,172222933,3146,10566,Nocco,7592,Ltd Nocco Tbilisi,no_campaign,7.00
1288308,2024-05-08,171835565,3146,88972,Shaurma inn,60727,შპს სთრით ფუდი,portal_campaigns,5.10
191848,2024-08-27,189967641,2050,76106,McDonald's Batumi Grand Mall,50089,MC\tBatumi,no_campaign,0.00
477393,2024-08-25,189713343,2050,11838,Fire Wok Dadiani,8577,Fire Wok,no_campaign,0.00


In [23]:
#Run the last order analysis on the deduplicated DataFrame
df_last_orders = (df_deduplicated.sort_values(by=['User ID', 'Order Created Date'], ascending=False).drop_duplicates(subset=['User ID'], keep='first'))
# Make the needed columns
df_last_orders['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) -
    df_last_orders['Order Created Date']
).dt.days
df_last_orders['used_discount'] = df_last_orders['Discount Value Local'] > 0

# Define bins & labels
bins   = [0, 30, 60, 90, 120, np.inf]
labels = ['0-30', '31-60', '61-90', '91-120', '120+']

df_last_orders['recency_group'] = pd.cut(
    df_last_orders['days_since_last_order'],
    bins=bins,
    labels=labels,
    right=True
)

# Group & pivot for counts
counts = (
    df_last_orders
      .groupby(['recency_group', 'used_discount'], observed=True)
      .size()
      .unstack(fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

# (4) Calculate percentages
pct = counts.div(counts.sum(axis=1), axis=0).mul(100).round(1)
pct.columns = [c + ' %' for c in pct.columns]

# (5) Combine into one clean table
summary = counts.join(pct)
print(summary)


# (6) Breakdown of discount types among those who used a discount
discount_breakdown = (
    df_last_orders[df_last_orders['used_discount']]       # filter to only “used discount”
      .groupby(['recency_group', 'Spend Objective'])
      .size()
      .unstack(fill_value=0)
)

# (7) Turn those into percentages of the “used discount” total per bucket
discount_pct = (
    discount_breakdown
      .div(discount_breakdown.sum(axis=1), axis=0)
      .mul(100)
      .round(1)
      .add_suffix(' %')
)

# (8) Put counts and % side by side
discount_summary = discount_breakdown.join(discount_pct)

print(discount_summary)


               No Discount  Used Discount  No Discount %  Used Discount %
recency_group                                                            
0-30                 12635          35139           26.4             73.6
31-60                 6743          19767           25.4             74.6
61-90                 4757          14805           24.3             75.7
91-120                4223          12438           25.3             74.7
120+                 36653         104206           26.0             74.0
Spend Objective  am_campaigns  bolt_plus  marketing  marketplace_campaigns  \
recency_group                                                                
0-30                    15689       2136        285                  11643   
31-60                    5427        769        255                  10475   
61-90                    5867         70        765                   6009   
91-120                   3436          0        679                   6520   
120+          





In [24]:
# visualize the results


# Merge counts & pct so we can show % labels on each bar section
plot_df = counts.copy()
for col in ['No Discount', 'Used Discount']:
    plot_df[col + ' %'] = pct[col + ' %']

# Long format for easier plotting
long = (
    plot_df
    .reset_index()  # recency_group back to column
    .melt(id_vars=['recency_group'],
          value_vars=['No Discount', 'Used Discount'],
          var_name='discount_flag',
          value_name='count')
)

# Attach % for each segment
pct_long = (
    plot_df
    .reset_index()
    .melt(id_vars=['recency_group'],
          value_vars=['No Discount %', 'Used Discount %'],
          var_name='discount_flag_pct',
          value_name='pct')
)

# Align rows
long['pct'] = pct_long['pct'].values

# Order bars top→bottom same as labels list
cat_order = ['0-30', '31-60', '61-90', '91-120', '120+']
long['recency_group'] = pd.Categorical(long['recency_group'], cat_order, ordered=True)
long = long.sort_values('recency_group', ascending=False)

# Build stacked horizontal bar
fig = go.Figure()

for flag, df_part in long.groupby('discount_flag'):
    fig.add_trace(
        go.Bar(
            y=df_part['recency_group'],
            x=df_part['count'],
            name=flag,
            orientation='h',
            text=df_part['pct'].astype(str) + '%',
            textposition='inside',
            insidetextanchor='middle',
            hovertemplate=(
                'Recency: %{y}<br>'
                + f'{flag} Count: ' + '%{x}<br>'
                + 'Share: %{text}<extra></extra>'
            ),
            customdata=np.stack([df_part['pct']], axis=-1)
        )
    )

fig.update_layout(
    barmode='stack',
    title='<b>Discount vs No Discount by Recency Group</b>',
    xaxis_title='Count of Orders/Users',
    yaxis_title='Recency Group (days since last order)',
    hovermode='y',
    legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
    margin=dict(l=80, r=40, t=60, b=80)
)

# Optional: add % labels only if segment > 3% to avoid clutter
fig.for_each_trace(
    lambda t: t.update(
        text=[txt if float(txt.strip('%')) >= 3 else '' for txt in t.text]
    )
)

# Save to HTML and open
out_file = 'discount_usage_recency.html'
fig.write_html(out_file, include_plotlyjs='cdn', full_html=True)
print(f"Saved to {out_file}")
# fig.show()  # Uncomment if running locally / Jupyter


Saved to discount_usage_recency.html


In [27]:
# run the same analysis on one time users
# — Identify users with exactly one order —
order_counts = df_deduplicated['User ID'].value_counts()
single_users = order_counts[order_counts == 1].index

# — Filter to only those single‑order rows —
df_single = df_deduplicated[df_deduplicated['User ID'].isin(single_users)].copy()

# — Now rerun your “last order” steps on df_single —
# (since each user has only one order, it already is their “last”)
df_single['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) -
    df_single['Order Created Date']
).dt.days
df_single['used_discount'] = df_single['Discount Value Local'] > 0

# — Bin into recency groups —
df_single['recency_group'] = pd.cut(
    df_single['days_since_last_order'],
    bins=[0,30,60,90,120,np.inf],
    labels=['0-30','31-60','61-90','91-120','120+'],
    right=True
)

# — 1) Counts & percentages of discount usage —
counts_single = (
    df_single
      .groupby(['recency_group','used_discount'], observed=True)
      .size()
      .unstack(fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)
pct_single = counts_single.div(counts_single.sum(axis=1), axis=0).mul(100).round(1)
pct_single.columns = [c + ' %' for c in pct_single.columns]
summary_single = counts_single.join(pct_single)
print(summary_single)

# — 2) Breakdown of which campaign‑type discounts were used —
discount_breakdown_single = (
    df_single[df_single['used_discount']]
      .groupby(['recency_group','Spend Objective'], observed=True)
      .size()
      .unstack(fill_value=0)
)
discount_pct_single = (
    discount_breakdown_single
      .div(discount_breakdown_single.sum(axis=1), axis=0)
      .mul(100)
      .round(1)
      .add_suffix(' %')
)
discount_summary_single = discount_breakdown_single.join(discount_pct_single)
print(discount_summary_single)



               No Discount  Used Discount  No Discount %  Used Discount %
recency_group                                                            
0-30                  1319           5404           19.6             80.4
31-60                 1105           4230           20.7             79.3
61-90                  725           4575           13.7             86.3
91-120                 648           4752           12.0             88.0
120+                 10227          50102           17.0             83.0
Spend Objective  am_campaigns  bolt_plus  marketing  marketplace_campaigns  \
recency_group                                                                
0-30                     1855         39        149                   2783   
31-60                     923         38        196                   2560   
61-90                    1389         11        692                   2003   
91-120                    963          0        637                   2642   
120+          

In [None]:
import plotly.graph_objects as go

# ---- build a plotting DataFrame ----
plot_df = counts_single.copy()
for col in ['No Discount', 'Used Discount']:
    plot_df[col + ' %'] = pct_single[col + ' %']

long = (
    plot_df.reset_index()
           .melt(id_vars='recency_group',
                 value_vars=['No Discount', 'Used Discount'],
                 var_name='discount_flag',
                 value_name='count')
)

pct_long = (
    plot_df.reset_index()
           .melt(id_vars='recency_group',
                 value_vars=['No Discount %', 'Used Discount %'],
                 var_name='discount_flag_pct',
                 value_name='pct')
)

long['pct'] = pct_long['pct'].values

# keep your preferred order (top to bottom) 
cat_order = ['0-30','31-60','61-90','91-120','120+']
long['recency_group'] = pd.Categorical(long['recency_group'], cat_order, ordered=True)
long = long.sort_values('recency_group', ascending=False)

# ---- plot ----
fig = go.Figure()

for flag, chunk in long.groupby('discount_flag'):
    fig.add_trace(
        go.Bar(
            y=chunk['recency_group'],
            x=chunk['count'],
            name=flag,
            orientation='h',
            text=chunk['pct'].astype(str) + '%',
            textposition='inside',
            insidetextanchor='middle',
            hovertemplate=(
                'Recency: %{y}<br>' +
                f'{flag} count: %{x}<br>' +
                'Share: %{text}<extra></extra>'
            )
        )
    )

# hide tiny % labels (<3%) to avoid clutter
fig.for_each_trace(lambda t: t.update(
    text=[txt if float(txt.strip('%')) >= 3 else '' for txt in t.text]
))

fig.update_layout(
    barmode='stack',
    title='<b>Single-Order Users: Discount vs No Discount by Recency Group</b>',
    xaxis_title='Count of Orders (users with exactly one order)',
    yaxis_title='Recency Group (days since last order)',
    hovermode='y',
    legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
    margin=dict(l=90, r=40, t=60, b=90)
)

# ---- save ----
out_file = 'single_order_discount_usage.html'
fig.write_html(out_file, include_plotlyjs='cdn', full_html=True)
print(f"Saved to {out_file}")
# fig.show()  # if you’re in a notebook


Saved to single_order_discount_usage.html


In [None]:
# 0) Ensure clean categories (optional but neat)
spend_order = [
    'marketplace_campaigns','portal_campaigns','am_campaigns','marketing',
    'smart_promotions','bolt_plus','other_campaigns','no_campaign'
]
df_last_orders['Spend Objective'] = (
    df_last_orders['Spend Objective']
    .fillna('no_campaign')
    .astype(pd.CategoricalDtype(spend_order, ordered=True))
)

# 1) Counts table (no filter!)
counts_so = (
    df_last_orders
      .groupby(['recency_group','Spend Objective','used_discount'], observed=True)
      .size()
      .unstack('used_discount', fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

# 2) % table (row-wise within each recency_group × spend objective)
pct_so = (
    counts_so
      .div(counts_so.sum(axis=1), axis=0)
      .mul(100).round(1)
      .add_suffix(' %')
)

# 3) Final summary
summary_so = counts_so.join(pct_so)
print(summary_so)          # MultiIndex rows
summary_so_reset = summary_so.reset_index()
print(summary_so_reset)


used_discount                        No Discount  Used Discount  \
recency_group Spend Objective                                     
0-30          marketplace_campaigns            0          11643   
              portal_campaigns                 0           4553   
              am_campaigns                     0          15689   
              marketing                        0            285   
              smart_promotions                 0            682   
              bolt_plus                        0           2136   
              other_campaigns                  0             44   
              no_campaign                  12635            107   
31-60         marketplace_campaigns            0          10475   
              portal_campaigns                 0           2526   
              am_campaigns                     0           5427   
              marketing                        0            255   
              smart_promotions                 0            27

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# -------------------------------------------------
# Assumes you already have counts_so & pct_so from your snippet
# counts_so index: (recency_group, Spend Objective)
# counts_so cols : ['No Discount','Used Discount']
# -------------------------------------------------

# 1) Collapse discount flags -> total count per (recency_group, SO)
collapsed = counts_so.sum(axis=1).rename('count').reset_index()  # -> recency_group, SO, count

# 2) % of full bar (per recency_group across *all* SOs)
collapsed['pct_bar'] = (
    collapsed['count'] /
    collapsed.groupby('recency_group')['count'].transform('sum')
).mul(100).round(1)

# Hide tiny labels
collapsed['label'] = collapsed['pct_bar'].astype(str) + '%'
collapsed.loc[collapsed['pct_bar'] < 3, 'label'] = ''

# 3) Order buckets (top to bottom)
recency_order = ['0-30','31-60','61-90','91-120','120+']
collapsed['recency_group'] = pd.Categorical(collapsed['recency_group'], recency_order, ordered=True)

# 4) Figure: one trace per Spend Objective (no patterns, no flags)
fig = go.Figure()

for so, df_so in collapsed.groupby('Spend Objective'):
    fig.add_trace(
        go.Bar(
            y=df_so['recency_group'],
            x=df_so['count'],
            name=so,
            orientation='h',
            text=df_so['label'],
            textposition='inside',
            insidetextanchor='middle',
            customdata=np.stack([df_so['pct_bar']], axis=-1),
            hovertemplate=(
                'Recency: %{y}<br>'
                f'Spend Objective: {so}<br>'
                'Count: %{x}<br>'
                'Share of bar: %{customdata[0]}%<extra></extra>'
            )
        )
    )

fig.update_layout(
    barmode='stack',
    title='<b>Last-Order Discount Usage Split by Spend Objective</b>',
    xaxis_title='Count of Orders (last order per user)',
    yaxis_title='Recency Group (days since last order)',
    hovermode='y',
    legend=dict(orientation='h', yanchor='bottom', y=-0.18, xanchor='center', x=0.5),
    margin=dict(l=110, r=40, t=70, b=120),
)

# 5) Dropdown to show only one SO (optional but handy)
buttons = []
all_visible = [True] * len(fig.data)
buttons.append(dict(
    label='All',
    method='update',
    args=[{'visible': all_visible},
          {'title':'<b>Last-Order Discount Usage Split by Spend Objective — All</b>'}]
))

unique_sos = collapsed['Spend Objective'].unique()
for so in unique_sos:
    vis = [tr.name == so for tr in fig.data]
    buttons.append(dict(
        label=so,
        method='update',
        args=[{'visible': vis},
              {'title': f'<b>{so}</b>'}]
    ))

fig.update_layout(
    updatemenus=[dict(
        type='dropdown',
        x=1.02, y=1,
        xanchor='left', yanchor='top',
        buttons=buttons,
        showactive=True
    )]
)

# 6) Save to HTML
out_file = 'discount_by_recency_SO_no_flag.html'
fig.write_html(out_file, include_plotlyjs='cdn', full_html=True)
print(f'Saved to {out_file}')
# fig.show()


Saved to discount_by_recency_SO_no_flag.html








In [44]:
import pandas as pd
import numpy as np
from datetime import datetime

# ---- 0) Isolate single-order users ----
order_counts  = df_deduplicated['User ID'].value_counts()
single_users  = order_counts[order_counts == 1].index
df_single     = df_deduplicated[df_deduplicated['User ID'].isin(single_users)].copy()

# ---- 1) Last-order fields (it's their only order anyway) ----
df_single['days_since_last_order'] = (
    pd.Timestamp(datetime.today().date()) - df_single['Order Created Date']
).dt.days
df_single['used_discount'] = df_single['Discount Value Local'] > 0

# ---- 2) Recency buckets ----
bins   = [0, 30, 60, 90, 120, np.inf]
labels = ['0-30','31-60','61-90','91-120','120+']
df_single['recency_group'] = pd.cut(df_single['days_since_last_order'],
                                    bins=bins, labels=labels, right=True)

# ---- 3) Clean Spend Objective cats ----
spend_order = [
    'marketplace_campaigns','portal_campaigns','am_campaigns','marketing',
    'smart_promotions','bolt_plus','other_campaigns','no_campaign'
]
df_single['Spend Objective'] = (
    df_single['Spend Objective']
    .fillna('no_campaign')
    .astype(pd.CategoricalDtype(spend_order, ordered=True))
)

# ---- 4) Counts & % (NO filtering!) ----
counts_so_single = (
    df_single
      .groupby(['recency_group','Spend Objective','used_discount'], observed=True)
      .size()
      .unstack('used_discount', fill_value=0)
      .rename(columns={False:'No Discount', True:'Used Discount'})
)

pct_so_single = (
    counts_so_single
      .div(counts_so_single.sum(axis=1), axis=0)
      .mul(100).round(1)
      .add_suffix(' %')
)

summary_so_single = counts_so_single.join(pct_so_single)

print("=== SINGLE-ORDER USERS: summary_so_single ===")
print(summary_so_single)
print("\nAs flat df:\n", summary_so_single.reset_index())


=== SINGLE-ORDER USERS: summary_so_single ===
used_discount                        No Discount  Used Discount  \
recency_group Spend Objective                                     
0-30          marketplace_campaigns            0           2783   
              portal_campaigns                 0            541   
              am_campaigns                     0           1855   
              marketing                        0            149   
              smart_promotions                 0             17   
              bolt_plus                        0             39   
              no_campaign                   1319             20   
31-60         marketplace_campaigns            0           2560   
              portal_campaigns                 0            486   
              am_campaigns                     0            923   
              marketing                        0            196   
              smart_promotions                 0             18   
              bo

In [45]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# -------------------------------------------------
# Assumes you already built:
#   counts_so_single  (index: recency_group, Spend Objective; cols: ['No Discount','Used Discount'])
# -------------------------------------------------

# 1) Collapse discount flags -> total count per (recency_group, SO)
collapsed_single = (
    counts_so_single
    .sum(axis=1)                       # sum No Discount + Used Discount
    .rename('count')
    .reset_index()
)

# 2) % of full bar (per recency_group across *all* SOs)
collapsed_single['pct_bar'] = (
    collapsed_single['count'] /
    collapsed_single.groupby('recency_group')['count'].transform('sum')
).mul(100).round(1)

# Hide tiny labels
collapsed_single['label'] = collapsed_single['pct_bar'].astype(str) + '%'
collapsed_single.loc[collapsed_single['pct_bar'] < 3, 'label'] = ''

# 3) Order buckets (top to bottom)
recency_order = ['0-30','31-60','61-90','91-120','120+']
collapsed_single['recency_group'] = pd.Categorical(
    collapsed_single['recency_group'],
    recency_order, ordered=True
)

# 4) Figure: one trace per Spend Objective
fig_single = go.Figure()

for so, df_so in collapsed_single.groupby('Spend Objective'):
    fig_single.add_trace(
        go.Bar(
            y=df_so['recency_group'],
            x=df_so['count'],
            name=so,
            orientation='h',
            text=df_so['label'],
            textposition='inside',
            insidetextanchor='middle',
            customdata=np.stack([df_so['pct_bar']], axis=-1),
            hovertemplate=(
                'Recency: %{y}<br>'
                f'Spend Objective: {so}<br>'
                'Count: %{x}<br>'
                'Share of bar: %{customdata[0]}%<extra></extra>'
            )
        )
    )

fig_single.update_layout(
    barmode='stack',
    title='<b>Single-Order Users — Last-Order Split by Spend Objective</b>',
    xaxis_title='Count of Orders (single-order users)',
    yaxis_title='Recency Group (days since last order)',
    hovermode='y',
    legend=dict(orientation='h', yanchor='bottom', y=-0.18, xanchor='center', x=0.5),
    margin=dict(l=110, r=40, t=70, b=120),
)

# 5) Dropdown to show only one SO (optional)
buttons = []
all_visible = [True] * len(fig_single.data)
buttons.append(dict(
    label='All',
    method='update',
    args=[{'visible': all_visible},
          {'title':'<b>Single-Order Users — All Spend Objectives</b>'}]
))

unique_sos_single = collapsed_single['Spend Objective'].unique()
for so in unique_sos_single:
    vis = [tr.name == so for tr in fig_single.data]
    buttons.append(dict(
        label=so,
        method='update',
        args=[{'visible': vis},
              {'title': f'<b>Single-Order Users — {so}</b>'}]
    ))

fig_single.update_layout(
    updatemenus=[dict(
        type='dropdown',
        x=1.02, y=1,
        xanchor='left', yanchor='top',
        buttons=buttons,
        showactive=True
    )]
)

# 6) Save to HTML
out_file_single = 'single_order_discount_by_recency_SO.html'
fig_single.write_html(out_file_single, include_plotlyjs='cdn', full_html=True)
print(f'Saved to {out_file_single}')
# fig_single.show()


Saved to single_order_discount_by_recency_SO.html








In [47]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

# ---------- CONFIG ----------
RECENCY_ORDER = ['0-30','31-60','61-90','91-120','120+']
LABEL_HIDE_THRESHOLD = 3  # percent
DASHBOARD_FILE = 'discount_dashboard.html'

# ---------- HELPERS ----------
def make_discount_vs_nodiscount_fig(counts_df, pct_df, title, x_label):
    """
    Horizontal stacked bars: Discount vs No Discount per recency bucket (no spend objective split).
    """
    plot_df = counts_df.copy()
    for col in ['No Discount', 'Used Discount']:
        plot_df[col + ' %'] = pct_df[col + ' %']

    long = (plot_df.reset_index()
                   .melt(id_vars='recency_group',
                         value_vars=['No Discount', 'Used Discount'],
                         var_name='discount_flag',
                         value_name='count'))
    pct_long = (plot_df.reset_index()
                        .melt(id_vars='recency_group',
                              value_vars=['No Discount %', 'Used Discount %'],
                              var_name='discount_flag_pct',
                              value_name='pct'))
    long['pct'] = pct_long['pct'].values
    long['recency_group'] = pd.Categorical(long['recency_group'], RECENCY_ORDER, ordered=True)
    long = long.sort_values('recency_group', ascending=False)

    fig = go.Figure()
    for flag, chunk in long.groupby('discount_flag'):
        texts = chunk['pct'].astype(str) + '%'
        texts = [t if float(t.strip('%')) >= LABEL_HIDE_THRESHOLD else '' for t in texts]
        fig.add_trace(
            go.Bar(
                y=chunk['recency_group'],
                x=chunk['count'],
                name=flag,
                orientation='h',
                text=texts,
                textposition='inside',
                insidetextanchor='middle',
                hovertemplate=(
                    'Recency: %{y}<br>' +
                    '{flag} count: %{x}<br>' +
                    'Share: %{text}<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(orientation='h', yanchor='bottom', y=-0.15, xanchor='center', x=0.5),
        margin=dict(l=90, r=40, t=70, b=90),
    )
    return fig


def collapse_so(counts_so_df):
    """
    Collapse discount flags so each Spend Objective has a single value (count sum).
    Return collapsed df with pct_bar and label computed.
    """
    collapsed = (
        counts_so_df.sum(axis=1).rename('count').reset_index()
    )
    collapsed['pct_bar'] = (
        collapsed['count'] /
        collapsed.groupby('recency_group')['count'].transform('sum')
    ).mul(100).round(1)

    collapsed['label'] = collapsed['pct_bar'].astype(str) + '%'
    collapsed.loc[collapsed['pct_bar'] < LABEL_HIDE_THRESHOLD, 'label'] = ''
    collapsed['recency_group'] = pd.Categorical(collapsed['recency_group'], RECENCY_ORDER, ordered=True)
    return collapsed


def make_spend_objective_fig(collapsed_df, title, x_label):
    """
    Horizontal stacked bars: Spend Objective segments (no discount flag shown).
    Dropdown to isolate one SO.
    """
    fig = go.Figure()
    for so, df_so in collapsed_df.groupby('Spend Objective'):
        fig.add_trace(
            go.Bar(
                y=df_so['recency_group'],
                x=df_so['count'],
                name=so,
                orientation='h',
                text=df_so['label'],
                textposition='inside',
                insidetextanchor='middle',
                customdata=np.stack([df_so['pct_bar']], axis=-1),
                hovertemplate=(
                    'Recency: %{y}<br>'
                    f'Spend Objective: {so}<br>'
                    'Count: %{x}<br>'
                    'Share of bar: %{customdata[0]}%<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(orientation='h', yanchor='bottom', y=-0.18, xanchor='center', x=0.5),
        margin=dict(l=110, r=40, t=70, b=120),
    )

    # Dropdown
    buttons = []
    all_visible = [True] * len(fig.data)
    buttons.append(dict(
        label='All',
        method='update',
        args=[{'visible': all_visible},
              {'title': f'<b>{title} — All</b>'}]
    ))

    unique_sos = collapsed_df['Spend Objective'].unique()
    for so in unique_sos:
        vis = [tr.name == so for tr in fig.data]
        buttons.append(dict(
            label=so,
            method='update',
            args=[{'visible': vis},
                  {'title': f'<b>{title} — {so}</b>'}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            type='dropdown',
            x=1.02, y=1,
            xanchor='left', yanchor='top',
            buttons=buttons,
            showactive=True
        )]
    )
    return fig


# ---------- BUILD FIGURES ----------
# Fig 1: All users — discount vs no discount (recency)
fig_all_simple = make_discount_vs_nodiscount_fig(
    counts, pct,
    title='Discount vs No Discount by Recency Group (All Users)',
    x_label='Count of Orders/Users'
)

# Fig 2: Single-order users — discount vs no discount (recency)
fig_single_simple = make_discount_vs_nodiscount_fig(
    counts_single, pct_single,
    title='Discount vs No Discount by Recency Group (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

# Fig 3: All users — spend objective split
collapsed_all = collapse_so(counts_so)
fig_all_so = make_spend_objective_fig(
    collapsed_all,
    title='Last-Order Split by Spend Objective (All Users)',
    x_label='Count of Orders (last order per user)'
)

# Fig 4: Single-order users — spend objective split
collapsed_single = collapse_so(counts_so_single)
fig_single_so = make_spend_objective_fig(
    collapsed_single,
    title='Last-Order Split by Spend Objective (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

# ---------- EXPORT ONE HTML ----------
# Only include plotly.js once (in the first figure)
html1 = pio.to_html(fig_all_simple, include_plotlyjs='cdn', full_html=False)
html2 = pio.to_html(fig_single_simple, include_plotlyjs=False, full_html=False)
html3 = pio.to_html(fig_all_so, include_plotlyjs=False, full_html=False)
html4 = pio.to_html(fig_single_so, include_plotlyjs=False, full_html=False)

# Simple header & anchors
full_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Discount Dashboard</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0 20px 40px; }}
h1 {{ margin-top: 30px; }}
.nav {{
  position: fixed; top: 10px; right: 10px;
  background: #f7f7f7; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px;
  font-size: 14px; line-height: 1.6;
}}
.nav a {{ text-decoration: none; color: #0077cc; }}
.nav a:hover {{ text-decoration: underline; }}
.section {{ margin: 80px 0; }}
</style>
</head>
<body>

<div class="nav">
  <b>Jump to:</b><br>
  <a href="#fig1">1. All Users – Disc vs No Disc</a><br>
  <a href="#fig2">2. Single Order – Disc vs No Disc</a><br>
  <a href="#fig3">3. All Users – Spend Objective</a><br>
  <a href="#fig4">4. Single Order – Spend Objective</a>
</div>

<h1>Discount Dashboard</h1>

<div id="fig1" class="section">
  <h2>1. All Users – Discount vs No Discount</h2>
  {html1}
</div>

<div id="fig2" class="section">
  <h2>2. Single-Order Users – Discount vs No Discount</h2>
  {html2}
</div>

<div id="fig3" class="section">
  <h2>3. All Users – Split by Spend Objective</h2>
  {html3}
</div>

<div id="fig4" class="section">
  <h2>4. Single-Order Users – Split by Spend Objective</h2>
  {html4}
</div>

</body>
</html>
"""

with open(DASHBOARD_FILE, 'w', encoding='utf-8') as f:
    f.write(full_html)

print(f"Saved all charts to {DASHBOARD_FILE}")


Saved all charts to discount_dashboard.html












In [56]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

# ---------- INPUT DFS YOU ALREADY BUILT ----------
# counts, pct
# counts_single, pct_single
# counts_so, pct_so
# counts_so_single, pct_so_single

# ---------- CONFIG ----------
RECENCY_ORDER_BOTTOM_FIRST = ['0-30','31-60','61-90','91-120','120+']
RECENCY_ORDER_TOP_FIRST    = RECENCY_ORDER_BOTTOM_FIRST[::-1]  # ['120+','91-120','61-90','31-60','0-30']
LABEL_HIDE_THRESHOLD = 3  # percent
DASHBOARD_FILE = 'discount_dashboard.html'

# ---------- HELPERS ----------
def make_discount_vs_nodiscount_fig(counts_df, pct_df, title, x_label):
    """ Horizontal stacked bars: Discount vs No Discount per recency bucket. """
    plot_df = counts_df.copy()
    for col in ['No Discount', 'Used Discount']:
        plot_df[col + ' %'] = pct_df[col + ' %']

    long = (
        plot_df.reset_index()
               .melt(id_vars='recency_group',
                     value_vars=['No Discount', 'Used Discount'],
                     var_name='discount_flag',
                     value_name='count')
    )
    pct_long = (
        plot_df.reset_index()
               .melt(id_vars='recency_group',
                     value_vars=['No Discount %', 'Used Discount %'],
                     var_name='discount_flag_pct',
                     value_name='pct')
    )
    long['pct'] = pct_long['pct'].values
    long['recency_group'] = pd.Categorical(long['recency_group'],
                                           RECENCY_ORDER_BOTTOM_FIRST,
                                           ordered=True)

    fig = go.Figure()
    for flag, chunk in long.groupby('discount_flag'):
        texts = chunk['pct'].astype(str) + '%'
        texts = [t if float(t.strip('%')) >= LABEL_HIDE_THRESHOLD else '' for t in texts]
        fig.add_trace(
            go.Bar(
                y=chunk['recency_group'],
                x=chunk['count'],
                name=flag,
                orientation='h',
                text=texts,
                textposition='inside',
                insidetextanchor='middle',
                hovertemplate=(
                    'Recency: %{y}<br>' +
                    flag + ' count: %{x}<br>' +
                    'Share: %{text}<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(
            orientation='h',
            yanchor='top',
            y=-0.25,          # moved lower
            xanchor='center',
            x=0.5
        ),
        margin=dict(l=90, r=40, t=70, b=150),  # extra bottom space
    )
    fig.update_yaxes(categoryorder='array', categoryarray=RECENCY_ORDER_TOP_FIRST)
    return fig


def collapse_so(counts_so_df):
    """ Collapse discount flags -> total count per (recency_group, SO). """
    collapsed = counts_so_df.sum(axis=1).rename('count').reset_index()
    collapsed['pct_bar'] = (
        collapsed['count'] /
        collapsed.groupby('recency_group')['count'].transform('sum')
    ).mul(100).round(1)

    collapsed['label'] = collapsed['pct_bar'].astype(str) + '%'
    collapsed.loc[collapsed['pct_bar'] < LABEL_HIDE_THRESHOLD, 'label'] = ''
    collapsed['recency_group'] = pd.Categorical(collapsed['recency_group'],
                                                RECENCY_ORDER_BOTTOM_FIRST,
                                                ordered=True)
    return collapsed


def make_spend_objective_fig(collapsed_df, title, x_label):
    """ Horizontal stacked bars: Spend Objective segments only. """
    fig = go.Figure()
    for so, df_so in collapsed_df.groupby('Spend Objective'):
        fig.add_trace(
            go.Bar(
                y=df_so['recency_group'],
                x=df_so['count'],
                name=so,
                orientation='h',
                text=df_so['label'],
                textposition='inside',
                insidetextanchor='middle',
                customdata=np.stack([df_so['pct_bar']], axis=-1),
                hovertemplate=(
                    'Recency: %{y}<br>'
                    'Spend Objective: ' + so + '<br>'
                    'Count: %{x}<br>'
                    'Share of bar: %{customdata[0]}%<extra></extra>'
                )
            )
        )

    fig.update_layout(
        barmode='stack',
        title=f'<b>{title}</b>',
        xaxis_title=x_label,
        yaxis_title='Recency Group (days since last order)',
        hovermode='y',
        legend=dict(
            orientation='h',
            yanchor='top',
            y=-0.28,          # moved lower
            xanchor='center',
            x=0.5
        ),
        margin=dict(l=110, r=40, t=70, b=150),
    )
    fig.update_yaxes(categoryorder='array', categoryarray=RECENCY_ORDER_TOP_FIRST)

    # Dropdown
    buttons = []
    all_visible = [True] * len(fig.data)
    buttons.append(dict(
        label='All',
        method='update',
        args=[{'visible': all_visible},
              {'title': f'<b>{title} — All</b>'}]
    ))

    unique_sos = collapsed_df['Spend Objective'].unique()
    for so in unique_sos:
        vis = [tr.name == so for tr in fig.data]
        buttons.append(dict(
            label=so,
            method='update',
            args=[{'visible': vis},
                  {'title': f'<b>{title} — {so}</b>'}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            type='dropdown',
            x=1.02, y=1,
            xanchor='left', yanchor='top',
            buttons=buttons,
            showactive=True
        )]
    )
    return fig


# ---------- BUILD FIGURES ----------
fig_all_simple = make_discount_vs_nodiscount_fig(
    counts, pct,
    title='Discount vs No Discount by Recency Group (All Users)',
    x_label='Count of Orders/Users'
)

fig_single_simple = make_discount_vs_nodiscount_fig(
    counts_single, pct_single,
    title='Discount vs No Discount by Recency Group (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

collapsed_all = collapse_so(counts_so)
fig_all_so = make_spend_objective_fig(
    collapsed_all,
    title='Last-Order Split by Spend Objective (All Users)',
    x_label='Count of Orders (last order per user)'
)

collapsed_single = collapse_so(counts_so_single)
fig_single_so = make_spend_objective_fig(
    collapsed_single,
    title='Last-Order Split by Spend Objective (Single-Order Users)',
    x_label='Count of Orders (single-order users)'
)

# ---------- EXPORT ONE HTML ----------
html1 = pio.to_html(fig_all_simple,   include_plotlyjs='cdn',  full_html=False)
html2 = pio.to_html(fig_single_simple, include_plotlyjs=False, full_html=False)
html3 = pio.to_html(fig_all_so,        include_plotlyjs=False, full_html=False)
html4 = pio.to_html(fig_single_so,     include_plotlyjs=False, full_html=False)

full_html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Discount Dashboard</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0 20px 40px; }}
h1 {{ margin-top: 30px; }}
.nav {{
  position: fixed; top: 10px; right: 10px;
  background: #f7f7f7; padding: 10px 14px; border: 1px solid #ddd; border-radius: 8px;
  font-size: 14px; line-height: 1.6;
}}
.nav a {{ text-decoration: none; color: #0077cc; }}
.nav a:hover {{ text-decoration: underline; }}
.section {{ margin: 80px 0; }}
</style>
</head>
<body>

<div class="nav">
  <b>Jump to:</b><br>
  <a href="#fig1">1. All Users – Disc vs No Disc</a><br>
  <a href="#fig2">2. Single Order – Disc vs No Disc</a><br>
  <a href="#fig3">3. All Users – Spend Objective</a><br>
  <a href="#fig4">4. Single Order – Spend Objective</a>
</div>

<h1>Discount Dashboard</h1>

<div id="fig1" class="section">
  <h2>1. All Users – Discount vs No Discount</h2>
  {html1}
</div>

<div id="fig2" class="section">
  <h2>2. Single-Order Users – Discount vs No Discount</h2>
  {html2}
</div>

<div id="fig3" class="section">
  <h2>3. All Users – Split by Spend Objective</h2>
  {html3}
</div>

<div id="fig4" class="section">
  <h2>4. Single-Order Users – Split by Spend Objective</h2>
  {html4}
</div>

</body>
</html>
"""

with open(DASHBOARD_FILE, 'w', encoding='utf-8') as f:
    f.write(full_html)

print(f"Saved all charts to {DASHBOARD_FILE}")


Saved all charts to discount_dashboard.html










