# Data Loading

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [None]:
pd.set_option('display.float_format','{:.2f}'.format)

In [None]:
nonesales = pd.read_parquet('../Data/nonesales_pivoted_2025_cleaned_v2.parquet')

In [None]:
esales = pd.read_parquet('../Data/esales_pivoted_2025_cleaned_v2.parquet')

In [None]:
def get_month_list(tp):
    yy, mon_str, k_str = tp.split('_')
    k = int(k_str)
    base_year = 2000 + int(yy)
    start_date = datetime.strptime(f'{mon_str}_{base_year - 1}', '%b_%Y') + relativedelta(months=1)
    return [(start_date + relativedelta(months=i)).strftime('%Y-%m') for i in range(k)]

# Time Level 12 months apr-24 to mar-25

## Time Level Definition

In [None]:
tp1 = '24_jun_12'
tp2 = '25_jun_12'


tp1_cols = get_month_list(tp1)
tp2_cols = get_month_list(tp2)

In [None]:
esales.columns

In [None]:
esales_df = esales[['global_id','flag_gl_id'] + tp1_cols + tp2_cols]
nonesales_df = nonesales[['global_id','flag_gl_id'] + tp1_cols + tp2_cols]

## Pre Analysis

### Analysis Pre steps 

In [None]:
def calc_metrics(df, tp1, tp2, tp1_cols, tp2_cols):
    df = df.copy()
    tp1_array = df[tp1_cols].to_numpy()
    tp2_array = df[tp2_cols].to_numpy()

    tp1_total = tp1_array.sum(axis=1)
    tp2_total = tp2_array.sum(axis=1)

    tp1_active = (tp1_array != 0).sum(axis=1)
    tp2_active = (tp2_array != 0).sum(axis=1)

    tp1_avg = np.divide(tp1_total, tp1_active, out=np.zeros_like(tp1_total, dtype=float), where=tp1_active != 0)
    tp2_avg = np.divide(tp2_total, tp2_active, out=np.zeros_like(tp2_total, dtype=float), where=tp2_active != 0)

    def masked_std(arr):
        masked = np.ma.masked_where(arr == 0, arr)
        return masked.std(axis=1, ddof=0)

    tp1_std = masked_std(tp1_array)
    tp2_std = masked_std(tp2_array)

    df[f'{tp1}_Total'] = tp1_total
    df[f'{tp2}_Total'] = tp2_total
    df[f'months_active_{tp1}'] = tp1_active
    df[f'months_active_{tp2}'] = tp2_active
    df[f'avg_act_monthly_{tp1}'] = tp1_avg
    df[f'avg_act_monthly_{tp2}'] = tp2_avg
    df[f'std_dev_{tp1}'] = tp1_std
    df[f'std_dev_{tp2}'] = tp2_std

    return df

In [None]:
esales_df = calc_metrics(esales_df,tp1, tp2,tp1_cols,tp2_cols)
nonesales_df = calc_metrics(nonesales_df,tp1, tp2,tp1_cols,tp2_cols)

In [None]:
esales_df['25_jun_12_Total'].sum()

In [None]:
merged_df = pd.merge(nonesales_df, esales_df, on='global_id', how='outer', suffixes=('_nonesales', '_esales'))
merged_df['flag_gl_id'] = merged_df['flag_gl_id_esales'].combine_first(merged_df['flag_gl_id_nonesales'])
merged_df.drop(columns=['flag_gl_id_esales','flag_gl_id_nonesales'],inplace=True)
merged_df.fillna(0, inplace=True)

usag_df = merged_df

In [None]:
usag_df['25_jun_12_Total_esales'].sum()

## Analysis

### Distribution analysis

In [None]:
def assign_segment(df, k, stype, atype, tp1, tp2):
    avg_tp1 = df[f'avg_{atype}_monthly_{tp1}_{stype}']
    avg_tp2 = df[f'avg_{atype}_monthly_{tp2}_{stype}']
    std_tp1 = df[f'std_dev_{tp1}_{stype}']
    months_tp1 = df[f'months_active_{tp1}_{stype}']

    conditions = [
        (avg_tp1 == 0) & (avg_tp2 == 0),
        avg_tp1 == 0,
        avg_tp2 == 0,
        months_tp1 <= 1,
        avg_tp2 < avg_tp1 - k * std_tp1,
        avg_tp2 > avg_tp1 + k * std_tp1
    ]

    choices = [
        "Not in Channel",
        "New",
        "Declining",
        "Unallocated",
        "Declining",
        "Growing"
    ]

    print(avg_tp1.sum(), avg_tp2.sum(), std_tp1.sum(), months_tp1.sum())
    return np.select(conditions, choices, default="Stable")


In [None]:
stypes = ['esales', 'nonesales']
atypes = ['act']
ks = [1]

for stype in stypes:
    for atype in atypes:
        for k in ks:
            usag_df[f'{tp1}_{tp2}_{atype}_mon_{stype}_sd_{str(k)}_segment'] = assign_segment(usag_df,k, stype, atype, tp1, tp2)
            print(f'{tp1}_{tp2}_{atype}_mon_{stype}_sd_{str(k)}_segment done')


In [None]:
usag_segments = usag_df[['global_id',f'{tp1}_{tp2}_act_mon_esales_sd_1_segment',f'{tp1}_{tp2}_act_mon_nonesales_sd_1_segment']]

### YoY assign

In [None]:
def compute_yoy(df, stype, tp1, tp2):
    total_tp1 = df[f'{tp1}_Total_{stype}']
    total_tp2 = df[f'{tp2}_Total_{stype}']
    
    cond1 = (total_tp1 <= 2) & (total_tp2 <= 2)
    cond2 = (total_tp1 <= 2) & (total_tp2 > 2)
    
    conditions = [cond1, cond2]
    choices = [0, 1]

    yoy = np.select(conditions, choices, default=(total_tp2 / total_tp1) - 1)
    
    return yoy

In [None]:
usag_df[f'{tp1}_{tp2}_esales_yoy'] = compute_yoy(usag_df,'esales', tp1, tp2)
usag_df[f'{tp1}_{tp2}_nonesales_yoy'] = compute_yoy(usag_df,'nonesales', tp1, tp2)

## New Data Load

In [None]:
bucket_bins = [-np.inf,0,0.001,500,1000,2500, 5000, 10000, 25000, np.inf]
buckets = ['<0','0','0-500','500-1000','1000-2500','2500-5000', '5000-10000', '10000-25000', '25000+']

usag_df[f'{tp1}_bucket'] = pd.cut(usag_df[f'{tp1}_Total_esales'], bins=bucket_bins, labels=buckets, right=False)

## Inc Analysis

In [None]:
stypes = ['esales', 'nonesales']
atypes = ['act']
ks = [1]

In [None]:
usag_df[f'{tp1}_bucket'] = usag_df[f'{tp1}_bucket'].astype(str)

In [None]:
esales_buckets = usag_df.groupby([f'{tp1}_bucket',])[[f'{tp1}_Total_esales',f'{tp2}_Total_esales']].sum().reset_index()
esales_buckets['comp_bucket_YoY'] = (esales_buckets[f'{tp2}_Total_esales']/esales_buckets[f'{tp1}_Total_esales'])-1
esales_buckets = esales_buckets[[f'{tp1}_bucket','comp_bucket_YoY']]
usag_df = usag_df.merge(esales_buckets,how='left',on=[f'{tp1}_bucket'])

In [None]:
cust_buckets = usag_df.groupby([f'{tp1}_bucket'])['global_id'].count().reset_index()

In [None]:
rev_buckets = usag_df.groupby([f'{tp1}_bucket'])[[f'{tp1}_Total_esales',f'{tp2}_Total_esales',f'{tp1}_Total_nonesales',f'{tp2}_Total_nonesales']].sum().reset_index()

In [None]:
rev_buckets = rev_buckets.merge(cust_buckets, on=[f'{tp1}_bucket'],how='inner')
rev_buckets['esales_yoy'] = (rev_buckets[f'{tp2}_Total_esales']/rev_buckets[f'{tp1}_Total_esales'])-1
rev_buckets['nonesales_yoy'] = (rev_buckets[f'{tp2}_Total_nonesales']/rev_buckets[f'{tp1}_Total_nonesales'])-1

In [None]:
rev_buckets

In [None]:
rev_buckets.to_excel('revenue buckets summary.xlsx')

In [None]:
# esales_buckets.to_excel('esales_buckets.xlsx')
esales_buckets

In [None]:
esales_buckets.to_excel('esales_buckets_segment.xlsx')

In [None]:
def compute_increment(df, seg_col, tp1, tp2):
    yoy_col = f'{tp1}_{tp2}_esales_yoy'
    bucket_col = f'{tp1}_bucket'
    comp_yoy_col = 'comp_bucket_YoY'

    conditions = [
        df[seg_col] == 'Not in Channel',
        df[bucket_col].isin(['<0', '0'])
    ]
    
    choices = [
        0.0,
        df[yoy_col]
    ]
    
    inc_pct = np.select(conditions, choices, default=df[yoy_col] - df[comp_yoy_col])
    return inc_pct

In [None]:
seg_col = f'{tp1}_{tp2}_act_mon_esales_sd_1_segment'
cust_count = usag_df[(usag_df[seg_col]=='Not in Channel')].shape[0]
cust_count = len(usag_df) - cust_count
cust_count

In [None]:
usag_df[f'{tp1}_{tp2}_esales_incremental_pct'] = compute_increment(usag_df,seg_col, tp1, tp2)
usag_df[f'{tp1}_{tp2}_esales_incremental_abs'] = usag_df[f'{tp1}_{tp2}_esales_incremental_pct'] * usag_df[f'{tp1}_Total_esales']
print(usag_df[f'{tp1}_{tp2}_esales_incremental_abs'].sum())

In [None]:
usag_df.groupby(f'{tp1}_{tp2}_act_mon_esales_sd_1_segment')['global_id'].count()

### Placeholder

In [None]:
usag_df.to_parquet(f'Outputs/usag_{tp1}_{tp2}_bucket_isocos_peinc_updated.parquet',index=False)

In [None]:
usag_df

In [None]:
# usag_df[usag_df['24_jun_12_bucket']=='25000+'][['24_jun_12_bucket','global_id','STO__OWNERSHIP_TYPE_CODE']+[item + '_esales' for item in tp1_cols]+[item + '_esales' for item in tp1_cols]].to_excel('25000+ customers.xlsx')

## Post Analysis

### Crosstab creation

In [None]:

segments = ['Unallocated', 'New', 'Growing', 'Stable', 'Declining', 'Not in Channel']
segments

In [None]:
def crosstab_func(usag_df,atype,k,tp1,tp2):
    metric_rows = ['# of Customers', 'eSales_Inc', f'eSales {tp1}', f'eSales {tp2}']
    metric_cols = ['.','..', f'non-eSales {tp1}', f'non-eSales {tp2}']


    row_index = pd.MultiIndex.from_product([segments, metric_rows])
    col_index = pd.MultiIndex.from_product([segments, metric_cols])


    cross_tab = pd.DataFrame(index=row_index, columns=col_index)


    total_nonesales_tp1 = usag_df[f'{tp1}_Total_nonesales'].sum()
    total_nonesales_tp2 = usag_df[f'{tp2}_Total_nonesales'].sum()
    total_esales_tp1 = usag_df[f'{tp1}_Total_esales'].sum()
    total_esales_tp2 = usag_df[f'{tp2}_Total_esales'].sum()

    for e_seg in segments:
        for n_seg in segments:
            subset = usag_df[
                (usag_df[f'{tp1}_{tp2}_{atype}_mon_esales_sd_{str(k)}_segment'] == e_seg) &
                (usag_df[f'{tp1}_{tp2}_{atype}_mon_nonesales_sd_{str(k)}_segment'] == n_seg)
            ]

            num_customers = subset.shape[0]
            esales_tp1 = subset[f'{tp1}_Total_esales'].sum()
            esales_tp2 = subset[f'{tp2}_Total_esales'].sum()
            nonesales_tp1 = subset[f'{tp1}_Total_nonesales'].sum()
            nonesales_tp2 = subset[f'{tp2}_Total_nonesales'].sum()

            esales_inc = subset[f'{tp1}_{tp2}_esales_incremental_abs'].sum()

            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, '.')] = num_customers
            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, f'non-eSales {tp1}')] = f"{nonesales_tp1:.0f}"
            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, f'non-eSales {tp2}')] = f"{nonesales_tp2:.0f}"
            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, f'non-eSales {tp1}')] = f"{(nonesales_tp1/total_nonesales_tp1):.2%}"
            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, f'non-eSales {tp2}')] = f"{(nonesales_tp2/total_nonesales_tp2):.2%}"

            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, '.')] = f"{esales_inc:.0f}"

            cross_tab.loc[(e_seg, f'eSales {tp1}'), (n_seg, '.')] = f"{esales_tp1:.0f}"
            cross_tab.loc[(e_seg, f'eSales {tp2}'), (n_seg, '.')] = f"{esales_tp2:.0f}"
            cross_tab.loc[(e_seg, f'eSales {tp1}'), (n_seg, '..')] = f"{(esales_tp1/total_esales_tp1):.2%}"
            cross_tab.loc[(e_seg, f'eSales {tp2}'), (n_seg, '..')] = f"{(esales_tp2/total_esales_tp2):.2%}"


    return cross_tab

In [None]:
sheet_dict = {}
for atype in atypes:
    for k in ks:
        sheet_name = f"cross_tab_{tp1}_{tp2}_{atype}_mon_sd_{str(k)}"
        sheet_dict[sheet_name] = crosstab_func(usag_df,atype, k,tp1, tp2)
print("done")

In [None]:
sheet_dict[f'cross_tab_{tp1}_{tp2}_act_mon_sd_1']

In [None]:
with pd.ExcelWriter(f"crosstab_output_bucket_pe_{tp1}_{tp2}.xlsx") as writer:
    for sheet_name, df in sheet_dict.items():
        df.to_excel(writer,sheet_name = sheet_name[:31])

### Placeholder

# Time Level 9 months apr-24 to dec-24

In [None]:
usag_df_proxy = usag_segments#pd.read_csv(f'usag_{tp1}_{tp2}_bucket_peinc_updated.csv')

In [None]:
tp_proxy_1 = tp1
tp_proxy_2 = tp2

In [None]:
# usag_df_proxy.to_csv(f'usag_{tp_proxy_1}_{tp_proxy_2}_peinc_updated.csv')

## Time Level Definition

In [None]:
tp1 = '24_jun_9'
tp2 = '25_jun_9'


tp1_cols = get_month_list(tp1)
tp2_cols = get_month_list(tp2)

In [None]:
esales_df = esales_df[['global_id','flag_gl_id'] + tp1_cols + tp2_cols]
nonesales_df = nonesales_df[['global_id','flag_gl_id'] + tp1_cols + tp2_cols]

## Pre Analysis

### Analysis Pre steps 

In [None]:
def calc_metrics(df, tp1, tp2, tp1_cols, tp2_cols):
    df = df.copy()
    tp1_array = df[tp1_cols].to_numpy()
    tp2_array = df[tp2_cols].to_numpy()

    tp1_total = tp1_array.sum(axis=1)
    tp2_total = tp2_array.sum(axis=1)

    tp1_active = (tp1_array != 0).sum(axis=1)
    tp2_active = (tp2_array != 0).sum(axis=1)

    tp1_avg = np.divide(tp1_total, tp1_active, out=np.zeros_like(tp1_total, dtype=float), where=tp1_active != 0)
    tp2_avg = np.divide(tp2_total, tp2_active, out=np.zeros_like(tp2_total, dtype=float), where=tp2_active != 0)

    def masked_std(arr):
        masked = np.ma.masked_where(arr == 0, arr)
        return masked.std(axis=1, ddof=0)

    tp1_std = masked_std(tp1_array)
    tp2_std = masked_std(tp2_array)

    df[f'{tp1}_Total'] = tp1_total
    df[f'{tp2}_Total'] = tp2_total
    df[f'months_active_{tp1}'] = tp1_active
    df[f'months_active_{tp2}'] = tp2_active
    df[f'avg_act_monthly_{tp1}'] = tp1_avg
    df[f'avg_act_monthly_{tp2}'] = tp2_avg
    df[f'std_dev_{tp1}'] = tp1_std
    df[f'std_dev_{tp2}'] = tp2_std

    return df

In [None]:
esales_df = calc_metrics(esales_df,tp1, tp2,tp1_cols,tp2_cols)
nonesales_df = calc_metrics(nonesales_df,tp1, tp2,tp1_cols,tp2_cols)

In [None]:
merged_df = pd.merge(nonesales_df, esales_df, on='global_id', how='outer', suffixes=('_nonesales', '_esales'))
merged_df['flag_gl_id'] = merged_df['flag_gl_id_esales'].combine_first(merged_df['flag_gl_id_nonesales'])
merged_df.drop(columns=['flag_gl_id_esales','flag_gl_id_nonesales'],inplace=True)
merged_df.fillna(0, inplace=True)

usag_df = merged_df

In [None]:
# usag_df.to_csv(f"usag_{tp1}_{tp2}_unflagged.csv")

In [None]:
# months_2022, months_2023,months_2024,months_2025 = get_year_columns(nonesales_df)
# months = months_2022 + months_2023 + months_2024 + months_2025



# for month in months:
#     usag_df[month] = usag_df[f'{month}_nonesales'] + usag_df[f'{month}_esales']

# # usag_df = usag_df[['global_id'] + months]


In [None]:
usag_df


## Analysis

### Distribution analysis

In [None]:
def get_segment(row,k,stype,atype,tp1,tp2):
    # print(f'avg_{atype}_monthly_2022_{stype}')
    if row[f'avg_{atype}_monthly_{tp1}_{stype}'] == 0 and row[f'avg_{atype}_monthly_{tp2}_{stype}'] == 0:
        return "Not in Channel"
    elif row[f'avg_{atype}_monthly_{tp1}_{stype}'] == 0:
        return "New"
    elif row[f'avg_{atype}_monthly_{tp2}_{stype}'] == 0:
        return "Declining"
    elif row[f'months_active_{tp1}_{stype}'] <= 1:
        return "Unallocated"
    elif row[f'avg_{atype}_monthly_{tp2}_{stype}'] < row[f'avg_{atype}_monthly_{tp1}_{stype}'] - k * row[f'std_dev_{tp1}_{stype}']:
        return "Declining"
    elif row[f'avg_{atype}_monthly_{tp2}_{stype}'] > row[f'avg_{atype}_monthly_{tp1}_{stype}'] + k * row[f'std_dev_{tp1}_{stype}']:
        return "Growing"
    else:
        return "Stable"


In [None]:
stypes = ['esales', 'nonesales']
atypes = ['act']
ks = [1]

# for stype in stypes:
#     for atype in atypes:
#         for k in ks:
#             usag_df[f'{tp1}_{tp2}_{atype}_mon_{stype}_sd_{str(k)}_segment'] = usag_df.apply(lambda row: get_segment(row,k, stype,atype,tp1,tp2), axis=1 )
#             print(f'{tp1}_{tp2}_{atype}_mon_{stype}_sd_{str(k)}_segment done')


### YoY assign

In [None]:
def compute_yoy(df, stype, tp1, tp2):
    total_tp1 = df[f'{tp1}_Total_{stype}']
    total_tp2 = df[f'{tp2}_Total_{stype}']
    
    cond1 = (total_tp1 <= 2) & (total_tp2 <= 2)
    cond2 = (total_tp1 <= 2) & (total_tp2 > 2)
    
    conditions = [cond1, cond2]
    choices = [0, 1]

    yoy = np.select(conditions, choices, default=(total_tp2 / total_tp1) - 1)
    
    return yoy

In [None]:
usag_df[f'{tp1}_{tp2}_esales_yoy'] = compute_yoy(usag_df,'esales', tp1, tp2)
usag_df[f'{tp1}_{tp2}_nonesales_yoy'] = compute_yoy(usag_df,'nonesales', tp1, tp2)

In [None]:
# usag_df.to_csv(f"usag_{tp1}_{tp2}_flagged.csv")

## Inc Analysis

In [None]:
# usag_df = pd.read_csv(f"usag_{tp1}_{tp2}_flagged.csv")

stypes = ['esales', 'nonesales']
atypes = ['act']
ks = [1]

In [None]:
usag_segments

In [None]:
usag_df_proxy_2 = usag_df_proxy[['global_id',f'{tp_proxy_1}_{tp_proxy_2}_act_mon_esales_sd_1_segment',f'{tp_proxy_1}_{tp_proxy_2}_act_mon_nonesales_sd_1_segment']]

In [None]:
usag_df=usag_df.merge(usag_df_proxy_2,how='inner',on='global_id')

In [None]:
bucket_bins = [-np.inf,0,0.001,500,1000,2500, 5000, 10000, 25000, np.inf]
buckets = ['<0','0','0-500','500-1000','1000-2500','2500-5000', '5000-10000', '10000-25000', '25000+']

usag_df[f'{tp1}_bucket'] = pd.cut(usag_df[f'{tp1}_Total_esales'], bins=bucket_bins, labels=buckets, right=False)

In [None]:
usag_df[f'{tp1}_bucket'] = usag_df[f'{tp1}_bucket'].astype(str)

In [None]:
esales_buckets = usag_df.groupby([f'{tp1}_bucket'])[[f'{tp1}_Total_esales',f'{tp2}_Total_esales']].sum().reset_index()
esales_buckets['comp_bucket_YoY'] = (esales_buckets[f'{tp2}_Total_esales']/esales_buckets[f'{tp1}_Total_esales'])-1
esales_buckets = esales_buckets[[f'{tp1}_bucket','comp_bucket_YoY']]
usag_df = usag_df.merge(esales_buckets,how='left',on=[f'{tp1}_bucket'])

In [None]:
# esales_buckets.to_excel('esales_buckets_9months.xlsx')
esales_buckets

In [None]:
esales_yoy = ((usag_df[f'{tp2}_Total_esales'].sum())/(usag_df[f'{tp1}_Total_esales'].sum()))-1

esales_ov_inc_abs = (usag_df[f'{tp2}_Total_esales'].sum())-(usag_df[f'{tp1}_Total_esales'].sum())

In [None]:
def compute_increment(df, seg_col, tp1, tp2):
    yoy_col = f'{tp1}_{tp2}_esales_yoy'
    bucket_col = f'{tp1}_bucket'
    comp_yoy_col = 'comp_bucket_YoY'

    conditions = [
        df[seg_col] == 'Not in Channel',
        df[bucket_col].isin(['<0', '0'])
    ]
    
    choices = [
        0.0,
        df[yoy_col]
    ]
    
    inc_pct = np.select(conditions, choices, default=df[yoy_col] - df[comp_yoy_col])
    return inc_pct

In [None]:
seg_col = f'{tp_proxy_1}_{tp_proxy_2}_act_mon_esales_sd_1_segment'
cust_count = usag_df[(usag_df[seg_col]=='Not in Channel')].shape[0]
cust_count = len(usag_df) - cust_count
cust_count

In [None]:
usag_df[f'{tp1}_{tp2}_esales_incremental_pct'] = compute_increment(usag_df,seg_col, tp1, tp2)
usag_df[f'{tp1}_{tp2}_esales_incremental_abs'] = usag_df[f'{tp1}_{tp2}_esales_incremental_pct'] * usag_df[f'{tp1}_Total_esales']
print(usag_df[f'{tp1}_{tp2}_esales_incremental_abs'].sum())

### Placeholder

In [None]:
# usag_df['STO__OWNERSHIP_TYPE_CODE'] = usag_df['STO__OWNERSHIP_TYPE_CODE'].astype(str)

In [None]:
usag_df.to_parquet(f'Outputs/usag_{tp1}_{tp2}_bucket_isocos_peinc_updated.parquet',index=False)

## Post Analysis

### Crosstab creation

In [None]:

segments = ['Unallocated', 'New', 'Growing', 'Stable', 'Declining', 'Not in Channel']
segments

In [None]:
def crosstab_func(usag_df,atype,k,tp1,tp2,tp_proxy_1,tp_proxy_2):
    metric_rows = ['# of Customers', 'eSales_Inc', f'eSales {tp1}', f'eSales {tp2}']
    metric_cols = ['.','..', f'non-eSales {tp1}', f'non-eSales {tp2}']


    row_index = pd.MultiIndex.from_product([segments, metric_rows])
    col_index = pd.MultiIndex.from_product([segments, metric_cols])


    cross_tab = pd.DataFrame(index=row_index, columns=col_index)


    total_nonesales_tp1 = usag_df[f'{tp1}_Total_nonesales'].sum()
    total_nonesales_tp2 = usag_df[f'{tp2}_Total_nonesales'].sum()
    total_esales_tp1 = usag_df[f'{tp1}_Total_esales'].sum()
    total_esales_tp2 = usag_df[f'{tp2}_Total_esales'].sum()

    for e_seg in segments:
        for n_seg in segments:
            subset = usag_df[
                (usag_df[f'{tp_proxy_1}_{tp_proxy_2}_{atype}_mon_esales_sd_{str(k)}_segment'] == e_seg) &
                (usag_df[f'{tp_proxy_1}_{tp_proxy_2}_{atype}_mon_nonesales_sd_{str(k)}_segment'] == n_seg)
            ]

            num_customers = subset.shape[0]
            esales_tp1 = subset[f'{tp1}_Total_esales'].sum()
            esales_tp2 = subset[f'{tp2}_Total_esales'].sum()
            nonesales_tp1 = subset[f'{tp1}_Total_nonesales'].sum()
            nonesales_tp2 = subset[f'{tp2}_Total_nonesales'].sum()

            esales_inc = subset[f'{tp1}_{tp2}_esales_incremental_abs'].sum()

            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, '.')] = num_customers
            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, f'non-eSales {tp1}')] = f"{nonesales_tp1:.0f}"
            cross_tab.loc[(e_seg, '# of Customers'), (n_seg, f'non-eSales {tp2}')] = f"{nonesales_tp2:.0f}"
            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, f'non-eSales {tp1}')] = f"{(nonesales_tp1/total_nonesales_tp1):.2%}"
            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, f'non-eSales {tp2}')] = f"{(nonesales_tp2/total_nonesales_tp2):.2%}"

            cross_tab.loc[(e_seg, 'eSales_Inc'), (n_seg, '.')] = f"{esales_inc:.0f}"

            cross_tab.loc[(e_seg, f'eSales {tp1}'), (n_seg, '.')] = f"{esales_tp1:.0f}"
            cross_tab.loc[(e_seg, f'eSales {tp2}'), (n_seg, '.')] = f"{esales_tp2:.0f}"
            cross_tab.loc[(e_seg, f'eSales {tp1}'), (n_seg, '..')] = f"{(esales_tp1/total_esales_tp1):.2%}"
            cross_tab.loc[(e_seg, f'eSales {tp2}'), (n_seg, '..')] = f"{(esales_tp2/total_esales_tp2):.2%}"


    return cross_tab

In [None]:
sheet_dict = {}
for atype in atypes:
    for k in ks:
        sheet_name = f"cross_tab_{tp1}_{tp2}_{atype}_mon_sd_{str(k)}"
        sheet_dict[sheet_name] = crosstab_func(usag_df,atype, k,tp1, tp2,tp_proxy_1,tp_proxy_2)
print("done")

In [None]:
sheet_dict[f'cross_tab_{tp1}_{tp2}_act_mon_sd_1']

In [None]:
with pd.ExcelWriter(f"crosstab_output_bucket_pe_{tp1}_{tp2}.xlsx") as writer:
    for sheet_name, df in sheet_dict.items():
        df.to_excel(writer,sheet_name = sheet_name[:31])

### Placeholder