### Summarise all products and promotions for the paper

In [1]:
import numpy as np
import uuid
import pandas as pd
import os
import pickle
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib
import fcn_helpers as fhelp
import glob

dataFolder = os.path.expanduser('~/Google Drive/order/Machine Learning Part/data/CorporacionFavorita')

In [2]:
mapper_family_to_category = fhelp.cfav_get_map_dept_to_cat()

In [3]:
selected_departments = [ 'LIQUOR,WINE,BEER', 'GROCERY_II','GROCERY_I',  'BEVERAGES', 'DAIRY', 'DELI', 'EGGS', 'FROZEN_FOODS',
'MEATS', 'POULTRY', 'PREPARED_FOODS', 'PRODUCE', 'BREAD_BAKERY']

In [4]:
stores = ['Pichincha_44_A_5',
'Pichincha_47_A_14', 
'Pichincha_46_A_14',
'Pichincha_48_A_14',
'Pichincha_3_D_8',
'Pichincha_4_D_9',
'Pichincha_45_A_11',
'Pichincha_49_A_11',
'Pichincha_6_D_13',
'Pichincha_7_D_8',
'Pichincha_8_D_8']

In [5]:
min_num_days_on_promo = 10

all_stores = []
all_skus = []
all_skus_promo = []
all_category_id = []
all_dept_id = []
all_skus_avg_sales = []

In [6]:
for dept_id in selected_departments:
    for store_name in stores:        
        # Read departement sales for the current store
        category_id = mapper_family_to_category.get(dept_id, '')
        foldername = os.path.join(dataFolder, category_id, dept_id, 'store_sales')
        filename = os.path.join(foldername, f'{store_name}.pickle')
        df_store = fhelp.readPickleFile(filename)

        sales_vars = [iVar for iVar in df_store.columns if 'sales-' in iVar]
        promotion_vars = [iVar for iVar in df_store.columns if 'promotion_flag-' in iVar]

        num_skus = len(sales_vars)
        # products that have been on promo more than 10 days
        num_products_on_promo = (df_store[promotion_vars].sum(axis=0)>min_num_days_on_promo).sum()



        all_stores.append(store_name)
        all_skus.append(num_skus)
        all_skus_promo.append(num_products_on_promo)
        all_skus_avg_sales.append(df_store[sales_vars].sum(axis=1).mean())
        all_category_id.append(category_id)
        all_dept_id.append(dept_id)

In [7]:
df_summary = pd.DataFrame(
{'category':all_category_id,
 'department': all_dept_id,
'stores':all_stores,
'num_skus':all_skus,
'all_skus_promo':all_skus_promo,
'all_skus_avg_sales': all_skus_avg_sales
})

In [8]:
aggregations  = {'num_skus': 'mean', 'all_skus_promo': 'mean', 'all_skus_avg_sales': 'mean'}
df_results_agg = df_summary.groupby(['category', 'department', ], as_index=False).agg(aggregations)
df_results_agg.sort_values(by=['category', 'department'], inplace=True)
df_results_agg['cat_dep'] = df_results_agg[['category', 'department']].apply(lambda st: f'{st[0]}-{st[1]}', axis=1)
vars_to_save = ['cat_dep', 'num_skus', 'all_skus_promo', 'all_skus_avg_sales']

In [9]:
str_latex = df_results_agg[vars_to_save].to_latex(index=False, float_format='{:3.2f}'.format)
print(str_latex)

\begin{tabular}{lrrr}
\toprule
                 cat\_dep &  num\_skus &  all\_skus\_promo &  all\_skus\_avg\_sales \\
\midrule
        DRINKS-BEVERAGES &       613 &          191.91 &             7088.31 \\
 DRINKS-LIQUOR,WINE,BEER &        73 &           19.27 &              170.33 \\
       FOOD-BREAD\_BAKERY &       134 &           43.00 &             1083.31 \\
              FOOD-DAIRY &       242 &          146.27 &             1943.90 \\
               FOOD-DELI &        91 &           74.36 &              516.36 \\
               FOOD-EGGS &        41 &           31.36 &              346.59 \\
       FOOD-FROZEN\_FOODS &        55 &           13.82 &              276.08 \\
              FOOD-MEATS &        84 &           53.82 &              816.60 \\
            FOOD-POULTRY &        54 &           45.55 &              977.83 \\
     FOOD-PREPARED\_FOODS &        26 &            6.73 &              225.08 \\
            FOOD-PRODUCE &       306 &          240.82 &             5

In [11]:
# Total number of SKU analysed
df_results_agg['num_skus'].sum()

3067

In [13]:
# number of departments
df_results_agg.shape[0]

13

In [14]:
len(df_results_agg['category'].unique().tolist())

3