In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pprint as pp
import IPython
from IPython.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
CSS = """
.output {
    flex-direction: row;
}
"""
HTML('<style>{}</style>'.format(CSS))
pd.set_option('display.max_columns', None)

---

## Mapping categories to DF1 below

In [2]:
df1 = pd.read_csv('./data/0826export_column_filter_1.csv')
df1.columns = df1.columns.str.lower()

df2 = pd.read_excel('./documentation/store_master.xlsx')
df2.columns = df2.columns.str.lower()

# column creation and mapping
#############################
# store class column
df2_store_dict = dict(zip(df2['store'], df2['class']))
df1['class'] = df1['store'].map(df2_store_dict)

# store city column 
df2_city_dict = dict(zip(df2['store'], df2['store_city']))
df1['city'] = df1['store'].map(df2_city_dict)

# store state column
df2_state_dict = dict(zip(df2['store'], df2['store_state']))
df1['state'] = df1['store'].map(df2_state_dict)

# store close date
df2_closed_dict = dict(zip(df2['store'], df2['close_date']))
df1['close_date'] = df1['store'].map(df2_closed_dict)

df2_ft_dict = dict(zip(df2['store'], df2['gross_feet']))
df1['gross_feet'] = df1['store'].map(df2_ft_dict)

---

### Remove outlier store, index closed stores, PivotTable

In [3]:
df1 = df1[df1.store != 2084]

closed = df1[df1.close_date.notna()]

pd.pivot_table(closed, index=['store'],columns='fiscal_year',values='comp_pct')

fiscal_year,2017,2018,2019,2020
store,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1643,-0.181329,-0.123526,,
1652,0.259425,-0.03705,0.153779,-0.055691
1670,0.460118,-0.38194,-0.070992,
1711,-0.042366,-0.033682,0.101881,0.099241
1717,-0.04028,-0.056802,-0.036926,-0.406107
1803,-0.090754,-0.046967,-0.063759,-0.1423
1820,0.096273,0.005422,,
1832,-0.10647,0.088853,0.154067,
2065,-0.076738,,,


---

## Making dataframes to concat and compare below by `store`:

Grouping DataFrames's by `'store'`, `'mean()'` of:
 - `df1`: full original data frame
 - `top_avg`: top by `comp_pct`
 - `bot_avg`: bottom by `comp_pct`
 - `closed_avg`: by closed store

In [5]:
# all_avg = df1.groupby(['store','fiscal_year','fiscal_month']).mean().median().round(3)
# top_avg = df1.groupby(['store','fiscal_year','fiscal_month']).mean().nlargest(27, 'comp_pct').median().round(3)
# bot_avg = df1.groupby(['store','fiscal_year','fiscal_month']).mean().nsmallest(27, 'comp_pct').median().round(3)
# closed_avg = closed.groupby(['store','fiscal_year','fiscal_month']).mean().median().round(3)

# # Concatenating
# for_compare = pd.concat([
#     all_avg.rename('all_means').to_frame(),
#     top_avg.rename('top_means').to_frame(),
#     bot_avg.rename('bot_means').to_frame(),
#     closed_avg.rename('closed_means').to_frame()
# ], axis=1).iloc[1:, :]

# for_compare

all_avg_by_store = df1.groupby(['store']).mean().mean().round(4)
top_avg_by_store = df1.groupby(['store']).mean().nlargest(20, 'comp_pct').mean().round(4)
bot_avg_by_store = df1.groupby(['store']).mean().nsmallest(20, 'comp_pct').mean().round(4)
closed_avg_by_store = closed.groupby(['store']).mean().mean().round(4)

# Concatenating
for_compare_by_store = pd.concat([
#     all_avg_by_store.rename('all_means').to_frame(),
    top_avg_by_store.rename('top_avgs').to_frame(),
    bot_avg_by_store.rename('bot_avgs').to_frame(),
    closed_avg_by_store.rename('closed_avgs').to_frame()
], axis=1).iloc[3:, :]

for_compare_by_store

Unnamed: 0,top_avgs,bot_avgs,closed_avgs
comp_pct,0.1587,-0.0535,-0.0148
mark_down_amt_ty,2124.6918,1711.4393,1144.2139
payroll_adj_hours,143.3224,129.9134,106.2597
strak_traffic,1084.4834,905.5472,779.3127
strak_sales_amt,20370.378,16344.4602,15008.3269
special_order_amt,3087.8884,2167.4445,2039.8378
sales_value,20368.617,16340.8817,15003.0004
shoes_units,94.8406,80.0852,65.7381
shoes_value,12554.7852,10419.3385,9758.1475
multi_value,5018.2321,4499.7414,3764.7171


---
## Making dataframes to concat and compare below by `class`:

### Commented out classes do not have enough stores to do top and bot 10 store difference checks

In [7]:
classes = df1['class'].unique()

mall_top = df1[df1['class'] == classes[0]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('mall_top').to_frame()
mall_bot = df1[df1['class'] == classes[0]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('mall_bot').to_frame()

# open_air_top = df1[df1['class'] == classes[1]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('open_air_top').to_frame()
# open_air_bot = df1[df1['class'] == classes[1]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('open_air_bot').to_frame()

# downtown_top = df1[df1['class'] == classes[2]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('downtown_top').to_frame()
# downtown_bot = df1[df1['class'] == classes[2]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('downtown_bot').to_frame()

# street_top = df1[df1['class'] == classes[3]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('street_top').to_frame()
# street_bot = df1[df1['class'] == classes[3]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('street_bot').to_frame()

airport_top = df1[df1['class'] == classes[4]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('airport_top').to_frame()
airport_bot = df1[df1['class'] == classes[4]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('airport_bot').to_frame()

# lifestyle_top = df1[df1['class'] == classes[5]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('lifestyle_top').to_frame()
# lifestyle_bot = df1[df1['class'] == classes[5]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('lifestyle_bot').to_frame()

outlet_top = df1[df1['class'] == classes[6]].groupby(['store']).mean().nlargest(10, 'comp_pct').iloc[:, 3:].mean().rename('outlet_top').to_frame()
outlet_bot = df1[df1['class'] == classes[6]].groupby(['store']).mean().nsmallest(10, 'comp_pct').iloc[:, 3:].mean().rename('outlet_bot').to_frame()

for_compare_by_class = pd.concat([mall_top, mall_bot, airport_top, airport_bot, outlet_top, outlet_bot], axis=1)
# for_compare_by_class

---

## MALL STORES:
 - #### Absolute percentage(%) differences between averages of top and bot mall stores' variables:

In [8]:
abs(abs(mall_bot.iloc[1:,0]-mall_top.iloc[1:,0]) / ((mall_bot.iloc[1:,0] + mall_top.iloc[1:,0]) * 0.5) * 100)

mark_down_amt_ty                 25.493565
payroll_adj_hours                 1.693765
strak_traffic                     7.404775
strak_sales_amt                  23.350967
special_order_amt                35.176158
sales_value                      23.348071
shoes_units                      25.909604
shoes_value                      27.212221
multi_value                      33.870609
payroll_units                     0.937128
payroll_value                     7.840776
accessory_value                  16.330514
trans_cnt_returns                26.260592
ty_mark_down_pct                  5.786198
strak_conversion_pct             11.951084
strak_sls_per_shopper            16.666606
strak_avg_trans_size              4.348530
strak_shopper_to_assoc_ratio      7.546524
strak_comp_traffic_delta        255.203971
shoe_multi_pct                   11.425645
special_order_pct_to_sls         11.391806
footwear_units_pct                3.883224
nonfootwear_units_pct             1.812479
avg_unit_pr

---

## AIRPORT STORES:
 - #### Absolute percentage(%) differences between averages of top and bot airport stores' variables:

In [9]:
abs(abs(airport_bot.iloc[1:,0]-airport_top.iloc[1:,0]) / ((airport_bot.iloc[1:,0] + airport_top.iloc[1:,0]) * 0.5) * 100)

mark_down_amt_ty                 33.349293
payroll_adj_hours                18.660824
strak_traffic                    30.956370
strak_sales_amt                  30.115526
special_order_amt                19.930650
sales_value                      30.088844
shoes_units                      30.580276
shoes_value                      28.356516
multi_value                      32.473314
payroll_units                    19.255683
payroll_value                    24.373565
accessory_value                  31.384410
trans_cnt_returns                56.637767
ty_mark_down_pct                  6.412140
strak_conversion_pct              5.533689
strak_sls_per_shopper             6.021613
strak_avg_trans_size              0.250370
strak_shopper_to_assoc_ratio     10.307315
strak_comp_traffic_delta        400.170782
shoe_multi_pct                    4.104888
special_order_pct_to_sls          0.347771
footwear_units_pct                3.969352
nonfootwear_units_pct             0.989802
avg_unit_pr

---

## OUTLET STORES: 
 - #### Absolute percentage(%) differences between averages of top and bot outlet stores' variables:

In [10]:
# outlet store differences
abs(abs(outlet_bot.iloc[1:,0]-outlet_top.iloc[1:,0]) / ((outlet_bot.iloc[1:,0] + outlet_top.iloc[1:,0]) * 0.5) * 100)

mark_down_amt_ty                  2.549816
payroll_adj_hours                13.399895
strak_traffic                    12.273005
strak_sales_amt                  16.188806
special_order_amt                10.274374
sales_value                      16.211427
shoes_units                      19.149166
shoes_value                      20.087996
multi_value                      11.542833
payroll_units                     4.084939
payroll_value                     5.536708
accessory_value                   8.141200
trans_cnt_returns                24.976449
ty_mark_down_pct                 12.566542
strak_conversion_pct              5.377097
strak_sls_per_shopper             5.498228
strak_avg_trans_size              0.497400
strak_shopper_to_assoc_ratio     15.427341
strak_comp_traffic_delta        174.740099
shoe_multi_pct                    5.691858
special_order_pct_to_sls          6.960984
footwear_units_pct                0.421821
nonfootwear_units_pct             0.209528
avg_unit_pr