In [1]:
import datetime
from collections import defaultdict
import numpy as np
import time
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_18 = pd.read_csv('2018.csv')
df_19 = pd.read_csv('2019.csv')

In [3]:
##extract data for the contribution of each product per store in next 4 weeks last year
format='%Y-%m-%d'
df_18.drop(columns=['Brand','Category','sku', 'Family'], inplace = True)
df_18['date'] = pd.to_datetime(df_18['week'], format='%Y-%m-%d')
df_18['week'] = df_18['date'].dt.week
df_18['year'] = df_18['date'].dt.year
df_18.drop(columns=['date'], inplace = True)
g18 = df_18.groupby(['store','Name','year','week']).sum()
g18.reset_index(inplace = True)

LY4W = g18[g18['week'].isin([46,47,48,49,50])].drop(['  v','year','week'], axis = 1)
LY4W = LY4W.groupby(['store','Name']).sum()

In [4]:
LY4W.head(2)

In [5]:
#extract data for the contribution of each product per store in last four weeks before the forecast
format='%Y-%m-%d'
df_19.drop(columns=['Brand','Category','sku', 'Family'], inplace = True)
df_19['date'] = pd.to_datetime(df_19['week'], format='%Y-%m-%d')
df_19['week'] = df_19['date'].dt.week
df_19['year'] = df_19['date'].dt.year
df_19.drop(columns=['date'], inplace = True)
g19 = df_19.groupby(['store','Name','year','week']).sum()
g19.reset_index(inplace = True)

TYL4W = g19[g19['week'].isin([41,42,43,44,45])].drop(['  v','year','week'], axis = 1)
TYL4W = TYL4W.groupby(['store','Name']).sum()

In [6]:
TY4W.head(2)

In [7]:
#calculate contribution name-store combination LY next 4 weeks
LY = LY4W.groupby(['store','Name']).agg({'  u':['sum']})
LY = LY.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
LY.columns = LY.columns.droplevel(1)

In [8]:
#calculate contribution name-store combination TY last 4 weeks
TY = TYL4W.groupby(['store','Name']).agg({'  u':['sum']})
TY = TY.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
TY.columns = TY.columns.droplevel(1)

In [9]:
#merge 2 contributions and compute average
merge = TY.merge(LY, left_index = True, right_index = True, how = 'left')
merge['avg']= merge.mean(axis=1)
merge = merge.drop(columns = ['  u_x', '  u_y'])
merge.reset_index(inplace = True)
merge['Name'] = merge['Name'].astype(str)

In [10]:
#name forecast next 6 weeks
fcst = pd.read_csv('fcst/name_predictions.csv')

In [11]:
fcst['week'] = fcst['Unnamed: 0']
fcst.drop('Unnamed: 0', axis=1, inplace = True)
#first 6 weeks of fcst
fcst = fcst[:5]

In [12]:
#unpivot and sum forecast
df_fcst = pd.melt(fcst, id_vars=['week'], var_name='Name', value_name='fcst')
df_fcst.drop('week', axis =1, inplace = True)
df_fcst = df_fcst.groupby(['Name']).sum()

In [13]:
fcst_zero_name = df_fcst[df_fcst['fcst']==0]
fcst_name = df_fcst.loc[df_fcst.ne(0).all(axis=1)]

In [15]:
fcst_name.head(2)

Unnamed: 0_level_0,fcst
Name,Unnamed: 1_level_1
1004,2997.479184
1006,2250.519646


In [19]:
#merge contribution average store-name and forecast by name
df = merge.merge(fcst_name, left_on = 'Name', right_index = True, how = 'inner')

In [20]:
#compute forecast
df['fcst_store'] = df['fcst']*df['avg']

In [21]:
df.head()

Unnamed: 0,store,Name,avg,fcst,fcst_store
0,1,14,0.149862,5790.502818,867.773657
500,2,14,0.029169,5790.502818,168.901093
982,3,14,0.037962,5790.502818,219.818014
1583,4,14,0.054153,5790.502818,313.573148
2309,5,14,0.011606,5790.502818,67.206393


#### Create list of combination store-name that don't have a fcst at name level

In [22]:
df['concat'] = df['store'].astype(str)+df['Name'].astype(str)

In [23]:
no_fcst_list = fcst_zero_name.index.tolist()

In [24]:
dictionary = pd.read_csv('fcst/name_dictionary.csv')

In [25]:
dict_0 = dictionary[dictionary['Name'].isin(no_fcst_list)]

In [26]:
dict_0['Category'].value_counts()

178    518
109    245
22     118
34      92
127     89
      ... 
58       1
52       1
107      1
45       1
79       1
Name: Category, Length: 149, dtype: int64

When there is no Name fcst we will use the % growth from the category fcst and apply it to every Name.

In [27]:
cat_fcst = pd.read_csv('fcst/category_predictions.csv')

same as before but with category

In [29]:
df_19 = pd.read_csv('2019.csv')
format='%Y-%m-%d'
df_19.drop(columns=['Brand','Name','sku', 'Family','store'], inplace = True)
df_19['date'] = pd.to_datetime(df_19['week'], format='%Y-%m-%d')
df_19['week'] = df_19['date'].dt.week
df_19['year'] = df_19['date'].dt.year
df_19.drop(columns=['date'], inplace = True)
g19 = df_19.groupby(['Category','year','week']).sum()
g19.reset_index(inplace = True)

g19 = g19[g19['week'].isin([41,42,43,44])]
g19 = g19.drop(columns = ['year','week', '  v'])
g19 = g19.groupby(['Category']).mean()
g19.index = g19.index.map(str)

In [31]:
cat_fcst['week'] = cat_fcst['Unnamed: 0']
cat_fcst.drop('Unnamed: 0', axis=1, inplace = True)
#first 6 weeks of fcst
cat_fcst = cat_fcst[:6]
#unpivot columns and check average sold per category in the last 6 weeks of 2019
cat_fcst = pd.melt(cat_fcst, id_vars=['week'], var_name='Category', value_name='fcst')
cat_fcst.drop('week', axis =1, inplace = True)
cat_fcst = cat_fcst.groupby(['Category']).mean()

In [32]:
#merge 2019 category data with fcst category and calculate growth of the category
merge_cat = cat_fcst.merge(g19, left_index = True, right_index = True)
merge_cat = merge_cat[merge_cat.fcst.notnull()]
merge_cat.columns = ['fcst','u']
merge_cat['growth_cat'] = merge_cat['fcst']/merge_cat['u']-1

In [33]:
merge_cat.head(3)

Unnamed: 0_level_0,fcst,u,growth_cat
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,7383.903337,5727.25,0.289258
10,6318.127155,5105.25,0.237574
109,6606.665126,5494.625,0.202387


In [34]:
#anything that didn't have a name fcst, here we check the category they are associated with
dict_0['Name'] = dict_0['Name'].astype(str)

fcst_zero = fcst_zero_name.merge(dict_0, left_index = True, right_on = 'Name')

fcst_zero['Category'] = fcst_zero['Category'].astype(str)
merge_cat.index = merge_cat.index.map(str)

df_cat = fcst_zero.merge(merge_cat, left_on = 'Category', right_index = True)

In [35]:
#re-do name-store extract data 
df_19 = pd.read_csv('2019.csv')
format='%Y-%m-%d'
df_19.drop(columns=['Brand','Category','sku', 'Family'], inplace = True)
df_19['date'] = pd.to_datetime(df_19['week'], format='%Y-%m-%d')
df_19['week'] = df_19['date'].dt.week
df_19['year'] = df_19['date'].dt.year
df_19.drop(columns=['date'], inplace = True)
g19 = df_19.groupby(['store','Name','year','week']).sum()
g19.reset_index(inplace = True)

In [40]:
#2019 data to fcst at name level
TYL4W = g19[g19['week'].isin([41,42,43,44,45])].drop(['  v','year','week'], axis = 1)
TYL4W = TYL4W.groupby(['store','Name']).mean()
TYL4W.reset_index(inplace = True)
TYL4W['concat'] = TYL4W['store'].astype(str)+TYL4W['Name'].astype(str)

In [36]:
df_final_cat = df.merge(df_cat, left_on = 'Name', right_on = 'Name', how='inner')

In [37]:
df.reset_index(inplace = True)
df_final_cat.reset_index(inplace = True)

In [38]:
#excluding the names that got a fcst already from before
df_final_cat = df_final_cat[~df_final_cat.concat.isin(df.concat)]
df_final_cat = merge.merge(df_cat, left_on = 'Name', right_on = 'Name', how = 'inner')
df_final_cat['concat'] = df_final_cat['store'].astype(str)+df_final_cat['Name'].astype(str)

In [39]:
df.reset_index(inplace = True)
df_final_cat.reset_index(inplace = True)

In [41]:
#merge 2019 store data with category fcst
df_final_name_cat = df_final_cat.merge(TYL4W, left_on = 'concat', right_on = 'concat', how='left')

In [42]:
#create fcst growth category * average units sold last 4 weeks * 6
df_final_name_cat['fcst_store'] = df_final_name_cat['growth_cat']*df_final_name_cat['u']*6
#rename columns
df_final_name_cat.columns = ['index', 'store_x', 'Name_x', 'avg', 'fcst_x', 'Family', 'Category',
       'Brand', 'fcst_y', 'u', 'growth_cat', 'concat', 'store_y', 'Name_y',
       'u_name', 'fcst_store']

In [43]:
df_final_name_cat['fcst_store'] = (1+df_final_name_cat['growth_cat'])*df_final_name_cat['u_name']
df_2 = df_final_name_cat[['store_x','Name_x','fcst_store','concat']]

In [44]:
#lists of succesfull fcst combination name-store
lista2 = df_2.concat
lista = df.concat

#### Are we still missing some?

In [46]:
lista3 = TYL4W.concat

In [48]:
#check which combinations are still in lista3 that are not in the previous 2 dataframes
missing = TYL4W[~TYL4W.concat.isin(df_2.concat)]
#missing = missing[~missing.concat.isin(df.concat)]

In [50]:
#18447 are still missing fcst
missing

Unnamed: 0,store,Name,u,concat
24,1,131,5.00,1131
29,1,195,2.00,1195
49,1,250,2.00,1250
51,1,296,2.50,1296
79,1,493,3.50,1493
...,...,...,...,...
229237,410,2614,1.00,4102614
229243,410,2657,1.00,4102657
229268,410,2727,3.00,4102727
229269,410,2728,1.75,4102728


In [51]:
family_fcst = pd.read_csv('fcst/family_predictions.csv')

In [53]:
df_19 = pd.read_csv('2019.csv')
#same as before but with family
format='%Y-%m-%d'
df_19.drop(columns=['Brand','Name','sku', 'Category','store'], inplace = True)
df_19['date'] = pd.to_datetime(df_19['week'], format='%Y-%m-%d')
df_19['week'] = df_19['date'].dt.week
df_19['year'] = df_19['date'].dt.year
df_19.drop(columns=['date'], inplace = True)
g19 = df_19.groupby(['Family','year','week']).sum()
g19.reset_index(inplace = True)

g19 = g19[g19['week'].isin([41,42,43,44])]
g19 = g19.drop(columns = ['year','week', '  v'])
g19 = g19.groupby(['Family']).mean()
g19.index = g19.index.map(str)

In [54]:
g19.head(2)

Unnamed: 0_level_0,u
Family,Unnamed: 1_level_1
1,159636.625
2,69507.625


In [55]:
family_fcst['week'] = family_fcst['Unnamed: 0']
family_fcst.drop('Unnamed: 0', axis=1, inplace = True)
#first 6 weeks of fcst
family_fcst = family_fcst[:6]
#unpivot columns and check average sold per category in the last 6 weeks of 2019
family_fcst = pd.melt(family_fcst, id_vars=['week'], var_name='Category', value_name='fcst')
family_fcst.drop('week', axis =1, inplace = True)
family_fcst = family_fcst.groupby(['Category']).mean()

In [56]:
#merge 2019 famegory data with fcst famegory and calculate growth of the famegory
merge_fam = family_fcst.merge(g19, left_index = True, right_index = True)
merge_fam = merge_fam[merge_fam.fcst.notnull()]
merge_fam.columns = ['fcst','u']
merge_fam['growth_fam'] = merge_fam['fcst']/merge_fam['u']-1

In [57]:
merge_fam.head(3)

Unnamed: 0,fcst,u,growth_fam
1,173720.712652,159636.625,0.088226
10,7401.313737,6968.625,0.062091
12,201681.665611,189569.625,0.063892


In [58]:
#anything that didn't have a name fcst, here we check the category they are associated with
dict_0['Name'] = dict_0['Name'].astype(str)

fcst_zero = fcst_zero_name.merge(dict_0, left_index = True, right_on = 'Name')

fcst_zero['Family'] = fcst_zero['Family'].astype(str)
merge_cat.index = merge_cat.index.map(str)

df_fam = fcst_zero.merge(merge_fam, left_on = 'Family', right_index = True)

In [59]:
#re-do name-store extract data 
df_19 = pd.read_csv('2019.csv')
format='%Y-%m-%d'
df_19.drop(columns=['Brand','Category','sku', 'Family'], inplace = True)
df_19['date'] = pd.to_datetime(df_19['week'], format='%Y-%m-%d')
df_19['week'] = df_19['date'].dt.week
df_19['year'] = df_19['date'].dt.year
df_19.drop(columns=['date'], inplace = True)
g19 = df_19.groupby(['store','Name','year','week']).sum()
g19.reset_index(inplace = True)

In [60]:
df_final_fam = df.merge(df_fam, left_on = 'Name', right_on = 'Name', how='inner')

df.reset_index(inplace = True)
df_final_fam.reset_index(inplace = True)

In [61]:
#excluding the names that got a fcst already from before
df_final_fam = df_final_fam[~df_final_fam.concat.isin(df.concat)]
df_final_fam = merge.merge(df_fam, left_on = 'Name', right_on = 'Name', how = 'inner')
df_final_fam['concat'] = df_final_fam['store'].astype(str)+df_final_fam['Name'].astype(str)

In [63]:
#df.reset_index(inplace = True)
df_final_fam.reset_index(inplace = True)

In [64]:
#2019 data to fcst at name level
TYL4W = g19[g19['week'].isin([41,42,43,44,45])].drop(['  v','year','week'], axis = 1)
TYL4W = TYL4W.groupby(['store','Name']).mean()
TYL4W.reset_index(inplace = True)
TYL4W['concat'] = TYL4W['store'].astype(str)+TYL4W['Name'].astype(str)

In [65]:
#merge 2019 store data with category fcst
df_final_name_fam = df_final_fam.merge(TYL4W, left_on = 'concat', right_on = 'concat', how='left')

In [66]:
#create fcst growth category * average units sold last 4 weeks * 6
df_final_name_fam['fcst_store'] = df_final_name_fam['growth_fam']*df_final_name_cat['u']*6
#rename columns
df_final_name_fam.columns = ['index', 'store_x', 'Name_x', 'avg', 'fcst_x', 'Family', 'Category',
       'Brand', 'fcst_y', 'u', 'growth_fam', 'concat', 'store_y', 'Name_y',
       'u_name', 'fcst_store']

In [67]:
df_final_name_fam['fcst_store'] = (1+df_final_name_fam['growth_fam'])*df_final_name_fam['u_name']
df_3 = df_final_name_fam[['store_x','Name_x','fcst_store','concat']]

In [68]:
#fcst for anything that was missing from the category fcst
df_3_final = df_3[df_3.concat.isin(missing.concat)]

### all fcst together

In [71]:
df_3_final

Unnamed: 0,store_x,Name_x,fcst_store,concat
1740,1,131,4.895435,1131
1741,2,131,1.631812,2131
1742,3,131,3.426804,3131
1743,4,131,1.468630,4131
1744,7,131,0.979087,7131
...,...,...,...,...
122325,389,2464,1.059436,3892464
122328,391,2164,1.059436,3912164
122332,398,2689,1.059436,3982689
122333,399,637,1.059436,399637


In [72]:
df = df[['store', 'Name','fcst_store','concat']]

In [81]:
df_2.columns = df.columns
df_3_final.columns = df.columns

In [85]:
final_fcst = df.append([df_2, df_3_final])

In [89]:
final_fcst

Unnamed: 0,store,Name,fcst_store,concat
0,1,14,867.773657,114
1,2,14,168.901093,214
2,3,14,219.818014,314
3,4,14,313.573148,414
4,5,14,67.206393,514
...,...,...,...,...
122325,389,2464,1.059436,3892464
122328,391,2164,1.059436,3912164
122332,398,2689,1.059436,3982689
122333,399,637,1.059436,399637
