In [1]:
# Libraries
import os
import pandas as pd
from scipy.stats import zscore
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
user = os.getenv('USERPROFILE')
data_path = os.path.join(
    user, 'OneDrive - National University of Singapore\EBAC\Year 1 Semester 1\Project\Data')
os.chdir(data_path)
pd.set_option('max_columns', None)  

In [2]:
retail_df = pd.read_csv('Retail_Week_NUS.csv', parse_dates=[0])
retail_df.head()

Unnamed: 0,DATE,CUSTNAME,MATERIAL,BASEUOM,MSTAE,H1,H2,H3,QTY_SOLD,CLUSTER
0,2018-06-25,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,5.0,B
1,2018-06-25,Store 4,10550,PAC,AC,"Nuts, Seeds & Beans",Nuts,Pumpkin Kernel,3.0,B
2,2018-06-25,Store 4,11485,PAC,AC,"Flour, Grain & Flakes",Premix,Others,1.0,B
3,2018-06-25,Store 4,1201,PAC,AC,Bakery,Functional,Starches,1.0,B
4,2018-06-25,Store 4,12085,PAC,AC,Grocery,Seasoning,Herbs & Spices,1.0,B


# Data Cleaning

**We only look at active skus and remove General and Service in H1**

In [3]:
retail_df = retail_df.loc[(retail_df['MSTAE'] == 'AC') & (
    (retail_df['H1'] != 'General') & (retail_df['H1'] != 'Service'))]

**Create Year and Week**

In [4]:
retail_df['YEAR'] = retail_df['DATE'].dt.year
retail_df['MONTH'] = retail_df['DATE'].dt.month
retail_df['WEEK'] = retail_df['DATE'].dt.strftime('%W')
print(retail_df.shape)
retail_df.head()

(1391820, 13)


Unnamed: 0,DATE,CUSTNAME,MATERIAL,BASEUOM,MSTAE,H1,H2,H3,QTY_SOLD,CLUSTER,YEAR,MONTH,WEEK
0,2018-06-25,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,5.0,B,2018,6,26
1,2018-06-25,Store 4,10550,PAC,AC,"Nuts, Seeds & Beans",Nuts,Pumpkin Kernel,3.0,B,2018,6,26
2,2018-06-25,Store 4,11485,PAC,AC,"Flour, Grain & Flakes",Premix,Others,1.0,B,2018,6,26
3,2018-06-25,Store 4,1201,PAC,AC,Bakery,Functional,Starches,1.0,B,2018,6,26
4,2018-06-25,Store 4,12085,PAC,AC,Grocery,Seasoning,Herbs & Spices,1.0,B,2018,6,26


**Insert weeks with no sales (Put 0)<br>Did not put currently as it affects standard deviation**

In [None]:
retail_group = retail_df.groupby(['YEAR', 'WEEK', 'MATERIAL'])[
    'QTY_SOLD'].sum().reset_index()

In [None]:
retail_group.head()

In [None]:
# retail_group = retail_df.pivot_table(index=['YEAR', 'WEEK'], columns=[
#     'MATERIAL'], values='QTY_SOLD', fill_value=0).reset_index()

In [None]:
# retail_group = retail_group.melt(
#     id_vars=['YEAR', 'WEEK'], value_vars=retail_group.columns[2:], value_name='QTY_SOLD')

In [None]:
# print(retail_group.shape)
# retail_group.head()

# Seasonal Products

**Sales of items affected by**
- Economy at times
- Nature of Item (Seasonal or Non-seasonal)
- Promotions or Discounts
- Competition from a rival
- Competition from a substitute product
- Special events like Super Bowl, Thanksgiving, New Year, etc varying from places to places

In [None]:
avg_df = retail_group.groupby(['MATERIAL', 'YEAR'])[
    'QTY_SOLD'].mean().reset_index()
avg_df = avg_df.rename(columns={'QTY_SOLD': 'AVG_QTY'})
avg_df.head()

In [None]:
retail_merged = retail_group.merge(avg_df, how='left', on=['YEAR', 'MATERIAL'])
retail_merged.head()

**Create seasonal index**

In [None]:
retail_merged['SEASONAL_INDEX'] = retail_merged['QTY_SOLD'] / \
    retail_merged['AVG_QTY']

In [None]:
std_df = retail_merged.groupby(['MATERIAL', 'YEAR'])[
    'SEASONAL_INDEX'].std().reset_index()
std_df = std_df.rename(columns={'SEASONAL_INDEX': 'STD'})

In [None]:
retail_merged = retail_merged.merge(
    std_df, how='left', on=['MATERIAL', 'YEAR'])

**Remove materials that are not sold, it may be cause of lack of inventory**

In [None]:
retail_merged = retail_merged.dropna(subset=['STD'])

retail_merged = retail_merged.groupby(['YEAR', 'MATERIAL'])['STD'].mean(
).to_frame().sort_values(by=['YEAR', 'STD'], ascending=False)

In [None]:
high_std = retail_merged.loc[retail_merged['STD'] >= 2]
low_std = retail_merged.loc[retail_merged['STD'] < 1]

**Find common seasonality items for 2019 and 2020**

In [None]:
s_products = high_std.loc[2019].merge(
    high_std.loc[2020], left_index=True, right_index=True)
non_s_products = low_std.loc[2019].merge(
    low_std.loc[2020], left_index=True, right_index=True)

In [None]:
s_products

In [None]:
def plot(mat, year):
    df = retail_df.loc[(retail_df['MATERIAL'] == mat)
                       & (retail_df['YEAR'] == year)]
    df.groupby(['DATE'])['QTY_SOLD'].sum().plot()

In [None]:
plot(225, 2018)

In [None]:
test = retail_df.loc[retail_df['MATERIAL'] == 52289]

In [None]:
test.groupby(['DATE'])['QTY_SOLD'].sum().plot()

# Regression

In [5]:
retail_df['DATE'] = pd.to_datetime(retail_df['DATE']).dt.date

In [6]:
retail_df['DATE_LY'] = retail_df['DATE'] - relativedelta(years=1)
retail_df['DATE_LM'] = retail_df['DATE'] - relativedelta(months=1)
retail_df['DATE_LW'] = retail_df['DATE'] - relativedelta(weeks=1)

**Change to datetime**

In [7]:
retail_df['DATE_LY'] = retail_df['DATE_LY'].apply(pd.to_datetime)
retail_df['DATE_LM'] = retail_df['DATE_LM'].apply(pd.to_datetime)
retail_df['DATE_LW'] = retail_df['DATE_LW'].apply(pd.to_datetime)

In [8]:
retail_df['YEAR_LY'] = retail_df['DATE_LY'].dt.year
retail_df['MONTH_LY'] = retail_df['DATE_LY'].dt.month
retail_df['WEEK_LY'] = retail_df['DATE_LY'].dt.strftime('%W')

retail_df['YEAR_LM'] = retail_df['DATE_LM'].dt.year
retail_df['MONTH_LM'] = retail_df['DATE_LM'].dt.month
retail_df['WEEK_LM'] = retail_df['DATE_LM'].dt.strftime('%W')

retail_df['YEAR_LW'] = retail_df['DATE_LW'].dt.year
retail_df['MONTH_LW'] = retail_df['DATE_LW'].dt.month
retail_df['WEEK_LW'] = retail_df['DATE_LW'].dt.strftime('%W')

In [9]:
retail_df

Unnamed: 0,DATE,CUSTNAME,MATERIAL,BASEUOM,MSTAE,H1,H2,H3,QTY_SOLD,CLUSTER,YEAR,MONTH,WEEK,DATE_LY,DATE_LM,DATE_LW,YEAR_LY,MONTH_LY,WEEK_LY,YEAR_LM,MONTH_LM,WEEK_LM,YEAR_LW,MONTH_LW,WEEK_LW
0,2018-06-25,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,5.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
1,2018-06-25,Store 4,10550,PAC,AC,"Nuts, Seeds & Beans",Nuts,Pumpkin Kernel,3.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
2,2018-06-25,Store 4,11485,PAC,AC,"Flour, Grain & Flakes",Premix,Others,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
3,2018-06-25,Store 4,1201,PAC,AC,Bakery,Functional,Starches,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
4,2018-06-25,Store 4,12085,PAC,AC,Grocery,Seasoning,Herbs & Spices,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1487642,2021-04-05,Store 3,934,PAC,AC,"Nuts, Seeds & Beans",Nuts,Cashew,14.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487643,2021-04-05,Store 3,9760,PC,AC,Non Food,Non-Consumables,"Baking Tin,Pan&Mould",5.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487644,2021-04-05,Store 3,980,PAC,AC,"Nuts, Seeds & Beans",Nuts,Almond,9.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487645,2021-04-05,Store 3,9817,TIN,AC,Filling & Jam,Filling,Fruits,1.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13


In [10]:
# Get slice of dataframe first
retail_ly = retail_df[['YEAR_LY', 'MONTH_LY',
                    'WEEK_LY', 'CUSTNAME', 'MATERIAL', 'QTY_SOLD']]
retail_lm = retail_df[['YEAR_LM', 'MONTH_LM',
                    'WEEK_LM', 'CUSTNAME', 'MATERIAL', 'QTY_SOLD']]
retail_lw = retail_df[['YEAR_LW', 'MONTH_LW',
                    'WEEK_LW', 'CUSTNAME', 'MATERIAL', 'QTY_SOLD']]

In [11]:
retail_created = retail_df[['DATE','CUSTNAME','MATERIAL','QTY_SOLD','YEAR','MONTH','WEEK','DATE_LY']]

In [28]:
retail_df

Unnamed: 0,DATE,CUSTNAME,MATERIAL,BASEUOM,MSTAE,H1,H2,H3,QTY_SOLD,CLUSTER,YEAR,MONTH,WEEK,DATE_LY,DATE_LM,DATE_LW,YEAR_LY,MONTH_LY,WEEK_LY,YEAR_LM,MONTH_LM,WEEK_LM,YEAR_LW,MONTH_LW,WEEK_LW
0,2018-06-25,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,5.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
1,2018-06-25,Store 4,10550,PAC,AC,"Nuts, Seeds & Beans",Nuts,Pumpkin Kernel,3.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
2,2018-06-25,Store 4,11485,PAC,AC,"Flour, Grain & Flakes",Premix,Others,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
3,2018-06-25,Store 4,1201,PAC,AC,Bakery,Functional,Starches,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
4,2018-06-25,Store 4,12085,PAC,AC,Grocery,Seasoning,Herbs & Spices,1.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1487642,2021-04-05,Store 3,934,PAC,AC,"Nuts, Seeds & Beans",Nuts,Cashew,14.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487643,2021-04-05,Store 3,9760,PC,AC,Non Food,Non-Consumables,"Baking Tin,Pan&Mould",5.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487644,2021-04-05,Store 3,980,PAC,AC,"Nuts, Seeds & Beans",Nuts,Almond,9.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13
1487645,2021-04-05,Store 3,9817,TIN,AC,Filling & Jam,Filling,Fruits,1.0,B,2021,4,14,2020-04-05,2021-03-05,2021-03-29,2020,4,13,2021,3,09,2021,3,13


In [13]:
retail_ly

Unnamed: 0,YEAR_LY,MONTH_LY,WEEK_LY,CUSTNAME,MATERIAL,QTY_SOLD
0,2017,6,25,Store 4,10091,5.0
1,2017,6,25,Store 4,10550,3.0
2,2017,6,25,Store 4,11485,1.0
3,2017,6,25,Store 4,1201,1.0
4,2017,6,25,Store 4,12085,1.0
...,...,...,...,...,...,...
1487642,2020,4,13,Store 3,934,14.0
1487643,2020,4,13,Store 3,9760,5.0
1487644,2020,4,13,Store 3,980,9.0
1487645,2020,4,13,Store 3,9817,1.0


**Get Retail Last Year**

In [15]:
retail_ly = retail_created.merge(retail_ly,how='left',left_on=['YEAR', 'MONTH', 'WEEK','CUSTNAME','MATERIAL'],
                    right_on=['YEAR_LY', 'MONTH_LY', 'WEEK_LY','CUSTNAME','MATERIAL'])
retail_ly = retail_ly.fillna(0)
retail_ly = retail_ly.rename(columns={'QTY_SOLD_x':'QTY_SOLD', 'QTY_SOLD_y':'QTY_SOLD_LY',
                                      'YEAR_x':'YEAR','MONTH_x':'MONTH','WEEK_x':'WEEK'})

In [24]:
retail_ly

Unnamed: 0,DATE,CUSTNAME,MATERIAL,QTY_SOLD,YEAR,MONTH,WEEK,DATE_LY,YEAR_LY,MONTH_LY,WEEK_LY,QTY_SOLD_LY
0,2018-06-25,Store 4,10091,5.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0
1,2018-06-25,Store 4,10550,3.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0
2,2018-06-25,Store 4,11485,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0
3,2018-06-25,Store 4,1201,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0
4,2018-06-25,Store 4,12085,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1391815,2021-04-05,Store 3,934,14.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0
1391816,2021-04-05,Store 3,9760,5.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0
1391817,2021-04-05,Store 3,980,9.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0
1391818,2021-04-05,Store 3,9817,1.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0


In [25]:
retail_lm

Unnamed: 0,YEAR_LM,MONTH_LM,WEEK_LM,CUSTNAME,MATERIAL,QTY_SOLD
0,2018,5,21,Store 4,10091,5.0
1,2018,5,21,Store 4,10550,3.0
2,2018,5,21,Store 4,11485,1.0
3,2018,5,21,Store 4,1201,1.0
4,2018,5,21,Store 4,12085,1.0
...,...,...,...,...,...,...
1487642,2021,3,09,Store 3,934,14.0
1487643,2021,3,09,Store 3,9760,5.0
1487644,2021,3,09,Store 3,980,9.0
1487645,2021,3,09,Store 3,9817,1.0


In [26]:
retail_ly_lm = retail_ly.merge(retail_lm,how='left',left_on=['YEAR', 'MONTH', 'WEEK','CUSTNAME','MATERIAL'],
                    right_on=['YEAR_LM', 'MONTH_LM', 'WEEK_LM','CUSTNAME','MATERIAL'])
# retail_ly_lm = retail_ly_lm.fillna(0)
# retail_ly_lm = retail_ly_lm.rename(columns={'QTY_SOLD_x':'QTY_SOLD', 'QTY_SOLD_y':'QTY_SOLD_LM',
#                                       'YEAR_x':'YEAR','MONTH_x':'MONTH','WEEK_x':'WEEK'})

In [27]:
retail_ly_lm

Unnamed: 0,DATE,CUSTNAME,MATERIAL,QTY_SOLD_x,YEAR,MONTH,WEEK,DATE_LY,YEAR_LY,MONTH_LY,WEEK_LY,QTY_SOLD_LY,YEAR_LM,MONTH_LM,WEEK_LM,QTY_SOLD_y
0,2018-06-25,Store 4,10091,5.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,33.0
1,2018-06-25,Store 4,10550,3.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,12.0
2,2018-06-25,Store 4,11485,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,1.0
3,2018-06-25,Store 4,1201,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,10.0
4,2018-06-25,Store 4,12085,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397112,2021-04-05,Store 3,934,14.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,,,,
1397113,2021-04-05,Store 3,9760,5.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,,,,
1397114,2021-04-05,Store 3,980,9.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,,,,
1397115,2021-04-05,Store 3,9817,1.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,,,,


In [23]:
retail_ly_lm

Unnamed: 0,DATE,CUSTNAME,MATERIAL,QTY_SOLD,YEAR,MONTH,WEEK,DATE_LY,YEAR_LY,MONTH_LY,WEEK_LY,QTY_SOLD_LY,YEAR_LM,MONTH_LM,WEEK_LM,QTY_SOLD_LM
0,2018-06-25,Store 4,10091,5.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,33.0
1,2018-06-25,Store 4,10550,3.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,12.0
2,2018-06-25,Store 4,11485,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,1.0
3,2018-06-25,Store 4,1201,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,10.0
4,2018-06-25,Store 4,12085,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397112,2021-04-05,Store 3,934,14.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397113,2021-04-05,Store 3,9760,5.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397114,2021-04-05,Store 3,980,9.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397115,2021-04-05,Store 3,9817,1.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0


In [19]:
retail_ly_lm_ly = retail_ly_lm.merge(retail_lw,how='left',left_on=['YEAR', 'MONTH', 'WEEK','CUSTNAME','MATERIAL'],
                    right_on=['YEAR_LW', 'MONTH_LW', 'WEEK_LW','CUSTNAME','MATERIAL'])
retail_ly_lm_ly = retail_ly_lm_ly.fillna(0)
retail_ly_lm_ly = retail_ly_lm_ly.rename(columns={'QTY_SOLD_x':'QTY_SOLD', 'QTY_SOLD_y':'QTY_SOLD_LW',
                                      'YEAR_x':'YEAR','MONTH_x':'MONTH','WEEK_x':'WEEK'})

In [20]:
retail_ly_lm_ly

Unnamed: 0,DATE,CUSTNAME,MATERIAL,QTY_SOLD,YEAR,MONTH,WEEK,DATE_LY,YEAR_LY,MONTH_LY,WEEK_LY,QTY_SOLD_LY,YEAR_LM,MONTH_LM,WEEK_LM,QTY_SOLD_LM,YEAR_LW,MONTH_LW,WEEK_LW,QTY_SOLD_LW
0,2018-06-25,Store 4,10091,5.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,33.0,2018.0,6.0,26,31.0
1,2018-06-25,Store 4,10550,3.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,12.0,2018.0,6.0,26,8.0
2,2018-06-25,Store 4,11485,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,1.0,0.0,0.0,0,0.0
3,2018-06-25,Store 4,1201,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,10.0,2018.0,6.0,26,10.0
4,2018-06-25,Store 4,12085,1.0,2018,6,26,2017-06-25,0.0,0.0,0,0.0,2018.0,6.0,26,2.0,2018.0,6.0,26,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1397112,2021-04-05,Store 3,934,14.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397113,2021-04-05,Store 3,9760,5.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397114,2021-04-05,Store 3,980,9.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0
1397115,2021-04-05,Store 3,9817,1.0,2021,4,14,2020-04-05,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0


In [None]:
retail_ly_lm

In [None]:
retail_ly = retail_get_df.merge(retail,how='left',left_on=['YEAR', 'MONTH', 'WEEK','CUSTNAME','MATERIAL'],
                    right_on=['YEAR_LY', 'MONTH_LY', 'WEEK_LY','CUSTNAME','MATERIAL'])

In [None]:
retail_df.dtypes

In [22]:
retail_df.loc[(retail_df['CUSTNAME'] == 'Store 4') & (retail_df['MATERIAL'] == 10091) & (retail_df['YEAR'] == 2018)]

Unnamed: 0,DATE,CUSTNAME,MATERIAL,BASEUOM,MSTAE,H1,H2,H3,QTY_SOLD,CLUSTER,YEAR,MONTH,WEEK,DATE_LY,DATE_LM,DATE_LW,YEAR_LY,MONTH_LY,WEEK_LY,YEAR_LM,MONTH_LM,WEEK_LM,YEAR_LW,MONTH_LW,WEEK_LW
0,2018-06-25,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,5.0,B,2018,6,26,2017-06-25,2018-05-25,2018-06-18,2017,6,25,2018,5,21,2018,6,25
2473,2018-07-02,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,31.0,B,2018,7,27,2017-07-02,2018-06-02,2018-06-25,2017,7,26,2018,6,22,2018,6,26
13855,2018-07-09,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,17.0,B,2018,7,28,2017-07-09,2018-06-09,2018-07-02,2017,7,27,2018,6,23,2018,7,27
20719,2018-07-16,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,33.0,B,2018,7,29,2017-07-16,2018-06-16,2018-07-09,2017,7,28,2018,6,24,2018,7,28
34570,2018-07-23,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,20.0,B,2018,7,30,2017-07-23,2018-06-23,2018-07-16,2017,7,29,2018,6,25,2018,7,29
41575,2018-07-30,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,33.0,B,2018,7,31,2017-07-30,2018-06-30,2018-07-23,2017,7,30,2018,6,26,2018,7,30
53644,2018-08-06,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,25.0,B,2018,8,32,2017-08-06,2018-07-06,2018-07-30,2017,8,31,2018,7,27,2018,7,31
63611,2018-08-13,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,31.0,B,2018,8,33,2017-08-13,2018-07-13,2018-08-06,2017,8,32,2018,7,28,2018,8,32
76203,2018-08-20,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,32.0,B,2018,8,34,2017-08-20,2018-07-20,2018-08-13,2017,8,33,2018,7,29,2018,8,33
83964,2018-08-27,Store 4,10091,PAC,AC,Dairy,Cream Compound,Whipping,13.0,B,2018,8,35,2017-08-27,2018-07-27,2018-08-20,2017,8,34,2018,7,30,2018,8,34


In [None]:
test[test['QTY_SOLD_y'].notnull()]

In [None]:
retail_df.loc[retail_df['DATE'] == '2020-04-05']

In [None]:
test = retail_df.merge(retail, how='left', left_on=['YEAR', 'MONTH', 'WEEK','CUSTNAME','MATERIAL'],
                       right_on=['YEAR_LY', 'MONTH_LY', 'WEEK_LY','CUSTNAME','MATERIAL'])

In [None]:
test[~test['QTY_SOLD_y'].isnull()]

In [None]:
retail_df.dtypes

In [None]:
retail.dtypes

In [None]:
test['QTY_SOLDQTY_LY'].unique()

In [None]:
retail_df.loc[retail_df['DATE_LY'] == retail_df['DATE']]