In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
#from sklearn.multioutput import RegressorChain
from sklearn.preprocessing import LabelEncoder
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
store_sales = pd.read_csv('train.csv',
    usecols=['store_nbr', 'family', 'date', 'sales', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'sales': 'float32',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
store_sales['date'] = store_sales.date.dt.to_period('D')
store_sales = store_sales.set_index(['store_nbr', 'family', 'date']).sort_index()

store_sales.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sales,onpromotion
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2013-01-01,0.0,0
1,AUTOMOTIVE,2013-01-02,2.0,0
1,AUTOMOTIVE,2013-01-03,3.0,0
1,AUTOMOTIVE,2013-01-04,3.0,0
1,AUTOMOTIVE,2013-01-05,5.0,0


In [3]:
nbr_index=np.unique(np.array([i[0] for i in store_sales.index]))
nbr_index[:5]

array(['1', '10', '11', '12', '13'], dtype='<U2')

In [11]:
index_date_0=[]
for every in store_sales.index:
    index_date_0.append(every[2])

index_date=list(set(index_date_0))
index_date.sort()
type(index_date)

list

In [13]:
len(index_date)

1684

In [4]:
test_data = pd.read_csv('test.csv',
    usecols=['store_nbr', 'family', 'date', 'id', 'onpromotion'],
    dtype={
        'store_nbr': 'category',
        'family': 'category',
        'id': 'uint32',
        'onpromotion': 'uint32',
    },
    parse_dates=['date'],
    infer_datetime_format=True,
)
test_data['date'] = test_data.date.dt.to_period('D')
test_data = test_data.set_index(['store_nbr', 'family', 'date']).sort_index()
test_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,onpromotion
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,0
1,AUTOMOTIVE,2017-08-17,3002670,0
1,AUTOMOTIVE,2017-08-18,3004452,0
1,AUTOMOTIVE,2017-08-19,3006234,0
1,AUTOMOTIVE,2017-08-20,3008016,0
...,...,...,...,...
9,SEAFOOD,2017-08-27,3022271,0
9,SEAFOOD,2017-08-28,3024053,0
9,SEAFOOD,2017-08-29,3025835,0
9,SEAFOOD,2017-08-30,3027617,0


In [5]:
oil=pd.read_csv('oil.csv', parse_dates=['date'], infer_datetime_format=True)
oil['date'] = oil.date.dt.to_period('D')
oil = oil.set_index('date').sort_index()
oil

Unnamed: 0_level_0,dcoilwtico
date,Unnamed: 1_level_1
2013-01-01,
2013-01-02,93.14
2013-01-03,92.97
2013-01-04,93.12
2013-01-07,93.20
...,...
2017-08-25,47.65
2017-08-28,46.40
2017-08-29,46.46
2017-08-30,45.96


In [6]:
holidays=pd.read_csv('holidays_events.csv',usecols=['date','type'], parse_dates=['date'], infer_datetime_format=True)
holidays['date'] = holidays.date.dt.to_period('D')
holidays=holidays.drop_duplicates(subset='date')
holidays = holidays.set_index('date').sort_index()
holidays['event']=1
holidays=holidays.drop('type', axis=1)
holidays

Unnamed: 0_level_0,event
date,Unnamed: 1_level_1
2012-03-02,1
2012-04-01,1
2012-04-12,1
2012-04-14,1
2012-04-21,1
...,...
2017-12-22,1
2017-12-23,1
2017-12-24,1
2017-12-25,1


In [95]:
group_1=['AUTOMOTIVE', 'BEAUTY', 'BEVERAGES','BREAD/BAKERY', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'GROCERY I', 'LIQUOR,WINE,BEER','PERSONAL CARE', 'HARDWARE', 'PET SUPPLIES']
group_2=['CELEBRATION','LAWN AND GARDEN', 'MEATS', 'POULTRY', 'PREPARED FOODS', 'SEAFOOD','FROZEN FOODS', 'GROCERY II']
group_3=['HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE', 'LADIESWEAR','PLAYERS AND ELECTRONICS', 'PRODUCE']
group_4=['BABY CARE','LINGERIE', 'MAGAZINES']
group_5=['SCHOOL AND OFFICE SUPPLIES']
group_6=['BOOKS']

## Make X_1_train for every Group

In [22]:
def join_oil_hol(df):
    df=df.join(holidays.loc['2013-01-01':'2017-08-15']).fillna(0)
    df=df.join(oil.loc['2013-01-01':'2017-08-15']).fillna(method='bfill', axis=0)
    return df
def join_oil_hol_test(df):
    df=df.join(holidays.loc['2017-08-16':'2017-08-31']).fillna(0)
    df=df.join(oil.loc['2017-08-16':'2017-08-31']).fillna(method='bfill', axis=0)
    return df

In [23]:
#Group 1
from statsmodels.tsa.deterministic import DeterministicProcess

dp_1=DeterministicProcess(index=index_date, constant=True, order=1, seasonal=True, drop=True)
X_1_train_1=join_oil_hol(dp_1.in_sample())
X_1_train_1['trend']=(X_1_train_1['trend']-X_1_train_1['trend'].mean())/X_1_train_1['trend'].std()
X_1_train_1

Unnamed: 0,const,trend,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)",event,dcoilwtico
2013-01-01,1.0,-1.730509,0.0,0.0,0.0,0.0,0.0,0.0,1.0,93.14
2013-01-02,1.0,-1.728452,1.0,0.0,0.0,0.0,0.0,0.0,0.0,93.14
2013-01-03,1.0,-1.726396,0.0,1.0,0.0,0.0,0.0,0.0,0.0,92.97
2013-01-04,1.0,-1.724339,0.0,0.0,1.0,0.0,0.0,0.0,0.0,93.12
2013-01-05,1.0,-1.722283,0.0,0.0,0.0,1.0,0.0,0.0,1.0,93.20
...,...,...,...,...,...,...,...,...,...,...
2017-08-11,1.0,1.722283,0.0,0.0,0.0,0.0,0.0,1.0,1.0,48.81
2017-08-12,1.0,1.724339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-13,1.0,1.726396,1.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-14,1.0,1.728452,0.0,1.0,0.0,0.0,0.0,0.0,0.0,47.59


In [24]:
#Group 2
dp_2=DeterministicProcess(index=index_date, constant=True, seasonal=True, drop=True)
X_1_train_2=join_oil_hol(dp_2.in_sample())
X_1_train_2

Unnamed: 0,const,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)",event,dcoilwtico
2013-01-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,93.14
2013-01-02,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,93.14
2013-01-03,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,92.97
2013-01-04,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,93.12
2013-01-05,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,93.20
...,...,...,...,...,...,...,...,...,...
2017-08-11,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,48.81
2017-08-12,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-13,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-14,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,47.59


In [25]:
from statsmodels.tsa.deterministic import CalendarFourier
#Group 3
fourier=CalendarFourier(freq='A', order=6)
dp_3=DeterministicProcess(index=index_date, constant=True, seasonal=True, additional_terms=[fourier], drop=True)
X_1_train_3=join_oil_hol(dp_3.in_sample())
X_1_train_3

Unnamed: 0,const,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)","sin(1,freq=A-DEC)","cos(1,freq=A-DEC)","sin(2,freq=A-DEC)",...,"sin(3,freq=A-DEC)","cos(3,freq=A-DEC)","sin(4,freq=A-DEC)","cos(4,freq=A-DEC)","sin(5,freq=A-DEC)","cos(5,freq=A-DEC)","sin(6,freq=A-DEC)","cos(6,freq=A-DEC)",event,dcoilwtico
2013-01-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.000000,0.000000,...,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,1.0,93.14
2013-01-02,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.017213,0.999852,0.034422,...,0.051620,0.998667,0.068802,0.997630,0.085965,0.996298,0.103102,0.994671,0.0,93.14
2013-01-03,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.034422,0.999407,0.068802,...,0.103102,0.994671,0.137279,0.990532,0.171293,0.985220,0.205104,0.978740,0.0,92.97
2013-01-04,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.051620,0.998667,0.103102,...,0.154309,0.988023,0.205104,0.978740,0.255353,0.966848,0.304921,0.952378,0.0,93.12
2013-01-05,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.068802,0.997630,0.137279,...,0.205104,0.978740,0.271958,0.962309,0.337523,0.941317,0.401488,0.915864,1.0,93.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-11,1.0,0.0,0.0,0.0,0.0,0.0,1.0,-0.628763,-0.777597,0.977848,...,-0.891981,0.452072,0.409356,-0.912375,0.255353,0.966848,-0.806480,-0.591261,1.0,48.81
2017-08-12,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.642055,-0.766659,0.984474,...,-0.867456,0.497513,0.345612,-0.938377,0.337523,0.941317,-0.863142,-0.504961,0.0,47.59
2017-08-13,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.655156,-0.755493,0.989932,...,-0.840618,0.541628,0.280231,-0.959933,0.417194,0.908818,-0.910605,-0.413279,0.0,47.59
2017-08-14,1.0,0.0,1.0,0.0,0.0,0.0,0.0,-0.668064,-0.744104,0.994218,...,-0.811539,0.584298,0.213521,-0.976938,0.493776,0.869589,-0.948362,-0.317191,0.0,47.59


In [21]:
#Group 4
fourier_4=CalendarFourier(freq='A', order=4)
dp_4=DeterministicProcess(index=index_date, constant=True, seasonal=True, additional_terms=[fourier_4], drop=True)
X_1_train_4=join_oil_hol(dp_4.in_sample())
X_1_train_4

Unnamed: 0,const,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)","sin(1,freq=A-DEC)","cos(1,freq=A-DEC)","sin(2,freq=A-DEC)","cos(2,freq=A-DEC)","sin(3,freq=A-DEC)","cos(3,freq=A-DEC)","sin(4,freq=A-DEC)","cos(4,freq=A-DEC)",event,dcoilwtico
2013-01-01,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,1.0,93.14
2013-01-02,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.017213,0.999852,0.034422,0.999407,0.051620,0.998667,0.068802,0.997630,0.0,93.14
2013-01-03,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.034422,0.999407,0.068802,0.997630,0.103102,0.994671,0.137279,0.990532,0.0,92.97
2013-01-04,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.051620,0.998667,0.103102,0.994671,0.154309,0.988023,0.205104,0.978740,0.0,93.12
2013-01-05,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.068802,0.997630,0.137279,0.990532,0.205104,0.978740,0.271958,0.962309,1.0,93.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-11,1.0,0.0,0.0,0.0,0.0,0.0,1.0,-0.628763,-0.777597,0.977848,0.209315,-0.891981,0.452072,0.409356,-0.912375,1.0,48.81
2017-08-12,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.642055,-0.766659,0.984474,0.175531,-0.867456,0.497513,0.345612,-0.938377,0.0,47.59
2017-08-13,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.655156,-0.755493,0.989932,0.141540,-0.840618,0.541628,0.280231,-0.959933,0.0,47.59
2017-08-14,1.0,0.0,1.0,0.0,0.0,0.0,0.0,-0.668064,-0.744104,0.994218,0.107381,-0.811539,0.584298,0.213521,-0.976938,0.0,47.59


In [27]:
#Group 5
fourier_5=CalendarFourier(freq='A', order=2)
dp_5=DeterministicProcess(index=index_date, constant=True, order=1, additional_terms=[fourier_5], drop=True)
X_1_train_5=join_oil_hol(dp_5.in_sample())
X_1_train_5['trend']=(X_1_train_5['trend']-X_1_train_5['trend'].mean())/X_1_train_5['trend'].std()
X_1_train_5

Unnamed: 0,const,trend,"sin(1,freq=A-DEC)","cos(1,freq=A-DEC)","sin(2,freq=A-DEC)","cos(2,freq=A-DEC)",event,dcoilwtico
2013-01-01,1.0,-1.730509,0.000000,1.000000,0.000000,1.000000,1.0,93.14
2013-01-02,1.0,-1.728452,0.017213,0.999852,0.034422,0.999407,0.0,93.14
2013-01-03,1.0,-1.726396,0.034422,0.999407,0.068802,0.997630,0.0,92.97
2013-01-04,1.0,-1.724339,0.051620,0.998667,0.103102,0.994671,0.0,93.12
2013-01-05,1.0,-1.722283,0.068802,0.997630,0.137279,0.990532,1.0,93.20
...,...,...,...,...,...,...,...,...
2017-08-11,1.0,1.722283,-0.628763,-0.777597,0.977848,0.209315,1.0,48.81
2017-08-12,1.0,1.724339,-0.642055,-0.766659,0.984474,0.175531,0.0,47.59
2017-08-13,1.0,1.726396,-0.655156,-0.755493,0.989932,0.141540,0.0,47.59
2017-08-14,1.0,1.728452,-0.668064,-0.744104,0.994218,0.107381,0.0,47.59


In [32]:
#Group 6
dp_6=DeterministicProcess(index=index_date[1366:], constant=True, order=2, drop=True)
X_1_train_6=join_oil_hol(dp_6.in_sample())
X_1_train_6['trend']=(X_1_train_6['trend']-X_1_train_6['trend'].mean())/X_1_train_6['trend'].std()
X_1_train_6

Unnamed: 0,const,trend,trend_squared,event,dcoilwtico
2016-10-01,1.0,-1.723896,1.0,0.0,48.80
2016-10-02,1.0,-1.713019,4.0,0.0,48.80
2016-10-03,1.0,-1.702143,9.0,0.0,48.80
2016-10-04,1.0,-1.691267,16.0,0.0,48.67
2016-10-05,1.0,-1.680390,25.0,0.0,49.75
...,...,...,...,...,...
2017-08-11,1.0,1.680390,98596.0,1.0,48.81
2017-08-12,1.0,1.691267,99225.0,0.0,47.59
2017-08-13,1.0,1.702143,99856.0,0.0,47.59
2017-08-14,1.0,1.713019,100489.0,0.0,47.59


In [30]:
index_date[1366:1367]

[Period('2016-10-01', 'D')]

## Make X_1_test for every Group

In [33]:
def make_X_1_test(dp, trend=True):
    X_1_test=join_oil_hol_test(dp.out_of_sample(steps=16))
    if trend:
        X_1_test['trend']=(X_1_test['trend']-X_1_test['trend'].mean())/X_1_test['trend'].std()
        
    return X_1_test

In [34]:
make_X_1_test(dp_1)

Unnamed: 0,const,trend,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)",event,dcoilwtico
2017-08-16,1.0,-1.575315,0.0,0.0,0.0,1.0,0.0,0.0,0.0,46.8
2017-08-17,1.0,-1.365273,0.0,0.0,0.0,0.0,1.0,0.0,0.0,47.07
2017-08-18,1.0,-1.155231,0.0,0.0,0.0,0.0,0.0,1.0,0.0,48.59
2017-08-19,1.0,-0.945189,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-20,1.0,-0.735147,1.0,0.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-21,1.0,-0.525105,0.0,1.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-22,1.0,-0.315063,0.0,0.0,1.0,0.0,0.0,0.0,0.0,47.65
2017-08-23,1.0,-0.105021,0.0,0.0,0.0,1.0,0.0,0.0,0.0,48.45
2017-08-24,1.0,0.105021,0.0,0.0,0.0,0.0,1.0,0.0,1.0,47.24
2017-08-25,1.0,0.315063,0.0,0.0,0.0,0.0,0.0,1.0,0.0,47.65


In [35]:
make_X_1_test(dp_2, trend=False)

Unnamed: 0,const,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)",event,dcoilwtico
2017-08-16,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,46.8
2017-08-17,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,47.07
2017-08-18,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,48.59
2017-08-19,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-20,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-21,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,47.39
2017-08-22,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,47.65
2017-08-23,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,48.45
2017-08-24,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,47.24
2017-08-25,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,47.65


## Make X_2_train for every Group

In [53]:
def make_lags(ts, lags):
    return pd.concat(
        {
            f'y_lag_{i}': ts.shift(i).fillna(method='bfill', axis=0)
            for i in range(1, lags + 1)
        },
        axis=1)


def make_X_2_train(nbr, category):
    X_2 = make_lags(store_sales.loc[nbr,category]['sales'], lags=6)
    X_2=X_2.join(store_sales.loc[nbr,category]['onpromotion'])
    if category == "BOOKS":
        X_2=X_2.loc['2016-10-01':]
    return X_2

In [56]:
make_X_2_train('1','AUTOMOTIVE')

Unnamed: 0_level_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,y_lag_6,onpromotion
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0
2013-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0
2013-01-03,2.0,0.0,0.0,0.0,0.0,0.0,0
2013-01-04,3.0,2.0,0.0,0.0,0.0,0.0,0
2013-01-05,3.0,3.0,2.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...
2017-08-11,9.0,7.0,4.0,7.0,6.0,5.0,0
2017-08-12,1.0,9.0,7.0,4.0,7.0,6.0,0
2017-08-13,6.0,1.0,9.0,7.0,4.0,7.0,0
2017-08-14,1.0,6.0,1.0,9.0,7.0,4.0,0


In [57]:
make_X_2_train('1','BOOKS')

Unnamed: 0_level_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,y_lag_6,onpromotion
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-10-01,0.0,0.0,0.0,0.0,0.0,0.0,0
2016-10-02,0.0,0.0,0.0,0.0,0.0,0.0,0
2016-10-03,0.0,0.0,0.0,0.0,0.0,0.0,0
2016-10-04,0.0,0.0,0.0,0.0,0.0,0.0,0
2016-10-05,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...
2017-08-11,0.0,0.0,0.0,0.0,0.0,0.0,0
2017-08-12,0.0,0.0,0.0,0.0,0.0,0.0,0
2017-08-13,0.0,0.0,0.0,0.0,0.0,0.0,0
2017-08-14,0.0,0.0,0.0,0.0,0.0,0.0,0


## Make X_2_test for every Group

In [60]:
def make_lags_test(ts, lags):
    return pd.concat(
        {
            f'y_lag_{i}': ts.shift(i).fillna(method='bfill', axis=0)
            for i in range(lags )
        },
        axis=1)

def make_X_2_test(nbr, category):
    X_2_test = make_lags_test(store_sales.loc[nbr,category]['sales'], lags=6)
    X_2_test=X_2_test.join(store_sales.loc[nbr,category]['onpromotion'])
    
    return X_2_test.iloc[1683:]

In [61]:
make_X_2_test('1','BOOKS')

Unnamed: 0_level_0,y_lag_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,onpromotion
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-15,0.0,0.0,0.0,0.0,0.0,0.0,0


In [62]:
make_X_2_test('1','AUTOMOTIVE')

Unnamed: 0_level_0,y_lag_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,onpromotion
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-08-15,4.0,1.0,1.0,6.0,1.0,9.0,0


## Make Y for every Group

In [66]:
def y_(nbr, category):
    y=store_sales.loc[nbr,category]['sales']
    if category == "BOOKS":
        y=y.loc['2016-10-01':]
    return y

In [67]:
y_('1','AUTOMOTIVE')

date
2013-01-01    0.0
2013-01-02    2.0
2013-01-03    3.0
2013-01-04    3.0
2013-01-05    5.0
             ... 
2017-08-11    1.0
2017-08-12    6.0
2017-08-13    1.0
2017-08-14    1.0
2017-08-15    4.0
Freq: D, Name: sales, Length: 1684, dtype: float32

In [68]:
y_('1','BOOKS')

date
2016-10-01    0.0
2016-10-02    0.0
2016-10-03    0.0
2016-10-04    0.0
2016-10-05    0.0
             ... 
2017-08-11    0.0
2017-08-12    0.0
2017-08-13    0.0
2017-08-14    0.0
2017-08-15    0.0
Freq: D, Name: sales, Length: 318, dtype: float32

In [99]:
test_result=test_data.copy()
test_result.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,onpromotion
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,0
1,AUTOMOTIVE,2017-08-17,3002670,0
1,AUTOMOTIVE,2017-08-18,3004452,0
1,AUTOMOTIVE,2017-08-19,3006234,0
1,AUTOMOTIVE,2017-08-20,3008016,0


In [100]:
test_result['sales']=0
test_result=test_result.drop('onpromotion', axis=1)
test_result.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,0
1,AUTOMOTIVE,2017-08-17,3002670,0
1,AUTOMOTIVE,2017-08-18,3004452,0
1,AUTOMOTIVE,2017-08-19,3006234,0
1,AUTOMOTIVE,2017-08-20,3008016,0


# Model

In [71]:
class BoostedHybrid:
    def __init__(self, model_1, model_2):
        self.model_1 = model_1
        self.model_2 = model_2
        self.y_columns = None
        
def fit(self, X_1, X_2, y):
    # YOUR CODE HERE: fit self.model_1
    self.model_1.fit(X_1,y)

    y_fit = pd.Series(
        # YOUR CODE HERE: make predictions with self.model_1
        self.model_1.predict(X_1),
        index=X_1.index
    )

    # YOUR CODE HERE: compute residuals
    y_resid = y-y_fit
    
    y_resid_2= make_multistep_target(y_resid, steps=16).dropna()

    y_resid_2, X_2 = y_resid_2.align(X_2, join='inner', axis=0)

    # YOUR CODE HERE: fit self.model_2 on residuals
    self.model_2.fit(X_2, y_resid_2)

    self.y_fit = y_fit
    self.y_resid = y_resid


# Add method to class
BoostedHybrid.fit = fit

def predict(self, X_1, X_2):
    y_pred = pd.Series(
        # YOUR CODE HERE: predict with self.model_1
        self.model_1.predict(X_1),
        index=X_1.index)
   
    # YOUR CODE HERE: add self.model_2 predictions to y_pred
    y_pred = y_pred + pd.Series(self.model_2.predict(X_2)[0], index=X_1.index)
    
    return y_pred


# Add method to class
BoostedHybrid.predict = predict

In [101]:
model = BoostedHybrid(LinearRegression(fit_intercept=False), MultiOutputRegressor(RandomForestRegressor()))

In [74]:
def make_multistep_target(ts, steps):
    return pd.concat(
        {f'y_step_{i + 1}': ts.shift(-i)
         for i in range(steps)},
        axis=1)

## Group 1 prediction:

In [102]:
for nbr in nbr_index:
    for category in group_1:
        model.fit(X_1_train_1, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_1), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [103]:
test_result.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,2.380082
1,AUTOMOTIVE,2017-08-17,3002670,2.519703
1,AUTOMOTIVE,2017-08-18,3004452,2.186947
1,AUTOMOTIVE,2017-08-19,3006234,4.289573
1,AUTOMOTIVE,2017-08-20,3008016,1.64036


## Group 2 prediction:

In [104]:
for nbr in nbr_index:
    for category in group_2:
        model.fit(X_1_train_2, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_2, trend=False), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [105]:
test_result.loc['9', 'SEAFOOD']

Unnamed: 0_level_0,id,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-16,3002669,19.925194
2017-08-17,3004451,15.900421
2017-08-18,3006233,17.836256
2017-08-19,3008015,15.278502
2017-08-20,3009797,16.781258
2017-08-21,3011579,18.065486
2017-08-22,3013361,16.274791
2017-08-23,3015143,16.207225
2017-08-24,3016925,14.135041
2017-08-25,3018707,18.69695


## Group 3 prediction:

In [106]:
for nbr in nbr_index:
    for category in group_3:
        model.fit(X_1_train_3, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_3, trend=False), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

## Group 4 prediction:

In [107]:
for nbr in nbr_index:
    for category in group_4:
        model.fit(X_1_train_4, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_4, trend=False), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

## Group 5 prediction:

In [108]:
for nbr in nbr_index:
    for category in group_5:
        model.fit(X_1_train_5, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_5), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

## Group 6 prediction:

In [109]:
for nbr in nbr_index:
    for category in group_6:
        model.fit(X_1_train_6, make_X_2_train(nbr, category), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_6), make_X_2_test(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [110]:
test_result.loc['8', 'BOOKS']

Unnamed: 0_level_0,id,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-16,3002608,1.716014
2017-08-17,3004390,1.677043
2017-08-18,3006172,1.701471
2017-08-19,3007954,1.526487
2017-08-20,3009736,1.459729
2017-08-21,3011518,1.355327
2017-08-22,3013300,1.271268
2017-08-23,3015082,1.195659
2017-08-24,3016864,0.896109
2017-08-25,3018646,0.916679


In [111]:
test_result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,2.380082
1,AUTOMOTIVE,2017-08-17,3002670,2.519703
1,AUTOMOTIVE,2017-08-18,3004452,2.186947
1,AUTOMOTIVE,2017-08-19,3006234,4.289573
1,AUTOMOTIVE,2017-08-20,3008016,1.640360
...,...,...,...,...
9,SEAFOOD,2017-08-27,3022271,18.300960
9,SEAFOOD,2017-08-28,3024053,19.036737
9,SEAFOOD,2017-08-29,3025835,18.004789
9,SEAFOOD,2017-08-30,3027617,20.822170


## Submision

In [113]:
sammple_submission = pd.read_csv('sample_submission.csv')
My_sammple_submission=sammple_submission.copy()
My_sammple_submission.head()

Unnamed: 0,id,sales
0,3000888,0.0
1,3000889,0.0
2,3000890,0.0
3,3000891,0.0
4,3000892,0.0


In [114]:
merged=pd.merge(My_sammple_submission, test_result, on='id')
merged

Unnamed: 0,id,sales_x,sales_y
0,3000888,0.0,2.380082
1,3000889,0.0,0.000000
2,3000890,0.0,1.923885
3,3000891,0.0,1032.241384
4,3000892,0.0,-0.295942
...,...,...,...
28507,3029395,0.0,379.181533
28508,3029396,0.0,117.528563
28509,3029397,0.0,1287.645618
28510,3029398,0.0,20.169329


In [115]:
merged.rename(columns={'sales_y':'sales'}, inplace=True)
My_sammple_submission=merged.drop('sales_x', axis=1)
My_sammple_submission

Unnamed: 0,id,sales
0,3000888,2.380082
1,3000889,0.000000
2,3000890,1.923885
3,3000891,1032.241384
4,3000892,-0.295942
...,...,...
28507,3029395,379.181533
28508,3029396,117.528563
28509,3029397,1287.645618
28510,3029398,20.169329


In [117]:
My_sammple_submission.to_csv('My_sammple_submission.csv', index=False)

## Without 'onpromotion'

In [119]:
make_X_2_train('1','BOOKS').drop('onpromotion', axis=1)

Unnamed: 0_level_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,y_lag_6
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-10-01,0.0,0.0,0.0,0.0,0.0,0.0
2016-10-02,0.0,0.0,0.0,0.0,0.0,0.0
2016-10-03,0.0,0.0,0.0,0.0,0.0,0.0
2016-10-04,0.0,0.0,0.0,0.0,0.0,0.0
2016-10-05,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
2017-08-11,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-12,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-13,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-14,0.0,0.0,0.0,0.0,0.0,0.0


In [120]:
make_X_2_test('1','AUTOMOTIVE').drop('onpromotion', axis=1)

Unnamed: 0_level_0,y_lag_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-15,4.0,1.0,1.0,6.0,1.0,9.0


In [121]:
for nbr in nbr_index:
    for category in group_1:
        model.fit(X_1_train_1, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_1), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [122]:
test_result.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,1.588569
1,AUTOMOTIVE,2017-08-17,3002670,2.518159
1,AUTOMOTIVE,2017-08-18,3004452,2.144832
1,AUTOMOTIVE,2017-08-19,3006234,4.473063
1,AUTOMOTIVE,2017-08-20,3008016,1.892241


In [123]:
for nbr in nbr_index:
    for category in group_2:
        model.fit(X_1_train_2, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_2, trend=False), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [124]:
test_result.loc['9', 'SEAFOOD']

Unnamed: 0_level_0,id,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-16,3002669,19.931374
2017-08-17,3004451,17.67662
2017-08-18,3006233,15.68273
2017-08-19,3008015,14.823598
2017-08-20,3009797,19.359189
2017-08-21,3011579,18.076021
2017-08-22,3013361,17.450217
2017-08-23,3015143,17.286863
2017-08-24,3016925,15.304138
2017-08-25,3018707,18.207654


In [125]:
for nbr in nbr_index:
    for category in group_3:
        model.fit(X_1_train_3, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_3, trend=False), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [126]:
for nbr in nbr_index:
    for category in group_4:
        model.fit(X_1_train_4, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_4, trend=False), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [127]:
for nbr in nbr_index:
    for category in group_5:
        model.fit(X_1_train_5, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_5), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [128]:
for nbr in nbr_index:
    for category in group_6:
        model.fit(X_1_train_6, make_X_2_train(nbr, category).drop('onpromotion', axis=1), y_(nbr, category))
        prediction=model.predict(make_X_1_test(dp_6), make_X_2_test(nbr, category).drop('onpromotion', axis=1))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [129]:
test_result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,1.588569
1,AUTOMOTIVE,2017-08-17,3002670,2.518159
1,AUTOMOTIVE,2017-08-18,3004452,2.144832
1,AUTOMOTIVE,2017-08-19,3006234,4.473063
1,AUTOMOTIVE,2017-08-20,3008016,1.892241
...,...,...,...,...
9,SEAFOOD,2017-08-27,3022271,18.463166
9,SEAFOOD,2017-08-28,3024053,18.720720
9,SEAFOOD,2017-08-29,3025835,17.326507
9,SEAFOOD,2017-08-30,3027617,20.476583


In [130]:
My_sammple_submission_2=sammple_submission.copy()
merged_2=pd.merge(My_sammple_submission_2, test_result, on='id')
merged_2.rename(columns={'sales_y':'sales'}, inplace=True)
My_sammple_submission_2=merged.drop('sales_x', axis=1)
My_sammple_submission_2

Unnamed: 0,id,sales
0,3000888,2.380082
1,3000889,0.000000
2,3000890,1.923885
3,3000891,1032.241384
4,3000892,-0.295942
...,...,...
28507,3029395,379.181533
28508,3029396,117.528563
28509,3029397,1287.645618
28510,3029398,20.169329


In [156]:
My_sammple_submission_2=merged_2.drop('sales_x', axis=1)
My_sammple_submission_2=My_sammple_submission_2.clip(0.)

In [157]:
My_sammple_submission_2.to_csv('My_sammple_submission_2.csv', index=False)

In [134]:
My_sammple_submission_2[My_sammple_submission_2['sales']<0]

Unnamed: 0,id,sales
4,3000892,-0.295942
26,3000914,-0.665354
53,3000941,-0.006358
56,3000944,-0.102216
59,3000947,-1.077847
...,...,...
28186,3029074,-0.262766
28318,3029206,-0.132539
28381,3029269,-0.001872
28414,3029302,-0.015094


In [139]:
My_sammple_submission_3=My_sammple_submission_2.copy()
#My_sammple_submission_3[My_sammple_submission_3['sales']<0].clip(0.)
My_sammple_submission_3=My_sammple_submission_3.clip(0.)
My_sammple_submission_3.loc[[4,26]]

Unnamed: 0,id,sales
4,3000892,0.0
26,3000914,0.0


In [140]:
My_sammple_submission_3.to_csv('My_sammple_submission_3.csv', index=False)

In [141]:
My_sammple_submission_4=My_sammple_submission.copy()
My_sammple_submission_4=My_sammple_submission_4.clip(0.)
My_sammple_submission_4.to_csv('My_sammple_submission_4.csv', index=False)

## Train on 1 year:

In [142]:
X_1_train_1.loc['2017-02-01':]

Unnamed: 0,const,trend,"s(2,7)","s(3,7)","s(4,7)","s(5,7)","s(6,7)","s(7,7)",event,dcoilwtico
2017-02-01,1.0,1.329499,0.0,0.0,0.0,1.0,0.0,0.0,0.0,53.90
2017-02-02,1.0,1.331556,0.0,0.0,0.0,0.0,1.0,0.0,0.0,53.55
2017-02-03,1.0,1.333612,0.0,0.0,0.0,0.0,0.0,1.0,0.0,53.81
2017-02-04,1.0,1.335669,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.01
2017-02-05,1.0,1.337725,1.0,0.0,0.0,0.0,0.0,0.0,0.0,53.01
...,...,...,...,...,...,...,...,...,...,...
2017-08-11,1.0,1.722283,0.0,0.0,0.0,0.0,0.0,1.0,1.0,48.81
2017-08-12,1.0,1.724339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-13,1.0,1.726396,1.0,0.0,0.0,0.0,0.0,0.0,0.0,47.59
2017-08-14,1.0,1.728452,0.0,1.0,0.0,0.0,0.0,0.0,0.0,47.59


In [144]:
# make_X_1_test --same

def make_X_2_train_year(nbr, category):
    X_2 = make_lags(store_sales.loc[nbr,category]['sales'], lags=6)
    if category == "BOOKS":
        X_2=X_2.loc['2016-10-01':]
    else:
        X_2=X_2.loc['2017-02-01':]
    return X_2

make_X_2_train_year('1', 'AUTOMOTIVE')

Unnamed: 0_level_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5,y_lag_6
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-02-01,5.0,3.0,0.0,6.0,2.0,12.0
2017-02-02,7.0,5.0,3.0,0.0,6.0,2.0
2017-02-03,3.0,7.0,5.0,3.0,0.0,6.0
2017-02-04,1.0,3.0,7.0,5.0,3.0,0.0
2017-02-05,4.0,1.0,3.0,7.0,5.0,3.0
...,...,...,...,...,...,...
2017-08-11,9.0,7.0,4.0,7.0,6.0,5.0
2017-08-12,1.0,9.0,7.0,4.0,7.0,6.0
2017-08-13,6.0,1.0,9.0,7.0,4.0,7.0
2017-08-14,1.0,6.0,1.0,9.0,7.0,4.0


In [145]:
def make_X_2_test_year(nbr, category):
    X_2_test = make_lags_test(store_sales.loc[nbr,category]['sales'], lags=6)
    #X_2_test=X_2_test.join(store_sales.loc[nbr,category]['onpromotion'])
    
    return X_2_test.iloc[1683:]

make_X_2_test_year('1', 'AUTOMOTIVE')

Unnamed: 0_level_0,y_lag_0,y_lag_1,y_lag_2,y_lag_3,y_lag_4,y_lag_5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-08-15,4.0,1.0,1.0,6.0,1.0,9.0


In [146]:
def y_year(nbr, category):
    y=store_sales.loc[nbr,category]['sales']
    if category == "BOOKS":
        y=y.loc['2016-10-01':]
    else:
        y=y.loc['2017-02-01':]
    return y

y_year('1', 'AUTOMOTIVE')

date
2017-02-01    7.0
2017-02-02    3.0
2017-02-03    1.0
2017-02-04    4.0
2017-02-05    3.0
             ... 
2017-08-11    1.0
2017-08-12    6.0
2017-08-13    1.0
2017-08-14    1.0
2017-08-15    4.0
Freq: D, Name: sales, Length: 196, dtype: float32

In [147]:
for nbr in nbr_index:
    for category in group_1:
        model.fit(X_1_train_1.loc['2017-02-01':], make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_1), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [148]:
for nbr in nbr_index:
    for category in group_2:
        model.fit(X_1_train_2.loc['2017-02-01':], make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_2, trend=False), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [149]:
for nbr in nbr_index:
    for category in group_3:
        model.fit(X_1_train_3.loc['2017-02-01':], make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_3, trend=False), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [150]:
for nbr in nbr_index:
    for category in group_4:
        model.fit(X_1_train_4.loc['2017-02-01':], make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_4, trend=False), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [151]:
for nbr in nbr_index:
    for category in group_5:
        model.fit(X_1_train_5.loc['2017-02-01':], make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_5), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [152]:
for nbr in nbr_index:
    for category in group_6:
        model.fit(X_1_train_6, make_X_2_train_year(nbr, category), y_year(nbr, category))
        prediction=model.predict(make_X_1_test(dp_6), make_X_2_test_year(nbr, category))
        
        test_result.loc[(nbr, category), 'sales']=prediction.values

In [153]:
test_result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id,sales
store_nbr,family,date,Unnamed: 3_level_1,Unnamed: 4_level_1
1,AUTOMOTIVE,2017-08-16,3000888,-5.609006
1,AUTOMOTIVE,2017-08-17,3002670,-7.403855
1,AUTOMOTIVE,2017-08-18,3004452,-3.060327
1,AUTOMOTIVE,2017-08-19,3006234,-2.699441
1,AUTOMOTIVE,2017-08-20,3008016,-6.568272
...,...,...,...,...
9,SEAFOOD,2017-08-27,3022271,24.189349
9,SEAFOOD,2017-08-28,3024053,17.926952
9,SEAFOOD,2017-08-29,3025835,18.333311
9,SEAFOOD,2017-08-30,3027617,16.467850


In [154]:
My_sammple_submission_5=sammple_submission.copy()
merged_5=pd.merge(My_sammple_submission_5, test_result, on='id')
merged_5.rename(columns={'sales_y':'sales'}, inplace=True)
My_sammple_submission_5=merged_5.drop('sales_x', axis=1)
My_sammple_submission_5

Unnamed: 0,id,sales
0,3000888,-5.609006
1,3000889,0.000000
2,3000890,4.609996
3,3000891,4280.031768
4,3000892,-0.317552
...,...,...
28507,3029395,355.156226
28508,3029396,112.682665
28509,3029397,-3987.301569
28510,3029398,-463.125230


In [155]:
My_sammple_submission_5=My_sammple_submission_5.clip(0.)
My_sammple_submission_5.to_csv('My_sammple_submission_5.csv', index=False)

In [None]:
# Best result : #2 without onpromotion