In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import pickle

In [2]:
def reduce_mem_usage(df, verbose=False):
    '''
    reduce memory usage by downcasting data types
    from https://www.kaggle.com/harupy/m5-baseline
    '''
    
    start_mem = df.memory_usage().sum() / 1024 ** 2
    int_columns = df.select_dtypes(include=["int"]).columns
    float_columns = df.select_dtypes(include=["float"]).columns

    for col in int_columns:
        df[col] = pd.to_numeric(df[col], downcast="integer")

    for col in float_columns:
        df[col] = pd.to_numeric(df[col], downcast="float")

    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

In [3]:
data=pd.read_pickle('/notebooks/final_data.pkl').pipe(reduce_mem_usage)
data.head(2)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,demand,date,wm_yr_wk,...,lag_21,lag_28,lag_30,lag_45,lag_60,lag_80,expanding_sold_mean,daily_avg_sold,avg_sold,selling_trend
6098000,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,1501,0,2015-03-09,11506,...,0.0,0.0,2.0,0.0,2.0,0.0,0.646973,0.0,0.669434,-0.669434
6098001,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,1501,1,2015-03-09,11506,...,0.0,0.0,0.0,0.0,0.0,1.0,0.318359,1.0,0.304932,0.695312


In [4]:
from tqdm import tqdm
for col in tqdm(['id','item_id','dept_id','cat_id','store_id','state_id','event_name_1','event_type_1','event_type_2','event_name_2','weekday']):
    data[col] = data[col].astype('category')

100%|██████████| 11/11 [00:00<00:00, 73.80it/s]


In [5]:
d_id = dict(zip(data.id.cat.codes, data.id))
d_item_id = dict(zip(data.item_id.cat.codes, data.item_id))
d_dept_id = dict(zip(data.dept_id.cat.codes, data.dept_id))
d_cat_id = dict(zip(data.cat_id.cat.codes, data.cat_id))
d_store_id = dict(zip(data.store_id.cat.codes, data.store_id))
d_state_id = dict(zip(data.state_id.cat.codes, data.state_id))

In [6]:
cols = data.dtypes.index.tolist()
d_types = data.dtypes.values.tolist()
for i,type in enumerate(d_types):
    if type.name == 'category':
        data[cols[i]] = data[cols[i]].cat.codes

# MLP

In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Flatten,Reshape
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
import csv
from tensorflow.keras.models import Model
#import keras
from tensorflow.keras import backend as k
from tensorflow.keras.layers import Input,Concatenate,Dropout,Dense,BatchNormalization,Conv1D
from tensorflow.keras.layers import Input
from tensorflow.keras.initializers import he_normal,glorot_normal
from tensorflow.keras.regularizers import l1,l2
from tensorflow.python.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import Callback, EarlyStopping, ModelCheckpoint,LearningRateScheduler,ReduceLROnPlateau
from time import time
from tensorflow.keras.utils import plot_model

In [8]:
try:
    os.mkdir('MLP_Model/store_wise_model')
except OSError:
    pass

In [22]:
os.chdir('../..')

In [23]:
os.chdir('MLP_Model/store_wise_model')

In [24]:
!pwd

/notebooks/MLP_Model/store_wise_model


In [25]:
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import *
import datetime
def simple_mlp_model1(input_shape):   
    model_in = Input(shape=(input_shape,), dtype='float32')
    out = Dense(1024,activation='relu',kernel_regularizer=regularizers.l2(0.001))(model_in)
    out = BatchNormalization()(out)
    out = Dropout(0.60)(out)
    out = Dense(512,activation='relu',kernel_regularizer=regularizers.l2(0.001))(out)
    out = BatchNormalization()(out)
    out = Dropout(0.50)(out)
    out = Dense(256,activation='relu',kernel_regularizer=regularizers.l2(0.001))(out)
    out = BatchNormalization()(out)
    out = Dropout(0.25)(out)
    out = Dense(128,activation='relu',kernel_regularizer=regularizers.l2(0.001))(out)
    out = BatchNormalization()(out)
    out = Dropout(0.30)(out)
    out = Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.001))(out)
    out = BatchNormalization()(out)
    out = Dropout(0.50)(out)
    out = Dense(1,activation='linear')(out)
    model = Model(model_in, out)
    return model

In [26]:
data.drop(['date'],axis=1,inplace=True)

In [27]:
def callback(filename):
    checkpoint = ModelCheckpoint(filename, monitor='val_root_mean_squared_error', verbose=1, mode='max')
    earlystop = EarlyStopping(monitor = 'val_root_mean_squared_error',  mode="max",min_delta = 0, patience = 3,verbose = 1)
    reduce_lr = ReduceLROnPlateau(monitor = 'val_root_mean_squared_error', factor = 0.25, patience = 2, verbose = 1)
    callbacks = [checkpoint, earlystop,reduce_lr]
    return callbacks

In [28]:
from tqdm import tqdm
import joblib
import gc
import warnings
warnings.filterwarnings('ignore')
evaluation=pd.DataFrame()
validation=pd.DataFrame()
valid_preds={}
eval_preds={}
data_new=pd.DataFrame()
stores = d_store_id.keys()
stores = d_store_id.keys()
for store in tqdm(stores):
    df = data[data['store_id']==store]
    print('********Prediction for Store:{}**********'.format(d_store_id[store]))
    X_train, y_train = df[df['d']<1914].drop('demand',axis=1), df[df['d']<1914]['demand']
    X_valid, y_valid = df[(df['d']>=1914) & (df['d']<1942)].drop('demand',axis=1), df[(df['d']>=1914) & (df['d']<1942)]['demand']
    X_test = df[df['d']>=1942].drop('demand',axis=1)
    model = simple_mlp_model1(X_train.shape[1])
    filename = 'model_'+str(d_store_id[store])+'.pkl'
    model.compile( optimizer='adam',loss='mean_squared_error',metrics=[tf.keras.metrics.RootMeanSquaredError()])
    model.fit(x=X_train, y=y_train,epochs=50,verbose=1,batch_size=512, callbacks=callback(filename), validation_data=(X_valid, y_valid))
    y_pred_valid=model.predict(X_valid)
    y_pred_eval=model.predict(X_test)
    X_valid['demand']=y_pred_valid
    X_test['demand']=y_pred_eval
    X_valid=X_valid[['id','d','demand']]
    X_test=X_test[['id','d','demand']]
    validation = validation.append(X_valid)
    evaluation = evaluation.append(X_test)
    del X_train, y_train, y_valid, X_valid
    # save model
    #joblib.dump(model, filename)
    gc.collect()   

  0%|          | 0/10 [00:00<?, ?it/s]

********Prediction for Store:CA_1**********
Epoch 1/50
Epoch 1: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/50
Epoch 5: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_CA_1.pkl
INFO:tensorflow:Assets written to: model_CA_1.pkl/assets

Epoch 6: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 6: early stopping


 10%|█         | 1/10 [02:19<20:56, 139.56s/it]

********Prediction for Store:CA_2**********
Epoch 1/50
Epoch 1: saving model to model_CA_2.pkl
INFO:tensorflow:Assets written to: model_CA_2.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_CA_2.pkl
INFO:tensorflow:Assets written to: model_CA_2.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_CA_2.pkl
INFO:tensorflow:Assets written to: model_CA_2.pkl/assets

Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 4/50
Epoch 4: saving model to model_CA_2.pkl
INFO:tensorflow:Assets written to: model_CA_2.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_CA_2.pkl
INFO:tensorflow:Assets written to: model_CA_2.pkl/assets
Epoch 5: early stopping


 20%|██        | 2/10 [04:18<17:00, 127.55s/it]

********Prediction for Store:CA_3**********
Epoch 1/50
Epoch 1: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6/50
Epoch 6: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets
Epoch 7/50
Epoch 7: saving model to model_CA_3.pkl
INFO:tensorflow:Assets written to: model_CA_3.pkl/assets

Epoch 7: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 8/50
Epoch 8: saving model to model_CA_3.

 30%|███       | 3/10 [07:24<17:58, 154.04s/it]

********Prediction for Store:CA_4**********
Epoch 1/50
Epoch 1: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/50
Epoch 5: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets
Epoch 7/50
Epoch 7: saving model to model_CA_4.pkl
INFO:tensorflow:Assets written to: model_CA_4.pkl/assets

Epoch 7: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 7: early stopping


 40%|████      | 4/10 [10:02<15:33, 155.51s/it]

********Prediction for Store:TX_1**********
Epoch 1/50
Epoch 1: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/50
Epoch 5: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_TX_1.pkl
INFO:tensorflow:Assets written to: model_TX_1.pkl/assets

Epoch 6: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 6: early stopping


 50%|█████     | 5/10 [12:40<13:03, 156.72s/it]

********Prediction for Store:TX_2**********
Epoch 1/50
Epoch 1: saving model to model_TX_2.pkl
INFO:tensorflow:Assets written to: model_TX_2.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_TX_2.pkl
INFO:tensorflow:Assets written to: model_TX_2.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_TX_2.pkl
INFO:tensorflow:Assets written to: model_TX_2.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_TX_2.pkl
INFO:tensorflow:Assets written to: model_TX_2.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_TX_2.pkl
INFO:tensorflow:Assets written to: model_TX_2.pkl/assets

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5: early stopping


 60%|██████    | 6/10 [14:47<09:46, 146.57s/it]

********Prediction for Store:TX_3**********
Epoch 1/50
Epoch 1: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_TX_3.pkl
INFO:tensorflow:Assets written to: model_TX_3.pkl/assets

Epoch 6: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6: early stopping


 70%|███████   | 7/10 [17:10<07:16, 145.35s/it]

********Prediction for Store:WI_1**********
Epoch 1/50
Epoch 1: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets

Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 4/50
Epoch 4: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_WI_1.pkl
INFO:tensorflow:Assets written to: model_WI_1.pkl/assets

Epoch 6: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 6: early stopping


 80%|████████  | 8/10 [19:48<04:58, 149.29s/it]

********Prediction for Store:WI_2**********
Epoch 1/50
Epoch 1: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets
Epoch 4/50
Epoch 4: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/50
Epoch 5: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets
Epoch 6/50
Epoch 6: saving model to model_WI_2.pkl
INFO:tensorflow:Assets written to: model_WI_2.pkl/assets
Epoch 6: early stopping


 90%|█████████ | 9/10 [22:25<02:31, 151.64s/it]

********Prediction for Store:WI_3**********
Epoch 1/50
Epoch 1: saving model to model_WI_3.pkl
INFO:tensorflow:Assets written to: model_WI_3.pkl/assets
Epoch 2/50
Epoch 2: saving model to model_WI_3.pkl
INFO:tensorflow:Assets written to: model_WI_3.pkl/assets
Epoch 3/50
Epoch 3: saving model to model_WI_3.pkl
INFO:tensorflow:Assets written to: model_WI_3.pkl/assets

Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 4/50
Epoch 4: saving model to model_WI_3.pkl
INFO:tensorflow:Assets written to: model_WI_3.pkl/assets
Epoch 5/50
Epoch 5: saving model to model_WI_3.pkl
INFO:tensorflow:Assets written to: model_WI_3.pkl/assets
Epoch 5: early stopping


100%|██████████| 10/10 [24:44<00:00, 148.50s/it]


In [None]:
def Time_series_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=150, return_sequences=True, input_shape = (input_shape,1)))
    model.add(TimeDistributed(Dense(100)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=100, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(LSTM(units=50))
    model.add(Dense(1))
    return model

In [29]:
model.summary()

NameError: name 'model' is not defined

In [30]:
actual = True
if actual == False:
    #Get the validation results(We already have them as less than one month left for competition to end)
    validation = data[(data['d']>=1914) & (data['d']<=1941)][['id','d','demand']]
    validation = pd.pivot(validation, index='id', columns='d', values='demand').reset_index()
    validation.columns=['id'] + ['F' + str(i + 1) for i in range(28)]
    validation.id = validation.id.map(d_id).str.replace('evaluation','validation')
else:
    #Get the actual validation results
    validation = validation[['id','d','demand']]
    validation = pd.pivot(validation, index='id', columns='d', values='demand').reset_index()
    validation.columns=['id'] + ['F' + str(i + 1) for i in range(28)]
    validation.id = validation.id.map(d_id).str.replace('evaluation','validation')
validation.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_validation,3.385223,0.177855,0.1809,0.182519,0.180347,0.559285,0.552798,0.176354,5.601048,...,3.323679,4.901747,0.579963,0.175248,0.179316,0.182661,0.568013,0.175423,0.178061,0.181291
1,FOODS_1_001_CA_2_validation,0.607253,2.673632,0.617448,0.60905,0.601412,1.747644,3.039139,0.595659,0.604057,...,1.731321,0.577709,0.687048,1.772819,1.774816,0.650303,0.64879,1.76778,2.294627,0.624159
2,FOODS_1_001_CA_3_validation,1.111589,0.371144,1.09795,0.363455,6.414598,1.081586,0.370281,0.372259,1.076083,...,1.0779,2.548054,2.529314,0.370183,0.367104,1.080994,0.36755,3.336484,2.532734,2.541372
3,FOODS_1_001_CA_4_validation,0.042745,0.15031,0.042698,0.042753,0.042773,0.042725,0.150749,0.042863,0.042849,...,0.147282,0.042848,0.042802,0.042814,0.042765,0.148333,0.154829,0.042808,0.042808,0.042731
4,FOODS_1_001_TX_1_validation,-0.120492,-0.120628,-0.122393,-0.120607,-0.122693,-0.12012,-0.122867,-0.122941,-0.123015,...,-0.123239,-0.123345,-0.123275,-0.123336,2.469488,-0.118953,-0.076217,-0.07654,-0.118474,-0.076678


In [31]:
evaluation=evaluation[['id','d','demand']]
evaluation['id']=evaluation['id'].map(d_id)
evaluation = pd.pivot(evaluation, index='id', columns='d', values='demand').reset_index()
evaluation.columns = ['id'] + ['F' + str(i + 1) for i in range(28)]
evaluation.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,FOODS_1_001_CA_1_evaluation,0.18168,0.177707,0.182351,0.182565,0.180001,0.180146,0.180733,0.183419,0.177576,...,0.179876,0.17903,0.181452,0.182021,0.181885,0.182415,0.180776,0.182066,0.180762,0.182355
1,FOODS_1_001_CA_2_evaluation,0.615857,0.595173,0.616473,0.614017,0.623826,0.621705,0.614656,0.606868,0.618891,...,0.609058,0.60006,0.595301,0.600238,0.583094,0.591264,0.578389,0.594111,0.593043,0.589946
2,FOODS_1_001_CA_3_evaluation,0.367973,0.366203,0.357821,0.368774,0.367252,0.365048,0.369783,0.377025,0.370403,...,0.368617,0.364288,0.365313,0.357447,0.354943,0.36847,0.356551,0.364431,0.361906,0.369686
3,FOODS_1_001_CA_4_evaluation,0.042808,0.042838,0.042854,0.04288,0.042779,0.042805,0.042868,0.042771,0.042845,...,0.042933,0.042943,0.042919,0.042921,0.042949,0.042967,0.042944,0.042838,0.042893,0.042838
4,FOODS_1_001_TX_1_evaluation,-0.118566,-0.118713,-0.119553,-0.119588,-0.119646,-0.119836,-0.119846,-0.120095,-0.1199,...,-0.120148,-0.120035,-0.120101,-0.119985,-0.120249,-0.12073,-0.120312,-0.120315,-0.120466,-0.120242
