In [79]:
import warnings
import numpy as np
import pandas as pd
import pickle
import time
import pymongo
# import matplotlib.pyplot as plt
# import plotly.graph_objs as go
# from plotly.offline import init_notebook_mode, iplot
# from sklearn.metrics import mean_squared_error
# from sklearn.model_selection import train_test_split

from keras import optimizers
from keras.utils import plot_model
from keras.models import Sequential, Model
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# %matplotlib inline
warnings.filterwarnings("ignore")

# Set seeds to make the experiment more reproducible.
# from tensorflow import set_random_seed
from numpy.random import seed
# set_random_seed(1)
seed(1)

In [80]:
training_data = pd.read_csv('./test/train.csv', parse_dates=['date'])
testing_data = pd.read_csv('./test/test.csv', parse_dates=['date'])

In [81]:
testing_data

Unnamed: 0,id,date,store,item
0,0,2018-01-01,1,1
1,1,2018-01-02,1,1
2,2,2018-01-03,1,1
3,3,2018-01-04,1,1
4,4,2018-01-05,1,1
...,...,...,...,...
44995,44995,2018-03-27,10,50
44996,44996,2018-03-28,10,50
44997,44997,2018-03-29,10,50
44998,44998,2018-03-30,10,50


In [82]:
print('Min date from train set: %s' % training_data['date'].min().date())
print('Max date from train set: %s' % training_data['date'].max().date())

Min date from train set: 2013-01-01
Max date from train set: 2013-06-03


In [96]:
lag_size = 180
print('Max date from train set: %s' % training_data['date'].max().date())
print('Max date from test set: %s' % testing_data['date'].max().date())
print('Forecast lag size', lag_size)

Max date from train set: 2013-06-03
Max date from test set: 2018-03-31
Forecast lag size 180


In [97]:
train_gp = training_data.sort_values('date').groupby(['item', 'store', 'date'], as_index=False)
train_gp = train_gp.agg({'sales':['mean']})
train_gp.columns = ['item', 'store', 'date', 'sales']
train_gp.head()


Unnamed: 0,item,store,date,sales
0,1,1,2013-01-01,13
1,1,1,2013-01-02,11
2,1,1,2013-01-03,14
3,1,1,2013-01-04,13
4,1,1,2013-01-05,10


In [98]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    print(agg)
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [99]:
window = (training_data['date'].max().date() - training_data['date'].min().date()).days
lag = lag_size
series = series_to_supervised(train_gp.drop('date', axis=1), window=window, lag=lag)
series

       item(t-153)  store(t-153)  sales(t-153)  item(t-152)  store(t-152)  \
0              NaN           NaN           NaN          NaN           NaN   
1              NaN           NaN           NaN          NaN           NaN   
2              NaN           NaN           NaN          NaN           NaN   
3              NaN           NaN           NaN          NaN           NaN   
4              NaN           NaN           NaN          NaN           NaN   
...            ...           ...           ...          ...           ...   
76503         50.0           9.0          63.0         50.0           9.0   
76504         50.0           9.0          61.0         50.0           9.0   
76505         50.0           9.0          71.0         50.0           9.0   
76506         50.0           9.0          68.0         50.0           9.0   
76507         50.0           9.0          72.0         50.0          10.0   

       sales(t-152)  item(t-151)  store(t-151)  sales(t-151)  item(t-150)  

Unnamed: 0,item(t-153),store(t-153),sales(t-153),item(t-152),store(t-152),sales(t-152),item(t-151),store(t-151),sales(t-151),item(t-150),...,sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+180),store(t+180),sales(t+180)
153,1.0,1.0,13.0,1.0,1.0,11.0,1.0,1.0,14.0,1.0,...,26.0,1.0,1.0,22.0,1,1,12,1.0,3.0,14.0
154,1.0,1.0,11.0,1.0,1.0,14.0,1.0,1.0,13.0,1.0,...,22.0,1.0,1.0,12.0,1,2,12,1.0,3.0,13.0
155,1.0,1.0,14.0,1.0,1.0,13.0,1.0,1.0,10.0,1.0,...,12.0,1.0,2.0,12.0,1,2,16,1.0,3.0,5.0
156,1.0,1.0,13.0,1.0,1.0,10.0,1.0,1.0,12.0,1.0,...,12.0,1.0,2.0,16.0,1,2,16,1.0,3.0,20.0
157,1.0,1.0,10.0,1.0,1.0,12.0,1.0,1.0,10.0,1.0,...,16.0,1.0,2.0,16.0,1,2,20,1.0,3.0,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76323,50.0,8.0,71.0,50.0,8.0,83.0,50.0,8.0,85.0,50.0,...,53.0,50.0,9.0,48.0,50,9,59,50.0,10.0,57.0
76324,50.0,8.0,83.0,50.0,8.0,85.0,50.0,8.0,85.0,50.0,...,48.0,50.0,9.0,59.0,50,9,74,50.0,10.0,70.0
76325,50.0,8.0,85.0,50.0,8.0,85.0,50.0,8.0,62.0,50.0,...,59.0,50.0,9.0,74.0,50,9,64,50.0,10.0,73.0
76326,50.0,8.0,85.0,50.0,8.0,62.0,50.0,8.0,75.0,50.0,...,74.0,50.0,9.0,64.0,50,9,94,50.0,10.0,82.0


In [100]:
# series.to_excel("output.xlsx") 

In [101]:
last_item = 'item(t-%d)' % window
last_store = 'store(t-%d)' % window
series = series[(series['store(t)'] == series[last_store])]
series = series[(series['item(t)'] == series[last_item])]

In [122]:
series

Unnamed: 0,item(t-153),store(t-153),sales(t-153),item(t-152),store(t-152),sales(t-152),item(t-151),store(t-151),sales(t-151),item(t-150),...,store(t-2),sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+180),store(t+180)
153,1.0,1.0,13.0,1.0,1.0,11.0,1.0,1.0,14.0,1.0,...,1.0,26.0,1.0,1.0,22.0,1,1,12,1.0,3.0
307,1.0,2.0,12.0,1.0,2.0,16.0,1.0,2.0,16.0,1.0,...,2.0,44.0,1.0,2.0,44.0,1,2,23,1.0,4.0
461,1.0,3.0,19.0,1.0,3.0,8.0,1.0,3.0,10.0,1.0,...,3.0,26.0,1.0,3.0,28.0,1,3,17,1.0,5.0
615,1.0,4.0,10.0,1.0,4.0,12.0,1.0,4.0,8.0,1.0,...,4.0,25.0,1.0,4.0,28.0,1,4,22,1.0,6.0
769,1.0,5.0,11.0,1.0,5.0,9.0,1.0,5.0,12.0,1.0,...,5.0,16.0,1.0,5.0,22.0,1,5,12,1.0,7.0
923,1.0,6.0,20.0,1.0,6.0,6.0,1.0,6.0,11.0,1.0,...,6.0,21.0,1.0,6.0,16.0,1,6,13,1.0,8.0
1077,1.0,7.0,7.0,1.0,7.0,4.0,1.0,7.0,8.0,1.0,...,7.0,22.0,1.0,7.0,23.0,1,7,12,1.0,9.0
1231,1.0,8.0,16.0,1.0,8.0,10.0,1.0,8.0,12.0,1.0,...,8.0,29.0,1.0,8.0,35.0,1,8,20,1.0,10.0


In [103]:
# Label
labels_col = 'sales(t+%d)' % lag_size
labels = series[labels_col]
series_copy = series
series = series.drop(labels_col, axis=1)

X_train, X_valid, Y_train, Y_valid = train_test_split(series, labels.values, test_size=0.4, random_state=0)
print('Train set shape', X_train.shape)
print('Validation set shape', X_valid.shape)

Train set shape (4, 464)
Validation set shape (4, 464)


In [114]:
epochs = 50
batch = 256
lr = 0.0003
adam = optimizers.Adam(lr)

In [115]:
X_train_series = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid_series = X_valid.values.reshape((X_valid.shape[0], X_valid.shape[1], 1))
print('Train set shape', X_train_series.shape)
print('Validation set shape', X_valid_series.shape)

Train set shape (4, 464, 1)
Validation set shape (4, 464, 1)


In [116]:
model_cnn = Sequential()
model_cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn.add(MaxPooling1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(50, activation='relu'))
model_cnn.add(Dense(1))
model_cnn.compile(loss='mse', optimizer=adam)
model_cnn.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 463, 64)           192       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 231, 64)           0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 14784)             0         
_________________________________________________________________
dense_11 (Dense)             (None, 50)                739250    
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 51        
Total params: 739,493
Trainable params: 739,493
Non-trainable params: 0
_________________________________________________________________


In [117]:
cnn_history = model_cnn.fit(X_train_series, Y_train, validation_data=(X_valid_series, Y_valid), epochs=epochs, verbose=2)

Train on 4 samples, validate on 4 samples
Epoch 1/50
 - 0s - loss: 167.4106 - val_loss: 343.4272
Epoch 2/50
 - 0s - loss: 141.6586 - val_loss: 303.4067
Epoch 3/50
 - 0s - loss: 121.1820 - val_loss: 92.7270
Epoch 4/50
 - 0s - loss: 31.6627 - val_loss: 20.4178
Epoch 5/50
 - 0s - loss: 26.3578 - val_loss: 35.2254
Epoch 6/50
 - 0s - loss: 54.4919 - val_loss: 40.6273
Epoch 7/50
 - 0s - loss: 60.5975 - val_loss: 25.8380
Epoch 8/50
 - 0s - loss: 41.8027 - val_loss: 19.5841
Epoch 9/50
 - 0s - loss: 20.3836 - val_loss: 42.9798
Epoch 10/50
 - 0s - loss: 14.5872 - val_loss: 86.3188
Epoch 11/50
 - 0s - loss: 23.9328 - val_loss: 118.4786
Epoch 12/50
 - 0s - loss: 33.7402 - val_loss: 117.6732
Epoch 13/50
 - 0s - loss: 32.4169 - val_loss: 88.5584
Epoch 14/50
 - 0s - loss: 21.7385 - val_loss: 52.1808
Epoch 15/50
 - 0s - loss: 11.6632 - val_loss: 26.8833
Epoch 16/50
 - 0s - loss: 9.6398 - val_loss: 17.0118
Epoch 17/50
 - 0s - loss: 14.5506 - val_loss: 15.4617
Epoch 18/50
 - 0s - loss: 19.2039 - val_los

In [118]:
cnn_train_pred = model_cnn.predict(X_train_series)
cnn_valid_pred = model_cnn.predict(X_valid_series)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, cnn_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, cnn_valid_pred)))
X_valid

Train rmse: 0.8034655163333494
Validation rmse: 5.510421570793481


Unnamed: 0,item(t-153),store(t-153),sales(t-153),item(t-152),store(t-152),sales(t-152),item(t-151),store(t-151),sales(t-151),item(t-150),...,store(t-2),sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+180),store(t+180)
1077,1.0,7.0,7.0,1.0,7.0,4.0,1.0,7.0,8.0,1.0,...,7.0,22.0,1.0,7.0,23.0,1,7,12,1.0,9.0
461,1.0,3.0,19.0,1.0,3.0,8.0,1.0,3.0,10.0,1.0,...,3.0,26.0,1.0,3.0,28.0,1,3,17,1.0,5.0
307,1.0,2.0,12.0,1.0,2.0,16.0,1.0,2.0,16.0,1.0,...,2.0,44.0,1.0,2.0,44.0,1,2,23,1.0,4.0
1231,1.0,8.0,16.0,1.0,8.0,10.0,1.0,8.0,12.0,1.0,...,8.0,29.0,1.0,8.0,35.0,1,8,20,1.0,10.0


In [125]:
cnn_valid_pred

array([[13.432692],
       [14.027822],
       [16.73235 ],
       [18.563726]], dtype=float32)

In [173]:
Y_train

array([ 7., 44., 38., ..., 45., 45., 52.])

In [174]:
cnn_train_pred

array([[ 9.402388],
       [29.246765],
       [40.400177],
       ...,
       [41.854942],
       [41.60445 ],
       [47.43506 ]], dtype=float32)

In [175]:
X_valid_series

array([[[28.],
        [ 5.],
        [29.],
        ...,
        [25.],
        [28.],
        [ 5.]],

       [[34.],
        [ 3.],
        [12.],
        ...,
        [19.],
        [34.],
        [ 3.]],

       [[25.],
        [10.],
        [67.],
        ...,
        [79.],
        [25.],
        [10.]],

       ...,

       [[ 5.],
        [10.],
        [12.],
        ...,
        [10.],
        [ 5.],
        [10.]],

       [[14.],
        [ 7.],
        [27.],
        ...,
        [30.],
        [14.],
        [ 7.]],

       [[36.],
        [ 5.],
        [35.],
        ...,
        [20.],
        [36.],
        [ 5.]]])

In [30]:
def save_model_to_db2(model, client, db, dbconnection, model_name):
    pickled_model = pickle.dumps(model)
    myclient = pymongo.MongoClient(client)
    mydb = myclient[db]
    mycon = mydb[dbconnection]
    info = mycon.insert_one({model_name: pickled_model, 'name': model_name, 'created_time':time.time()})

In [31]:
def save_model_to_db(model, model_type, user):
    client ='mongodb://root:root123@ds353378.mlab.com:53378/cross-entropy?authSource=cross-entropy&retryWrites=false'
    db = 'cross-entropy'
    dbconnection = 'predictive_models' 
    pickled_model = pickle.dumps(model)
    myclient = pymongo.MongoClient(client)
    mydb = myclient[db]
    mycon = mydb[dbconnection]
    info = mycon.insert_one({ 'model': pickled_model, 'type': model_type, 'created_time': time.time(), 'user': user })

In [32]:
save_model_to_db(model = model_cnn, model_type = 'Prediccion de inventario', user='escruz')

In [33]:
def load_saved_model_from_db(model_type, user):
    client ='mongodb://root:root123@ds353378.mlab.com:53378/cross-entropy?authSource=cross-entropy&retryWrites=false'
    db = 'cross-entropy'
    dbconnection = 'predictive_models' 
    json_data = {}
    myclient = pymongo.MongoClient(client)
    mydb = myclient[db]
    mycon = mydb[dbconnection]
    data = mycon.find({'type': model_type, 'user': user })
    
    for i in data:
        json_data = i
    pickled_model = json_data['model']
    return pickle.loads(pickled_model)

In [34]:
loaded_model  = load_saved_model_from_db(model_type='cnn', user='escruz')

KeyError: 'model'

In [None]:
def load_saved_model_from_db2(model_name, client, db, dbconnection):
    json_data = {}
    myclient = pymongo.MongoClient(client)
    mydb = myclient[db]
    mycon = mydb[dbconnection]
    data = mycon.find({'name': model_name})
    
    for i in data:
        json_data = i
    pickled_model = json_data[model_name]
    return pickle.loads(pickled_model)

In [None]:
loaded_model  = load_saved_model_from_db2(client ='mongodb://root:root123@ds353378.mlab.com:53378/cross-entropy?authSource=cross-entropy&retryWrites=false', 
                           db = 'cross-entropy', 
                           dbconnection = 'predictive_models', model_name = 'cnn')

In [35]:
cnn_train_pred = model_cnn.predict(X_train_series)
cnn_valid_pred = model_cnn.predict(X_valid_series)
cnn2_train_pred = loaded_model.predict(X_train_series)
cnn2_valid_pred = loaded_model.predict(X_valid_series)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, cnn_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, cnn_valid_pred)))
print('Train rmse2:', np.sqrt(mean_squared_error(Y_train, cnn2_train_pred)))
print('Validation rmse2:', np.sqrt(mean_squared_error(Y_valid, cnn2_valid_pred)))


NameError: name 'loaded_model' is not defined

In [None]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [229]:
window = (training_data['date'].max().date() - training_data['date'].min().date()).days    
lag_size = 1
training_data['date'] = pd.to_datetime(training_data['date'])
train_gp = training_data.sort_values('date').groupby(
    ['item', 'store', 'date'], as_index=False)
train_gp = train_gp.agg({'sales': ['mean']})
train_gp.columns = ['item', 'store', 'date', 'sales']
window = 120     
lag = lag_size
series = series_to_supervised(train_gp.drop(
    'date', axis=1), window=window, lag=lag)
last_item = 'item(t-%d)' % window
last_store = 'store(t-%d)' % window
series = series[(series['store(t)'] == series[last_store])]
series = series[(series['item(t)'] == series[last_item])]
# aca borro columnas
# columns_to_drop = [('%s(t+%d)' % (col, lag)) for col in []]
# for i in range(window, 0, -1):
#     columns_to_drop += [('%s(t-%d)' % (col, i))
#                         for col in ['item', 'store']]
# series.drop(columns_to_drop, axis=1, inplace=True)
# series.drop(['item(t)', 'store(t)'], axis=1, inplace=True)
labels_col = 'sales(t+%d)' % lag_size
labels = series[labels_col]
series = series.drop(labels_col, axis=1)

X_train, X_valid, Y_train, Y_valid = train_test_split(
    series, labels.values, test_size=0.4, random_state=0)
X_train_series = X_train.values.reshape(
    (X_train.shape[0], X_train.shape[1], 1))
X_valid_series = X_valid.values.reshape(
    (X_valid.shape[0], X_valid.shape[1], 1))
trained_data = {'data':{'X_train_series': X_train_series, 'Y_train': Y_train, 'X_valid_series': X_valid_series, 'Y_valid': Y_valid}}

       item(t-120)  store(t-120)  sales(t-120)  item(t-119)  store(t-119)  \
0              NaN           NaN           NaN          NaN           NaN   
1              NaN           NaN           NaN          NaN           NaN   
2              NaN           NaN           NaN          NaN           NaN   
3              NaN           NaN           NaN          NaN           NaN   
4              NaN           NaN           NaN          NaN           NaN   
...            ...           ...           ...          ...           ...   
76503         50.0          10.0          36.0         50.0          10.0   
76504         50.0          10.0          40.0         50.0          10.0   
76505         50.0          10.0          43.0         50.0          10.0   
76506         50.0          10.0          46.0         50.0          10.0   
76507         50.0          10.0          46.0         50.0          10.0   

       sales(t-119)  item(t-118)  store(t-118)  sales(t-118)  item(t-117)  

In [230]:
def train_model(X_train_series, X_valid_series, Y_train, Y_valid):
    epochs = 40
    lr = 0.0003
    adam = optimizers.Adam(lr)
    model_cnn = Sequential()
    model_cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(
        X_train_series.shape[1], X_train_series.shape[2])))
    model_cnn.add(MaxPooling1D(pool_size=2))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(50, activation='relu'))
    model_cnn.add(Dense(1))
    model_cnn.compile(loss='mse', optimizer=adam)
    model_cnn.summary()

    cnn_history = model_cnn.fit(X_train_series, Y_train, validation_data=(
        X_valid_series, Y_valid), epochs=epochs, verbose=2)
    return model_cnn

In [231]:
t = pd.io.json.json_normalize(trained_data['data'])

In [232]:
df = t.iloc[0]

In [233]:
model = train_model(df['X_train_series'], df['X_valid_series'], df['Y_train'], df['Y_valid'])

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_14 (Conv1D)           (None, 364, 64)           192       
_________________________________________________________________
max_pooling1d_14 (MaxPooling (None, 182, 64)           0         
_________________________________________________________________
flatten_14 (Flatten)         (None, 11648)             0         
_________________________________________________________________
dense_27 (Dense)             (None, 50)                582450    
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 51        
Total params: 582,693
Trainable params: 582,693
Non-trainable params: 0
_________________________________________________________________
Train on 9904 samples, validate on 6603 samples
Epoch 1/40
 - 5s - loss: 116.8699 - val_loss: 73.5122
Epoch 2/40


In [234]:
X_train.values.shape

(9904, 365)

In [235]:
X_train

Unnamed: 0,item(t-120),store(t-120),sales(t-120),item(t-119),store(t-119),sales(t-119),item(t-118),store(t-118),sales(t-118),item(t-117),...,store(t-2),sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+1),store(t+1)
73882,49.0,3.0,14.0,49.0,3.0,24.0,49.0,3.0,14.0,49.0,...,3.0,33.0,49.0,3.0,29.0,49,3,31,49.0,3.0
70977,47.0,4.0,17.0,47.0,4.0,12.0,47.0,4.0,11.0,47.0,...,4.0,20.0,47.0,4.0,31.0,47,4,25,47.0,4.0
39455,26.0,8.0,29.0,26.0,8.0,29.0,26.0,8.0,30.0,26.0,...,8.0,67.0,26.0,8.0,45.0,26,8,49,26.0,8.0
6735,5.0,4.0,10.0,5.0,4.0,12.0,5.0,4.0,7.0,5.0,...,4.0,15.0,5.0,4.0,18.0,5,4,14,5.0,4.0
68698,45.0,9.0,49.0,45.0,9.0,39.0,45.0,9.0,35.0,45.0,...,9.0,82.0,45.0,9.0,89.0,45,9,60,45.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42825,28.0,10.0,42.0,28.0,10.0,63.0,28.0,10.0,81.0,28.0,...,10.0,91.0,28.0,10.0,85.0,28,10,99,28.0,10.0
60883,40.0,8.0,21.0,40.0,8.0,21.0,40.0,8.0,21.0,40.0,...,8.0,21.0,40.0,8.0,29.0,40,8,43,40.0,8.0
45725,30.0,9.0,24.0,30.0,9.0,23.0,30.0,9.0,24.0,30.0,...,9.0,39.0,30.0,9.0,39.0,30,9,43,30.0,9.0
50159,33.0,8.0,49.0,33.0,8.0,50.0,33.0,8.0,39.0,33.0,...,8.0,52.0,33.0,8.0,64.0,33,8,82,33.0,8.0


In [236]:
v = X_train.values.reshape(
    (X_train.shape[0], X_train.shape[1],1)).reshape((X_train.shape[0], X_train.shape[1],))
df1 = pd.DataFrame(v, columns=X_train.columns)
df1

Unnamed: 0,item(t-120),store(t-120),sales(t-120),item(t-119),store(t-119),sales(t-119),item(t-118),store(t-118),sales(t-118),item(t-117),...,store(t-2),sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+1),store(t+1)
0,49.0,3.0,14.0,49.0,3.0,24.0,49.0,3.0,14.0,49.0,...,3.0,33.0,49.0,3.0,29.0,49.0,3.0,31.0,49.0,3.0
1,47.0,4.0,17.0,47.0,4.0,12.0,47.0,4.0,11.0,47.0,...,4.0,20.0,47.0,4.0,31.0,47.0,4.0,25.0,47.0,4.0
2,26.0,8.0,29.0,26.0,8.0,29.0,26.0,8.0,30.0,26.0,...,8.0,67.0,26.0,8.0,45.0,26.0,8.0,49.0,26.0,8.0
3,5.0,4.0,10.0,5.0,4.0,12.0,5.0,4.0,7.0,5.0,...,4.0,15.0,5.0,4.0,18.0,5.0,4.0,14.0,5.0,4.0
4,45.0,9.0,49.0,45.0,9.0,39.0,45.0,9.0,35.0,45.0,...,9.0,82.0,45.0,9.0,89.0,45.0,9.0,60.0,45.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9899,28.0,10.0,42.0,28.0,10.0,63.0,28.0,10.0,81.0,28.0,...,10.0,91.0,28.0,10.0,85.0,28.0,10.0,99.0,28.0,10.0
9900,40.0,8.0,21.0,40.0,8.0,21.0,40.0,8.0,21.0,40.0,...,8.0,21.0,40.0,8.0,29.0,40.0,8.0,43.0,40.0,8.0
9901,30.0,9.0,24.0,30.0,9.0,23.0,30.0,9.0,24.0,30.0,...,9.0,39.0,30.0,9.0,39.0,30.0,9.0,43.0,30.0,9.0
9902,33.0,8.0,49.0,33.0,8.0,50.0,33.0,8.0,39.0,33.0,...,8.0,52.0,33.0,8.0,64.0,33.0,8.0,82.0,33.0,8.0


In [237]:
cnn_train_pred = model.predict(X_train_series)
cnn_valid_pred = model.predict(X_valid_series)

In [238]:
v = X_valid_series.reshape((X_valid_series.shape[0], X_valid_series.shape[1]))
df1 = pd.DataFrame(v, columns=X_valid.columns)
df1['predicted_sales'] = cnn_valid_pred.round()

In [239]:
df1

Unnamed: 0,item(t-120),store(t-120),sales(t-120),item(t-119),store(t-119),sales(t-119),item(t-118),store(t-118),sales(t-118),item(t-117),...,sales(t-2),item(t-1),store(t-1),sales(t-1),item(t),store(t),sales(t),item(t+1),store(t+1),predicted_sales
0,6.0,7.0,21.0,6.0,7.0,15.0,6.0,7.0,19.0,6.0,...,26.0,6.0,7.0,31.0,6.0,7.0,46.0,6.0,7.0,40.0
1,2.0,7.0,27.0,2.0,7.0,31.0,2.0,7.0,23.0,2.0,...,40.0,2.0,7.0,42.0,2.0,7.0,44.0,2.0,7.0,28.0
2,24.0,9.0,32.0,24.0,9.0,29.0,24.0,9.0,37.0,24.0,...,81.0,24.0,9.0,47.0,24.0,9.0,54.0,24.0,9.0,59.0
3,7.0,7.0,19.0,7.0,7.0,25.0,7.0,7.0,23.0,7.0,...,37.0,7.0,7.0,48.0,7.0,7.0,31.0,7.0,7.0,39.0
4,38.0,7.0,28.0,38.0,7.0,24.0,38.0,7.0,33.0,38.0,...,65.0,38.0,7.0,46.0,38.0,7.0,48.0,38.0,7.0,48.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6598,4.0,1.0,11.0,4.0,1.0,7.0,4.0,1.0,9.0,4.0,...,24.0,4.0,1.0,24.0,4.0,1.0,22.0,4.0,1.0,24.0
6599,22.0,1.0,34.0,22.0,1.0,40.0,22.0,1.0,42.0,22.0,...,73.0,22.0,1.0,74.0,22.0,1.0,74.0,22.0,1.0,80.0
6600,20.0,9.0,19.0,20.0,9.0,29.0,20.0,9.0,27.0,20.0,...,52.0,20.0,9.0,49.0,20.0,9.0,52.0,20.0,9.0,39.0
6601,48.0,10.0,31.0,48.0,10.0,25.0,48.0,10.0,23.0,48.0,...,65.0,48.0,10.0,33.0,48.0,10.0,39.0,48.0,10.0,50.0


In [150]:
X_train

Unnamed: 0,sales(t-120),sales(t-119),sales(t-118),sales(t-117),sales(t-116),sales(t-115),sales(t-114),sales(t-113),sales(t-112),sales(t-111),...,sales(t-9),sales(t-8),sales(t-7),sales(t-6),sales(t-5),sales(t-4),sales(t-3),sales(t-2),sales(t-1),sales(t)
43744,40.0,38.0,28.0,24.0,27.0,23.0,33.0,43.0,41.0,23.0,...,44.0,57.0,60.0,35.0,44.0,48.0,34.0,38.0,54.0,62
46515,33.0,25.0,25.0,38.0,41.0,47.0,36.0,40.0,38.0,36.0,...,52.0,57.0,61.0,54.0,59.0,50.0,69.0,56.0,49.0,44
35488,67.0,63.0,55.0,42.0,37.0,46.0,49.0,55.0,47.0,62.0,...,67.0,104.0,84.0,95.0,58.0,66.0,78.0,76.0,98.0,89
1975,46.0,35.0,35.0,37.0,34.0,45.0,49.0,39.0,41.0,32.0,...,67.0,76.0,66.0,43.0,53.0,58.0,73.0,64.0,72.0,69
62428,17.0,12.0,13.0,22.0,22.0,9.0,8.0,18.0,17.0,23.0,...,18.0,21.0,30.0,25.0,32.0,28.0,15.0,17.0,21.0,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42825,42.0,63.0,81.0,34.0,52.0,55.0,55.0,53.0,55.0,67.0,...,93.0,104.0,111.0,97.0,60.0,83.0,84.0,91.0,85.0,99
60883,21.0,21.0,21.0,16.0,28.0,17.0,12.0,20.0,16.0,18.0,...,26.0,34.0,25.0,33.0,34.0,34.0,59.0,21.0,29.0,43
45725,24.0,23.0,24.0,28.0,24.0,19.0,32.0,36.0,25.0,27.0,...,29.0,35.0,46.0,36.0,23.0,40.0,38.0,39.0,39.0,43
50159,49.0,50.0,39.0,59.0,59.0,51.0,46.0,31.0,49.0,55.0,...,70.0,71.0,61.0,77.0,70.0,69.0,98.0,52.0,64.0,82


In [152]:
X_valid_series

array([[[ 14.],
        [ 10.],
        [ 15.],
        ...,
        [ 17.],
        [ 15.],
        [ 15.]],

       [[ 84.],
        [ 57.],
        [ 45.],
        ...,
        [100.],
        [103.],
        [107.]],

       [[ 35.],
        [ 28.],
        [ 23.],
        ...,
        [ 55.],
        [ 50.],
        [ 54.]],

       ...,

       [[ 29.],
        [ 30.],
        [ 30.],
        ...,
        [ 38.],
        [ 56.],
        [ 58.]],

       [[ 52.],
        [ 49.],
        [ 55.],
        ...,
        [ 72.],
        [ 79.],
        [106.]],

       [[ 30.],
        [ 37.],
        [ 44.],
        ...,
        [ 46.],
        [ 48.],
        [ 45.]]])

In [153]:
X_valid

Unnamed: 0,sales(t-120),sales(t-119),sales(t-118),sales(t-117),sales(t-116),sales(t-115),sales(t-114),sales(t-113),sales(t-112),sales(t-111),...,sales(t-9),sales(t-8),sales(t-7),sales(t-6),sales(t-5),sales(t-4),sales(t-3),sales(t-2),sales(t-1),sales(t)
6578,14.0,10.0,15.0,13.0,5.0,9.0,8.0,11.0,10.0,21.0,...,20.0,24.0,27.0,20.0,17.0,18.0,21.0,17.0,15.0,15
22936,84.0,57.0,45.0,48.0,67.0,47.0,56.0,67.0,64.0,46.0,...,113.0,122.0,107.0,68.0,96.0,93.0,86.0,100.0,103.0,107
54451,35.0,28.0,23.0,35.0,41.0,33.0,30.0,27.0,32.0,39.0,...,53.0,54.0,42.0,47.0,56.0,70.0,42.0,55.0,50.0,54
68089,28.0,37.0,41.0,49.0,45.0,30.0,44.0,28.0,33.0,45.0,...,64.0,48.0,66.0,41.0,65.0,77.0,37.0,42.0,40.0,61
8849,36.0,43.0,39.0,49.0,41.0,38.0,26.0,28.0,36.0,40.0,...,46.0,60.0,70.0,57.0,68.0,53.0,73.0,57.0,64.0,65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4580,15.0,19.0,27.0,27.0,37.0,20.0,23.0,31.0,20.0,20.0,...,35.0,41.0,40.0,26.0,49.0,57.0,24.0,33.0,47.0,35
51842,19.0,10.0,15.0,14.0,17.0,14.0,8.0,13.0,16.0,15.0,...,14.0,20.0,23.0,32.0,18.0,26.0,23.0,16.0,21.0,21
38714,29.0,30.0,30.0,48.0,27.0,26.0,22.0,33.0,34.0,39.0,...,48.0,44.0,52.0,71.0,54.0,29.0,38.0,38.0,56.0,58
27217,52.0,49.0,55.0,54.0,45.0,47.0,54.0,70.0,52.0,76.0,...,84.0,82.0,113.0,108.0,121.0,118.0,87.0,72.0,79.0,106


In [None]:
def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    print(agg)
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [255]:
testing_data = testing_data.drop('id', axis=1)
data = series_to_supervised(testing_data)
data_without_date = data.drop(['date(t)', 'date(t-1)', 'date(t+1)'], axis=1)
data_without_date

       date(t-1)  store(t-1)  item(t-1)    date(t)  store(t)  item(t)  \
0            NaT         NaN        NaN 2018-01-01         1        1   
1     2018-01-01         1.0        1.0 2018-01-02         1        1   
2     2018-01-02         1.0        1.0 2018-01-03         1        1   
3     2018-01-03         1.0        1.0 2018-01-04         1        1   
4     2018-01-04         1.0        1.0 2018-01-05         1        1   
...          ...         ...        ...        ...       ...      ...   
44995 2018-03-26        10.0       50.0 2018-03-27        10       50   
44996 2018-03-27        10.0       50.0 2018-03-28        10       50   
44997 2018-03-28        10.0       50.0 2018-03-29        10       50   
44998 2018-03-29        10.0       50.0 2018-03-30        10       50   
44999 2018-03-30        10.0       50.0 2018-03-31        10       50   

       date(t+1)  store(t+1)  item(t+1)  
0     2018-01-02         1.0        1.0  
1     2018-01-03         1.0        1.0

Unnamed: 0,store(t-1),item(t-1),store(t),item(t),store(t+1),item(t+1)
1,1.0,1.0,1,1,1.0,1.0
2,1.0,1.0,1,1,1.0,1.0
3,1.0,1.0,1,1,1.0,1.0
4,1.0,1.0,1,1,1.0,1.0
5,1.0,1.0,1,1,1.0,1.0
...,...,...,...,...,...,...
44994,10.0,50.0,10,50,10.0,50.0
44995,10.0,50.0,10,50,10.0,50.0
44996,10.0,50.0,10,50,10.0,50.0
44997,10.0,50.0,10,50,10.0,50.0


In [258]:
model_cnn.predict(data_without_date.values.reshape(
    (data_without_date.shape[0], data_without_date.shape[1], 1)))

ValueError: Error when checking input: expected conv1d_6_input to have shape (464, 1) but got array with shape (6, 1)

In [257]:

data_without_date.values.reshape(
    (data_without_date.shape[0], data_without_date.shape[1], 1))

array([[[ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.]],

       [[ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.]],

       [[ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.],
        [ 1.]],

       ...,

       [[10.],
        [50.],
        [10.],
        [50.],
        [10.],
        [50.]],

       [[10.],
        [50.],
        [10.],
        [50.],
        [10.],
        [50.]],

       [[10.],
        [50.],
        [10.],
        [50.],
        [10.],
        [50.]]])