In [5]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
plt.style.use('ggplot')

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import LSTM





import sklearn
from sklearn import preprocessing, model_selection
import tensorflow as tf

import src.lstm_helper as lh
import src.time_series_helpers as th

In [6]:
#reference:https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/
# https://heartbeat.fritz.ai/building-a-neural-network-from-scratch-using-python-part-1-6d399df8d432
# https://www.kaggle.com/alexdance/store-item-combination-part-6-deep-learning
# https://www.tensorflow.org/tutorials/structured_data/time_series

In [7]:
!ls src/

[34m__pycache__[m[m            lstm_helper.py         time_series_helpers.py


In [8]:
!ls data/

lstm_data.csv         sample_submission.csv test.csv
processed_train.csv   store_item.csv        train.csv


### Predict future sales but non time-series models.

#### Fearture engineering

In [9]:
#load dataset that is processed by store by item
path = 'data/store_item.csv'
df = pd.read_csv(path)
df.shape

(1826, 501)

In [10]:
df.head()

Unnamed: 0,date,s1_i1,s1_i2,s1_i3,s1_i4,s1_i5,s1_i6,s1_i7,s1_i8,s1_i9,...,s10_i41,s10_i42,s10_i43,s10_i44,s10_i45,s10_i46,s10_i47,s10_i48,s10_i49,s10_i50
0,2013-01-01,13,33,15,10,11,31,25,33,18,...,12,20,24,17,45,37,11,25,17,33
1,2013-01-02,11,43,30,11,6,36,23,37,23,...,10,22,29,10,45,33,13,24,13,37
2,2013-01-03,14,23,14,8,8,18,34,38,25,...,17,27,43,20,56,28,16,29,19,46
3,2013-01-04,13,18,10,19,9,19,36,54,22,...,13,24,27,12,50,44,11,39,23,51
4,2013-01-05,10,34,23,12,8,31,38,51,29,...,14,25,31,25,62,45,16,34,22,41


In [11]:
#set datatime to index
df['date'] =  pd.to_datetime(df['date'])
df = df.set_index('date')

#### Build a  Single-output model
Single-output, multi-time-step predictions

In [12]:
# extract columns names
columns = df.columns.tolist()
columns[:5]

['s1_i1', 's1_i2', 's1_i3', 's1_i4', 's1_i5']

In [13]:
columns[0]

's1_i1'

In [18]:
# try to predict 1 year sales of item 1 at store 1
output_cols = columns[:1]

def prepare_data(df,output_cols):
    #load item 1 at store 1 sales values
    prep_df = df[output_cols]
    # convert df_1 to a dataframe
    prep_df = pd.DataFrame(prep_df)
    return prep_df

prep_df = prepare_data(df,output_cols)
prep_df.shape

(1826, 1)

In [19]:
prep_df.head()

Unnamed: 0_level_0,s1_i1
date,Unnamed: 1_level_1
2013-01-01,13
2013-01-02,11
2013-01-03,14
2013-01-04,13
2013-01-05,10


In [135]:
# split into train and valid sets

output_length =365 # the number days we would like to predict
#time_stepts in LSTM: the recurrent cell gets unrolled to a specified length 
time_steps = 14    #recurrent cell numbers,two weeks

train_size = prep_df.shape[0] - (output_length+time_steps)
test_size  = prep_df.shape[0] - train_size
train = prep_df.iloc[:train_size,:]
test  = prep_df.iloc[-test_size:,:]
valid = prep_df.iloc[-output_length:,:]

In [138]:
print('All shapes are:    (train, ,test,valid)')
print(f'train dataset shape: {train.shape}')
print(f'test dataset shape: {test.shape}')
print(f'valid dataset shape: {valid.shape}')


All shapes are:    (train, ,test,valid)
train dataset shape: (1447, 1)
test dataset shape: (379, 1)
valid dataset shape: (365, 1)


In [139]:
# Genarate window datasets
# One of the most difficult parts of Deep Learning modelling is to get the dataset in the right format 
# The function completes that process
X_train, y_train = lh.window_generator(train, train.iloc[:,:len(output_cols)],time_steps)
X_test,  y_test  = lh.window_generator(test, test.iloc[:,:len(output_cols)],time_steps)


In [140]:
print('All shapes are: (X_train, y_train, X_test,  y_test)')
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test  shape: {X_test.shape}')
print(f'y_test  shape: {y_test.shape}')

All shapes are: (X_train, y_train, X_test,  y_test)
X_train shape: (1433, 14, 1)
y_train shape: (1433, 1)
X_test  shape: (365, 14, 1)
y_test  shape: (365, 1)


In [141]:
# call lstm model

model_1 = lh.lstm_model(X_train)

In [142]:
history = model_1.fit(
    X_train, y_train,
    epochs=30,
    batch_size=16,
    validation_split=0.1,
    verbose=1,
    shuffle=False)

Train on 1289 samples, validate on 144 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [143]:
y_pred = model_1.predict(X_test)
y_pred.shape

(365, 1)

In [144]:
type(y_pred)

numpy.ndarray

In [146]:
df_forecast = pd.DataFrame(y_pred, index=valid.index, columns=valid.columns + '_forecast')
df_forecast

Unnamed: 0_level_0,s1_i1_forecast
date,Unnamed: 1_level_1
2017-01-01,18.347805
2017-01-02,17.651955
2017-01-03,16.559359
2017-01-04,14.314113
2017-01-05,16.391363
...,...
2017-12-27,15.027835
2017-12-28,14.296826
2017-12-29,15.513728
2017-12-30,15.294107


In [149]:
valid

Unnamed: 0_level_0,s1_i1
date,Unnamed: 1_level_1
2017-01-01,19
2017-01-02,15
2017-01-03,10
2017-01-04,16
2017-01-05,14
...,...
2017-12-27,14
2017-12-28,19
2017-12-29,15
2017-12-30,27


In [150]:
RMSE  = th.rmse_calculate(df_forecast,valid)
print(RMSE)

5.623309595468694


In [151]:
df_forecast_3months = df_forecast[-92:]
valid_3months = valid[-92:]

In [152]:
RMSE_3months  = th.rmse_calculate(df_forecast_3months,valid_3months)
print(RMSE_3months)

5.377468583055339


In [None]:
#plot forecast VS actuals



#### Build a  multi-output predictions
Single-output multi-output predictions.