In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder

from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Bidirectional, Input, Flatten 
from keras.layers.core import Reshape

from keras.layers.convolutional import Conv1D, MaxPooling1D

import tensorflow as tf
import keras.backend as K



In [None]:
# read data (for now, sell_prices & calendar are not used)

data_dir = 'data/'

train_sales = pd.read_csv(data_dir + 'sales_train_validation.csv')
sell_prices = pd.read_csv(data_dir + 'sell_prices.csv')
calendar = pd.read_csv(data_dir + 'calendar.csv')
submission_file = pd.read_csv(data_dir + 'sample_submission.csv')

In [None]:
## It might be interesting to train each LSTM on each store..
train_sales['store_id'].unique()

In [None]:
train_sales[train_sales['cat_id'] == "HOBBIES"]

In [None]:
## Ik vroeg me af hoeveel categorien er zijn
train_sales['cat_id'].unique()

In [None]:
## ik wilde het indexeren op basis van kolom waardes proberen.
sell_prices[sell_prices['item_id'] == 'HOBBIES_1_001']

In [None]:
## voorbeelden events
calendar[(calendar['event_name_1'].notna() == True) & (calendar['event_name_1'] == 'Ramadan starts')]

In [None]:
## nog een voorbeeld
calendar[(calendar['event_name_1'].notna() == True) & (calendar['event_name_1'] == 'ValentinesDay')]

## Notes

Im thinking of two things that might be interesting to implement. The first is the event type on a certain day. If we can train a CNN to learn the relation between a day and the events. Then we could use that information in the LSTM. I was also thinking about product prices, maybe we can train the CNN on days and product prices.

I think it would also be interesting to train the CNN on the relation between events and prices.

I'm going to try to train the CNN on the relation between events and item prices. 

The dataset would consist of 
day, wm_yr_wk, item_id,  event , sell_price

the input for the CNN would be
X = day,wm_yr_wk, item_id, event
Y = sell_price

We first train the CNN to predict the sell prices, the goal is to learn the relation between events and sell prices


Then the next step is combine the CNN with the LSTM. So we would create a new model, which starts with CNN and ends with LSTM. The CNN is the same as the previous CNN however we remove the last layer, the prediction layer, and continue on to the LSTM. 

The input would be 
X = day, wm_yr_wk, item_id,  event 
Y = number_of_sales

The goal is that this time the LSTM uses information about the relation of event and sell price when predicting number of sales.

In [None]:
## The dataset...
#onderstaande is uiteindelijk niet meer nodig
# new_dataset = pd.concat([calendar,sell_prices])


In [None]:
modified_sales =train_sales.drop([ "dept_id", "cat_id", "store_id", "state_id"], axis=1)


In [None]:
modified_sales

## Sales and Event
the first data set will represent days, sales, events
thus 1913, 30490,1913 --> maybe better as follows rows 1913, columns 304901 where the first 30490 columns 
are item sales and the last column indicates whether that day was an event.

In [None]:
modified_sales_v2 = modified_sales.drop(['id','item_id'], axis=1).T
modified_sales_v2['event_name_1'] = calendar[:1913]['event_name_1'].values

In [None]:
modified_sales_v2

we need to deal with the NaNs as well as the event_name_1 .. maybe we can just use 0 and 1 to indicate that there was an event. However this would mean that you neglect the available information between event type and products which is quite important. Therefore it might be interesting to use one hot encodings ?

In [None]:
modified_sales_v2[modified_sales_v2['event_name_1'].isna() == False]

In [None]:
modified_sales_v2['event_name_1'].fillna('none', inplace=True)

numerical_event_values =  np.arange(modified_sales_v2['event_name_1'].unique().shape[0])
numerical_event_values = [str(x ) for x in numerical_event_values]
event_array = np.column_stack((modified_sales_v2['event_name_1'].unique(), \
                        numerical_event_values) )

In [None]:

event_dict = {key: value for (key, value) in zip(modified_sales_v2['event_name_1'].unique(), \
                        numerical_event_values)}


In [None]:
event_dict

In [None]:
from keras.utils import to_categorical


encoded = to_categorical(numerical_event_values)


In [None]:
modified_sales_v2['event_name_1'].iloc[1912]
event_dict['none']

In [None]:
## uiteindelijk dus geen gebruik gemaakt van one hot encoding omdat het Model klaagt omdat de kolom een lijst bevat ipv numerieke waardes
for i in range(1913):
    event = modified_sales_v2['event_name_1'].iloc[i]
    index = int(event_dict[event])
    modified_sales_v2['event_name_1'].iloc[i] = index


In [None]:
# create X and y

timesteps = 28
prediction_steps = 1
len_window = timesteps + prediction_steps

nr_training_days = modified_sales_v2.shape[0]
nr_sets = nr_training_days - len_window + 1

base, predictions = [], []

for i in range(nr_sets):
    samples = modified_sales_v2.iloc[i:i+timesteps]
    pred = modified_sales_v2.iloc[i+timesteps]
    base.append(samples.to_numpy())
    predictions.append(pred.to_numpy())
    
X = np.array(base)
y = np.array(predictions)

del base, predictions

In [None]:
## Ik twijfel nog over dat one hot encoding van de events..

## Model
We kunnen eerst een 1D conv laag proberen in [dit artikel](https://machinelearningmastery.com/how-to-develop-convolutional-neural-network-models-for-time-series-forecasting/) staat op interessante info over hoe je meerdere cnns kunt combineren. Misschien kunnen wij dat ook doen, dus meerdere CNNs en dan de output van alle3 als input gebruiken voor de LSTM. De output shape moet dan wel passen op de input shape van de LSTM.

Maar eerst kunnen we een 1D Conv laag proberen.






In [None]:
n_features = 30491
visible1 = Input(shape=(timesteps, n_features))
print(visible1.shape)
cnn1 = Conv1D(filters=64, kernel_size=2, activation='relu')(visible1)
print(cnn1.shape)
cnn1 = MaxPooling1D(pool_size=14)(cnn1)
print(cnn1.shape)
dense = Dense(50, activation='relu')(cnn1)
print(dense.shape[1])
output = Dense(30491)(dense)
print(output.shape)
reshaped_output = Reshape((output.shape[2], output.shape[1]))(output)


In [None]:
model = Model(inputs=visible1, outputs=reshaped_output)
model.compile(optimizer='adam', loss='mse')

In [None]:
print(X.shape)
print(y.shape)

In [None]:
model.fit(X, y.reshape(1885,30491,1) , batch_size=32, epochs=1, verbose=1)


Het netwerk traint nu wel maar de loss is erg hoog. Ik moet nog even kijken of dit gewoon een kweste van lagen teovoegen is, of dat er iets anders aan de hand is. Ik gebruik nu ook geen one hot encoding voor de evenementen omdat Keras dan begint te klagen dat er iets mis is met de data.

## Multiple LSTMS

Ik bekeek de data en toen zag ik dat de 30490 items niet unieke producten zijn. Het zijn dezelfde producten maar dan over verschillende stores. Misschien is het interessant om per store een LSTM te trainen. Dan heb je veel minder features per LSTM. Vervolgens kun je tijdens het predicten per item bepalen welk LSTM je moet gebruiken omdat **id** een combinatie is van **item_id**, **dept_id** en **store_id**.


## Sales and ItemPrices
The second dataset will represent days, sales and item prices. 
This dataset is a bit more trickier since we have 30490 item sales and 30490 item prices so the result would be something like

1913 rows (days)
each column would ideally represent number_of_sales, price
I think the end result would be something along the lines of

1913,30490,2

