## Imports

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# from pandas_profiling import ProfileReport

## Preprocessing the datasets

### Training Dataset

In [3]:
# train_path = '../input/data-storm-30/train_data.csv'
train_path = "../input/data-storm-30/train_data.csv"
train_dataset = pd.read_csv(train_path)
train_dataset = train_dataset.drop_duplicates(keep = 'first')

In [4]:
print(train_dataset.columns)
print('No. of different categories: {}'.format(len(train_dataset['CategoryCode'].unique())))
print('No. of different items: {}'.format(len(train_dataset['ItemCode'].unique())))


In [5]:
# converting the dtype of the DateID
train_dataset['DateID'] = pd.to_datetime(train_dataset['DateID'])
train_dataset.sort_values(by = 'DateID', inplace=True)
print(train_dataset.dtypes)

In [6]:
train_dataset.head()
# train_dataset

In [7]:
flag = 0
for item in train_dataset['ItemCode'].unique():
    tmp_dataset = train_dataset.loc[train_dataset['ItemCode'] == item]
    if len(tmp_dataset['CategoryCode'].unique()) == 1:
        continue
    else:
        print('CategoryCode for an ItemCode is not unique')
        flag = 1
        break

### Validation Dataset

In [8]:
valid_path = '../input/data-storm-30/validation_data.csv'
validation_dataset = pd.read_csv(valid_path)
validation_dataset = validation_dataset.drop_duplicates(keep = 'first')

In [9]:
print(validation_dataset.columns)
print('No. of different categories: {}'.format(len(validation_dataset['CategoryCode'].unique())))
print('No. of different items: {}'.format(len(validation_dataset['ItemCode'].unique())))

### Test Dataset

In [10]:
test_path = '../input/data-storm-30/test_data.csv'
test_dataset = pd.read_csv(test_path)
test_dataset = test_dataset.drop_duplicates(keep = 'first')

In [11]:
print(test_dataset.columns)
print('No. of different categories: {}'.format(len(test_dataset['CategoryCode'].unique())))
print('No. of different items: {}'.format(len(test_dataset['ItemCode'].unique())))

## Defining the model


### Picking an item

In [12]:
item_num = 174436
tmp_dataset = train_dataset.loc[train_dataset['ItemCode'] == item_num]
dates = np.array(tmp_dataset.pop('DateID'))
sales = np.array(tmp_dataset['DailySales'])

In [13]:
print(len(sales))
time = np.arange(len(sales))

### Windowing function

In [14]:
#if you are using convolutions, expand dim within the helper function
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis = -1)
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size +1, shift = 1, drop_remainder = True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1:]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset

In [15]:
window_size = 7
batch_size = 5
shuffle_buffer_size = 10
x_train = sales[:100]
x_valid = sales[100:]
train_set = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

In [16]:
# for convenience lets use [0,1,2,3,4,...] instead of dates
time_train = time[:100]
time_valid = time[100:]

In [17]:
#first try to determine an optimum lr using lesser epochs for training
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

### Model

In [18]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters = 32, kernel_size = 3, strides =1 , padding = 'causal', activation = 'relu', input_shape = [None,1]),
    tf.keras.layers.LSTM(32, return_sequences = True),
    tf.keras.layers.LSTM(32, return_sequences = True),
    tf.keras.layers.Dense(1),
    tf.keras.layers.Lambda(lambda x: x*200.0)
])

optimizer = tf.keras.optimizers.SGD(lr = 1e-5, momentum = 0.9)
loss = tf.keras.losses.Huber()

model.compile(loss = loss, optimizer = optimizer, metrics = ['mae'])

history = model.fit(train_set, epochs = 500)

In [20]:
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)

In [21]:
#lets define a function for the forecasting part that would be used after training
def model_forecast(model, series, window_size):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size, shift= 1, drop_remainder = True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size))
    dataset = dataset.batch(32).prefetch(1)
    forecast = model.predict(dataset)
    
    return forecast

In [22]:
lstm_forecast = model_forecast(model, sales[..., np.newaxis], window_size)
lstm_forecast = lstm_forecast[100 - window_size:-1, -1, 0]

In [23]:
plt.figure(figsize = (10,6))
plot_series(time_valid, x_valid)
plot_series(time_valid, lstm_forecast)

In [24]:
tf.keras.metrics.mean_absolute_error(x_valid, lstm_forecast).numpy()