In [1]:
import numpy as np
import pandas as pd
import neptune
import os
import datetime
import matplotlib.pyplot as plt

In [2]:
timesteps = 1


d1_col = 6              # First day column
n_days = 1911            # Number of days to analyse

filename = '../data/'   # Data directory

state = ''            # Data subdivision to analyse, leave blank study entire dataset

if state:
    filename += '%s/%s_data.csv' % (state, state)
else:
    filename += 'sales_train_validation.csv'

df = pd.read_csv(filename, index_col=0)                 # Read data
calendar_df = pd.read_csv('../data/calendar.csv')       # Read the calendar csv for notable dates

In [3]:
def prepare_data(raw_data, y, timesteps):
    n_frames = raw_data.shape[0]
    n_bands = raw_data.shape[1]
    
    data = np.empty((n_frames - timesteps + 1, timesteps, n_bands))
    targets = np.empty((n_frames - timesteps + 1, 1))
    for i in range(timesteps, n_frames + 1):
        entry = np.empty((timesteps, n_bands))
        for j in range(timesteps):
            entry[timesteps - j - 1, :] = scaled_data[i - j - 1, :]
        data[i - timesteps, :, :] = entry
        targets[i - timesteps] = y[i - 1]
        
    return data, targets

In [4]:
# Plots for CA
data_divisions = {
    'state_id': ['CA', 'TX', 'WI'],
    'store_id': [['CA_1', 'CA_2', 'CA_3', 'CA_4'], ['TX_1', 'TX_2', 'TX_3'], ['WI_1', 'WI_2', 'WI_3']],
    'cat_id': ['HOBBIES', 'HOUSEHOLD', 'FOODS'],
    'dept_id': [['HOBBIES_1', 'HOBBIES_2'], ['HOUSEHOLD_1', 'HOUSEHOLD_2'], ['FOODS_1', 'FOODS_2', 'FOODS_3']]
}

# store sum series will store the sum of all items in a specific department i.e 'HOBBIES_1' in the form: 
# store_sum_series[store, deptartment], where each entry is a 1D time series of sales across the whole department
# Each time series can be formatted into LSTM friendly formats and input to neural network for training
# Intial training/validation split could take entries from 8/10 stores for training and remaining 2 for val?

store_series = [] 
store_sum_series = []
store_ids = []
depart_ids = []

for state in data_divisions['store_id']:
    for store in state:
        time_series = []
        sum_series = []
        store_ids_state = []
        for dept in data_divisions['dept_id']:
            for item in dept:
                entry = df.loc[df['store_id'] == str(store)].loc[df['dept_id'] == str(item)].iloc[:, d1_col:d1_col+n_days]
                time_series.append(entry)
                sum_series.append(np.sum(entry, axis=0))
                store_ids_state.append(str(store))
                depart_ids.append(str(item))
        store_series.append(time_series)
        store_sum_series.append(sum_series)
        store_ids.append(store_ids_state)


In [39]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if this is longer then the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [53]:
from numpy import array

n_steps = 1

X_LSTM = np.empty((0, n_steps))
X_emb = np.empty((0, n_steps))
targets = np.empty((0, 1))

for i in range(len(store_sum_series) - 1):
    input_sequence = store_sum_series[i][0]
    X_tmp, y_tmp = split_sequence(input_sequence, n_steps)
    X_LSTM = np.concatenate((X_LSTM, X_tmp), axis=0)
    y_tmp = np.reshape(y_tmp, (y_tmp.shape[0], 1))
    targets = np.concatenate((targets, y_tmp), axis=0)
    print(X_LSTM.shape)
    print(targets.shape)
    
X_emb = np.ones(targets.shape)
X_LSTM_val, targets_val = split_sequence(store_sum_series[len(store_sum_series)-1][0], n_steps)
X_emb_val = np.ones(targets_val.shape)

X_LSTM = np.reshape(X_LSTM, (X_LSTM.shape[0], X_LSTM.shape[1], 1))
X_LSTM_val = np.reshape(X_LSTM_val, (X_LSTM_val.shape[0], X_LSTM_val.shape[1], 1))

print(X_LSTM.shape)
print(targets.shape)
print(X_emb.shape)
print(X_LSTM_val.shape)
print(targets_val.shape)
print(X_emb_val.shape)

(1910, 1)
(1910, 1)
(3820, 1)
(3820, 1)
(5730, 1)
(5730, 1)
(7640, 1)
(7640, 1)
(9550, 1)
(9550, 1)
(11460, 1)
(11460, 1)
(13370, 1)
(13370, 1)
(15280, 1)
(15280, 1)
(17190, 1)
(17190, 1)
(17190, 1, 1)
(17190, 1)
(17190, 1)
(1910, 1, 1)
(1910,)
(1910,)


[1 2]


In [56]:
import tensorflow as tf

n_embed_features = 1
n_features = 1

embed_output_dim = 1
# define model
optimiser = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1, clipvalue=0.5)

input_layer_embed = tf.keras.Input(shape=(n_embed_features,))
input_layer_lstm = tf.keras.Input(shape=(n_features, timesteps,))

x = tf.keras.layers.Embedding(input_dim=n_embed_features, output_dim=embed_output_dim)(input_layer_embed)
x = tf.keras.layers.Reshape((1, embed_output_dim))(x)
x = tf.keras.Model(inputs=input_layer_embed, outputs=x)

y = input_layer_lstm
y = tf.keras.Model(inputs=input_layer_lstm, outputs=y)

combined = tf.keras.layers.concatenate([x.output, y.output])

#z = tf.keras.layers.Reshape((n_steps, n_features))
z = tf.keras.layers.Reshape((n_steps, n_features + 2))(combined)
z = tf.keras.layers.LSTM(50, activation='relu')(z)
z = tf.keras.layers.Dense(1, activation='relu')(z)

z = tf.keras.Model(inputs=[x.input, y.input], outputs=z)

z.compile(optimizer='adam', loss='mse', metrics=['accuracy', 'mse'])
z.summary()
history = z.fit(x=[X_emb, X_LSTM], y=targets, validation_data=[[X_emb_val, X_LSTM_val], targets_val], epochs=2000, verbose=2)

#model.compile(optimizer='adam', loss='mse', metrics=['accuracy', 'mse'])

#model.summary()

#history = model.fit(X, y, validation_data=[X_val, y_val], epochs=2000, verbose=2)

Model: "model_27"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_24 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 1, 1)         1           input_24[0][0]                   
__________________________________________________________________________________________________
reshape_17 (Reshape)            (None, 1, 1)         0           embedding_9[0][0]                
__________________________________________________________________________________________________
input_25 (InputLayer)           [(None, 1, 1)]       0                                            
___________________________________________________________________________________________

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  indices[0,0] = 1 is not in [0, 1)
	 [[node model_27/embedding_9/embedding_lookup (defined at <ipython-input-56-598d89e29c84>:31) ]]
	 [[VariableShape_3/_24]]
  (1) Invalid argument:  indices[0,0] = 1 is not in [0, 1)
	 [[node model_27/embedding_9/embedding_lookup (defined at <ipython-input-56-598d89e29c84>:31) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_4840]

Errors may have originated from an input operation.
Input Source operations connected to node model_27/embedding_9/embedding_lookup:
 model_27/embedding_9/embedding_lookup/4034 (defined at C:\Users\handr\AppData\Local\Programs\Python\Python37\lib\contextlib.py:112)

Input Source operations connected to node model_27/embedding_9/embedding_lookup:
 model_27/embedding_9/embedding_lookup/4034 (defined at C:\Users\handr\AppData\Local\Programs\Python\Python37\lib\contextlib.py:112)

Function call stack:
distributed_function -> distributed_function


In [8]:
print(X_val)
print(X_val.shape)

[[342 228 183 ... 349 172 120]
 [228 183  70 ... 172 120 122]
 [183  70 285 ... 120 122 146]
 ...
 [151 180 258 ... 149 249 202]
 [180 258 325 ... 249 202 182]
 [258 325 257 ... 202 182 264]]
(1881, 30)


In [9]:
print(y_val)
print(y_val.shape)

[122 146 247 ... 182 264 381]
(1881,)


In [10]:
from keras.utils.vis_utils import plot_model

plot_model(z, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

Using TensorFlow backend.


OSError: `pydot` failed to call GraphViz.Please install GraphViz (https://www.graphviz.org/) and ensure that its executables are in the $PATH.

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense

def scale_data(x, y, min_x, max_x):
    x_scaled = np.empty(x.shape)
    y_scaled = np.empty(y.shape)
    n_entries = x.shape[0]
    n_filts = x.shape[1]

    for i in range(0, n_entries):
        for j in range(0, n_filts):
            val = x[i, j]
            if val > max_x:
                x_scaled[i, j] = 1
            elif val < min_x:
                x_scaled[i, j] = 0
            else:
                x_scaled[i, j] = (val - min_x) / (max_x - min_x)
        y_scaled[i] = (y[i] - min_x) / (max_x - min_x)

    return x_scaled, y_scaled

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

# scale the data
X, y = scale_data(X, y, 0, 1000)
X_val, y_val = scale_data(X_val, y_val, 0, 1000)
print(X)
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], n_features))


# define model
model = Sequential()
optimiser = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1, clipvalue=0.5)
input_layer = tf.keras.layer.InputLayer(input_shape=(n_embed_features))
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy', 'mse'])

model.summary()

history = model.fit(X, y, validation_data=[X_val, y_val], epochs=2000, verbose=2)

1 Physical GPUs, 1 Logical GPU
[[0.489 0.409 0.383 ... 0.598 0.196 0.169]
 [0.409 0.383 0.263 ... 0.196 0.169 0.217]
 [0.383 0.263 0.453 ... 0.169 0.217 0.187]
 ...
 [0.203 0.197 0.216 ... 0.252 0.217 0.25 ]
 [0.197 0.216 0.286 ... 0.217 0.25  0.256]
 [0.216 0.286 0.25  ... 0.25  0.256 0.266]]
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 50)                10400     
_________________________________________________________________
dense (Dense)                (None, 1)                 51        
Total params: 10,451
Trainable params: 10,451
Non-trainable params: 0
_________________________________________________________________
Train on 16929 samples, validate on 1881 samples
Epoch 1/2000


KeyboardInterrupt: 