### Require Dependencies

In [1]:
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM, Dense
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np 

### Preprocess Data

In [2]:
df = pd.read_csv('data.csv', header=0)
df.head()


Unnamed: 0,date,onpromotion,storeNbr,AUTOMOTIVE,BABY CARE,BEAUTY,BEVERAGES,BOOKS,BREAD/BAKERY,CELEBRATION,...,MEATS,PERSONAL CARE,PET SUPPLIES,PLAYERS AND ELECTRONICS,POULTRY,PREPARED FOODS,PRODUCE,SCHOOL AND OFFICE SUPPLIES,SEAFOOD,totalSales
0,2013-01-01,0,1,0,0,0,0,0,0.0,0,...,0.0,0,0,0,0.0,0.0,0.0,0,0.0,0.0
1,2013-01-01,0,2,0,0,0,0,0,0.0,0,...,0.0,0,0,0,0.0,0.0,0.0,0,0.0,0.0
2,2013-01-01,0,3,0,0,0,0,0,0.0,0,...,0.0,0,0,0,0.0,0.0,0.0,0,0.0,0.0
3,2013-01-01,0,4,0,0,0,0,0,0.0,0,...,0.0,0,0,0,0.0,0.0,0.0,0,0.0,0.0
4,2013-01-01,0,5,0,0,0,0,0,0.0,0,...,0.0,0,0,0,0.0,0.0,0.0,0,0.0,0.0


In [3]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        dataX.append(dataset[i:(i + look_back), :])
        dataY.append(dataset[i + look_back, 3:-1])
    return np.array(dataX), np.array(dataY)
    


### Define Architecture

In [4]:
look_back = 8
num_categories = len(df.columns) - 4  # Excluding date, storeNbr, onpromotion, and totalSales

model = Sequential()
model.add(LSTM(50, input_shape=(look_back, num_categories + 3)))  # +3 for storeNbr, onpromotion, and totalSales
model.add(Dense(32))
model.compile(loss='mean_squared_error', optimizer='adam')


### Train the model

In [5]:
# Prepare the dataset
df['date'] = pd.to_datetime(df['date'])
df = df[df['storeNbr'] == 1] # start with only the first store
data = df.sort_values(by=['date']).drop(columns=['date']).values


# Create input-output pairs
X, y = create_dataset(data, look_back)

# Split the data into training and testing sets
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Needs a TON of epochs
model.fit(trainX, trainY, epochs=10000, batch_size=32, verbose=2)

Epoch 1/10000
90/90 - 4s - loss: 102500.3906 - 4s/epoch - 47ms/step
Epoch 2/10000
90/90 - 0s - loss: 102212.5703 - 455ms/epoch - 5ms/step
Epoch 3/10000
90/90 - 0s - loss: 101674.6562 - 469ms/epoch - 5ms/step
Epoch 4/10000
90/90 - 1s - loss: 100892.1875 - 534ms/epoch - 6ms/step
Epoch 5/10000
90/90 - 0s - loss: 100142.0781 - 473ms/epoch - 5ms/step
Epoch 6/10000
90/90 - 0s - loss: 99490.9609 - 414ms/epoch - 5ms/step
Epoch 7/10000
90/90 - 0s - loss: 98858.2734 - 392ms/epoch - 4ms/step
Epoch 8/10000
90/90 - 0s - loss: 98192.9297 - 346ms/epoch - 4ms/step
Epoch 9/10000
90/90 - 0s - loss: 97620.3984 - 380ms/epoch - 4ms/step
Epoch 10/10000
90/90 - 0s - loss: 97124.7266 - 377ms/epoch - 4ms/step
Epoch 11/10000
90/90 - 0s - loss: 96648.9375 - 326ms/epoch - 4ms/step
Epoch 12/10000
90/90 - 0s - loss: 96199.9219 - 441ms/epoch - 5ms/step
Epoch 13/10000
90/90 - 0s - loss: 95779.4453 - 472ms/epoch - 5ms/step
Epoch 14/10000
90/90 - 0s - loss: 95376.1641 - 438ms/epoch - 5ms/step
Epoch 15/10000
90/90 - 0s 

<keras.callbacks.History at 0x20e96cfd5d0>

### Test Performance

In [19]:
# Test the model and make predictions
testPredictions = model.predict(testX)

# Calculate the Mean Squared Error (MSE) on the test data
test_mse = mean_squared_error(testY, testPredictions)
print("Mean Squared Error: ", test_mse)

test_mae = mean_absolute_error(testY, testPredictions)
test_r2 = r2_score(testY, testPredictions)

print("Mean Absolute Error:", test_mae)
print("R-squared score:", test_r2)
"""
Mean Squared Error:  66174.43491272551
Mean Absolute Error: 81.56172771348042
R-squared score: 0.13775325212931822
oof
"""

Mean Squared Error:  66174.43491272551
Mean Absolute Error: 81.56172771348042
R-squared score: 0.13775325212931822


In [16]:
x = np.array([testX[-1]]) # predict last point
model.predict(x)


"""
array([[-6.1686362e-05,  4.6692055e-01,  3.8359444e+01, -2.6546391e-05,
         1.8567648e+02,  1.6154854e+00,  4.1970105e+02,  3.5754965e+02,
         3.5898205e+01,  1.7267962e+01,  9.8091194e+01, -2.0850977e+02,
         5.2151585e+00, -4.8492447e-02,  1.9163357e+00,  4.5457201e+00,
        -1.4824487e-02,  3.1117912e+01,  7.9827392e-01,  2.3482652e+00,
         1.7561276e+00,  2.7935734e+01,  9.0150878e-02,  8.9242462e+01,
         5.5166054e+01,  2.0977840e-01,  1.1819119e+00,  3.2852470e+01,
         3.1793232e+00,  4.8103748e+01,  1.4592125e-01,  3.9945490e+00]
"""




array([[-6.1686362e-05,  4.6692055e-01,  3.8359444e+01, -2.6546391e-05,
         1.8567648e+02,  1.6154854e+00,  4.1970105e+02,  3.5754965e+02,
         3.5898205e+01,  1.7267962e+01,  9.8091194e+01, -2.0850977e+02,
         5.2151585e+00, -4.8492447e-02,  1.9163357e+00,  4.5457201e+00,
        -1.4824487e-02,  3.1117912e+01,  7.9827392e-01,  2.3482652e+00,
         1.7561276e+00,  2.7935734e+01,  9.0150878e-02,  8.9242462e+01,
         5.5166054e+01,  2.0977840e-01,  1.1819119e+00,  3.2852470e+01,
         3.1793232e+00,  4.8103748e+01,  1.4592125e-01,  3.9945490e+00]],
      dtype=float32)

### Notes

- Model takes way too long to train
- Most likely need to normalize
- Considerations for null data?
- Architecture for store specific model
    - Encapsulate BST
- Grid search hyperparams
    - window size
    - optimizer