Credits to https://www.kaggle.com/bountyhunters/baseline-lstm-with-keras-0-7 for setup, general structure, and pieces of code

In [None]:
# imports & guarantee reproducible values
seed_value= 2

import os
os.environ['PYTHONHASHSEED']=str(seed_value)

import random
random.seed(seed_value)

import numpy as np
np.random.seed(seed_value)

import tensorflow as tf
tf.random.set_seed(seed_value)

from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

import pandas as pd
from scipy.sparse import csr_matrix


In [None]:
dataPath = "/kaggle/input/m5-forecasting-accuracy/"

# Hyperparameters
timesteps = 15
startDay = 350
RMSE = False
EPOCH_NO = 32

# Features
DBE = True
WDAY = True
STORE_AVG = False
STATE_AVG = False

# Load Data


In [None]:
dt_eval = pd.read_csv(dataPath + "/sales_train_evaluation.csv")
dt = dt_eval.iloc[:,0:1919]
dt_eval = dt_eval.iloc[:,1919:]


In [None]:
#To reduce memory usage
def downcast_dtypes(df):
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
    df[float_cols] = df[float_cols].astype('float32')
    df[int_cols] = df[int_cols].astype('int16')
    return df

In [None]:
#Reduce memory usage
dt = downcast_dtypes(dt)
dt_eval = downcast_dtypes(dt_eval)

# Features

In [None]:
### STORE AVERAGE ###
# Calculate Store averages
store_avg = dt.groupby('store_id', as_index=False)[dt.columns[6:]].mean()
store_avg = store_avg.drop(store_avg.columns[0], axis=1)   
store_avg = store_avg.T
store_avg.columns = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3']
store_avg = store_avg[startDay:]
store_avg.index = dt.index[StartDay:1913]

# Save store_id for prediction
store_id = pd.DataFrame(dt['store_id'])
store_id.columns = ['store_id']

In [None]:
### STATE AVERAGE ###
# Calculate State averages
state_avg = dt.groupby('state_id', as_index=False)[dt.columns[6:]].mean()
state_avg = state_avg.drop(state_avg.columns[0], axis=1)   
state_avg = state_avg.T
state_avg.columns = ['CA', 'TX', 'WI']
state_avg = state_avg[startDay:]
state_avg.index = dt.index[StartDay:1913]

# Save state_id column for prediction loop
state_id = pd.DataFrame(dt['state_id'])
state_id.columns = ['state_id']

In [None]:
#Trasnpose to items as columns, Days as rows
dt = dt.T    
dt_eval = dt_eval.T


In [None]:
#Remove id, item_id, dept_id, cat_id, store_id, state_id columns
dt = dt[6 + startDay:]

In [None]:
calendar = pd.read_csv(dataPath + "/calendar.csv")

In [None]:
### DAY BEFORE EVENT ###

#Create dataframe with zeros for 1969 days in the calendar
daysBeforeEvent = pd.DataFrame(np.zeros((1969,1)))
for x,y in calendar.iterrows():
   if((pd.isnull(calendar["event_name_1"][x])) == False):
           daysBeforeEvent[0][x-1] = 1 

#"daysBeforeEventTest" will be used as input for predicting (We will forecast the days 1913-1941)
daysBeforeEventTest = daysBeforeEvent[1913:1941]

#"daysBeforeEvent" will be used for training as a feature.
daysBeforeEvent = daysBeforeEvent[startDay:1913]

#Before concatanation with our main data "dt", indexes are made same and column name is changed to "oneDayBeforeEvent"
daysBeforeEvent.columns = ["oneDayBeforeEvent"]
daysBeforeEvent.index = dt.index[:1913]



In [None]:
### DAY OF THE WEEK ###
weekday = pd.DataFrame(calendar["wday"])
weekdayTest = weekday[1913:1941]
weekday = weekday[startDay:1913]

# One Hot Encoding
weekday = pd.get_dummies(weekday, columns=['wday'])
weekdayTest = pd.get_dummies(weekdayTest, columns=['wday'])

weekday.index = dt.index[:1913]

In [None]:
### ADD FEATURES ###
if DBE:
    dt = pd.concat([dt, daysBeforeEvent], axis = 1)
if WDAY:
    dt = pd.concat([dt, weekday], axis = 1)
if STORE_AVG:
    dt = pd.concat([dt, store_avg], axis = 1)
if STATE_AVG:
    dt = pd.concat([dt, state_avg], axis = 1)

In [None]:
#"calendar" won't be used anymore. 
del calendar

In [None]:
#Feature Scaling
#Scale the features using min-max scaler in range 0-1
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
dt_scaled = sc.fit_transform(dt)

In [None]:
X_train = []
y_train = []
for i in range(timesteps, 1913 - startDay):
    X_train.append(dt_scaled[i-timesteps:i])
    y_train.append(dt_scaled[i][0:30490]) 

In [None]:
del dt_scaled

In [None]:
#Convert to np array to be able to feed the LSTM model
X_train = np.array(X_train)
y_train = np.array(y_train)

# Create Model

In [None]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true)))

In [None]:
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# Initialising the RNN
regressor = Sequential()

# Adding the first LSTM layer and some Dropout regularisation
layer_1_units=40
regressor.add(LSTM(units = layer_1_units, return_sequences = True, input_shape = (X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))

# Adding a second LSTM layer and some Dropout regularisation
layer_2_units=300
regressor.add(LSTM(units = layer_2_units, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a third LSTM layer and some Dropout regularisation
layer_3_units=300
regressor.add(LSTM(units = layer_3_units))
regressor.add(Dropout(0.2))

# Adding the output layer
regressor.add(Dense(units = 30490))

# Compiling the RNN
if RMSE:
    regressor.compile(optimizer = 'adam', loss = root_mean_squared_error)
else:
    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fitting the RNN to the Training set
epoch_no=EPOCH_NO
batch_size_RNN=44
regressor.fit(X_train, y_train, epochs = epoch_no, batch_size = batch_size_RNN, validation_split = 0.0)



# Prediction

In [None]:
inputs= dt[-timesteps:]
inputs = sc.transform(inputs)

In [None]:
X_test = []
X_test.append(inputs[0:timesteps])
X_test = np.array(X_test)
predictions = []

for j in range(timesteps,timesteps + 28):
    predicted_stock_price = regressor.predict(X_test[0,j - timesteps:j].reshape(1, timesteps, inputs.shape[1]))
    p_sales = np.array(predicted_stock_price).T
    
    # Calculate store average
    store_avg = pd.DataFrame(np.zeros((30490,2)))
    store_avg.columns = ['store_id','sales']
    store_avg['store_id'] = store_id['store_id']
    store_avg['sales'] = p_sales    
    store_avg = store_avg.groupby('store_id', as_index=False)['sales'].mean()
    store_avg = store_avg.drop('store_id', axis=1)  
    store_avg = store_avg.T
    
    # Calculate state average
    state_avg = pd.DataFrame(np.zeros((30490,2)))
    state_avg.columns = ['state_id', 'sales']
    state_avg['state_id'] = state_id
    state_avg['sales'] = p_sales    
    state_avg = state_avg.groupby('state_id', as_index=False)['sales'].mean()
    state_avg = state_avg.drop('state_id', axis=1)   
    state_avg = state_avg.T
    
    # Add sales
    testInput = np.array(predicted_stock_price)
    
    # Add DayBeforeEvent
    if DBE:
        testInput = np.column_stack((testInput,daysBeforeEventTest[0][1913 + j - timesteps]))
    
    # Add weekday
    if WDAY:
        for i in range(1,8):
            testInput = np.column_stack((testInput,weekdayTest['wday_'+str(i)][1913 + j - timesteps]))
        
    # Add store average
    if STORE_AVG:
        for i in range(0,10):
            testInput = np.column_stack((testInput,store_avg[i]))
        
    # Add state average
    if STATE_AVG:
        for i in range(0,3):
            testInput = np.column_stack((testInput,state_avg[i]))
    
    
    X_test = np.append(X_test, testInput).reshape(1,j + 1,inputs.shape[1])
    predicted_stock_price = sc.inverse_transform(testInput)[:,0:30490]
    predictions.append(predicted_stock_price)
    


In [None]:
del regressor
del dt

# WRMSSE
Credits to https://www.kaggle.com/jeffzi/fast-clear-wrmsse-18ms for this part

In [None]:
# Define fold pass here:
file_pass = '/kaggle/input/wrmsse/'# '/kaggle/input/fast-wrmsse-and-sw-frame/'

# Load S and W weights for WRMSSE calcualtions:
sw_df = pd.read_pickle(file_pass+'sw_df.pkl')
S = sw_df.s.values
W = sw_df.w.values
SW = sw_df.sw.values

# Load roll up matrix to calcualte aggreagates:
roll_mat_df = pd.read_pickle(file_pass+'roll_mat_df.pkl')
roll_index = roll_mat_df.index
roll_mat_csr = csr_matrix(roll_mat_df.values)

del roll_mat_df

# Function to do quick rollups:
def rollup(v):
    '''
    v - np.array of size (30490 rows, n day columns)
    v_rolledup - array of size (n, 42840)
    '''
    return roll_mat_csr*v #(v.T*roll_mat_csr.T).T


# Function to calculate WRMSSE:
def wrmsse(preds, y_true, score_only=True, s = S, w = W, sw=SW):
    '''
    preds - Predictions: pd.DataFrame of size (30490 rows, N day columns)
    y_true - True values: pd.DataFrame of size (30490 rows, N day columns)
    sequence_length - np.array of size (42840,)
    sales_weight - sales weights based on last 28 days: np.array (42840,)
    '''
    
    if score_only:
        return np.sum(
                np.sqrt(
                    np.mean(
                        np.square(rollup(preds-y_true))
                            ,axis=1)) * sw)/12 #<-used to be mistake here
    else: 
        score_matrix = (np.square(rollup(preds-y_true)) * np.square(w)[:, None])/ s[:, None]
        score = np.sum(np.sqrt(np.mean(score_matrix,axis=1)))/12 #<-used to be mistake here
        return score, score_matrix

In [None]:
score = wrmsse(np.array(predictions).reshape((28,-1)).T, dt_eval.T)
print(score)

# Submission File

In [None]:
import time

submission = pd.DataFrame(data=np.array(predictions).reshape(28,30490))
submission = submission.T
submission = pd.concat((submission, submission), ignore_index=True)
sample_submission = pd.read_csv(dataPath + "/sample_submission.csv")
    
idColumn = sample_submission[["id"]]
    
submission[["id"]] = idColumn  

cols = list(submission.columns)
cols = cols[-1:] + cols[:-1]
submission = submission[cols]

colsdeneme = ["id"] + [f"F{i}" for i in range (1,29)]

submission.columns = colsdeneme

currentDateTime = time.strftime("%d%m%Y_%H%M%S")

submission.to_csv("submission.csv", index=False)