In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Bidirectional, Conv1D, MaxPooling1D, Flatten, TimeDistributed

In [None]:
# read data

data_dir = 'data/'

train_sales = pd.read_csv(data_dir + 'sales_train_validation.csv')
#sell_prices = pd.read_csv(data_dir + 'sell_prices.csv')
calendar = pd.read_csv(data_dir + 'calendar.csv')
submission_file = pd.read_csv(data_dir + 'sample_submission.csv')
events_feature = pd.read_csv('eventencoding.csv')

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics: 
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:
train_sales = reduce_mem_usage(train_sales) # takes about 4mins

In [None]:
# create training data
sales = train_sales.drop(["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], axis=1).T

# normalize
scaler = MinMaxScaler()
scaler.fit(sales)
sales = scaler.transform(sales)
sales = pd.DataFrame(sales)

In [None]:
# add extra features

# event feature
sales = pd.concat([sales, events_feature[:1913]], axis=1)

In [None]:
timesteps = 28
prediction_steps = 1
len_window = timesteps + prediction_steps

nr_training_days = sales.shape[0]
nr_sets = nr_training_days - len_window + 1

In [None]:
# create X and y

X, y = [], []
for i in range(nr_sets):
    # because every item is added as as feature, 
    samples = sales.iloc[i:i+timesteps]
    pred = sales.iloc[i+timesteps,:30490]
    X.append(samples.to_numpy())
    y.append(pred.to_numpy())
X = np.array(X)
y = np.array(y)

In [None]:
print(X.shape)
print(y.shape)

In [None]:
# cnn-lstm model

n_features = X.shape[2]
n_outputs = y.shape[1]

model = Sequential()
model.add(Conv1D(filters=32, kernel_size=1, activation='relu', input_shape=(timesteps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Bidirectional(LSTM(20, activation='relu', return_sequences=True)))
model.add(Bidirectional(LSTM(10, activation='relu')))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')

In [None]:
# train & validate model (1 epoch takes about 1.5mins)

model.fit(X, y, batch_size=32, epochs=10, verbose=1)

In [None]:
# test model

for i in range(28):    
    # get input for prediction by selecting last 28 days from sales
    X_pred = []
    X_pred.append(sales.iloc[-timesteps:].to_numpy())
    X_pred = np.array(X_pred)
    
    # get prediction
    prediction = model.predict(X_pred)
    
    # get event feature for predicted day
    events = events_feature.iloc[1913 + i].to_numpy()
    
    # add prediction+events to sales so that it can be used for next prediction
    sales.loc[sales.shape[0]] = np.append(prediction[0], events)
    
predictions = sales.iloc[-28:,:30490]
predictions = scaler.inverse_transform(predictions)
predictions = np.round(np.abs(predictions))
predictions = pd.DataFrame(predictions).T

In [None]:
# create submission file

predictions_copy = predictions
final_submission = pd.concat([predictions, predictions_copy])
final_submission.reset_index(drop=True, inplace=True)
final_submission = final_submission.astype(int)
final_submission.insert(0, 'id', submission_file['id'])
final_submission.columns = ['id'] + [f"F{i}" for i in range(1, 29)]

final_submission.to_csv('submission.csv', index=False)