In [None]:
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
from project_functions.sample_feed_v0_single import SampleFeed

In [None]:
# Global parameters

TRAINING_WINDOW_SIZE = 90
PREDICTED_WINDOW_SIZE = 7
N_SAMPLES = 4
N_EPOCHS = 30
BATCH_SIZE = 8

In [None]:
today_label = datetime.today().strftime("%m%d")

# Raw data

features_train = dict(np.load("data/features_train.npz", allow_pickle=True))
features_valid = dict(np.load("data/features_valid.npz", allow_pickle=True))

# Calculated parameters

n_rows_train = features_train['visits'].shape[0]
n_features = features_train['time'].shape[1] + features_train['page'].shape[1] + 1

steps_per_epoch = round(n_rows_train * N_SAMPLES / BATCH_SIZE)
total_samples_per_page = N_SAMPLES * N_EPOCHS

In [None]:
# Sample Feed

sample_feed = SampleFeed(
    training_window_size = TRAINING_WINDOW_SIZE,
    predicted_window_size = PREDICTED_WINDOW_SIZE,
    samples_per_epoch = N_SAMPLES
    )

In [None]:
# Prepare data

Xy_train_gen = sample_feed.random_sample_stream(features_train)
Xy_valid = sample_feed.random_sample_array(features_valid, samples_per_page=1, shuffle=False, seed=0)

In [None]:
from keras import Sequential
from keras import layers
from keras import losses
from keras import metrics
from keras import optimizers
from keras import callbacks
from keras import regularizers

model = Sequential()
model.add(layers.InputLayer(input_shape=(TRAINING_WINDOW_SIZE, n_features)))
model.add(layers.LSTM(
    units=64, 
    return_sequences=True, 
    recurrent_regularizer=regularizers.L2(0.01)
    ))
model.add(layers.LSTM(
    units=32, 
    return_sequences=False,
    recurrent_regularizer=regularizers.L2(0.01)
    ))
model.add(layers.Dense(
    units=16, 
    activation='relu'
    ))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(PREDICTED_WINDOW_SIZE, 'sigmoid'))

model.compile(
    loss=losses.Huber(0.25), 
    optimizer=optimizers.Adam(learning_rate=1e-3), 
    metrics=metrics.RootMeanSquaredError()
    )

model.summary()

model_callbacks = [
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, min_lr=1e-5),
    callbacks.EarlyStopping(monitor='val_loss', patience=8),
    callbacks.ModelCheckpoint(filepath=f"models/checkpoints/{today_label}" + "{epoch:02d}-{val_root_mean_squared_error:.4f}.keras", monitor='val_loss')
]

In [None]:
model_history = model.fit(
    x = Xy_train_gen,
    validation_data = Xy_valid,
    steps_per_epoch = steps_per_epoch,
    epochs = N_EPOCHS,
    batch_size = BATCH_SIZE,
    callbacks = model_callbacks
    )

In [None]:
model.save(f"models/best_{today_label}", overwrite=False)

In [None]:
plt.plot(model_history.history['loss'], color='black')
plt.plot(model_history.history['val_loss'], color='blue')