In [1]:
import os
import gc
import glob

import numpy as np 
import pandas as pd 

from itertools import islice

from multiprocessing import Pool
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K

from tensorflow.keras.layers import Dense, Lambda, Dot, Activation, Concatenate
from tensorflow.keras.layers import Layer

from sklearn.model_selection import train_test_split

from tqdm.auto import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings('ignore')

In [2]:
import psutil
psutil.cpu_count()

16

In [3]:
NTHREADS = psutil.cpu_count()-2
SEED = 42
TRAIN_BATCH_SIZE = 256
TEST_BATCH_SIZE = 256
BUCKET_WINDOWS2 = [(0, 100), (100, 200), (200, 300), (300, 400), (400, 500), (500, 600)]

DATA_PATH = 'input/optiver-realized-volatility-prediction'
BOOK_TRAIN_PATH = 'input/optiver-realized-volatility-prediction/book_train.parquet'
TRADE_TRAIN_PATH = 'input/optiver-realized-volatility-prediction/trade_train.parquet'
BOOK_TEST_PATH = 'input/optiver-realized-volatility-prediction/book_test.parquet'
TRADE_TEST_PATH = 'input/optiver-realized-volatility-prediction/trade_test.parquet'
CHECKPOINT = 'model_checkpoint/model_01'

book_skip_columns = trade_skip_columns = ['time_id', 'row_id', 'target']

In [4]:
import pickle
data_input = open('LSTMtemp/np_train.pkl','rb')
np_train = pickle.load(data_input)
data_input.close()

data_input = open('LSTMtemp/targets.pkl','rb')
targets = pickle.load(data_input)
data_input.close()

In [5]:
idx = np.arange(np_train.shape[0])
train_idx, valid_idx = train_test_split(idx, shuffle=False, test_size=0.1, random_state=2021)

# Scaler
transformers = []
for i in tqdm(range(np_train.shape[1])):
    a = np.nan_to_num(np_train[train_idx, i, :])
    b = np.nan_to_num(np_train[valid_idx, i, :])

    transformer = StandardScaler() # StandardScaler is very useful!
    np_train[train_idx, i, :] = transformer.fit_transform(a)
    np_train[valid_idx, i, :] = transformer.transform(b)
    transformers.append(transformer) # Save Scalers for the inference stage

np_train = np.nan_to_num(np_train)

import pickle
data_output = open('LSTMtemp/np_train.pkl','wb')
pickle.dump(np_train,data_output)
data_output.close()

data_output = open('LSTMtemp/targets.pkl','wb')
pickle.dump(targets,data_output)
data_output.close()

In [6]:
# Loss function
def rmspe(y_true, y_pred):
    return K.sqrt(K.mean(K.square((y_true - y_pred) / y_true)))

In [7]:
# https://github.com/philipperemy/keras-attention-mechanism
class Attention(Layer):

    def __init__(self, units=128, **kwargs):
        self.units = units
        super().__init__(**kwargs)

    def __call__(self, inputs):
        """
        Many-to-one attention mechanism for Keras.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @return: 2D tensor with shape (batch_size, 128)
        @author: felixhao28, philipperemy.
        """
        hidden_states = inputs
        hidden_size = int(hidden_states.shape[2])
        print('h_t shape:',hidden_states.shape)
        # Inside dense layer
        #              hidden_states            dot               W            =>           score_first_part
        # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
        # W is the trainable weight matrix of attention Luong's multiplicative style score
        score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
        #            score_first_part           dot        last_hidden_state     => attention_weights
        # (batch_size, time_steps, hidden_size) dot   (batch_size, hidden_size)  => (batch_size, time_steps)
        h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
        score = Dot(axes=[1, 2], name='attention_score')([h_t, score_first_part])
        attention_weights = Activation('softmax', name='attention_weight')(score)
        # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
        context_vector = Dot(axes=[1, 1], name='context_vector')([hidden_states, attention_weights])
        pre_activation = Concatenate(name='attention_output')([context_vector, h_t])
        attention_vector = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
        return attention_vector

    def get_config(self):
        return {'units': self.units}

    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [8]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, ds, targets, batch_size, shape=(32,32,32), shuffle=True):
        'Initialization'
        self.batch_size = batch_size
        self.targets = targets
        self.shape = shape
        self.ds = ds
        self.ids = np.arange(ds.shape[0])
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.ids[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        ids_temp = [self.ids[k] for k in indexes]


        x = self.ds[ids_temp, :, :]
        y = self.targets[ids_temp]
        
        return x, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.ids = np.arange(self.ds.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.ids)

In [9]:
def get_model_v1():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(50, input_shape=(np_train.shape[1], np_train.shape[2]), return_sequences=False))
#     model.add(tf.keras.layers.LSTM(50, input_shape=(np_train.shape[1], np_train.shape[2]), return_sequences=False))
#     model.add(Attention(256)) # the gain is small, but ...
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(1))

    model.compile(loss=rmspe, optimizer='adam')
    model.summary()
    return model

In [10]:
training_generator = DataGenerator(np_train[train_idx, :, :], targets[train_idx], batch_size=TRAIN_BATCH_SIZE)
validation_generator = DataGenerator(np_train[valid_idx, :, :], targets[valid_idx], batch_size=TRAIN_BATCH_SIZE)

model = get_model_v1()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 50)                16000     
_________________________________________________________________
flatten (Flatten)            (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 51        
Total params: 16,051
Trainable params: 16,051
Non-trainable params: 0
_________________________________________________________________


In [None]:
# training_generator = DataGenerator(np_train[train_idx, :, :], targets[train_idx], batch_size=TRAIN_BATCH_SIZE)
# validation_generator = DataGenerator(np_train[valid_idx, :, :], targets[valid_idx], batch_size=TRAIN_BATCH_SIZE)

# model = get_model_v1()

checkpoint_filepath = CHECKPOINT
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

model_earlystopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

NEPOCHS = 25
history = model.fit_generator(generator=training_generator, 
                              callbacks=[model_checkpoint_callback, model_earlystopping_callback], 
                              epochs=NEPOCHS, 
                              validation_data=validation_generator, 
                              use_multiprocessing=False, 
                              workers=NTHREADS)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
  64/1507 [>.............................] - ETA: 27s - loss: 0.2451

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [11]:
a = np.min(history.history['val_loss'])
print(f'The best val_loss is {a:.4f}')

NameError: name 'history' is not defined

In [12]:
del np_train, np_books, np_trades
z = gc.collect()

NameError: name 'np_books' is not defined

In [None]:
%%time
pool = Pool(NTHREADS)
r = pool.map(process_book_test_chunk, book_test_chunks)
pool.close()

a1, _ = zip(*r)
np_books = [np.concatenate(a1[i], axis=0) for i in range(len(a1))]
np_books = np.concatenate(np_books, axis=0)

In [None]:
%%time
pool = Pool(NTHREADS)
r = pool.map(process_trade_test_chunk, trade_test_chunks)
pool.close()

a1, _ = zip(*r)
np_trades = [np.concatenate(a1[i], axis=0) for i in range(len(a1))]
np_trades = np.concatenate(np_trades, axis=0)

In [None]:
print(np_books.shape, np_trades.shape)
np_test = np.concatenate((np_books, np_trades), axis=2)
print(np_test.shape)

In [None]:
# Scaler
for i in tqdm(range(np_test.shape[1])):
    transformer = transformers[i]
    np_test[:, i, :] = transformer.transform(np.nan_to_num(np_test[:, i, :]))


In [None]:
np_test = np.nan_to_num(np_test)

In [None]:
model.load_weights(checkpoint_filepath)
res = model.predict(np_test, batch_size=TEST_BATCH_SIZE)
res = np.clip(res, 0, 1)

In [None]:
import shutil
shutil.rmtree('./model_checkpoint')