In [1]:
import numpy as np

from data_loaders import *

In [2]:
# In this notebook we will try to use a NN model using Keras
# that chooses at what point in the next 10 minutes to buy
# one unit of the stock.

# The model receives as an input the bid/ask prices/volumes
# for the last 50 instants in time, and has to output at
# which of the next 120 points it wants to buy. It then
# receives a loss equal to the price it costs to buy at that time.

# Data reading-- you do not need to understand this

In [3]:
offset = 120 # 10 minutes roughly
data_size = 400000 # size of train+test set
slice_size = 50

X = read_scale(SAMPLE_PATH, no_obs=data_size, slice_size=slice_size)

In [4]:
_, sell = read_bid_ask(SAMPLE_PATH, no_obs=data_size+offset)
y = np.hstack([sell.shift(x).values.reshape(-1, 1) 
               for x in range(offset)])
# Remove the first corrupted rows
y = y[offset+slice_size:].copy()

In [5]:
# CHECK YOU UNDERSTAND WHY THE SHAPES ARE THE WAY THEY ARE
X.shape, y.shape

((399950, 1000), (399950, 120))

In [6]:
X_tr, X_ts = X[:int(0.8*data_size)], X[int(0.8*data_size):]
y_tr, y_ts = y[:int(0.8*data_size)], y[int(0.8*data_size):]

# Defining custom loss and keras model

In [7]:
import tensorflow as tf
import tensorflow.keras.backend as K

In [21]:
# As a loss, take the dot product between the probability vector of selling
# at that time with price at that time
def my_loss(y_true, y_pred):
    return K.mean(
            -K.dot(y_true, K.transpose(y_pred))
    )

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

model = Sequential()
model.add(Dense(128, input_shape=(slice_size*20,))) #1000
model.add(Activation('relu'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(offset)) #output shape: 120
model.add(Activation('softmax')) #squish output into a probability vector

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 128)               128128    
_________________________________________________________________
activation_3 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 120)               15480     
_________________________________________________________________
activation_5 (Activation)    (None, 120)               0         
Total params: 160,120
Trainable params: 160,120
Non-trainable params: 0
________________________________________________

In [24]:
model.compile(loss=my_loss,
              optimizer='sgd')

# Training

In [25]:
# Training takes a while without GPU, 4 mins on my laptop
# It actually converges in just one epoch with this small batch size
hs = model.fit(X_tr, y_tr, epochs=3, batch_size=16, verbose=1)

Train on 320000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [26]:
# Let's check how it performs on the train and test sets
y_tr_pred = model.predict(X_tr[:20000])
y_ts_pred = model.predict(X_ts[:20000])

In [27]:
# On the train set we spend an average of
np.dot(y_tr_pred, y_tr[:20000].T).mean()

3181.599906042315

In [28]:
# Versus an overall mean price of
y_tr[:20000].mean().mean()

3181.678803583334

In [None]:
# Which is slighly lower!