In [15]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tqdm import tqdm
tqdm.pandas()


npy_data_path = os.path.join('data', 'AAPL*.npy')
files_to_load = sorted(glob.glob(npy_data_path))

if not files_to_load:
    sys.exit('Files to load not found')

d_num_layers = 6  # Price, Ordered volume, Filled volume, Canceled volume, Pending volume, Time index
d_num_price_levels = 10 * 2 * 2  # price level ($10) per 50 cents per level (*2) per side (*2)
d_minutes_per_day = int(6.5 * 60)  # 6 hours 30 minutes of data per trading session, from 9:30 to 16:00
d_total_minutes = d_minutes_per_day * len(files_to_load)

d = np.zeros((d_num_layers, d_num_price_levels, d_total_minutes), np.float32)

load_pointer = 0
for file in files_to_load:
    d[:, :, load_pointer:load_pointer + d_minutes_per_day] = np.load(file)
    load_pointer += d_minutes_per_day


# make X and y

d_pointer = 0
x_block_length = 10 # in minutes
y_block_length = 1 # in minutes
highest_bid_position = int(d_num_price_levels / 2)
error_severity_multiplier = 1000
X_y_pointer = 0
X_y_entries_count = int(d_total_minutes / x_block_length) - 1


X = np.zeros((X_y_entries_count, d_num_price_levels, x_block_length, d_num_layers), np.float32)
y = np.zeros((X_y_entries_count, 1), np.float32)

while d_pointer + x_block_length < d_total_minutes:
    new_X = d[:, :, d_pointer:d_pointer + x_block_length]

    last_X_price = new_X[0, highest_bid_position, -1]
    raw_new_y = d[0, highest_bid_position, d_pointer + x_block_length + y_block_length]
    new_y = (raw_new_y - last_X_price) * error_severity_multiplier

    X[X_y_pointer] = new_X.transpose(1, 2, 0)
    y[X_y_pointer] = new_y

    X_y_pointer += 1
    d_pointer += x_block_length
    # d_pointer += 60 # 5 min


train_data_pointer = len(X) - int(len(X) / 10) # 10%

X_train = X[0:train_data_pointer]
y_train = y[0:train_data_pointer]

X_test = X[train_data_pointer:-1]
y_test = y[train_data_pointer:-1]

# X_train, y_train = shuffle(X_train, y_train)

In [16]:
model = Sequential()

model.add(Flatten())
model.add(Dense(2400, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(2400, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(1200, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(600, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(300, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(50, activation=lambda x: tf.nn.leaky_relu(x, alpha=0.01)))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

In [None]:
batch_size = 32
train_data_slice = (len(X_train) // batch_size) * batch_size
test_data_slice = (len(X_test) // batch_size) * batch_size

model.fit(x=X_train[:train_data_slice], y=y_train[:train_data_slice],
          validation_data=(X_test[:test_data_slice], y_test[:test_data_slice]),
          batch_size=batch_size, epochs=200)

In [None]:
pd.DataFrame(model.history.history).plot()

In [8]:
predictions = model.predict(X_test)

In [None]:
predictions

In [None]:
mean_absolute_error(y_test, predictions)

In [None]:
# Our predictions
# plt.scatter(y_test,predictions)
plt.figure(figsize=(20,12))
plt.plot(predictions, 'purple')

# Perfect predictions
plt.plot(y_test, 'green')

In [None]:
plt.figure(figsize=(20,12))
plt.plot(y_train, 'green')

In [None]:
plt.figure(figsize=(20,12))
plt.plot(predictions, 'green')

In [11]:
# plt.imsave('layer_0.png', d[0], cmap='hot')
# plt.imsave('layer_1.png', d[1], cmap='hot')
# plt.imsave('layer_2.png', d[2], cmap='hot')
# plt.imsave('layer_3.png', d[3], cmap='hot')
# plt.imsave('layer_4.png', d[4], cmap='hot')
# plt.imsave('layer_5.png', d[5], cmap='hot')
#
# d.shape

In [None]:
# scaling

# v_min = X_train.min(axis=(0, 1, 2, 3), keepdims=True)
# v_max = X_train.max(axis=(0, 1, 2, 3), keepdims=True)
#
# X_train = (X_train - v_min)/(v_max - v_min)
# X_test = (X_test - v_min)/(v_max - v_min)

# scaler = MinMaxScaler()
#
# n_train_samples, n_train_x, n_train_y, n_train_z = X_train.shape
# X_train = X_train.reshape((n_train_samples, n_train_x * n_train_y * n_train_z))
#
# n_test_samples, n_test_x, n_test_y, n_test_z = X_test.shape
# X_test = X_test.reshape((n_test_samples, n_test_x * n_test_y * n_test_z))
#
# scaler.fit(X_train)
#
# X_train = scaler.transform(X_train)
# X_test = scaler.transform(X_test)
#
# X_train = X_train.reshape((n_train_samples, n_train_x, n_train_y, n_train_z))
# X_test = X_test.reshape((n_test_samples, n_test_x, n_test_y, n_test_z))