## Imports

In [1]:
# from src.config.config import Config
# from src.db_writer.db import DB
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (12.8, 7.2)

## Preparation

In [3]:
# df_total = pd.read_csv('data/df_total.csv', index_col=[0])
# df_razmetka = pd.read_csv('data/df_razmetka.csv', index_col=[0])

# df_fin = df_total.copy()
# df_fin['extr2'] = df_razmetka['extr2']
# df_fin.drop(columns=['symbol_perp','time_diff', 'diff', 'diff_shift1', 'is_extremum'], inplace=True)

In [4]:
df_fin = pd.read_csv('data/all_features.csv', index_col=[0])

Lets imagine our range of opening contract lays between 

(high + max(open, close))/2

and

(low + min(open, close))/2

In [5]:
# df_fin.loc[~df_fin['extr7'], ['ismaxima', 'isminima']] = False

In [6]:
df_fin['low_border'] = (df_fin['low_perp'] + df_fin[['open_perp', 'close_perp']].min(axis=1)) / 2
df_fin['up_border'] = (df_fin['high_perp'] + df_fin[['open_perp', 'close_perp']].max(axis=1)) / 2

df_fin.loc[df_fin['ismaxima'] & df_fin['extr7'], 'target1'] = df_fin.loc[df_fin['ismaxima'] & df_fin['extr7'], 'up_border']
df_fin.loc[df_fin['isminima'] & df_fin['extr7'], 'target1'] = df_fin.loc[df_fin['isminima'] & df_fin['extr7'], 'low_border']

df_fin.loc[df_fin['extr7'], 'target2'] = df_fin.loc[df_fin['extr7'], 'target1'].shift(-1)

df_fin[['target1', 'target2']] = df_fin[['target1', 'target2']].shift(-1).fillna(method='bfill')
df_fin.dropna(inplace=True)

In [7]:
df_fin.iloc[:, -4:].head(20)

Unnamed: 0,low_border,up_border,target1,target2
0,21422.2,21445.0,21445.9,21344.8
1,21437.5,21445.9,21344.8,21392.75
2,21430.6,21446.8,21344.8,21392.75
3,21423.5,21431.95,21344.8,21392.75
4,21404.0,21438.35,21344.8,21392.75
5,21359.0,21406.95,21344.8,21392.75
6,21345.95,21369.75,21344.8,21392.75
7,21323.5,21355.75,21344.8,21392.75
8,21344.8,21364.7,21392.75,21324.0
9,21364.35,21385.5,21392.75,21324.0


## Train test

In [8]:
X = df_fin.drop(columns= ['ismaxima', 'isminima', 'extr7', 'low_border', 'up_border', 'target1', 'target2'])
y = df_fin[['target1', 'target2']]

In [9]:
from sklearn.preprocessing import StandardScaler

X_scaled = StandardScaler().fit_transform(X)
y_scaled = StandardScaler().fit_transform(y)

In [10]:
y_scaled[61:]

array([[-0.08593977, -0.06863393],
       [-0.08593977, -0.06863393],
       [-0.08593977, -0.06863393],
       ...,
       [ 1.37833471,  1.3602327 ],
       [ 1.37833471,  1.3602327 ],
       [ 1.37833471,  1.3602327 ]])

In [11]:
def create_lstm_data(X, y, look_back=60):
    arrays = [np.array(X[i : i+look_back]) for i in range(len(X) - look_back + 1)]
    lstm_data = np.stack(arrays)
    lstm_target = y_scaled[look_back - 1:]
    print('initial X shape: ', np.array(X).shape, 'y shape: ', y.shape, 'look_back: ', look_back)
    print('output X shape: ', lstm_data.shape, 'y shape:', lstm_target.shape)
    return lstm_data, lstm_target


In [12]:
# X_lstm, y_lstm = create_lstm_data(X_scaled, y_scaled)
# np.save('X_lstm_60.npy', X_lstm)
# np.save('y_lstm_60.npy', y_lstm)

In [13]:
X_lstm = np.load('LSTM_data/X_lstm_60.npy')
y_lstm = np.load('LSTM_data/y_lstm_60.npy')

In [14]:
X_train, X_val, X_test = X_lstm[:-30000], X_lstm[-30000:-15000], X_lstm[-15000:]
y_train, y_val, y_test = y_lstm[:-30000], y_lstm[-30000:-15000], y_lstm[-15000:]

print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)

(351329, 60, 101) (351329, 2) (15000, 60, 101) (15000, 2) (15000, 60, 101) (15000, 2)


## Simple Keras LSTM model

In [15]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [16]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.activations import relu
from tensorflow.keras.layers import LSTM, Dense, Activation

In [17]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [18]:
# create and fit the LSTM network
model = keras.models.Sequential()
model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(Dense(256, activation='relu'))
# model.add(Dense(512, activation='relu'))
# model.add(Dense(512, activation='relu'))
# model.add(Dense(512, activation='relu'))
# model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='linear'))

In [19]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=2048)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100

KeyboardInterrupt: 