## Imports

In [29]:
# from src.config.config import Config
# from src.db_writer.db import DB
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [30]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (12.8, 7.2)

## Preparation

In [31]:
df_total = pd.read_csv('data/df_total.csv', index_col=[0])
df_razmetka = pd.read_csv('data/df_razmetka.csv', index_col=[0])

In [32]:
df_fin = df_total.copy()
df_fin['extr2'] = df_razmetka['extr2']
df_fin.drop(columns=['symbol_perp','time_diff', 'diff', 'diff_shift1', 'is_extremum'], inplace=True)

Lets imagine our range of opening contract lays between 

(high + max(open, close))/2

and

(low + min(open, close))/2

In [33]:
df_fin.loc[~df_fin['extr2'], ['ismaxima', 'isminima']] = False

In [38]:
df_fin['low_border'] = (df_fin['low_perp'] + df_fin[['open_perp', 'close_perp']].min(axis=1)) / 2
df_fin['up_border'] = (df_fin['high_perp'] + df_fin[['open_perp', 'close_perp']].max(axis=1)) / 2

df_fin.loc[df_fin['ismaxima'], 'target1'] = df_fin.loc[df_fin['ismaxima'], 'up_border']
df_fin.loc[df_fin['isminima'], 'target1'] = df_fin.loc[df_fin['isminima'], 'low_border']

df_fin.loc[df_fin['extr2'], 'target2'] = df_fin.loc[df_fin['extr2'], 'target1'].shift(-1)

df_fin[['target1', 'target2']] = df_fin[['target1', 'target2']].shift(-1).fillna(method='bfill')
df_fin.dropna(inplace=True)

In [39]:
df_fin.iloc[:, -5:].head(20)

Unnamed: 0,extr2,low_border,up_border,target1,target2
0,False,21455.3,21478.05,21445.9,21344.8
1,False,21430.2,21459.7,21445.9,21344.8
2,False,21422.2,21445.0,21445.9,21344.8
3,True,21437.5,21445.9,21344.8,21392.75
4,False,21430.6,21446.8,21344.8,21392.75
5,False,21423.5,21431.95,21344.8,21392.75
6,False,21404.0,21438.35,21344.8,21392.75
7,False,21359.0,21406.95,21344.8,21392.75
8,False,21345.95,21369.75,21344.8,21392.75
9,False,21323.5,21355.75,21344.8,21392.75


## Train test

In [24]:
X = df_fin.drop(columns= ['timestamp', 'symbol_cq', 'ismaxima', 'isminima', 'extr2', 'low_border', 'up_border', 'target'])
y = df_fin['target']

In [25]:
from sklearn.preprocessing import StandardScaler

X_scaled = StandardScaler().fit_transform(X)
y_scaled = StandardScaler().fit_transform(y.values.reshape(-1,1))

In [26]:
def create_lstm_data(X, y, look_back=60):
    arrays = [np.array(X[i : i+look_back]) for i in range(len(X) - look_back + 1)]
    lstm_data = np.stack(arrays)
    lstm_target = y_scaled[look_back - 1:].reshape(len(y) - look_back + 1)
    print('initial X shape: ', np.array(X).shape, 'y shape: ', y.shape, 'look_back: ', look_back)
    print('output X shape: ', lstm_data.shape, 'y shape:', lstm_target.shape)
    return lstm_data, lstm_target


In [27]:
X_lstm, y_lstm = create_lstm_data(X_scaled, y_scaled)

initial X shape:  (381405, 22) y shape:  (381405, 1) look_back:  60
output X shape:  (381346, 60, 22) y shape: (381346,)


In [17]:
X_train, X_val, X_test = X_lstm[:350000], X_lstm[350000:365000], X_lstm[365000:]
y_train, y_val, y_test = y_lstm[:350000], y_lstm[350000:365000], y_lstm[365000:]

print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)

(350000, 60, 22) (350000,) (15000, 60, 22) (15000,) (16346, 60, 22) (16346,)


## Simple Keras LSTM model

In [18]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [29]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.activations import relu
from tensorflow.keras.layers import LSTM, Dense, Activation

ModuleNotFoundError: No module named 'tensorflow'

In [20]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [21]:
# create and fit the LSTM network
model = keras.models.Sequential()
model.add(LSTM(128, input_shape=(60, 22)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

In [22]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=1024)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x20c0af4b7c0>