## Imports

In [1]:
# from src.config.config import Config
# from src.db_writer.db import DB
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (12.8, 7.2)

## Preparation

In [3]:
df_total = pd.read_csv('data/df_total.csv', index_col=[0])
df_razmetka = pd.read_csv('data/df_razmetka.csv', index_col=[0])

In [4]:
df_fin = df_total.copy()

In [5]:
df_fin['extr2'] = df_razmetka['extr2']

In [6]:
df_fin.drop(columns=['symbol_perp','time_diff', 'diff', 'diff_shift1', 'is_extremum'], inplace=True)

In [7]:
df_fin.groupby('symbol_cq').agg({'timestamp':['min', 'max']}).apply(lambda x: pd.to_datetime(x, unit='s'))

Unnamed: 0_level_0,timestamp,timestamp
Unnamed: 0_level_1,min,max
symbol_cq,Unnamed: 1_level_2,Unnamed: 2_level_2
BTCUSDT_220930,2022-08-23 09:58:00,2022-09-30 08:00:00
BTCUSDT_221230,2022-09-30 08:01:00,2022-12-30 08:00:00
BTCUSDT_230331,2022-12-30 08:01:00,2023-03-31 08:00:00
BTCUSDT_230630,2023-03-31 08:01:00,2023-05-15 08:16:00


Lets imagine our range of opening contract lays between 

(high + max(open, close))/2

and

(low + min(open, close))/2

In [8]:
df_fin['low_border'] = (df_fin['low_perp'] + df_fin[['open_perp', 'close_perp']].min(axis=1)) / 2
df_fin['up_border'] = (df_fin['high_perp'] + df_fin[['open_perp', 'close_perp']].max(axis=1)) / 2

In [9]:
df_fin.head()

Unnamed: 0,timestamp,open_perp,close_perp,low_perp,high_perp,volume_perp,trades_perp,symbol_cq,open_cq,close_cq,low_cq,high_cq,volume_cq,trades_cq,bidPrice_perp,bidQty_perp,askPrice_perp,askQty_perp,funding_rate,bidPrice_cq,bidQty_cq,askPrice_cq,askQty_cq,time_left,ismaxima,isminima,extr2,low_border,up_border
0,1661248680,21476.3,21459.6,21451.0,21479.8,413.33,3596,BTCUSDT_220930,21511.1,21483.9,21483.9,21511.1,1.659,22,21476.2,8.013,21476.3,6.975,1.1e-05,21502.4,0.019,21508.0,0.071,3276120,False,False,False,21455.3,21478.05
1,1661248740,21459.7,21430.2,21430.2,21459.7,378.801,3206,BTCUSDT_220930,21493.2,21453.5,21453.5,21493.2,0.579,40,21459.1,3.42,21459.2,14.399,1.2e-05,21487.4,0.019,21493.3,0.05,3276060,False,False,False,21430.2,21459.7
2,1661248800,21430.3,21445.0,21414.1,21445.0,619.156,5519,BTCUSDT_220930,21459.9,21478.3,21440.6,21478.4,1.392,46,21432.6,0.82,21434.3,0.004,1.2e-05,21456.4,0.169,21462.8,0.071,3276000,False,True,False,21422.2,21445.0
3,1661248860,21445.0,21445.0,21430.0,21446.8,311.295,3116,BTCUSDT_220930,21476.7,21470.2,21460.0,21476.7,1.419,41,21444.9,25.023,21445.0,1.005,1.1e-05,21476.7,0.1,21478.3,0.054,3275940,True,False,True,21437.5,21445.9
4,1661248920,21445.0,21431.2,21430.0,21448.6,250.049,2848,BTCUSDT_220930,21479.4,21463.8,21460.4,21479.5,0.4,24,21444.9,17.361,21445.0,1.806,1e-05,21471.6,0.019,21476.5,0.121,3275880,False,False,False,21430.6,21446.8


In [10]:
df_fin.loc[df_fin['extr2'] & df_fin['ismaxima'], 'target'] = df_fin.loc[df_fin['extr2'] & df_fin['ismaxima'], 'up_border']
df_fin.loc[df_fin['extr2'] & df_fin['isminima'], 'target'] = df_fin.loc[df_fin['extr2'] & df_fin['isminima'], 'low_border']

In [11]:
df_fin['target'] = df_fin['target'].shift(-1).fillna(method='bfill')
df_fin.dropna(inplace=True)

In [12]:
df_fin.head()

Unnamed: 0,timestamp,open_perp,close_perp,low_perp,high_perp,volume_perp,trades_perp,symbol_cq,open_cq,close_cq,low_cq,high_cq,volume_cq,trades_cq,bidPrice_perp,bidQty_perp,askPrice_perp,askQty_perp,funding_rate,bidPrice_cq,bidQty_cq,askPrice_cq,askQty_cq,time_left,ismaxima,isminima,extr2,low_border,up_border,target
0,1661248680,21476.3,21459.6,21451.0,21479.8,413.33,3596,BTCUSDT_220930,21511.1,21483.9,21483.9,21511.1,1.659,22,21476.2,8.013,21476.3,6.975,1.1e-05,21502.4,0.019,21508.0,0.071,3276120,False,False,False,21455.3,21478.05,21445.9
1,1661248740,21459.7,21430.2,21430.2,21459.7,378.801,3206,BTCUSDT_220930,21493.2,21453.5,21453.5,21493.2,0.579,40,21459.1,3.42,21459.2,14.399,1.2e-05,21487.4,0.019,21493.3,0.05,3276060,False,False,False,21430.2,21459.7,21445.9
2,1661248800,21430.3,21445.0,21414.1,21445.0,619.156,5519,BTCUSDT_220930,21459.9,21478.3,21440.6,21478.4,1.392,46,21432.6,0.82,21434.3,0.004,1.2e-05,21456.4,0.169,21462.8,0.071,3276000,False,True,False,21422.2,21445.0,21445.9
3,1661248860,21445.0,21445.0,21430.0,21446.8,311.295,3116,BTCUSDT_220930,21476.7,21470.2,21460.0,21476.7,1.419,41,21444.9,25.023,21445.0,1.005,1.1e-05,21476.7,0.1,21478.3,0.054,3275940,True,False,True,21437.5,21445.9,21344.8
4,1661248920,21445.0,21431.2,21430.0,21448.6,250.049,2848,BTCUSDT_220930,21479.4,21463.8,21460.4,21479.5,0.4,24,21444.9,17.361,21445.0,1.806,1e-05,21471.6,0.019,21476.5,0.121,3275880,False,False,False,21430.6,21446.8,21344.8


## Train test

In [13]:
X = df_fin.drop(columns= ['timestamp', 'symbol_cq', 'ismaxima', 'isminima', 'extr2', 'low_border', 'up_border', 'target'])
y = df_fin['target']

In [14]:
from sklearn.preprocessing import StandardScaler

X_scaled = StandardScaler().fit_transform(X)
y_scaled = StandardScaler().fit_transform(y.values.reshape(-1,1))

In [15]:
def create_lstm_data(X, y, look_back=60):
    arrays = [np.array(X[i : i+look_back]) for i in range(len(X) - look_back + 1)]
    lstm_data = np.stack(arrays)
    lstm_target = y_scaled[look_back - 1:].reshape(len(y) - look_back + 1)
    print('initial X shape: ', np.array(X).shape, 'y shape: ', y.shape, 'look_back: ', look_back)
    print('output X shape: ', lstm_data.shape, 'y shape:', lstm_target.shape)
    return lstm_data, lstm_target


In [16]:
X_lstm, y_lstm = create_lstm_data(X_scaled, y_scaled)

initial X shape:  (381405, 22) y shape:  (381405, 1) look_back:  60
output X shape:  (381346, 60, 22) y shape: (381346,)


In [17]:
X_train, X_val, X_test = X_lstm[:350000], X_lstm[350000:365000], X_lstm[365000:]
y_train, y_val, y_test = y_lstm[:350000], y_lstm[350000:365000], y_lstm[365000:]

print(X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape)

(350000, 60, 22) (350000,) (15000, 60, 22) (15000,) (16346, 60, 22) (16346,)


## Simple Keras LSTM model

In [18]:
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [19]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.activations import relu
from tensorflow.keras.layers import LSTM, Dense, Activation

In [20]:
tf.config.list_physical_devices('GPU')

[]

In [21]:
# create and fit the LSTM network
model = keras.models.Sequential()
model.add(LSTM(128, input_shape=(60, 22)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

In [22]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=1024)


Epoch 1/100
 40/342 [==>...........................] - ETA: 1:21 - loss: 0.0833

KeyboardInterrupt: 