In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
from tensorflow.keras.layers import MaxPooling1D, Flatten, GRU, GlobalMaxPooling1D
from tensorflow.keras.models import Sequential
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
from sqlalchemy import create_engine, text
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

2024-01-28 22:15:14.370527: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-28 22:15:14.394535: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows),))

        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            futures = range(rows[j], rows[j] + delay, step)
            samples[j] = data[indices]
            #targets[j] = [inner_list[2] for inner_list in data[futures]]
            targets[j] = data[rows[j] + delay][3]

        yield samples, targets

In [3]:
TABLES = ["BTC_KLINES_MONTHLY_12_2020_PER_5m"]

engine = create_engine('postgresql://postgres:postgres@127.0.0.1/CryptoData')
df = pd.DataFrame()
for table in TABLES:
    with engine.begin() as connection:
        query = text("SELECT * FROM \"" + table + "\" ORDER BY \"ISOTimestampKlineCLOSE\" ASC")
        df1 = pd.read_sql(query, con=connection)

    df = pd.concat([df, df1], axis=0, ignore_index=True)
    print(table, len(df))


df.drop(columns=['ISOInsertTimestamp', 'ISOTimestampKlineCLOSE', 'UNIXTimestampKlineOPEN', 'UNIXTimestampKlineCLOSE',
                 'close_minus99_d', 'close_80_roc', 'close_99_roc', 'close_150_roc', 'mfi_80', 'mfi_99', 'mfi_150',
                 'ftr_80', 'ftr_99', 'ftr_150','vr_6' ],
        inplace=True)

df.drop(df.columns[5:], axis=1, inplace=True)
print(df)

BTC_KLINES_MONTHLY_12_2020_PER_5m 8938
      openPrice  highPrice  lowPrice  closePrice  volume
0      18880.71   18911.46  18859.38    18887.13  223.97
1      18887.13   18979.99  18887.13    18919.62  295.66
2      18919.62   18940.05  18883.32    18892.30  215.83
3      18892.31   18933.68  18855.19    18902.11  188.29
4      18902.10   18983.46  18902.10    18941.38  182.43
...         ...        ...       ...         ...     ...
8933   29189.60   29204.66  29145.49    29145.49  101.61
8934   29145.49   29177.41  29127.60    29172.83  129.65
8935   29172.84   29199.00  29163.69    29197.48   76.62
8936   29197.93   29199.00  29160.02    29174.49  100.82
8937   29174.49   29202.67  29173.98    29182.99  123.60

[8938 rows x 5 columns]


In [4]:
print(df.dtypes)
np_array = df.to_numpy()
print(np_array)

train_max_range = 6000
val_max_range = train_max_range + 2000

close_price = np_array[:, 3]
print(close_price)

mean = np_array[:train_max_range].mean(axis=0)
np_array -= mean
std = np_array[:train_max_range].std(axis=0)
np_array /= std
print('std[3]', std[3])

print(np_array)

openPrice     float64
highPrice     float64
lowPrice      float64
closePrice    float64
volume        float64
dtype: object
[[18880.71 18911.46 18859.38 18887.13   223.97]
 [18887.13 18979.99 18887.13 18919.62   295.66]
 [18919.62 18940.05 18883.32 18892.3    215.83]
 ...
 [29172.84 29199.   29163.69 29197.48    76.62]
 [29197.93 29199.   29160.02 29174.49   100.82]
 [29174.49 29202.67 29173.98 29182.99   123.6 ]]
[18887.13 18919.62 18892.3  ... 29197.48 29174.49 29182.99]
std[3] 1928.5890690107578
[[-0.61949825 -0.61538368 -0.6184909  -0.61642158 -0.12391699]
 [-0.61616862 -0.58000954 -0.60401848 -0.59957507  0.16001007]
 [-0.59931819 -0.60062596 -0.60600551 -0.61374087 -0.15615533]
 ...
 [ 4.71835286  4.69488658  4.75550074  4.72963692 -0.70749424]
 [ 4.73136539  4.69488658  4.75358673  4.71771629 -0.61165054]
 [ 4.7192086   4.69678098  4.76086727  4.72212366 -0.52143073]]


In [5]:
lookback = 60
step = 2
delay = 1
batch_size = 1

train_gen = generator(np_array, lookback=lookback, delay=delay, min_index=0, max_index=train_max_range, shuffle=False, step=step, batch_size=batch_size)

In [6]:
print(next(train_gen))

(array([[[-6.19498253e-01, -6.15383676e-01, -6.18490899e-01,
         -6.16421583e-01, -1.23916995e-01],
        [-5.99318194e-01, -6.00625957e-01, -6.06005506e-01,
         -6.13740867e-01, -1.56155331e-01],
        [-6.08404666e-01, -5.78218381e-01, -5.96211200e-01,
         -5.88292211e-01, -2.88435480e-01],
        [-5.76394561e-01, -5.26073408e-01, -5.73477772e-01,
         -5.66986480e-01,  3.40568513e-01],
        [-4.98277870e-01, -4.99897684e-01, -5.12067787e-01,
         -5.13413636e-01,  7.24041736e-02],
        [-5.21434889e-01, -4.92252989e-01, -5.26383745e-01,
         -4.86160552e-01,  4.60571164e-01],
        [-5.43222714e-01, -5.51335485e-01, -5.77091964e-01,
         -5.86912964e-01,  2.00921864e-01],
        [-5.90159113e-01, -5.74661869e-01, -5.79908218e-01,
         -5.79892287e-01, -3.93744737e-01],
        [-5.64668240e-01, -5.76685313e-01, -6.01723757e-01,
         -5.98895817e-01, -3.17941082e-01],
        [-6.07844541e-01, -6.06840820e-01, -5.98949225e-01,
   

In [7]:
lookback = 60
step = 2
delay = 1
batch_size = 128

train_gen = generator(np_array, lookback=lookback, delay=delay, min_index=0, max_index=train_max_range, shuffle=True,
                      step=step, batch_size=batch_size)
val_gen = generator(np_array, lookback=lookback, delay=delay, min_index=train_max_range+1, max_index=val_max_range,
                    shuffle=True, step=step, batch_size=batch_size)
test_gen = generator(np_array, lookback=lookback, delay=delay, min_index=val_max_range+1, max_index=None, shuffle=True, step=step,
                     batch_size=batch_size)

val_steps = (val_max_range - train_max_range - 1 - lookback) // batch_size
test_steps = (len(np_array) - val_max_range - 1 - lookback) // batch_size

In [None]:
model = Sequential()
model.add(GRU(32,
              dropout=0.1, recurrent_dropout=0.5, return_sequences=True, input_shape=(None, np_array.shape[-1])))
model.add(GRU(64, activation='relu', dropout=0.1, recurrent_dropout=0.5))
model.add(Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen, steps_per_epoch=100, epochs=5,
                              validation_data=val_gen, validation_steps=val_steps)