In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
from tensorflow.keras.layers import MaxPooling1D, Flatten, GRU, GlobalMaxPooling1D, LSTM
from tensorflow.keras.models import Sequential
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
from sqlalchemy import create_engine, text
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None


In [38]:
TABLES = [ "BTC_KLINES_YEARLY_2018_PER_30m", "BTC_KLINES_YEARLY_2019_PER_30m", "BTC_KLINES_YEARLY_2020_PER_30m", "BTC_KLINES_YEARLY_2021_PER_30m", "BTC_KLINES_YEARLY_2022_PER_30m", "BTC_KLINES_YEARLY_2023_PER_30m"]
LIMITED_DOWN_TABLES = []

engine = create_engine('postgresql://postgres:postgres@127.0.0.1/CryptoData')
df = pd.DataFrame()
for table in TABLES:
    if table in LIMITED_DOWN_TABLES:
        with engine.begin() as connection:
            query = text("SELECT * FROM \"" + table + "\" ORDER BY \"ISOTimestampKlineCLOSE\" ASC OFFSET 155")
            df1 = pd.read_sql(query, con=connection)
    else:
        with engine.begin() as connection:
            query = text("SELECT * FROM \"" + table + "\" ORDER BY \"ISOTimestampKlineCLOSE\" ASC")
            df1 = pd.read_sql(query, con=connection)

    df = pd.concat([df, df1], axis=0, ignore_index=True)
    print(table, len(df))


df.drop(columns=['ISOInsertTimestamp', 'ISOTimestampKlineCLOSE', 'UNIXTimestampKlineOPEN', 'UNIXTimestampKlineCLOSE',
                 'close_minus99_d', 'close_80_roc', 'close_99_roc', 'close_150_roc', 'mfi_80', 'mfi_99', 'mfi_150',
                 'ftr_80', 'ftr_99', 'ftr_150','vr_6' ],
        inplace=True)

#df.drop(df.columns[15:], axis=1, inplace=True)
print(df)

print(df.select_dtypes(exclude=['float64']).columns)
df['tradesAmount'] = df['tradesAmount'].astype('float64')
print(df.select_dtypes(exclude=['float64']).columns)
print( df.columns[df.isna().any()].tolist())
print('len before', len(df))
df = df.dropna(axis=0)
print('len after', len(df))

# print('len before', len(df))
# # Convert 'tradesAmount' column to numeric, coerce errors to NaN
# df['tradesAmount'] = pd.to_numeric(df['tradesAmount'], errors='coerce')
# # Drop rows where 'tradesAmount' is NaN
# df = df.dropna(subset=['tradesAmount'])
# print('len after', len(df))

np_array = df.to_numpy()
print(np_array)

BTC_KLINES_YEARLY_2018_PER_30m 17374
BTC_KLINES_YEARLY_2019_PER_30m 34817
BTC_KLINES_YEARLY_2020_PER_30m 52276
BTC_KLINES_YEARLY_2021_PER_30m 69745
BTC_KLINES_YEARLY_2022_PER_30m 87245
BTC_KLINES_YEARLY_2023_PER_30m 104743
        openPrice  highPrice  lowPrice  closePrice  volume  quoteAssetVolume  \
0        16214.92   16372.99  16190.10    16298.00  483.71        7880124.16   
1        16298.00   16333.00  16011.21    16168.00  408.26        6613629.87   
2        16159.69   16400.00  16145.00    16369.80  525.96        8555658.67   
3        16369.80   16488.98  16335.01    16403.01  363.64        5964801.34   
4        16419.99   16639.00  16335.03    16612.02  475.17        7836295.46   
...           ...        ...       ...         ...     ...               ...   
104738   45061.34   45111.10  44840.01    44981.73  580.11       26090537.98   
104739   44981.72   45049.98  44928.00    44967.48  480.02       21593326.90   
104740   44967.47   44982.71  44802.01    44946.91  600.2

In [52]:
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows), delay // step,))

        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            futures = range(rows[j], rows[j] + delay, step)
            samples[j] = data[indices]
            #print('data[indices][-1][3]', data[indices][-1][3])
            targets[j] = [1 if inner_list[3] > data[indices][-1][3] else 0 for inner_list in data[futures]]
            #print(samples,targets )
            #print(samples.shape, targets.shape)

        yield samples, targets

In [31]:
# lookback = 4
# step = 3
# delay = 24
# batch_size = 2
# 
# train_gen = generator(np_array, lookback=lookback, delay=delay, min_index=0, max_index=None, shuffle=False,
#                       step=step, batch_size=batch_size)
# print(next(train_gen))

ValueError: could not broadcast input array from shape (2,277) into shape (1,277)

In [55]:
train_max_range = int(0.9 * len(df))
val_max_range = int(train_max_range + 0.1 * len(df) - 100)

lookback = 144
step = 1
delay = 24
batch_size = 128

train_gen = generator(np_array, lookback=lookback, delay=delay, min_index=0, max_index=train_max_range, shuffle=True,
                      step=step, batch_size=batch_size)
val_gen = generator(np_array, lookback=lookback, delay=delay, min_index=train_max_range+1, max_index=val_max_range,
                    shuffle=True, step=step, batch_size=batch_size)
test_gen = generator(np_array, lookback=lookback, delay=delay, min_index=val_max_range+1, max_index=None, shuffle=True, step=step,
                     batch_size=batch_size)

val_steps = (val_max_range - train_max_range - 1 - lookback) // batch_size
test_steps = (len(np_array) - val_max_range - 1 - lookback) // batch_size


In [56]:
model = Sequential()
model.add(LSTM(128, dropout=0.0, recurrent_dropout=0.3, return_sequences=True, input_shape=(None, np_array.shape[-1])))
model.add(LSTM(256, activation='relu', dropout=0.0, recurrent_dropout=0.0))
#model.add(LSTM(32, dropout=0.0, recurrent_dropout=0.5))
model.add(Dense(delay/step, activation='sigmoid'))

model.compile(optimizer=RMSprop(), loss='binary_crossentropy', metrics=['acc'])
#200-60
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=30,
                              validation_data=val_gen, validation_steps=val_steps)

Epoch 1/30


  history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=30,


 73/500 [===>..........................] - ETA: 1:11 - loss: 0.6937 - acc: 0.0473


KeyboardInterrupt

