# Constants:

In [2]:
SEQ_LEN = 240   # how many past candles to use to predict
CANDLES_SHIFT = 1 # how many candles to shift between sequences
NAME = "r40t075"
VALIDATION_PCT = 0.2

DF initialisation:

In [3]:
import pandas as pd
main_df = pd.read_csv("HistoricalDataClassified_2016_2023.csv")
main_df

Unnamed: 0,BTC_close,BTC_low,BTC_high,BTC_volume,BTC_average,BTC_HLPercent,target
0,675.50,671.00,675.50,0.0001,674.66,0.006662,0
1,667.00,667.00,671.00,2764.2000,668.04,0.005961,0
2,670.10,670.10,671.99,8873.0000,671.99,0.002813,0
3,667.00,667.00,672.00,204.4600,671.90,0.007440,0
4,667.00,667.00,672.00,0.0001,671.90,0.007440,0
...,...,...,...,...,...,...,...
711758,28423.92,28416.67,28440.89,4032.0000,28424.22,0.000852,0
711759,28402.64,28401.90,28426.65,1226.3600,28409.15,0.000871,0
711760,28418.48,28403.11,28418.48,4559.1800,28406.68,0.000541,0
711761,28412.62,28407.49,28422.84,388.8300,28413.01,0.000540,0


# Functions:

split df

In [4]:
def splitDf(df):
    
    res = []
    print("")
    print("splitDf")
    while len(df) >= SEQ_LEN + len(df.columns) -1:
        first = df.head(SEQ_LEN + len(df.columns) -1).copy()
        first.index = np.arange(0, len(first))
        res.append(first)
        df = df.tail(len(df) - CANDLES_SHIFT)
        df.index = np.arange(0, len(df))

    print("-done")
    print("")
    return res

balance

In [5]:
def balance(dfs):
    
    buys = []
    sells = []
    holds = []
    for df in dfs:
        if df.at[len(df)-1, 'target'] == 0:
            sells.append(df)
        elif df.at[len(df)-1, 'target'] == 1:
            buys.append(df)
        else:
          holds.append(df)

    print("before balancing:")
    print("buys:", len(buys), ", sells:", len(sells), ", holds:", len(holds))

    smallest = min(len(buys), len(sells), len(holds))
    buys = buys[:smallest]
    sells = sells[:smallest]
    holds = holds[:smallest]

    dfsBalanced = buys+sells+holds
    return dfsBalanced

preprocessing

In [6]:
from sklearn import preprocessing
from tqdm import tqdm

def preprocess(dfs):
    
    for df in dfs:
        for col in df.columns:
            if col != "target":
                df[col] = df[col].pct_change()
                df.dropna(inplace=True)
                df[col] = preprocessing.scale(df[col].values)
                df.index = np.arange(0, len(df))

    return dfs

sequences

In [7]:
def buildSequences(dfs):
    
    sequences = []
    for df in dfs:
        if(len(df) == SEQ_LEN):
            label = df.at[SEQ_LEN-1, 'target']
            df = df.iloc[:, :-1]
            dfArray = df.values.tolist()
            sequences.append([np.array(dfArray), label])
    
    return sequences

split

In [8]:
def split(seqWithTarget):

    X = []
    y = []
    for seq, target in seqWithTarget:
        X.append(seq)
        y.append(target)

    return np.array(X),np.array(y)

# DF manipulation, build training sets:

split into dfs with SEQ_LEN rows

In [9]:
import numpy as np
splittedDfs = splitDf(main_df)


splitDf
-done



seperate training and validation

In [10]:
dfsTraining = splittedDfs[:(int(len(splittedDfs) * (1-VALIDATION_PCT)))].copy()
dfsValidation = splittedDfs[(int(len(splittedDfs) * (1-VALIDATION_PCT))):].copy()

shuffle

In [11]:
import random

random.shuffle(dfsTraining)
random.shuffle(dfsValidation)

balance buys/sells/holds

In [12]:
dfsTrainingBalanced = balance(dfsTraining)
dfsValidationBalanced = dfsValidation # balance(dfsValidation) , validation data does not have to be balanced   

before balancing:
buys: 45775 , sells: 44747 , holds: 478692


In [14]:
dfsValidationBalanced = balance(dfsValidation)

before balancing:
buys: 7634 , sells: 7136 , holds: 127534


shuffle

In [15]:
random.shuffle(dfsTrainingBalanced)
random.shuffle(dfsValidationBalanced)

In [16]:
import pickle

In [1]:

with open('dfsTrainingBalanced.pkl', 'wb') as file:
    pickle.dump(dfsTrainingBalanced, file)
with open('dfsValidationBalanced.pkl', 'wb') as file:
    pickle.dump(dfsValidationBalanced, file)

NameError: name 'pickle' is not defined

preprocessing

In [None]:
dfsTrainingPreprocessed = preprocess(dfsTrainingBalanced)
dfsValidationPreprocessed = preprocess(dfsValidationBalanced)



build sequences

In [None]:
sequencesTraining = buildSequences(dfsTrainingPreprocessed)
sequencesValidation = buildSequences(dfsValidationPreprocessed)

shuffle

In [None]:
random.shuffle(sequencesTraining)
random.shuffle(sequencesValidation)

split sequence from label

In [None]:
train_x, train_y = split(sequencesTraining)
validation_x, validation_y = split(sequencesValidation)

# Model:

hyper parameters bounds

In [None]:
EPOCHS = 50

batchSize = 32
layers = 2
nodes = 256
denseNodes = 128
dropOut = 0.8
learningRate = 0.0001
decay = 1e-06


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint
import os



# model
model = Sequential()

for _ in range(layers-1):
  model.add(LSTM(nodes, activation="tanh", recurrent_activation = 'sigmoid', recurrent_dropout = 0, unroll = False, use_bias = True, input_shape=(train_x.shape[1:]), return_sequences=True))
  model.add(Dropout(dropOut))
  model.add(BatchNormalization())

model.add(LSTM(nodes, activation="tanh", recurrent_activation = 'sigmoid', recurrent_dropout = 0, unroll = False, use_bias = True, input_shape=(train_x.shape[1:])))
model.add(Dropout(dropOut))
model.add(BatchNormalization())

model.add(Dense(denseNodes, activation="relu"))
model.add(Dropout(dropOut))

model.add(Dense(3, activation="softmax"))



# opt  
opt = tf.keras.optimizers.Adam(lr=learningRate, decay=decay)


model.compile(loss="sparse_categorical_crossentropy",
              optimizer = opt,
              metrics=["accuracy"])


filename = NAME + "-{epoch:02d}"
filepath = f"models/{filename}.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # saves every epoch


# train
history = model.fit(
  train_x, train_y,
  batch_size = batchSize,
  epochs = EPOCHS,
  validation_data=(validation_x, validation_y),
  callbacks=[checkpoint])