# Constants:

In [1]:
SEQ_LEN = 180 #240   # how many past candles to use to predict
CANDLES_SHIFT = 2 #5 # how many candles to shift between sequences
NAME = "pp3_m5_ov40th004p_shift2_seq180"
VALIDATION_PCT = 0.2

# Functions:

sequence split

In [2]:
import numpy as np

def splitDf_new(df):
    
    res = []
    print("")
    print("splitDf")
    while len(df) >= SEQ_LEN:
        first = df.head(SEQ_LEN).copy()
        first.index = np.arange(0, len(first))
        res.append(first)
        df = df.tail(len(df) - CANDLES_SHIFT)
        df.index = np.arange(0, len(df))

    print("-done")
    print("")
    return res

balance

In [3]:
def balance(dfs):
    
    buys = []
    sells = []
    holds = []
    for df in dfs:
        if df.at[len(df)-1, 'target'] == 0:
            sells.append(df)
        elif df.at[len(df)-1, 'target'] == 1:
            buys.append(df)
        else:
          holds.append(df)

    print("before balancing:")
    print("buys:", len(buys), ", sells:", len(sells), ", holds:", len(holds))

    smallest = min(len(buys), len(sells), len(holds))
    buys = buys[:smallest]
    sells = sells[:smallest]
    holds = holds[:smallest]

    dfsBalanced = buys+sells+holds
    return dfsBalanced

preprocessing

In [4]:
from sklearn.preprocessing import StandardScaler


# Function to preprocess data
def preprocess3_train(df):
    # before sequencing
    #
    # log and pctchange transform price columns ('low', 'high', 'open', 'close')
    # scale every colum (center mean and unit variance)

    scaler_dict = {}
    for col in df.columns:
        if col != 'target':
            if col != 'quantity_baseUnits' and col != 'hl_percent':
                df[col] = np.log(df[col])
                df[col] = df[col].pct_change()
                df.dropna(inplace=True)
            scaler = StandardScaler()
            df[col] = scaler.fit_transform(df[col].values.reshape(-1, 1))
            scaler_dict[col] = scaler
    df.index = np.arange(0, len(df))
    return df, scaler_dict

# Function to apply saved preprocessing to new data
def apply_preprocess3_val(df, scaler_dict):
    # before sequencing
    #
    # pct.change transform price columns ('low', 'high', 'open', 'close')
    # scale every colum (center mean and unit variance)
    
    for col in df.columns:
        if col != 'target':
            if col != 'quantity_baseUnits' and col != 'hl_percent':
                df[col] = np.log(df[col])
                df[col] = df[col].pct_change()
                df.dropna(inplace=True)
            scaler = scaler_dict[col]
            df[col] = scaler.transform(df[col].values.reshape(-1, 1))
    df.index = np.arange(0, len(df))
    return df

sequences

In [5]:
def buildSequences(dfs):
    
    sequences = []
    for df in dfs:
        if(len(df) == SEQ_LEN):
            label = df.at[SEQ_LEN-1, 'target']
            df = df.iloc[:, :-1]
            dfArray = df.values.tolist()
            sequences.append([np.array(dfArray), label])
    
    return sequences

X y split

In [6]:
def xySplit(seqWithTarget):

    X = []
    y = []
    for seq, target in seqWithTarget:
        X.append(seq)
        y.append(target)

    return np.array(X),np.array(y)

# DF manipulation, build training sets:

In [7]:
import pandas as pd
df = pd.read_csv("../historicalData/labeled/HistoricalDataLabeled_BTC_USDT_01072016_01072023_MINUTE_5_ov40_th004p.csv")
df = df[['close', 'weightedAverage', 'hl_percent', 'quantity_baseUnits', 'target']]
df

Unnamed: 0,close,low,high,weightedAverage,hl_percent,quantity_baseUnits,target
0,681.84,678.01,681.84,679.87,0.005617,0.488352,0
1,683.40,678.12,683.53,682.69,0.007915,0.239165,0
2,681.95,681.95,683.37,683.01,0.002078,0.103008,0
3,678.86,678.86,684.89,684.00,0.008804,9.084392,0
4,684.39,684.39,684.73,684.55,0.000497,0.236453,0
...,...,...,...,...,...,...,...
736051,30408.88,30405.38,30415.48,30409.02,0.000332,0.091832,0
736052,30413.16,30408.38,30414.80,30409.77,0.000211,0.128622,0
736053,30414.06,30409.65,30420.01,30413.24,0.000341,0.090499,0
736054,30426.45,30411.67,30427.69,30419.85,0.000526,0.205135,0


In [8]:
# Split data into train and validation sets
train_size = int((1-VALIDATION_PCT) * len(df))
train_df = df.iloc[:train_size].copy()
val_df = df.iloc[train_size:].copy()

In [9]:
# Preprocess the training data and save the scaling parameters
train_df, scaler_dict = preprocess3_train(train_df)
# Apply saved preprocessing to validation data
val_df = apply_preprocess3_val(val_df, scaler_dict)

split into dfs with SEQ_LEN rows

In [10]:
train_dfs = splitDf_new(train_df)
val_dfs = splitDf_new(val_df)


splitDf
-done


splitDf
-done



balance buys/sells/holds

In [11]:
# balance buys, sells, and holds
import random

random.shuffle(train_dfs)
random.shuffle(val_dfs)
train_dfs = balance(train_dfs)
val_dfs = balance(val_dfs)
random.shuffle(train_dfs)
random.shuffle(val_dfs)

before balancing:
buys: 104849 , sells: 101730 , holds: 87752
before balancing:
buys: 24630 , sells: 24825 , holds: 24060


build sequences

In [12]:
sequencesTraining = buildSequences(train_dfs)
sequencesValidation = buildSequences(val_dfs)
random.shuffle(sequencesTraining)
random.shuffle(sequencesValidation)

split sequence from label

In [13]:
train_x, train_y = xySplit(sequencesTraining)
validation_x, validation_y = xySplit(sequencesValidation)

In [14]:
print(type(train_x))
print(train_x.shape)
print(type(validation_x))
print(validation_x.shape)

<class 'numpy.ndarray'>
(263256, 180, 6)
<class 'numpy.ndarray'>
(72180, 180, 6)


In [15]:
import pickle

with open(f'../trainData/{NAME}_train_x.pkl', 'wb') as file:
    pickle.dump(train_x, file)
with open(f'../trainData/{NAME}_train_y.pkl', 'wb') as file:
    pickle.dump(train_y, file)
with open(f'../trainData/{NAME}_validation_x.pkl', 'wb') as file:
    pickle.dump(validation_x, file)
with open(f'../trainData/{NAME}_validation_y.pkl', 'wb') as file:
    pickle.dump(validation_y, file)

In [2]:
import pickle

with open(f"../trainData/{NAME}_train_x.pkl", "rb") as file:
    train_x = pickle.load(file)
with open(f"../trainData/{NAME}_train_y.pkl", "rb") as file:
    train_y = pickle.load(file)
with open(f"../trainData/{NAME}_validation_x.pkl", "rb") as file:
    validation_x = pickle.load(file)
with open(f"../trainData/{NAME}_validation_y.pkl", "rb") as file:
    validation_y = pickle.load(file)

# Model:

In [3]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


Num GPUs Available:  1


hyper parameters bounds

In [6]:
EPOCHS = 120

batchSize = 96
layers = 2
nodes = 64#256
denseNodes = 32#128

dropOut = 0.4#0.8 #0.92 #0.88
rec_dropout = 0
l1l2_reg = 0 #1e-5#1e-3

learningRate = 0.00001
decay = 0

hp_suffix = f"bs({batchSize})_layers({layers})_noded({nodes})_dNodes({denseNodes})_do({int(dropOut*100)}%)_recdo({rec_dropout})_l1l2({l1l2_reg})_lr({learningRate})_decay({decay})"

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
import os
from tqdm.keras import TqdmCallback
import datetime
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras import regularizers
import json
# model
model = Sequential()

for _ in range(layers-1):
  model.add(LSTM(nodes, 
               activation="tanh", 
               recurrent_activation = 'sigmoid', 
               recurrent_dropout = rec_dropout, 
               unroll = False, 
               use_bias = True, 
               input_shape=(train_x.shape[1:]), 
               return_sequences=True,
               kernel_regularizer=regularizers.l1_l2(l1=l1l2_reg/10, l2=l1l2_reg),
               #bias_regularizer=regularizers.l2(l1l2_reg),
               activity_regularizer=regularizers.l2(l1l2_reg)
               ))
  model.add(Dropout(dropOut))
  #model.add(BatchNormalization())

model.add(LSTM(nodes, 
             activation="tanh", 
             recurrent_activation = 'sigmoid', 
             recurrent_dropout = rec_dropout, 
             unroll = False, 
             use_bias = True, 
             input_shape=(train_x.shape[1:]),
             kernel_regularizer=regularizers.l1_l2(l1=l1l2_reg/10, l2=l1l2_reg),
             #bias_regularizer=regularizers.l2(l1l2_reg),
             activity_regularizer=regularizers.l2(l1l2_reg)
             ))
model.add(Dropout(dropOut))
#model.add(BatchNormalization())

model.add(Dense(denseNodes, 
                activation="relu",
                kernel_regularizer=regularizers.l1_l2(l1=l1l2_reg/10, l2=l1l2_reg),
                #bias_regularizer=regularizers.l2(l1l2_reg), 
                activity_regularizer=regularizers.l2(l1l2_reg)))
model.add(Dropout(dropOut))

model.add(Dense(3, activation="softmax"))



# opt  
opt = tf.keras.optimizers.Adam(lr=learningRate) # decay?


model.compile(loss="sparse_categorical_crossentropy",
              optimizer = opt,
              metrics=["accuracy"])


filename = "" + NAME + "-{epoch:02d}"
filepath = f"models/{filename}.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) # saves every epoch

# Prepare TensorBoard callback
log_dir = "../logs/fit/" + f"{NAME}" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)


# store hyper-parameters
# Path to the JSON file
file_path = "models/hps/hps.json"
# Load the JSON file into a dictionary
with open(file_path, "r") as json_file:
    data = json.load(json_file)
# Add a new key-value pair to the dictionary
data[f"{NAME}" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")] = hp_suffix
# Write the updated dictionary back to the file
with open(file_path, "w") as json_file:
    json.dump(data, json_file, indent=4)




# train
history = model.fit(
  train_x, train_y,
  batch_size = batchSize,
  epochs = EPOCHS,
  validation_data=(validation_x, validation_y),
  callbacks=[checkpoint, TqdmCallback(verbose=0), tensorboard_callback])
  #callbacks=[checkpoint])



  super().__init__(name, **kwargs)


Epoch 1/120



Epoch 2/120



Epoch 3/120



Epoch 4/120



Epoch 5/120



Epoch 6/120



Epoch 7/120



Epoch 8/120



Epoch 9/120



Epoch 10/120



Epoch 11/120



Epoch 12/120



Epoch 13/120



Epoch 14/120



Epoch 15/120



Epoch 16/120



Epoch 17/120



Epoch 18/120



Epoch 19/120



Epoch 20/120



Epoch 21/120



Epoch 22/120



Epoch 23/120



Epoch 24/120



Epoch 25/120



Epoch 26/120



Epoch 27/120



Epoch 28/120



Epoch 29/120



Epoch 30/120



Epoch 31/120



Epoch 32/120



Epoch 33/120



Epoch 34/120



Epoch 35/120



Epoch 36/120



Epoch 37/120



Epoch 38/120



Epoch 39/120



Epoch 40/120



Epoch 41/120



Epoch 42/120



Epoch 43/120



Epoch 44/120



Epoch 45/120



Epoch 46/120



Epoch 47/120



Epoch 48/120



Epoch 49/120



Epoch 50/120



Epoch 51/120



Epoch 52/120

KeyboardInterrupt: 