In [1]:
import numpy as np
import time
import os

import keras

import matplotlib.pyplot as plt
import pandas as pd
import random

import tensorflow as tf
from sklearn.metrics import confusion_matrix,accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ["TF_ENABLE_GPU_GARBAGE_COLLECTION"] = 'false'
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tensorboard = TensorBoard(log_dir="./logs")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-01-21 21:16:15.780466: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-21 21:16:15.807043: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-21 21:16:15.807217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [2]:
column_names = ['timestamp','open','close','high','low','volume','adosc','atr','macd','macd_signal','macd_hist','mfi','upper_band','middle_band','lower_band','rsi','difference_low_high','difference_open_close','target']

data_directory = '/home/joren/Coding/cryptodata/Normalized_labelled/'

#####################################
frame_size = 120
layers = 2
layer_sizes = [512, 512]
dropouts = [0.1, 0.1]
batchnormalizations = [0, 0]
optimizer = 'adam'
#####################################

In [3]:
field_info = [
    { "type": np.uint64, "count": 1 },
    { "type": np.double, "count": 17 },
    { "type": np.int64, "count": 1 }
]
BYTES_EIGHT = 8

def read_bin_full_file(file):
    f = open(file, 'rb')
    b = f.read(-1)

    BYTES_TO_READ = 0
    for field in field_info:
        BYTES_TO_READ += BYTES_EIGHT * field["count"]

    data = []
    BYTES_READ = 0
    for i in range(0, int(os.path.getsize(file) / BYTES_TO_READ)):
        row = []

        for idx, field in enumerate(field_info):
            row += np.frombuffer(b, dtype=field["type"], count=field["count"], offset=BYTES_READ).tolist()

            BYTES_READ += BYTES_EIGHT * field["count"]

        data.append(row)
    return data

In [4]:
def random_file():
    filenames = []
    for file in os.listdir(data_directory):
        filename = os.fsdecode(file)
        filenames.append(filename)
        
    randomname = filenames[random.randint(0, len(filenames)-1)]
    if randomname.endswith(".bin"): 
        print(f"reading file: {os.path.join(data_directory, randomname)}")
        return os.path.join(data_directory, randomname)

In [5]:
class DQN(tf.keras.Model):
    def __init__(self, n_actions, feature_size, layers = 2, layer_sizes = [128, 128], dropouts = [0.1, 0], batchnormalizations = [0, 0], optimizer='adam'):
        super().__init__()
        self._n_actions = n_actions
        self._feature_size = feature_size
        self._frame_size = frame_size

        self._model = self.create_model(layers, layer_sizes, dropouts, batchnormalizations, optimizer)
    
    def create_model(self, layers, layer_sizes, dropouts, batchnormalizations, optimizer):
        model = Sequential()

        for i in range(0, layers):
            if i == 0:
                model.add(LSTM(units=layer_sizes[i], return_sequences = True, input_shape = (self._frame_size, self._feature_size)))
            elif i == layers:
                model.add(LSTM(units=layer_sizes[i]))
            elif i >= len(layer_sizes):
                model.add(LSTM(units=layer_sizes[0], return_sequences = True))
            else:
                model.add(LSTM(units=layer_sizes[i], return_sequences = True))


            if i < len(dropouts) and dropouts[i] > 0:
                model.add(Dropout(dropouts[i]))
            if i < len(batchnormalizations) and batchnormalizations[i] == 1:
                model.add(BatchNormalization()) 
        
        model.add(Dense(units=self._n_actions, activation='softmax'))
        
        model.compile(optimizer=optimizer, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

        return model
    
    # def forward(self, observation):
    #     q_values = self._model.predict(observation)
    #     # q_values = self._model.make_predict_function(observation)
    #     # print(q_values.all())
    #     return q_values
    
    # def predict(self, observation, epsilon):
    #     q_values = self.forward(observation)
    #     if np.random.uniform() > epsilon:
    #         action = np.argmax(q_values[0][-1], axis=-1)
    #     else:
    #         action = np.random.randint(self._n_actions)
    #     return action
    
    # def fit(self, observations, targets, batch_size):
    #     self._model.fit(observations, targets, batch_size=batch_size)


In [6]:
# train-test split
def df_split(df):
    X = df.drop(columns=['timestamp','target'], axis=0).to_numpy()
    Y = df['target'].to_numpy()

    X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X, Y, test_size=0.1, shuffle=False)

    y_train_raw = to_categorical(y_train_raw, 3).tolist()
    y_test_raw = to_categorical(y_test_raw, 3).tolist()

    X_train = []
    y_train = []
    for i in range(frame_size, X_train_raw.shape[0]): #frame size up to size of array
        X_train.append(X_train_raw[i-frame_size:i])
        y_train.append(y_train_raw[i-frame_size:i])
    X_train, y_train = np.array(X_train), np.array(y_train)

    X_test = []
    y_test = []
    for i in range(frame_size, X_test_raw.shape[0]): #frame size up to size of array
        X_test.append(X_test_raw[i-frame_size:i])
        y_test.append(y_test_raw[i-frame_size:i])
    X_test, y_test = np.array(X_test), np.array(y_test)

    return X_train, X_test, y_train, y_test

In [7]:
file = random_file()
data = read_bin_full_file(file)

df = pd.DataFrame(data, columns=column_names) # variable from cell 1

del(data)

X_train, X_test, y_train, y_test = df_split(df)

del(df)

reading file: /home/joren/Coding/cryptodata/Normalized_labelled/MITHUSDT.bin


In [9]:
del(df)

NameError: name 'df' is not defined

In [10]:
# model initialization
dqn = DQN(y_train.shape[2], X_train.shape[2], layers, layer_sizes, dropouts, batchnormalizations, optimizer)


2022-01-21 21:18:17.862040: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-21 21:18:17.872895: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-21 21:18:17.873299: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-21 21:18:17.873429: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
202

In [11]:
# training
print(X_train.shape)
print(y_train.shape)

print(X_train[0].shape)
print(y_train[0].shape)

dqn._model.fit(X_train, y_train, epochs = 1, batch_size = 64, callbacks=[tensorboard])

(1154508, 120, 17)
(1154508, 120, 3)
(120, 17)
(120, 3)


In [4]:
# save model
dqn._model.save_weights(f'./models/model{i_episode}weights')
dqn._model.save(f'models/completemodel{i_episode}')