In [1]:
import numpy as np
import time
import os

import keras

# import matplotlib.pyplot as plt
import pandas as pd
import random

import tensorflow as tf
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
# from sklearn.utils import shuffle
# from sklearn.utils import class_weight
# from sklearn.metrics import r2_score
# from sklearn.metrics import mean_absolute_error
from tensorflow.keras.optimizers import Adam
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard

os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ["TF_ENABLE_GPU_GARBAGE_COLLECTION"] = 'false'
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

physical_devices = tf.config.experimental.list_physical_devices('GPU')
# print(physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tensorboard = TensorBoard(log_dir="./logs")

2022-01-22 18:10:40.435159: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 18:10:40.461359: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 18:10:40.461532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [2]:
column_names = ['timestamp','open','close','high','low','volume','adosc','atr','macd','macd_signal','macd_hist','mfi','upper_band','middle_band','lower_band','rsi','difference_low_high','difference_open_close','target']

data_directory = '/home/joren/Coding/cryptodata/Normalized_labelled/'
max_df_length = 25000

#####################################
frame_size = 120
layers = 5
layer_sizes = [512]*layers
dropouts = [0.1]*layers
batchnormalizations = [0]*layers
learning_rate = 0.0001
optimizer = Adam(learning_rate)
#####################################

In [3]:
field_info = [
    { "type": np.uint64, "count": 1 },
    { "type": np.double, "count": 17 },
    { "type": np.int64, "count": 1 }
]
BYTES_EIGHT = 8

def read_bin_full_file(file):
    f = open(file, 'rb')
    b = f.read(-1)

    BYTES_TO_READ = 0
    for field in field_info:
        BYTES_TO_READ += BYTES_EIGHT * field["count"]

    data = []
    BYTES_READ = 0
    for i in range(0, int(os.path.getsize(file) / BYTES_TO_READ)):
        row = []

        for idx, field in enumerate(field_info):
            row += np.frombuffer(b, dtype=field["type"], count=field["count"], offset=BYTES_READ).tolist()

            BYTES_READ += BYTES_EIGHT * field["count"]

        data.append(row)
    return np.array(data)

In [4]:
def random_file():
    filenames = []
    for file in os.listdir(data_directory):
        filename = os.fsdecode(file)
        filenames.append(filename)
        
    randomname = filenames[random.randint(0, len(filenames)-1)]
    if randomname.endswith(".bin"): 
        print(f"reading file: {os.path.join(data_directory, randomname)}")
        return os.path.join(data_directory, randomname)

In [5]:
class DQN(tf.keras.Model):
    def __init__(self, n_actions, feature_size, layers = 2, layer_sizes = [128, 128], dropouts = [0.1, 0], batchnormalizations = [0, 0], optimizer='adam'):
        super().__init__()
        self._n_actions = n_actions
        self._feature_size = feature_size
        self._frame_size = frame_size

        self._model = self.create_model(layers, layer_sizes, dropouts, batchnormalizations, optimizer)
    
    def create_model(self, layers, layer_sizes, dropouts, batchnormalizations, optimizer):
        model = Sequential()

        for i in range(0, layers):
            if i == 0:
                model.add(LSTM(units=layer_sizes[i], return_sequences = True, input_shape = (self._frame_size, self._feature_size)))
            elif i == layers:
                model.add(LSTM(units=layer_sizes[i]))
            elif i >= len(layer_sizes):
                model.add(LSTM(units=layer_sizes[0], return_sequences = True))
            else:
                model.add(LSTM(units=layer_sizes[i], return_sequences = True))


            if i < len(dropouts) and dropouts[i] > 0:
                model.add(Dropout(dropouts[i]))
            if i < len(batchnormalizations) and batchnormalizations[i] == 1:
                model.add(BatchNormalization()) 
        
        model.add(Dense(units=self._n_actions, activation='softmax'))
        
        model.compile(optimizer=optimizer, loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

        return model
    
    # def forward(self, observation):
    #     q_values = self._model.predict(observation)
    #     # q_values = self._model.make_predict_function(observation)
    #     # print(q_values.all())
    #     return q_values
    
    # def predict(self, observation, epsilon):
    #     q_values = self.forward(observation)
    #     if np.random.uniform() > epsilon:
    #         action = np.argmax(q_values[0][-1], axis=-1)
    #     else:
    #         action = np.random.randint(self._n_actions)
    #     return action
    
    # def fit(self, observations, targets, batch_size):
    #     self._model.fit(observations, targets, batch_size=batch_size)


In [6]:
# train-test split
def df_split(df):  
    X = df.drop(columns=['timestamp','target'], axis=0).to_numpy()
    Y = df['target'].to_numpy()

    X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(X, Y, test_size=0.1, shuffle=False)

    y_train_raw = to_categorical(y_train_raw, 3).tolist()
    y_test_raw = to_categorical(y_test_raw, 3).tolist()

    X_train = []
    y_train = []
    # y_train_weights = []
    for i in range(frame_size, X_train_raw.shape[0]): #frame size up to size of array
        X_train.append(X_train_raw[i-frame_size:i])
        # y_train.append(y_train_raw[i])
        y_train.append(y_train_raw[i-frame_size:i])
        # y_train_weights.append(2 if (y_train_raw[i][0] != 1) else 0.1)
    X_train, y_train = np.array(X_train), np.array(y_train)

    X_test = []
    y_test = []
    for i in range(frame_size, X_test_raw.shape[0]): #frame size up to size of array
        X_test.append(X_test_raw[i-frame_size:i])
        # y_test.append(y_test_raw[i])
        y_test.append(y_test_raw[i-frame_size:i])
    X_test, y_test = np.array(X_test), np.array(y_test)

    return X_train, X_test, y_train, y_test

In [7]:
# model initialization
dqn = DQN(3, 17, layers, layer_sizes, dropouts, batchnormalizations, optimizer)

def test_accuracy(model):
    y_pred_raw = np.array(model.predict(X_test))
    y_pred = np.argmax(y_pred_raw, axis=-1, keepdims=True)
    y_pred = y_pred.flatten()
    y_test_2 = np.argmax(y_test, axis=-1, keepdims=True)
    y_test_2 = y_test_2.flatten()
    print(f"""
    Class. report:
    {classification_report(y_test_2, y_pred)}
    """)

    cf = confusion_matrix(y_test_2, y_pred)

    print(cf)
    print(accuracy_score(y_test_2, y_pred) * 100) 


2022-01-22 18:10:40.591674: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-22 18:10:40.592494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 18:10:40.592691: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-22 18:10:40.592832: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [8]:
for i in range(100):
    file = random_file()
    data = read_bin_full_file(file)

    df = pd.DataFrame(data, columns=column_names) # variable from cell 1

    df.fillna(0, inplace=True)
    # if df.isnull().values.any():
    #     print('nan values found')
    #     continue

    df = df.iloc[120:]

    if len(df) > max_df_length:
        randstart = random.randint(0, len(df)-max_df_length)
        df = df.iloc[randstart:randstart+max_df_length]

    X_train, X_test, y_train, y_test = df_split(df)

    dqn._model.fit(X_train, y_train, epochs = 3, batch_size = 256, callbacks=[tensorboard])
    test_accuracy(dqn._model)

reading file: /home/joren/Coding/cryptodata/Normalized_labelled/ZILUSDT.bin
Epoch 1/3


2022-01-22 18:10:56.894183: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8300


Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       1.00      1.00      1.00    284942
           1       0.00      0.00      0.00       298
           2       0.00      0.00      0.00       360

    accuracy                           1.00    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       1.00      1.00      1.00    285600

    
[[284942      0      0]
 [   298      0      0]
 [   360      0      0]]
99.76960784313725
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/ATMUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       0.98      1.00      0.99    279279
           1       0.00      0.00      0.00      3158
           2       0.00      0.00      0.00      3163

    accuracy                           0.98    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       0.96      0.98      0.97    285600

    
[[279279      0      0]
 [  3158      0      0]
 [  3163      0      0]]
97.78676470588236
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/BCHUPUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3

    Class. report:
                  precision    recall  f1-score   support

           0       1.00      1.00      1.00    285600

    accuracy                           1.00    285600
   macro avg       1.00      1.00      1.00    285600
weighted avg       1.00      1.00      1.00    285600

    
[[285600]]
100.0
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/TCTUSDT.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       1.00      1.00      1.00    284520
           1       0.00      0.00      0.00       600
           2       0.00      0.00      0.00       480

    accuracy                           1.00    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       0.99      1.00      0.99    285600

    
[[284520      0      0]
 [   600      0      0]
 [   480      0      0]]
99.6218487394958
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/GNOUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       1.00      1.00      1.00    285120
           1       0.00      0.00      0.00       240
           2       0.00      0.00      0.00       240

    accuracy                           1.00    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       1.00      1.00      1.00    285600

    
[[285120      0      0]
 [   240      0      0]
 [   240      0      0]]
99.83193277310924
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/C98USDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       0.99      1.00      1.00    282963
           1       0.00      0.00      0.00      1320
           2       0.00      0.00      0.00      1317

    accuracy                           0.99    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       0.98      0.99      0.99    285600

    
[[282963      0      0]
 [  1320      0      0]
 [  1317      0      0]]
99.0766806722689
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/CHRUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       0.99      1.00      0.99    281819
           1       0.00      0.00      0.00      1871
           2       0.00      0.00      0.00      1910

    accuracy                           0.99    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       0.97      0.99      0.98    285600

    
[[281819      0      0]
 [  1871      0      0]
 [  1910      0      0]]
98.67612044817928
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/RIFUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       0.99      1.00      1.00    283246
           1       0.00      0.00      0.00      1200
           2       0.00      0.00      0.00      1154

    accuracy                           0.99    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       0.98      0.99      0.99    285600

    
[[283246      0      0]
 [  1200      0      0]
 [  1154      0      0]]
99.17577030812325
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/WTCUSDT.bin
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



    Class. report:
                  precision    recall  f1-score   support

           0       1.00      1.00      1.00    285360
           1       0.00      0.00      0.00       120
           2       0.00      0.00      0.00       120

    accuracy                           1.00    285600
   macro avg       0.33      0.33      0.33    285600
weighted avg       1.00      1.00      1.00    285600

    
[[285360      0      0]
 [   120      0      0]
 [   120      0      0]]
99.91596638655463
reading file: /home/joren/Coding/cryptodata/Normalized_labelled/UNIUPUSDT.bin
Epoch 1/3
Epoch 2/3
 6/88 [=>............................] - ETA: 42s - loss: 0.1558 - accuracy: 0.9700

KeyboardInterrupt: 

In [9]:
# save model
dqn._model.save(f'models/model1')

2022-01-22 16:37:14.665630: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: models/model1/assets


INFO:tensorflow:Assets written to: models/model1/assets
