In [7]:
from google.colab import drive
drive.mount('/drive', force_remount=True)

Mounted at /drive
time: 1.29 s (started: 2021-06-12 09:42:46 +00:00)


In [8]:
!pip install ipython-autotime
%load_ext autotime
import pickle
import os
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,10)

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 3.04 s (started: 2021-06-12 09:42:48 +00:00)


In [9]:
import tensorflow as tf
print("Using Keras",tf.keras.__version__)

Using Keras 2.5.0
time: 1.5 ms (started: 2021-06-12 09:42:51 +00:00)


In [10]:
window_length = 10
timeframe = "minute"
batch_size = 64
classes = 2
candles = 1
network = None
scaler = None

layer_tune = [1, 2]
neurons_tune = [128, 256]
lr_tune = [0.001, 0.0001, 0.00001]
tune_results_file = "/drive/My Drive/disertation/tunning_results_nn_{}_window_{}.csv".format(window_length, timeframe)

time: 3.23 ms (started: 2021-06-12 09:42:51 +00:00)


In [11]:
def extend_dataset_with_window_length(X, Y, window_length=window_length):
    new_x = []
    for i in range(len(X) - window_length+1):
        lst = []
        for j in range(i, i+window_length):
            lst.extend(X[j])
        new_x.append(lst)
    return np.array(new_x), Y[window_length-1:]

def get_equal_class_distribution(X,Y):
    ys = pd.Series(Y)
    vs = ys.value_counts()
    required_of_each_class = vs.min()
    original_indexes = []
    for idx in vs.index:
        original_indexes.extend(ys[ys==idx].sample(n=required_of_each_class, replace=False, random_state=1).index)
    return X[original_indexes], Y[original_indexes]

time: 9.76 ms (started: 2021-06-12 09:42:51 +00:00)


In [None]:
train_df = pd.read_csv("/drive/My Drive/disertation/train_df_{}_{}_candles_{}_class.csv".format(timeframe, candles, classes))
train_df = train_df.set_index("open_time")
train_df.drop(columns=["close"],inplace=True)
train_df.index = pd.to_datetime(train_df.index)

data = train_df.to_numpy()
X = data[:,:-1]
Y = data[:,-1]

X, Y = extend_dataset_with_window_length(X,Y)

In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

scaler_path = "/drive/My Drive/disertation/nns/nn_scaler_{}_{}window.save".format(timeframe, window_length)
joblib.dump(scaler, scaler_path)

In [None]:
a,b = np.unique(Y, return_counts=True)
print("buy sell %")
print(b)
print(b / sum(b))

X,Y = get_equal_class_distribution(X,Y)

a,b = np.unique(Y, return_counts=True)
print("buy sell %")
print(b)
print(b / sum(b))

In [None]:
tune_df = pd.read_csv("/drive/My Drive/disertation/tune_df_{}_{}_candles_{}_class.csv".format(timeframe, candles, classes))
tune_df = tune_df.set_index("open_time")
tune_df.drop(columns=["close"],inplace=True)

data = tune_df.to_numpy()
X_tune = data[:,:-1]
Y_tune = data[:,-1]

X_tune, Y_tune = extend_dataset_with_window_length(X_tune, Y_tune)

X_tune = scaler.transform(X_tune)


test_df = pd.read_csv("/drive/My Drive/disertation/test_df_{}_{}_candles_{}_class.csv".format(timeframe, candles, classes))
test_df = test_df.set_index("open_time")
test_df.drop(columns=["close"],inplace=True)

data = test_df.to_numpy()
X_test = data[:,:-1]
Y_test = data[:,-1]

X_test, Y_test = extend_dataset_with_window_length(X_test, Y_test)

X_test = scaler.transform(X_test)

In [None]:
a,b = np.unique(Y_tune, return_counts=True)
print("buy sell %")
print(b)
print(b / sum(b))
tune_buy_percentage = (b/sum(b))[0]

In [None]:
a,b = np.unique(Y_test, return_counts=True)
print("buy sell %")
print(b)
print(b / sum(b))
test_buy_percentage = (b/sum(b))[0]

In [None]:
def create_model(layers_count, neurons, lr):
    network = models.Sequential()
    network.add(layers.Dense(neurons, activation='sigmoid', input_shape=(len(X[0]),)))
    for _ in range(layers_count):
        network.add(layers.Dense(neurons, activation='sigmoid'))

    network.add(layers.Dense(1, activation="sigmoid"))
    loss = "binary_crossentropy"
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    network.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

    # print(network.summary())

    return network

In [None]:
def train_network(network):
    callback_early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
    history = network.fit(X, Y, epochs=400, batch_size=batch_size, validation_data=(X_tune, Y_tune), callbacks=[callback_early_stop])
    return history
    # pd.DataFrame(history.history).plot(lw=2);

In [None]:
results = []

attempt_number = 0
total_attempts = len(layer_tune) * len(neurons_tune) * len(lr_tune)

for layers_count in layer_tune:
    for neurons in neurons_tune:
        for lr in lr_tune:
            attempt_number += 1
            print("Attempt {}/{}".format(attempt_number, total_attempts))

            network = create_model(layers_count=layers_count, neurons=neurons, lr=lr)
            t = time.time()
            history = train_network(network)
            train_time = time.time() - t

            model_file = "/drive/My Drive/disertation/nns/nn_{}__{}_{}window.h5".format(attempt_number, timeframe, window_length)

            if os.path.exists(model_file):
                raise Exception("file exists!")
            network.save(model_file)

            Y_predicted = network.predict_classes(X)
            report = metrics.classification_report(Y, Y_predicted, digits=3, output_dict=True)
            f1_train = report['weighted avg']['f1-score']
            acc_train = report["accuracy"]

            Y_tune_predicted = network.predict_classes(X_tune)
            report = metrics.classification_report(Y_tune, Y_tune_predicted, digits=3, output_dict=True)
            f1_tune = report['weighted avg']['f1-score']
            acc_tune = report["accuracy"]

            Y_test_predicted = network.predict_classes(X_test)
            report = metrics.classification_report(Y_test, Y_test_predicted, digits=3, output_dict=True)
            f1_test = report['weighted avg']['f1-score']
            acc_test = report['accuracy']

            r = {
                "attempt": attempt_number,
                "layers": layers_count,
                "neurons": neurons,
                "lr": lr,
                "train_time": train_time,
                "epochs": len(history.epoch),
                "train_acc": acc_train,
                "train_f1": f1_train,
                "tune_acc": acc_tune,
                "tune_f1": f1_tune,
                "test_acc": acc_test,
                "test_f1": f1_test,
                "tune_buy_percentage": tune_buy_percentage,
                "test_buy_percentage": test_buy_percentage
            }
            results.append(r)
            print(pd.DataFrame(results)[["train_acc", "train_f1", "tune_acc", "tune_f1", "test_acc", "test_f1"]])

In [None]:
r = pd.DataFrame(results)
r.to_csv(tune_results_file)
r