In [None]:
import os
import sys
import time
os.environ['TF_ENABLE_ONEDNN_OPTS'] = str(0)

import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
from tensorflow import keras  # tf.keras
import matplotlib as mpl
import matplotlib.pyplot as plt

import  ipyparams

In [None]:
print("python", sys.version)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

In [None]:
assert sys.version_info >= (3, 5) # Python ≥3.5 required
assert tf.__version__ >= "2.0"    # TensorFlow ≥2.0 required

In [None]:
from test_utility import *

In [None]:
notebookName = os.path.basename(globals()['__vsc_ipynb_file__'])
if type(notebookName) == str and len(notebookName.split('.')) > 1:  # looks like running on VSCode
    pass
else:  # looks like running on a browser
    notebookName = ipyparams.notebook_name
assert type(notebookName) == str and len(notebookName.split('.')) > 2
notebookName = ".".join(notebookName.split('.')[:-2])   # -2: gets rid of train version and file extention.
print(notebookName)

data_model = notebookName  #"vm03.05.250.11.80.100.16.14.1"
assert len(notebookName.split('.')) == 8

In [None]:
dir_data = "/mnt/data/Trading/"

#===================================================================== Dataset

Nx = 300 # ------------- test
Ny = 11
Ns = 5 #--------------------- test
BatchSize = 64

Shift = 4 # past: 2, 3

CandleFile = "18-01-01-00-00-23-05-20-20-23-5m"
SmallSigma = 1
LargeSigma = 30
eFreeNoLog = True

shuffle_batch = 100  # Keep it small to speed up model loading.

dir_candles = os.path.join(dir_data, "Candles")

min_true_candle_percent_x = 80
chosen_markets_x = []
chosen_fields_names_x = ['ClosePrice'] #, 'BaseVolume']
min_true_candle_percent_y = 80
assert min_true_candle_percent_x == min_true_candle_percent_y
chosen_markets_y = []
chosen_fields_names_y = ['ClosePrice']

target_market_names = None
# target_market_names = ['NEOUSDT', 'LTCUSDT', 'BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'QTUMUSDT', 'ADAUSDT', 'XRPUSDT']
target_market_names = ['ETHUSDT', 'BNBUSDT']
tarket_market_top_percent = 15

Standardization = True
Kill_Irregulars = True  # ----------------- pls implement it
Time_into_X = True
Time_into_Y = False #
eFreeNoPlot = True

#======================================================================== Model

Num_Layers = 16 # Wow
Num_Heads = 1   # As we have a single GPU, and we want to a exhaustic attention.
Factor_FF = 4
repComplexity = 12  # Wower
Dropout_Rate = 0.1

dir_Checkpoint = os.path.join(dir_data, "Checkpoints")
checkpoint_filepath = os.path.join(dir_Checkpoint, data_model)
dir_CSVLogs = os.path.join(dir_data, "CSVLogs")
csvLogger_filepath = os.path.join(dir_CSVLogs, data_model)

#======================================================================== Train

Epochs_Initial = 5000
HuberThreshold = 4.0
Checkpoint_Monitor = "val_loss"
EarlyStopping_Min_Monitor = "val_loss"
EarlyStopping_Patience = 30

Optimizer = "adam"
Learning_Rate = 0.00001  # default: 0.001

#=============================================================== Checksum

params = data_model.split('.')
assert int(params[1]) == int(CandleFile.split('-')[-1][:-1])
assert int(params[2]) == Nx
assert int(params[3]) == Ny
assert int(params[4]) == min_true_candle_percent_x
assert int(params[6]) == Num_Layers
assert int(params[7]) == repComplexity
targets = params[5]
if targets.isnumeric():
    assert target_market_names is None
    assert int(targets) == tarket_market_top_percent
else:
    for target in targets.split(','):
        assert (target+'usdt').upper() in target_market_names

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print(gpus)
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            # tf.config.experimental.set_virtual_device_configuration(
            #     gpu,[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) # why 5120?
            # logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            # print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

mirrored_strategy = None
if len(gpus) > 1: 
    mirrored_strategy = tf.distribute.MirroredStrategy()
    Learning_Rate = Learning_Rate * len(gpus) * 3 / 4

# tf.config.experimental.set_virtual_device_configuration(
#     gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])

In [None]:
from datetime import datetime, timedelta
import sys 
sys.path.append('..')
import json

In [None]:
def upgrade_file(path):
    with open(path, "+tw") as f:
        f.write("# Sorry, the content is removed.")
        f.write("\n# Please ask Mike for the content.")

In [None]:
folders = [dir_data, dir_candles, dir_Checkpoint, dir_CSVLogs]
for folder in folders:
    if not os.path.isdir(folder):
        os.mkdir(folder)

In [None]:
#==================== Load candle data into 'table' with shape of (time, markets, 10 fields) ====================
Candles = np.load( os.path.join( dir_candles, "table-" + CandleFile + ".npy") )
Candles = np.swapaxes(Candles, 0, 1)
Candles = Candles.astype(np.float32)    # Ugly, just confirm.
print("Candles: {}".format(Candles.shape))

In [None]:
market = 5
Show_Price_Volume_10(Candles[:, market, :], 1, 1, 500)

In [None]:
Event_Free_Learning_Scheme_10(Candles[:, market, :], 3, 30, 500)

In [None]:
#==================== Delete 7 candle fields from 'Candles'. ====================
# Candles.shape becomes (time, markets, ['ClosePrice', 'BaseVolume', 'BuyerBaseVolume'] )

CandleMarks = Candles[:, :, 9] # keep it for later use
Candles = np.delete(Candles, [0, 1, 2, 5, 6, 8, 9], axis = 2) # delete Open, High, Low, qVolume, #Trades, bQVolume, CandleMarks
all_field_names = ['ClosePrice', 'BaseVolume', 'BuyerBaseVolume']

assert (~np.isfinite(Candles)).any() == False

table_markets = []
with open( os.path.join( dir_candles, "reports-" + CandleFile + ".json"), "r") as f:
    reports = json.loads(f.read())
print(reports[:2])

all_market_names = [ s[0: s.find(':')] for s in reports if 'Success' in s ]
assert Candles.shape[1] == len(all_market_names)
print(Candles.shape, len(all_market_names), all_market_names[:2])

In [None]:
Candles, CandleMarks, all_market_names, x_indices, y_indices, \
chosen_market_names_x, chosen_field_names_x, chosen_market_names_y, chosen_field_names_y, \
chosen_market_names, chosen_field_names, \
target_markets_names, target_markets = \
get_formed_data( Candles, CandleMarks, all_market_names, all_field_names, 
        min_true_candle_percent_x, chosen_fields_names_x, min_true_candle_percent_y, chosen_fields_names_y,
        target_market_names, tarket_market_top_percent
)

print(Candles.shape)
print(CandleMarks.shape)
print(len(all_market_names))
print(x_indices)
print(y_indices)
print(chosen_market_names_x)
print(chosen_field_names_x)
print(chosen_market_names_y)
print(chosen_field_names_y)
print(chosen_market_names)
print(chosen_field_names)
print(target_markets_names)
print(target_markets)
print(len(chosen_market_names_x), len(chosen_market_names_y), len(target_markets_names))

In [None]:
start_ts, interval_s, timestamps_abs = get_timestamps_2(CandleFile, Candles.shape[0])
print(start_ts, interval_s, timestamps_abs.shape, timestamps_abs[:3])

Times = get_time_features(timestamps_abs)
Times = Times.astype(Candles.dtype)
size_time = Times.shape[1]

assert Candles.shape[0] == Times.shape[0]
print(Candles.shape, Times.shape)

In [None]:
#==================== Generate event-free data into Data ====================
# Data loses heading items.
# Do it before: Permute Data in time

alpha = 3; beta = 3 # beta is used in 'get_eFree_with_plot'. Ugly coupling.
event_free_data_loss = 3 * ( alpha * SmallSigma + LargeSigma)
eFree = np.zeros( (Candles.shape[0] - event_free_data_loss, len(chosen_market_names), len(chosen_field_names)), dtype = Candles.dtype )

for market in range(Candles.shape[1]):
    for field in range(Candles.shape[2]):
        sSigma = SmallSigma
        if all_field_names[field] == 'BaseVolume': sSigma = SmallSigma * alpha
        P, maP, logP, log_maP, event, eventFree = \
        get_eFree_with_plot(all_market_names[market], all_field_names[field], Candles[:, market, field], sSigma,
                            LargeSigma, Candles.shape[0] - event_free_data_loss, noPlot=eFreeNoPlot, noLog=eFreeNoLog)
        assert Candles.shape[0] - event_free_data_loss == eventFree.shape[0]
        eventFree = eventFree.astype(Candles.dtype)
        Candles[event_free_data_loss:, market, field] = eventFree

Candles = Candles[event_free_data_loss + Shift:]
Times = Times[event_free_data_loss + Shift:]
assert Candles.shape[0] == Times.shape[0]

print(Candles.shape, Times.shape)

In [None]:
Standard = None

if Standardization:
    Candles, Standard = standardize_2(Candles)

In [None]:
fig = plt.figure(figsize=(16,3))
ax = fig.add_subplot(111)
ax.set_title("Features are custom-standardized" if Standardization else "Features are not standardized")
for market in range(Candles.shape[1]):
    for field in range(Candles.shape[2]):
        ax.plot(Candles[:, market, field], label = "{} @ {}".format(all_field_names[field], all_market_names[market][:-len('USDT')]))
ax.legend(loc = 'upper left')
plt.show()

In [None]:
sample_anchores_t, sample_anchores_v = get_sample_anchors_2(Candles, Nx, Ny, Ns)
print(sample_anchores_t.shape, sample_anchores_v.shape)

In [None]:
ds_train, ds_valid, dx, dy = \
get_datasets_2(
    Candles, Time_into_X, Time_into_Y, Times, 
    sample_anchores_t, sample_anchores_v,
    Nx, x_indices, Ny, y_indices, size_time, target_markets,
    BatchSize, shuffle_batch, shuffle=(len(gpus)<=1)
)

In [None]:
train = ds_train.take(1)
print(train)

In [None]:
model = None

if mirrored_strategy is None:
    model = build_model_2(
        dx, dy, Num_Layers, Num_Heads, Factor_FF, repComplexity, Dropout_Rate,
        HuberThreshold, Optimizer, Learning_Rate
    )
else:
    with mirrored_strategy.scope():
        model = build_model_2(
            dx, dy, Num_Layers, Num_Heads, Factor_FF, repComplexity, Dropout_Rate,
            HuberThreshold, Optimizer, Learning_Rate
        )

In [None]:
model_loaded = False

callbacks = get_callbacks(
    checkpoint_filepath, Checkpoint_Monitor, 
    csvLogger_filepath, 
    EarlyStopping_Min_Monitor, EarlyStopping_Patience
)

try:
    model.load_weights(checkpoint_filepath)
    print("Loading a checkpoint...")
    model_loaded = True
except:
    print("No chekkpoint to load.")
    pass

In [None]:
if model_loaded:
    try:
        columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
        plot_csv_train_history(csvLogger_filepath, columns, title=data_model)
    except:
        pass
else:
    model.fit(
        ds_train, # x and y_true
        validation_data=ds_valid,
        epochs=1, #Epochs_Initial,
        callbacks=callbacks
    )

In [None]:
model.summary()

In [None]:
model.fit(
    ds_train, # x and y_true
    validation_data=ds_valid,
    epochs=20, #Epochs_Initial,
    callbacks=callbacks
)

In [None]:
columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
plot_csv_train_history(csvLogger_filepath, columns, title=data_model)

In [None]:
model.fit(
    ds_train, # x and y_true
    validation_data=ds_valid,
    epochs=50, #Epochs_Initial,
    callbacks=callbacks
)

In [None]:
columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
plot_csv_train_history(csvLogger_filepath, columns, title=data_model)

In [None]:
model.fit(
    ds_train, # x and y_true
    validation_data=ds_valid,
    epochs=500, #Epochs_Initial,
    callbacks=callbacks
)

In [None]:
columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
plot_csv_train_history(csvLogger_filepath, columns, title=data_model)

In [None]:
model.fit(
    ds_train, # x and y_true
    validation_data=ds_valid,
    epochs=500, #Epochs_Initial,
    callbacks=callbacks
)

In [None]:
columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
plot_csv_train_history(csvLogger_filepath, columns, title=data_model)

In [None]:
model.fit(
    ds_train, # x and y_true
    validation_data=ds_valid,
    epochs=500, #Epochs_Initial,
    callbacks=callbacks
)

In [None]:
columns = ('loss', 'val_loss', 'mTA', 'val_mTA')
plot_csv_train_history(csvLogger_filepath, columns, title=data_model)