In [None]:
%matplotlib inline
%load_ext tensorboard

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
from tensorflow import keras  # tf.keras
import time

In [None]:
print("python", sys.version)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

In [None]:
assert sys.version_info >= (3, 5) # Python ≥3.5 required
assert tf.__version__ >= "2.0"    # TensorFlow ≥2.0 required

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            # logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            # print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
from datetime import datetime, timedelta
import sys 
sys.path.append('..')
import json

In [None]:
#==================== Define 'intervalToMilliseconds' ====================

def intervalToMilliseconds(interval):
    """Convert a Binance interval string to milliseconds

    :param interval: Binance interval string 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w
    :type interval: str

    :return:
        None if unit not one of m, h, d or w
        None if string not in correct format
        int value of interval in milliseconds
    """
    ms = None
    seconds_per_unit = {
        "m": 60,
        "h": 60 * 60,
        "d": 24 * 60 * 60,
        "w": 7 * 24 * 60 * 60
    }

    unit = interval[-1]
    if unit in seconds_per_unit:
        try:
            ms= int(interval[:-1]) * seconds_per_unit[unit] * 1000
        except ValueError:
            pass
    return ms

In [None]:
#==================== Load candle data into 'table' with shape of (time, markets, 10 fields) ====================

# path = "18-01-01-00-00-23-05-20-05-55-1h" #
path = "18-01-01-00-00-23-05-20-20-23-5m"
# path = "18-01-01-00-00-23-05-20-09-11-1d"

table = np.load( os.path.join( "/mnt/data/Trading/Candles", "table-" + path + ".npy") )
table = np.swapaxes(table, 0, 1)
print("table: {}".format(table.shape))

In [None]:
from Mike_NB_01 import *

market = 5
Show_Price_Volume_10(table[:, market, :], 1, 1, 5000)

In [None]:
Event_Free_Learning_Scheme_10(table[:, market, :], 3, 30, 5000)

In [None]:
#==================== Delete 7 candle fields from 'table'. ====================
# table.shape becomes (time, markets, ['ClosePrice', 'BaseVolume', 'BuyerBaseVolume'] )

marks = table[:, :, 9] # keep it for later use
table = np.delete(table, [0, 1, 2, 5, 6, 8, 9], axis = 2) # delete Open, High, Low, qVolume, #Trades, bQVolume, Marks

table_markets = []
with open( os.path.join( "/mnt/data/Trading/Candles", "reports-" + path + ".json"), "r") as f:
    reports = json.loads(f.read())
print(reports[:2])

markets = [ s[0: s.find(':')] for s in reports if 'Success' in s ]
assert table.shape[1] == len(markets)
print(table.shape, len(markets), markets[:2])

In [None]:
#==================== Restore timestamps. ====================

start = datetime( 2000+int(path[0:2]), int(path[3:5]), int(path[6:8]), int(path[9:11]), int(path[12:14]) )
start_ts = round(datetime.timestamp(start))
interval = path[ path.find('-', len(path) - 4) + 1 : ]
interval_s = round(intervalToMilliseconds(interval) / 1000)
timestamps = np.array( range(start_ts, start_ts + table.shape[0] * interval_s, interval_s), dtype=int)
assert timestamps.shape[0] == table.shape[0]
print(start_ts, interval_s, timestamps.shape, timestamps[:3])

In [None]:
# # Find market clusters # temporary
# from sklearn.metrics import pairwise

# distances = np.zeros( (table.shape[1], table.shape[1]), dtype=float)

# # Find dependency distance
# for m in range(table.shape[1]):
#     distances[m, m] = 0.
#     for n in range(m+1, table.shape[1]):
#         mask = (marks[:, m] + marks[:, n] == 0) # marks == 0 : true full candles, marks = -1: price interpolated , marks = -2: coincodex prices
#         pm = table[mask, m, 0][np.newaxis]
#         pn = table[mask, n, 0][np.newaxis]
#         distances[m, n] = sklearn.metrics.pairwise.cosine_distances(pm, pn)
#         distances[n, m] = distances[m, n]

In [None]:
# from sklearn.cluster import OPTICS
# clustering = OPTICS(metric='precomputed', n_jobs=-1).fit(distances)
# print( clustering.labels_ )

# np.reshape(np.argwhere(clustering.labels_ == 1), -1)

# market_clusters = [ [ markets[ id ] for id in np.reshape(np.argwhere(clustering.labels_ == label), -1) ] for label in range(np.max(clustering.labels_))]
# print(market_clusters)

In [None]:
# cluster = 0
# ids = np.reshape(np.argwhere(clustering.labels_ == cluster), -1)
# series = [ [table[:, id, 0], markets[id] ] for id in ids ]
# PoltNormalized("Market cluster 0. Recent prices are mediocre. Shorter history.", series, color = 'auto')

In [None]:
# cluster = 1
# ids = np.reshape(np.argwhere(clustering.labels_ == cluster), -1)
# series = [ [table[:, id, 0], markets[id] ] for id in ids ]
# PoltNormalized("Market cluster 1. Vanished recently. Shorter history, Trends later", series, color = 'auto')

In [None]:
# cluster = 2
# ids = np.reshape(np.argwhere(clustering.labels_ == cluster), -1)
# series = [ [table[:, id, 0], markets[id] ] for id in ids ]
# PoltNormalized("Market cluster 2. Not vanished recently. Longer hostory. Trends earlier", series, color = 'auto')

In [None]:
#==================== Define 'save_to_multiple_csv_files' ====================

def save_to_multiple_csv_files(data, sample_anchors, name_prefix, Nx, x_indices, Ny, y_indices, header=None, n_parts=10):
    path_format = "{}_{:02d}.csv"

    filenames = []
    for file_idx, anchors in enumerate(np.array_split(sample_anchors, n_parts)):
        part_csv = path_format.format(name_prefix, file_idx)
        filenames.append(part_csv)
        with open(part_csv, "wt", encoding="utf-8") as f:
            if header is not None:
                f.write(header)
                f.write("\n")
            for anchor in anchors:
                x = np.reshape(data[anchor: anchor + Nx][:, x_indices[0]][:, :, x_indices[1]], -1)
                f.write(",".join([str(col) for col in x]))
                y = np.reshape(data[anchor + Nx: anchor + Nx + Ny][:, y_indices[0]][:, :, y_indices[1]], -1)
                f.write("," + ",".join([str(col) for col in y]))
                f.write("\n")
    return filenames

In [None]:
#==================== Define 'parse_csv_line' ====================

def parse_csv_line(line, Nx, size_x, Ny, size_y):
    print(line)
    defs = [tf.constant(0.0)] * (Nx * size_x + Ny * size_y)
    fields = tf.io.decode_csv(line, record_defaults=defs)
    x = tf.stack( tf.reshape(fields[: Nx * size_x], [Nx, -1] ) )    # sequence of Nx tokens, each of size_x
    y = tf.stack(fields[Nx * size_x :])
    return x, y

In [None]:
#==================== Define 'csv_reader_dataset' ====================

def csv_reader_dataset(filenames, Nx, size_x, Ny, size_y, n_parse_threads=5, batch_size=32, shuffle_buffer_size=32*128, n_readers=5):
    dataset = tf.data.Dataset.list_files(filenames)
    # dataset = dataset.repeat()
    dataset = dataset.interleave(
        lambda filename: tf.data.TextLineDataset(filename), #.skip(1), as we have no headers.
        cycle_length=n_readers)
    dataset = dataset.shuffle(shuffle_buffer_size)          # Shuffle before batch
    dataset = dataset.map(lambda x: parse_csv_line(x, Nx, size_x, Ny, size_y), num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size, drop_remainder=False)   # Batch the shuffled
    # dataset = dataset.shuffle(10)          # Shuffle again over batches.
    return dataset #.prefetch(3)

In [None]:
def get_timepoint_size(indices):
    size = 1
    for ids in indices:
        size *= len(ids)
    return size

In [None]:
# defind test data

n_times = 1000; n_markets = 2; n_fields = 2
data = [ [ [ time * n_markets * n_fields + market * n_fields + field for field in range(n_fields) ] for market in range(n_markets) ] for time in range(n_times)]
data = np.array(data, dtype=float)
print(data.shape)   # time, market, field
print(data[:2, :, :])

In [None]:
Nx = 2
Ny = 2
Ns = 10
BatchSize = 2

sample_anchors = range(0, data.shape[0] - Nx - Ny, Ns)
print(data.shape[0], len(sample_anchors), sample_anchors)

x_indices = ( (0, 1), (0, 1) )    # (market, field)
y_indices = ( (0,), (0, 1) )    # (market, field)
print(data[0:2][:, x_indices[0]][:, :, x_indices[1]])
print(data[2:4][:, y_indices[0]][:, :, y_indices[1]])

size_x = get_timepoint_size(x_indices)
size_y = get_timepoint_size(y_indices)
print(size_x, size_y)

In [None]:
dir_datasets = "/mnt/data/Trading/Datasets"
name_plus = path+'_o'
name_prefix = os.path.join(dir_datasets, name_plus)

reuse_files = True

if reuse_files:
    import re
    filenames = [ os.path.join(dir_datasets, x) for x in os.listdir(dir_datasets) if re.match(name_plus, x)]
else:
    os.system("rm {}/*{}*".format(dir_datasets, name_plus))
    filenames = save_to_multiple_csv_files(data, sample_anchors, name_prefix, Nx, x_indices, Ny, y_indices, header=None, n_parts=10)

print(filenames)

In [None]:
filename_dataset = tf.data.Dataset.list_files(filenames, shuffle=None) # no way to prevent shuffle.
print(filename_dataset.cardinality().numpy())
for element in filename_dataset:
    print(element.numpy())

In [None]:
ds = tf.data.TextLineDataset(filenames[0])
for line in ds.take(20):
    print(line.numpy())

In [None]:
n_readers = 5
dataset = filename_dataset.interleave(
    lambda filename: tf.data.TextLineDataset(filename),
    cycle_length=n_readers, num_parallel_calls=tf.data.AUTOTUNE) # no way to prevent shuffle?

for line in dataset.take(15):
    print(line.numpy())

In [None]:
parse_csv_line(b'0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11.', Nx, size_x, Ny, size_y)

In [None]:
dataset = csv_reader_dataset(filenames, Nx, size_x, Ny, size_y,
                             n_parse_threads=5, batch_size=BatchSize, shuffle_buffer_size=100000, n_readers=5)

In [None]:
for element in dataset:
    print(element)
    break

# should print: (None, Nx, size_x), (None, Ny * size_y)

In [None]:
# Check elements: NaN, -inf, +inf

assert (~np.isfinite(table)).any() == False

In [None]:
#==================== Define 'Get_Event_Free_Feature' ====================

def Get_Event_Free_Feature(feature, smallSigma, largeSigma, nLatest):

    def gaussian( x, s): return 1./np.sqrt( 2. * np.pi * s**2 ) * np.exp( -x**2 / ( 2. * s**2 ) )

    smallSigma = min(math.floor(feature.shape[0]/3), smallSigma)
    smallP = 3 * smallSigma
    smallKernel = np.fromiter( (gaussian( x , smallSigma ) for x in range(-smallP+1, 1, 1 ) ), float ) # smallP points, incl 0.
#     print("smallKernel: {}".format(smallKernel))
    maP = np.convolve(feature, smallKernel, mode="valid") / np.sum(smallKernel) # maps to feature[smallP-1:]

    # maP = maP / np.min(1.0, np.min(maP[np.where(maP>0.0)]))
    nzPs = np.where(maP > 0.0)[0] [smallP:]    # to exclude initial nearly-zero values.
    log_maP = np.zeros( maP.shape, dtype=maP.dtype)
    log_maP[nzPs] = np.log2(maP[nzPs])  #------------------------------------------ Log danger ------------

    # log_maP = np.log2(maP + 1e-9) # maps to feature[smallP-1:]

    largeSigma = min(math.floor(feature.shape[0]/3), largeSigma)
    largeP = 3 * largeSigma
    largeKernel = np.fromiter( (gaussian( x , largeSigma ) for x in range(-largeP+1, 1, 1 ) ), float ) # largeP points, incl 0.
#     print("largeKernel: {}".format(largeKernel))
    event = np.convolve(log_maP, largeKernel, mode="valid") / np.sum(largeKernel) # maps to log_maP[largeP-1:], so to feature[smallP+largeP-2:]

    assert event.shape[0] == feature.shape[0] - (smallP+largeP-2)
    log_maP1 = log_maP[largeP-1:] # maps to log_maP[largeP-1:], so to feature[smalP+largeP-2:]
    assert log_maP1.shape[0] == feature.shape[0] - (smallP+largeP-2)
    P1 = feature[smallP+largeP-2:]
    assert P1.shape[0] == feature.shape[0] - (smallP+largeP-2)
    eventFree = log_maP1 - event # maps to feature[smallP+largeP-2:]

    nLatest = min(feature.shape[0] - (smallP+largeP-2), nLatest)
    P2 = P1[-nLatest:]
    maP2 = maP[-nLatest:]

    # P2 = P2 / np.min(1.0, np.min(P2[np.where(P2>0.0)]))
    nzPs = np.where(P2 > 0.0) [0] [:]
    logP2 = np.zeros( P2.shape, dtype=P2.dtype)
    logP2[nzPs] = np.log2(P2[nzPs]) #------------------------------------------ Log danger ------------

    # logP2 = np.log2(P2 + 1e-9) 
    
    log_maP2 = log_maP1[-nLatest:]
    event2 = event[-nLatest:]
    eventFree2 = eventFree[-nLatest:] # maps to candle[p1-1+p2-1+begin: p1-1+p2-1+begine+width]

    return P2, maP2, logP2, log_maP2, event2, eventFree2    # eventFree = log_maP - event, event = convolve(lag_maP, leftKernel) / sum(leftKernel)


In [None]:
nLatest = 500
P, maP, logP, log_maP, event, eventFree = Get_Event_Free_Feature(table[:, 0, 0], 1, 30, nLatest)
assert maP.shape[0] == nLatest; assert logP.shape[0] == nLatest; assert log_maP.shape[0] == nLatest; assert event.shape[0] == nLatest; assert eventFree.shape[0] == nLatest


In [None]:
#==================== Define 'get_plot_log_feature' ====================

def get_plot_log_feature(market, field, feature, smallSigma, largeSigma, nLatest, NoChart = True):
    P, maP, logP, log_maP, event, eventFree = Get_Event_Free_Feature(feature, smallSigma, largeSigma, nLatest)
    series = [ [maP, "maP", "g"], [logP, "logP" ,"m"], [log_maP, "log.maP", "b"], [event, "event", "c"],  [eventFree, "e.Free", "brown"] ] #, [P, "raw feature", "r"] ]
    if not NoChart:
        PoltNormalized("Event-free (brown) {} on {}".format(field, market), series)
    return P, maP, logP, log_maP, event, eventFree

In [None]:
# Markets orderd in the mount of true candles they have.
# ['NEOUSDT', 'LTCUSDT', 'BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'QTUMUSDT', 'ADAUSDT', 'XRPUSDT', 'EOSUSDT', 'XLMUSDT', 'IOTAUSDT', 'ONTUSDT', 'TRXUSDT', 'ETCUSDT', 'ICXUSDT']
# ['NULSUSDT', 'VETUSDT', 'LINKUSDT', 'WAVESUSDT', 'ONGUSDT', 'HOTUSDT', 'ZILUSDT', 'ZRXUSDT', 'FETUSDT', 'BATUSDT', 'XMRUSDT', 'ZECUSDT', 'IOSTUSDT', 'CELRUSDT', 'DASHUSDT']
# ['OMGUSDT', 'THETAUSDT', 'ENJUSDT', 'MATICUSDT', 'ATOMUSDT', 'TFUELUSDT', 'ONEUSDT', 'FTMUSDT', 'ALGOUSDT', 'DOGEUSDT', 'DUSKUSDT', 'ANKRUSDT', 'WINUSDT', 'COSUSDT', 'COCOSUSDT']
# ['MTLUSDT', 'TOMOUSDT', 'PERLUSDT', 'KEYUSDT', 'WANUSDT', 'FUNUSDT', 'DENTUSDT', 'DOCKUSDT', 'CHZUSDT', 'BANDUSDT', 'BUSDUSDT', 'XTZUSDT', 'RVNUSDT', 'RENUSDT', 'HBARUSDT']
# ['NKNUSDT', 'KAVAUSDT', 'STXUSDT', 'ARPAUSDT', 'IOTXUSDT', 'RLCUSDT', 'CTXCUSDT', 'BCHUSDT', 'TROYUSDT', 'VITEUSDT', 'OGNUSDT', 'DREPUSDT', 'WRXUSDT', 'BTSUSDT', 'LSKUSDT']
# ['BNTUSDT', 'LTOUSDT', 'MBLUSDT', 'COTIUSDT', 'STPTUSDT', 'WTCUSDT', 'DATAUSDT', 'CTSIUSDT', 'HIVEUSDT', 'CHRUSDT', 'ARDRUSDT', 'MDTUSDT', 'STMXUSDT', 'KNCUSDT', 'LRCUSDT']
# ['PNTUSDT', 'COMPUSDT', 'ZENUSDT', 'SCUSDT', 'SNXUSDT', 'VTHOUSDT', 'DGBUSDT', 'SXPUSDT', 'MKRUSDT', 'DCRUSDT', 'STORJUSDT', 'MANAUSDT', 'YFIUSDT', 'KMDUSDT', 'IRISUSDT']
# ['SOLUSDT', 'BLZUSDT', 'BALUSDT', 'JSTUSDT', 'ANTUSDT', 'SANDUSDT', 'CRVUSDT', 'DOTUSDT', 'OCEANUSDT', 'NMRUSDT', 'LUNAUSDT', 'RSRUSDT', 'WNXMUSDT', 'PAXGUSDT', 'TRBUSDT']
# ['SUSHIUSDT', 'YFIIUSDT', 'EGLDUSDT', 'FIOUSDT', 'RUNEUSDT', 'KSMUSDT', 'DIAUSDT', 'UMAUSDT', 'BELUSDT', 'WINGUSDT', 'UNIUSDT', 'OXTUSDT', 'SUNUSDT', 'AVAXUSDT', 'FLMUSDT']
# ['ORNUSDT', 'UTKUSDT', 'XVSUSDT', 'ALPHAUSDT', 'NEARUSDT', 'AAVEUSDT', 'FILUSDT', 'INJUSDT', 'AUDIOUSDT', 'CTKUSDT', 'AKROUSDT', 'AXSUSDT', 'HARDUSDT', 'STRAXUSDT', 'UNFIUSDT']
# ['ROSEUSDT', 'AVAUSDT', 'XEMUSDT', 'SKLUSDT', 'GRTUSDT', 'JUVUSDT', 'PSGUSDT', '1INCHUSDT', 'REEFUSDT', 'ASRUSDT', 'ATMUSDT', 'OGUSDT', 'CELOUSDT', 'RIFUSDT', 'TRUUSDT']
# ['CKBUSDT', 'TWTUSDT', 'FIROUSDT', 'LITUSDT', 'SFPUSDT', 'CAKEUSDT', 'DODOUSDT', 'ACMUSDT', 'BADGERUSDT', 'FISUSDT', 'OMUSDT', 'PONDUSDT', 'DEGOUSDT', 'ALICEUSDT', 'LINAUSDT']
# ['PERPUSDT', 'SUPERUSDT', 'CFXUSDT', 'TKOUSDT', 'PUNDIXUSDT', 'TLMUSDT', 'BARUSDT', 'FORTHUSDT', 'BURGERUSDT', 'SLPUSDT', 'BAKEUSDT', 'SHIBUSDT', 'ICPUSDT', 'ARUSDT', 'POLSUSDT']
# ['MDXUSDT', 'MASKUSDT', 'LPTUSDT', 'XVGUSDT', 'ATAUSDT', 'GTCUSDT', 'ERNUSDT', 'KLAYUSDT', 'PHAUSDT', 'MLNUSDT', 'BONDUSDT', 'DEXEUSDT', 'C98USDT', 'CLVUSDT', 'QNTUSDT']
# ['FLOWUSDT', 'TVKUSDT', 'MINAUSDT', 'RAYUSDT', 'ALPACAUSDT', 'FARMUSDT', 'QUICKUSDT', 'MBOXUSDT', 'REQUSDT', 'FORUSDT', 'GHSTUSDT', 'WAXPUSDT', 'GNOUSDT', 'XECUSDT', 'ELFUSDT']
# ['DYDXUSDT', 'IDEXUSDT', 'VIDTUSDT', 'GALAUSDT', 'ILVUSDT', 'YGGUSDT', 'SYSUSDT', 'DFUSDT', 'FIDAUSDT', 'FRONTUSDT', 'CVPUSDT', 'AGLDUSDT', 'RADUSDT', 'BETAUSDT', 'RAREUSDT']
# ['LAZIOUSDT', 'CHESSUSDT', 'ADXUSDT', 'AUCTIONUSDT', 'DARUSDT', 'BNXUSDT', 'MOVRUSDT', 'ENSUSDT', 'CITYUSDT', 'KP3RUSDT', 'QIUSDT', 'PORTOUSDT', 'POWRUSDT', 'VGXUSDT', 'JASMYUSDT']
# ['PLAUSDT', 'AMPUSDT', 'PYRUSDT', 'RNDRUSDT', 'ALCXUSDT', 'SANTOSUSDT', 'MCUSDT', 'BICOUSDT', 'FLUXUSDT', 'FXSUSDT', 'VOXELUSDT', 'HIGHUSDT', 'PEOPLEUSDT', 'CVXUSDT', 'OOKIUSDT']
# ['SPELLUSDT', 'JOEUSDT', 'IMXUSDT', 'ACHUSDT', 'GLMRUSDT', 'LOKAUSDT', 'API3USDT', 'SCRTUSDT', 'ACAUSDT', 'XNOUSDT', 'WOOUSDT', 'ALPINEUSDT', 'TUSDT', 'ASTRUSDT', 'GMTUSDT']
# ['KDAUSDT', 'APEUSDT', 'BSWUSDT', 'BIFIUSDT', 'MULTIUSDT', 'STEEMUSDT', 'MOBUSDT', 'NEXOUSDT', 'REIUSDT', 'GALUSDT', 'LDOUSDT', 'EPXUSDT', 'OPUSDT', 'LEVERUSDT', 'STGUSDT']
# ['LUNCUSDT', 'GMXUSDT', 'POLYXUSDT', 'APTUSDT', 'OSMOUSDT', 'HFTUSDT', 'PHBUSDT', 'HOOKUSDT', 'MAGICUSDT', 'HIFIUSDT', 'RPLUSDT', 'PROSUSDT', 'AGIXUSDT', 'GNSUSDT', 'SYNUSDT']
# ['SSVUSDT', 'VIBUSDT', 'LQTYUSDT', 'AMBUSDT', 'USTCUSDT', 'UFTUSDT', 'PROMUSDT', 'GLMUSDT', 'GASUSDT', 'QKCUSDT', 'IDUSDT', 'ARBUSDT', 'OAXUSDT', 'LOOMUSDT', 'RDNTUSDT']
# ['WBTCUSDT', 'EDUUSDT', 'SUIUSDT', 'AERGOUSDT', 'FLOKIUSDT', 'PEPEUSDT']

# Market clusters
# 0: [('APTUSDT', 11), ('SUIUSDT', 1), ('DYDXUSDT', 31), ('ANKRUSDT', 71), ('AUDIOUSDT', 48), ('SKLUSDT', 46)]
# 1: [('EOSUSDT', 93), ('AAVEUSDT', 48), ('FLOWUSDT', 33), ('CRVUSDT', 51), ('COMPUSDT', 54), ('SLPUSDT', 38), ('MBOXUSDT', 32), ('BNTUSDT', 61), ('SPELLUSDT', 26), ('AERGOUSDT', 1), ('BAKEUSDT', 38)]
# 2: [('DOTUSDT', 51), ('BTCUSDT', 100), ('ETHUSDT', 100), ('WBTCUSDT', 1), ('LINKUSDT', 81), ('ETCUSDT', 92), ('XLMUSDT', 92), ('TWTUSDT', 43), ('SFPUSDT', 42), ('STPTUSDT', 59), ('STEEMUSDT', 20), ('POWRUSDT', 28)]

In [None]:
clusters = [
    ['APT', 'SUI', 'DYDX', 'ANKR', 'AUDIO', 'SKL'],
    ['EOS', 'AAVE', 'FLOW', 'CRV', 'COMP', 'SLP', 'MBOX', 'BNT', 'SPELL', 'AERGO', 'BAKE'],
    ['DOT', 'BTC', 'ETH', 'WBTC', 'LINK', 'ETC', 'XLM', 'TWT', 'SFP', 'STPT', 'STEEM', 'POWR'],
]

for c in range(len(clusters)):
    cluster = clusters[c]
    cluster = [ markets.index(m + 'USDT') for m in cluster ]
    check = [ (markets[m], 100 - round(np.argmax(table[:, m, 0]>0) / table.shape[0] * 100)) for m in cluster ]
    print(check)

In [None]:
#==================== Select markets and fields ====================

enFields = ['ClosePrice', 'BaseVolume', 'BuyerBaseVolume']

# dot, 1inch, btc, eth, matic, bnb, ada, sol, ltc, avax, wbtc, link, arb, ape, aave, crv, sui, op, gmx, agix, bal, comp, gmt, joe, stg
chosen_markets_x = [
                    'DOTUSDT', 'BTCUSDT', 'ETHUSDT', 'LINKUSDT', 'ETCUSDT', 'XLMUSDT', 'STPTUSDT',   # cluster 2, with over 50% true candles, except 'DOTUSDT'
                    'EOSUSDT', 'BNTUSDT', # cluster 1, with over 50% true candles.
                    'ANKRUSDT', # cluster 0, with over 50% true candles.
                    'BNBUSDT', 'AVAXUSDT', 'COMPUSDT', 'BALUSDT', 'XRPUSDT', '1INCHUSDT'  # etc
]
chosen_markets_x = tuple([ markets.index(elem) for elem in chosen_markets_x ])
chosen_markets_x = tuple(list(set(chosen_markets_x)))
chosen_fields_x = ['ClosePrice'] #, 'BaseVolume']
chosen_fields_x = tuple( [ enFields.index(elem) for elem in chosen_fields_x ] )
chosen_fields_x = tuple(list(set(chosen_fields_x)))
x_indices = ( chosen_markets_x, chosen_fields_x )
print(x_indices)

chosen_markets_y = [
                    'DOTUSDT', 'BTCUSDT', 'ETHUSDT', 'LINKUSDT', 'ETCUSDT', 'XLMUSDT', 'STPTUSDT',   # cluster 2, with over 50% true candles, except 'DOTUSDT'
                    'EOSUSDT', 'BNTUSDT', # cluster 1, with over 50% true candles.
                     'ANKRUSDT', # cluster 0, with over 50% true candles.
                     'BNBUSDT', 'AVAXUSDT', 'COMPUSDT', 'BALUSDT', 'XRPUSDT', '1INCHUSDT'  # etc
]
chosen_markets_y = tuple([ markets.index(elem) for elem in chosen_markets_y ])
chosen_markets_y = tuple(list(set(chosen_markets_y)))
chosen_fields_y = ['ClosePrice']
chosen_fields_y = tuple( [ enFields.index(elem) for elem in chosen_fields_y ] )
chosen_fields_y = tuple(list(set(chosen_fields_y)))
y_indices = ( chosen_markets_y, chosen_fields_y )
print(y_indices)

size_x = get_timepoint_size(x_indices)
size_y = get_timepoint_size(y_indices)
print(size_x, size_y)

chosen_markets = tuple(list(set(chosen_markets_x + chosen_markets_y)))
chosen_fields = tuple(list(set(chosen_fields_x + chosen_fields_y)))
print(chosen_markets, chosen_fields)

In [None]:
# TRY 21

# Define Data
enFields = ['ClosePrice', 'BaseVolume', 'BuyerBaseVolume']

# dot, 1inch, btc, eth, matic, bnb, ada, sol, ltc, avax, wbtc, link, arb, ape, aave, crv, sui, op, gmx, agix, bal, comp, gmt, joe, stg
chosen_markets_x = ['DOTUSDT', '1INCHUSDT', 'BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'ADAUSDT', 'LTCUSDT', 'AVAXUSDT', 'WBTCUSDT', 'LINKUSDT',
                  'ARBUSDT', 'APEUSDT', 'SUIUSDT', 'OPUSDT', 'GMXUSDT', 'AGIXUSDT', 'BALUSDT', 'COMPUSDT', 'GMTUSDT', 'JOEUSDT', 'STGUSDT']
chosen_markets_x = tuple([ markets.index(elem) for elem in chosen_markets_x ])
chosen_markets_x = tuple(list(set(chosen_markets_x)))
chosen_fields_x = ['ClosePrice'] #, 'BaseVolume']
chosen_fields_x = tuple( [ enFields.index(elem) for elem in chosen_fields_x ] )
chosen_fields_x = tuple(list(set(chosen_fields_x)))
x_indices = ( chosen_markets_x, chosen_fields_x )
print(x_indices)

chosen_markets_y = ['DOTUSDT', '1INCHUSDT', 'BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'ADAUSDT', 'LTCUSDT', 'AVAXUSDT', 'WBTCUSDT', 'LINKUSDT', ]
chosen_markets_y = tuple([ markets.index(elem) for elem in chosen_markets_y ])
chosen_markets_y = tuple(list(set(chosen_markets_y)))
chosen_fields_y = ['ClosePrice']
chosen_fields_y = tuple( [ enFields.index(elem) for elem in chosen_fields_y ] )
chosen_fields_y = tuple(list(set(chosen_fields_y)))
y_indices = ( chosen_markets_y, chosen_fields_y )
print(y_indices)

size_x = get_timepoint_size(x_indices)
size_y = get_timepoint_size(y_indices)
print(size_x, size_y)

chosen_markets = tuple(list(set(chosen_markets_x + chosen_markets_y)))
chosen_fields = tuple(list(set(chosen_fields_x + chosen_fields_y)))
print(chosen_markets, chosen_fields)

In [None]:
#==================== Define Data ====================

Data = table[:, :, :]   # (time:, all markets, 20 fields)

In [None]:
check = [ (markets[m], "{}%".format(100-int(np.argmax(Data[:, m, 0]>0) / Data.shape[0] * 100)) ) for m in chosen_markets ]
batch = 5
for i in range(0, len(check), batch):
    print(check[i: i+batch])

In [None]:
check = np.array([ np.argmax(Data[:, m, 0]>0) / Data.shape[0] * 100 for m in range(len(markets)) ])
permute = np.argsort(check)
marketrank = [ (markets[m], 100 - round(np.argmax(Data[:, m, 0]>0) / Data.shape[0] * 100)) for m in permute ]
# marketrank = [ markets[m] for m in permute ]

batch = 10
for i in range(0, len(markets), batch):
    print(marketrank[i: i+batch])

In [None]:
#==================== Generate event-free data into Data ====================
# Data loses heading items.
# Do it before: Permute Data in time

smallSigma = 1
largeSigma = 30

alpha = 3; beta = 3 # beta is used in 'get_plot_log_feature'. Ugly coupling.
head_data_loss = 3 * ( alpha * smallSigma + largeSigma)
eFree = np.zeros( (Data.shape[0] - head_data_loss, len(chosen_markets), len(chosen_fields)), dtype = np.float32 )

for market in chosen_markets:
    for field in chosen_fields:
        sSigma = smallSigma
        if enFields[field] == 'BaseVolume': sSigma = smallSigma * alpha
        P, maP, logP, log_maP, event, eventFree = \
        get_plot_log_feature(markets[market], enFields[field], Data[:, market, field], sSigma, largeSigma, Data.shape[0] - head_data_loss, NoChart = False)
        Data[head_data_loss:, market, field] = eventFree

Data = Data[head_data_loss: ]

print(Data.shape)

In [None]:
# Trim in time dimension

In [None]:
# TRY 21

# #==================== Standardize Data on chosen markets and fields ====================
# # No need to Do it before: Permute Data in time

# for market in chosen_markets:
#     for field in chosen_fields:
#         nzPs = np.where( Data[:, market, field] != 0.0 ) [0]
#         # Keep record of mu and sigma for later use with inference. ----------------------------------
#         mu = np.average(Data[nzPs, market, field])
#         sigma = np.std(Data[nzPs, market, field])
#         standard = (Data[nzPs, market, field] - mu) / (sigma + 1e-9)
#         Data[nzPs, market, field] = standard

In [None]:
fig = plt.figure(figsize=(16,3))
ax = fig.add_subplot(111)
ax.set_title("Features are custom-standardized.")
for market in chosen_markets:
    for field in chosen_fields:
        ax.plot(Data[:, market, field], label = "{} @ {}".format(enFields[field], markets[market][:-4])) # -4: 'USDT'
ax.legend(loc = 'upper left')
plt.show()

In [None]:
#==================== Define time features, to augment Data with ====================
# Do it before: Permute Data in time

hourly = np.sin( 2 * np.pi / (60*60) * timestamps )
daily = np.sin( 2 * np.pi / (60*60*24) * timestamps )
weekly = np.sin( 2 * np.pi / (60*60*24*7) * timestamps )
yearly = np.sin( 2 * np.pi / (60*60*24*365) * timestamps )


# A normalized representation of 'timestamps'
time_of_year_sin = np.sign( 2 * np.pi / (60*60*24*365) * timestamps )
time_of_year_cos = np.cos( 2 * np.pi / (60*60*24*365) * timestamps )
time_of_week_sin = np.sign( 2 * np.pi / (60*60*24*7) * timestamps )
time_of_week_cos = np.cos( 2 * np.pi / (60*60*24*7) * timestamps )
time_of_day_sin = np.sign( 2 * np.pi / (60*60*24) * timestamps )
time_of_day_cos = np.cos( 2 * np.pi / (60*60*24) * timestamps )

print(table.shape, hourly.shape)

# table = np.insert(table, 0, hourly.reshape(1), axis=0)

In [None]:
#==================== Permute Data in time, and split into train/valid part ====================

permute = np.random.permutation(len(Data)); Data = Data[permute]
permute = np.random.permutation(len(Data)); Data = Data[permute]
permute = np.random.permutation(len(Data)); Data = Data[permute]
print(Data.shape)

from sklearn.model_selection import train_test_split
Train, Valid = train_test_split(Data, test_size=0.30, random_state=42)
print(Train.shape, Valid.shape)

In [None]:
#==================== Define input sequence and output sequence ====================

Nx = 500
Ny = 10
Ns = 10
BatchSize = 64

sample_anchors_t = range(0, Train.shape[0] - Nx - Ny + 1, Ns)
print(Train.shape[0], len(sample_anchors_t), sample_anchors_t, sample_anchors_t[-1])
print(Data.shape[0], sample_anchors_t[ -1 ], sample_anchors_t[ -1 ] + Nx + Ny, sample_anchors_t[ -1 ] + Ns, sample_anchors_t[ -1 ] + Ns + Nx + Ny)

sample_anchors_v = range(0, Valid.shape[0] - Nx - Ny + 1, Ns)
print(Valid.shape[0], len(sample_anchors_v), sample_anchors_v, sample_anchors_v[-1])
print(Data.shape[0], sample_anchors_v[ -1 ], sample_anchors_v[ -1 ] + Nx + Ny, sample_anchors_v[ -1 ] + Ns, sample_anchors_v[ -1 ] + Ns + Nx + Ny)

In [None]:
# TRY 21

Nx = 500
Ny = 5
Ns = 10
BatchSize = 64

sample_anchors_t = range(0, Train.shape[0] - Nx - Ny + 1, Ns)
print(Train.shape[0], len(sample_anchors_t), sample_anchors_t, sample_anchors_t[-1])
print(Data.shape[0], sample_anchors_t[ -1 ], sample_anchors_t[ -1 ] + Nx + Ny, sample_anchors_t[ -1 ] + Ns, sample_anchors_t[ -1 ] + Ns + Nx + Ny)

sample_anchors_v = range(0, Valid.shape[0] - Nx - Ny + 1, Ns)
print(Valid.shape[0], len(sample_anchors_v), sample_anchors_v, sample_anchors_v[-1])
print(Data.shape[0], sample_anchors_v[ -1 ], sample_anchors_v[ -1 ] + Nx + Ny, sample_anchors_v[ -1 ] + Ns, sample_anchors_v[ -1 ] + Ns + Nx + Ny)

In [None]:
#==================== Create train/valid datasets ====================

nFiles_t = 70
nFiles_v = 30
n_readers = 10
suffule_batch = 300
prefetch = 3

dir_datasets = "/mnt/data/Trading/Datasets/"

name_plus_t = path+'_t'
name_plus_v = path+'_v'
name_prefix_t = os.path.join(dir_datasets, name_plus_t)
name_prefix_v = os.path.join(dir_datasets, name_plus_v)

reuse_files = True #------------------------------------------------------------------------------------------------------- 

if reuse_files:
    import re
    filenames_train = [ os.path.join(dir_datasets, x) for x in os.listdir(dir_datasets) if re.match(name_plus_t, x)]
    filenames_valid = [ os.path.join(dir_datasets, x) for x in os.listdir(dir_datasets) if re.match(name_plus_v, x)]
else:
    os.system("rm {}/*{}*".format(dir_datasets, name_plus_t))
    os.system("rm {}/*{}*".format(dir_datasets, name_plus_v))
    filenames_train = save_to_multiple_csv_files(Train, sample_anchors_t, name_prefix_t, Nx, x_indices, Ny, y_indices, header=None, n_parts=nFiles_t)
    filenames_valid = save_to_multiple_csv_files(Valid, sample_anchors_v, name_prefix_v, Nx, x_indices, Ny, y_indices, header=None, n_parts=nFiles_v)

Dataset_train = csv_reader_dataset(filenames_train, Nx, size_x, Ny, size_y,
                             n_parse_threads=5, batch_size=BatchSize, shuffle_buffer_size=BatchSize*suffule_batch, n_readers=n_readers)
Dataset_train = Dataset_train.prefetch(prefetch)
# print(len(Dataset_train))

Dataset_valid = csv_reader_dataset(filenames_valid, Nx, size_x, Ny, size_y,
                             n_parse_threads=5, batch_size=BatchSize, shuffle_buffer_size=BatchSize*suffule_batch, n_readers=n_readers)
Dataset_valid = Dataset_valid.prefetch(prefetch)
# print(len(Dataset_valid))


In [None]:
for element in Dataset_train:
    print(element)
    break

In [None]:
def mae_last_step(Y_true, Y_pred):
    return keras.mean(keras.abs(Y_pred[:, -1] - Y_true[:, -1]))

In [None]:
#==================== Define 'huber_loss' ====================

def huber_loss(y_true, y_pred, max_grad=0.2):   # default: max_grad = 1.0
    err = tf.abs(y_true - y_pred, name='abs')
    mg = tf.constant(max_grad, name='max_grad')
    lin = mg * (err - 0.5 * mg)
    quad = 0.5 * err * err
    return tf.where(err < mg, quad, lin)

In [None]:
# TRY 21

def huber_loss(y_true, y_pred, max_grad=1.):
    err = tf.abs(y_true - y_pred, name='abs')
    mg = tf.constant(max_grad, name='max_grad')
    lin = mg * (err - 0.5 * mg)
    quad = 0.5 * err * err
    return tf.where(err < mg, quad, lin)

In [None]:
#==================== Define RegularizedLSTM ====================

from functools import partial
RegularizedLSTM = partial(keras.layers.LSTM,
                          return_sequences=True,
                          kernel_regularizer=keras.regularizers.l2(1e-4),
                          recurrent_regularizer=keras.regularizers.l2(1e-4))

In [None]:
#==================== Define build_model ====================


def build_model(input_dim, output_size, allow_cudnn_kernel=True):

    units = max(input_dim, output_size)

    # CuDNN is only available at the layer level, and not at the cell level.
    # This means `LSTM(units)` will use the CuDNN kernel,
    # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.
    if allow_cudnn_kernel:
        # The LSTM layer with default options uses CuDNN.

        inputs = keras.Input( shape = (None, input_dim), name = "candles" )
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(inputs)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = RegularizedLSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.LSTM(units, kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        x = keras.layers.Dense(units, activation="selu", kernel_initializer="lecun_normal")(x)
        x = keras.layers.BatchNormalization(synchronized=True)(x)
        outputs = keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal")(x)

        model = keras.Model( 
            inputs = inputs, 
            outputs = outputs,
            name = "LSTMDense_model"
        )

        model.compile(
            loss=huber_loss,
            optimizer=keras.optimizers.Adam(
                learning_rate=0.0001,  # def lr = 0.001
                beta_1=0.9,
                beta_2=0.999, 
                epsilon=1e-07
            ),
            metrics=keras.metrics.MSE,
        )

    return model

In [None]:
# TRY 21

input_dim = size_x
output_size = Ny * size_y

units = max(input_dim, output_size)

print(input_dim, units, output_size)

def build_model(allow_cudnn_kernel=True):
    # CuDNN is only available at the layer level, and not at the cell level.
    # This means `LSTM(units)` will use the CuDNN kernel,
    # while RNN(LSTMCell(units)) will run on non-CuDNN kernel.
    if allow_cudnn_kernel:
        # The LSTM layer with default options uses CuDNN.
        lstm_layer = RegularizedLSTM(units, input_shape=(None, input_dim), kernel_initializer="lecun_normal" )
        lstm_layer1 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer2 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer3 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer4 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer5 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer6 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer7 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer8 = RegularizedLSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )
        lstm_layer9 = keras.layers.LSTM(units, input_shape=(None, units), kernel_initializer="lecun_normal" )

    else:
        # Wrapping a LSTMCell in a RNN layer will not use CuDNN.
        lstm_layer = keras.layers.RNN(keras.layers.LSTMCell(units), input_shape=(None, input_dim), return_sequences=True)

    # with mirrored_strategy.scope():
    model = keras.models.Sequential(
        [
            lstm_layer,
            lstm_layer1,
            lstm_layer2,
            lstm_layer3,
            lstm_layer4,
            lstm_layer5,
            lstm_layer6,
            lstm_layer7,
            lstm_layer8,
            lstm_layer9,
            keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal"),
            keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal"),
            keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal"),
            keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal"),
            keras.layers.Dense(output_size, activation="selu", kernel_initializer="lecun_normal"),
        ]
    )

    model.compile(
        loss=huber_loss,
        optimizer=keras.optimizers.Adam(
            learning_rate=0.001,  # def lr = 0.001
            beta_1=0.9, 
            beta_2=0.999, 
            epsilon=1e-07
        ),
        metrics=keras.metrics.MSE,
    )

    return model

In [None]:
#==================== Build model ====================

# model = build_model(size_x, Ny * size_y, allow_cudnn_kernel=True)
model = build_model(allow_cudnn_kernel=True)
model.summary()
# keras.utils.plot_model(model, show_shapes=True)

In [None]:
#==================== Fit model ====================

# options = tf.data.Options()
# options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
# train_data = Dataset_train.with_options(options)
# val_data = Dataset_valid.with_options(options)

history = model.fit(Dataset_train, validation_data=Dataset_valid, epochs=5)
# history_2= model.fit(Dataset_train, validation_data=Dataset_valid, epochs=10, initial epoch=history_1.epoch(-1) )

In [None]:
#==================== Define plot_history ====================

def plot_history(history, loss="loss"):
    train_losses = history.history[loss]
    valid_losses = history.history["val_" + loss]
    n_epochs = len(history.epoch)
    minloss = min( np.min(valid_losses), np.min(train_losses) )
    maxloss = max( np.max(valid_losses), np.max(train_losses) )
    
    plt.plot(train_losses, color="b", label="Train")
    plt.plot(valid_losses, color="r", label="Validation")
    plt.plot([0, n_epochs], [minloss, minloss], "k--",
             label="Min val: {:.2f}".format(minloss))
    plt.axis([0, n_epochs, minloss/1.05, maxloss*1.05])
    plt.legend()
    plt.show()

In [None]:
plot_history(history)

In [None]:
# history_2= model.fit(Dataset_train, validation_data=Dataset_valid, epochs=10, initial epoch=history_1.epoch(-1) )

In [None]:
history = model.fit(Dataset_train, validation_data=Dataset_valid, epochs=5)
plot_history(history)

In [None]:
history = model.fit(Dataset_train, validation_data=Dataset_valid, epochs=5)
plot_history(history)

In [None]:
history = model.fit(Dataset_train, validation_data=Dataset_valid, epochs=5)
plot_history(history)