In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import pyfolio
import json
import numpy as np
import pandas as pd

from tqdm import tqdm
from itertools import combinations
from utils.read2df import read2df
from utils.cointncorr import CointnCorr

In [2]:
'''
Download historical data for `symbols` after `start_date` with selected `freqs` from [`binance-public-data`](https://github.com/binance/binance-public-data/tree/master/python)
We will train data from `start_date` until `trade_date`, and start trade after `trade_date`.
'''
# symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
# symbols = ['USDCUSDT', 'DAIUSDT', 'TUSDUSDT', 'BUSDUSDT', 'USDCTUSD', 'USDCBUSD', 'DAIBUSD', 'TUSDBUSD', 'BUSDDAI']

symbols = ['BTCUSDT', 'BTCUSD', 'BTCTUSD', 'BTCUSDC', 'BTCBUSD', 'BTCDAI']


start_date = '2010-01-01'
trade_date = '2022-10-01'

# freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}
# freqs = {'1m': 1, '3m':3, '5m':5, '15m':15, '30m':30}
# Because we want as much data as possible, it makes sense to use only 1m
freqs = {'1m': 1}


In [3]:
%%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py \
        -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py \
        -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

Found 6 symbols
[1/6] - start download monthly BTCUSDT klines 

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-01.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-02.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-03.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-04.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-05.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-06.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-07.zip

File Download: C:\Users\hyan212\backtrader-crypto-rl\binance-public-data\python\data/spot/monthly/klines/BTCUSDT/1m/BTCUSDT-1m-2017-08.zip

[..................................................]
[..............

In [3]:
'''
Read the downloaded OHLCV data into `pandas` dataframe
'''

# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)

# have a preview
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1597125659999,11858.08,11858.35,11851.77,11854.25,5.052877,BTCBUSD,1m,2020-08-11 06:00:59.999
1,1597125659999,11824.55,11824.55,11824.55,11824.55,0.000900,BTCDAI,1m,2020-08-11 06:00:59.999
2,1597125659999,11861.74,11861.74,11855.84,11855.84,0.078586,BTCTUSD,1m,2020-08-11 06:00:59.999
3,1597125659999,11856.34,11856.34,11852.56,11852.56,0.700986,BTCUSDC,1m,2020-08-11 06:00:59.999
4,1597125659999,11854.56,11854.57,11850.00,11850.10,28.918968,BTCUSDT,1m,2020-08-11 06:00:59.999
...,...,...,...,...,...,...,...,...,...
7283600,1698796799999,34659.99,34664.69,34641.51,34643.94,0.283840,BTCBUSD,1m,2023-10-31 23:59:59.999
7283601,1698796799999,34643.62,34643.62,34643.61,34643.61,0.006860,BTCDAI,1m,2023-10-31 23:59:59.999
7283602,1698796799999,34662.58,34662.58,34648.08,34648.08,3.266090,BTCTUSD,1m,2023-10-31 23:59:59.999
7283603,1698796799999,34671.39,34671.39,34656.42,34656.42,0.064850,BTCUSDC,1m,2023-10-31 23:59:59.999


In [4]:
'''
Set data before `trade_data` as training data, after `trade_data` is trade_data
'''

trains, tests = [], []
for i in range(len(dfs)):
    trains.append(dfs[i][(dfs[i]['datetime'] > start_date) & (dfs[i]['datetime'] < trade_date)].reset_index(drop=True))
    tests.append(dfs[i][dfs[i]['datetime'] >= trade_date].reset_index(drop=True))

In [7]:
'''
Find the best pair
'''

import pickle

tables = CointnCorr(trains, freqs).tabulate()
cointncorrtxt = f"result/gridsearch/cointncorr.txt"

with open(cointncorrtxt, "a") as f:
    for k, v in tables.items():
        f.write(f"{k}\n")
        f.write(f"{v}\n\n")
f.close()

best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq] + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq

print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}%")
print(f"and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

cointncorr = best_pair.split("_"), best_freq

with open('result/cointncorr.pickle', 'wb') as pk:
    pickle.dump(cointncorr, pk)

Best trading pairs shall be: BTCBUSD_BTCUSDC under 1m interval
the coint is 99.74%
and the corr is 1.0
