In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import pyfolio
import json
import numpy as np
import pandas as pd
from params import *

from tqdm import tqdm
from itertools import combinations
from utils.read2df import read2df
from utils.cointncorr import CointnCorr

In [2]:
# %%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py \
        -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py \
        -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

In [4]:
'''
Read the downloaded OHLCV data into `pandas` dataframe
'''

# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)

# have a preview
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1592978459999,8562.59,8565.32,8561.97,8563.10,0.262898,BTCEUR,1m,2020-06-24 06:00:59.999
1,1592978459999,7718.00,7729.32,7718.00,7729.32,0.032782,BTCGBP,1m,2020-06-24 06:00:59.999
2,1592978459999,4308014.00,4308231.00,4306276.00,4308231.00,0.319138,BTCNGN,1m,2020-06-24 06:00:59.999
3,1592978459999,666818.00,666818.00,666818.00,666818.00,0.000000,BTCRUB,1m,2020-06-24 06:00:59.999
4,1592978459999,66271.00,66271.00,66271.00,66271.00,0.010166,BTCTRY,1m,2020-06-24 06:00:59.999
...,...,...,...,...,...,...,...,...,...
6820567,1698796799999,27977.60,27977.60,27977.60,27977.60,0.000000,BTCGBP,1m,2023-10-31 23:59:59.999
6820568,1698796799999,39509072.00,39509072.00,39381022.00,39498613.00,0.001100,BTCNGN,1m,2023-10-31 23:59:59.999
6820569,1698796799999,3276275.00,3276275.00,3276275.00,3276275.00,0.000000,BTCRUB,1m,2023-10-31 23:59:59.999
6820570,1698796799999,984549.00,984550.00,984250.00,984250.00,0.006690,BTCTRY,1m,2023-10-31 23:59:59.999


In [7]:
'''
Set data before `trade_data` as training data, after `trade_data` is trade_data
'''

trains, tests = [], []
for i in range(len(dfs)):
    trains.append(dfs[i][(dfs[i]['datetime'] > start_date) & (dfs[i]['datetime'] < trade_date)].reset_index(drop=True))
    tests.append(dfs[i][dfs[i]['datetime'] >= trade_date].reset_index(drop=True))

In [8]:
'''
Find the best pair
'''

import pickle

tables = CointnCorr(trains, freqs).tabulate()
cointncorrtxt = f"result/gridsearch/cointncorr.txt"

os.remove(f"{cointncorrtxt}") if os.path.exists(f"{cointncorrtxt}") else None

with open(cointncorrtxt, "w+") as f:
    for k, v in tables.items():
        f.write(f"{k}\n")
        f.write(f"{v}\n\n")
f.close()

best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq] + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq

print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}%")
print(f"and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

cointncorr = best_pair.split("_"), best_freq

with open('result/cointncorr.pickle', 'wb') as pk:
    pickle.dump(cointncorr, pk)

Best trading pairs shall be: BTCEUR_BTCUAH under 1m interval
the coint is 70.65%
and the corr is 0.926
