Execute the preliminaries for downloading data and finding a suitable pair for pair trading.

Please re-execute if `params.py` is modified.

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import json
import numpy as np
import pandas as pd
from params import *

from tqdm import tqdm
from itertools import combinations
from utils.read2df import read2df
from utils.cointncorr import CointnCorr

In [2]:
# %%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py \
        -i {" ".join(list(freqs.keys()))} -startDate {start_date} -endDate {end_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py \
        -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

Found 7 symbols
[1/7] - start download monthly BTCUSD klines 

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSD/1m/BTCUSD-1m-2023-08.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSD/1m/BTCUSD-1m-2023-09.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSD/1m/BTCUSD-1m-2023-10.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSD/1m/BTCUSD-1m-2023-11.zip

File not found: https://data.binance.vision/data/spot/monthly/klines/BTCUSD/1m/BTCUSD-1m-2023-12.zip
[2/7] - start download monthly BTCGBP klines 

File Download: C:\Users\hyan212\backtrader-crypto-rl\binance-public-data\python\data/spot/monthly/klines/BTCGBP/1m/BTCGBP-1m-2023-08.zip

[..................................................]
[..................................................]
[#.................................................]
[#.................................................]
[##............................

In [3]:
'''
Read the downloaded OHLCV data into `pandas` dataframe
'''

# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)

# have a preview
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1690848059999,26676.54,26677.00,26674.68,26675.16,0.13061,BTCEUR,1m,2023-08-01 00:00:59.999
1,1690848059999,22846.83,22850.59,22845.00,22845.00,0.71648,BTCGBP,1m,2023-08-01 00:00:59.999
2,1690848059999,2694077.00,2694077.00,2694077.00,2694077.00,0.00000,BTCRUB,1m,2023-08-01 00:00:59.999
3,1690848059999,1693.91,1693.95,1693.79,1693.79,1.36210,ETHEUR,1m,2023-08-01 00:00:59.999
4,1690848059999,1451.21,1451.33,1451.16,1451.33,0.27500,ETHGBP,1m,2023-08-01 00:00:59.999
...,...,...,...,...,...,...,...,...,...
1054075,1701388799999,29987.54,29987.54,29987.54,29987.54,0.00000,BTCGBP,1m,2023-11-30 23:59:59.999
1054076,1701388799999,3386180.00,3386180.00,3386180.00,3386180.00,0.00000,BTCRUB,1m,2023-11-30 23:59:59.999
1054077,1701388799999,1885.94,1885.94,1884.17,1885.70,3.59330,ETHEUR,1m,2023-11-30 23:59:59.999
1054078,1701388799999,1627.07,1627.07,1627.07,1627.07,0.00000,ETHGBP,1m,2023-11-30 23:59:59.999


In [4]:
'''
Set data before `trade_data` as training data, after `trade_data` is trade_data
'''

trains, tests = [], []
for i in range(len(dfs)):
    trains.append(dfs[i][(dfs[i]['datetime'] > start_date) & (dfs[i]['datetime'] < trade_date)].reset_index(drop=True))
    tests.append(dfs[i][dfs[i]['datetime'] >= trade_date].reset_index(drop=True))

In [5]:
'''
Find the best pair
'''

import pickle

tables = CointnCorr(trains, freqs).tabulate()
cointncorrtxt = f"result/gridsearch/cointncorr.txt"

os.remove(f"{cointncorrtxt}") if os.path.exists(f"{cointncorrtxt}") else None

with open(cointncorrtxt, "w+") as f:
    for k, v in tables.items():
        f.write(f"{k}\n")
        f.write(f"{v}\n\n")
f.close()

best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq]*3 + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq

print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}%")
print(f"and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

cointncorr = best_pair.split("_"), best_freq

with open('result/cointncorr.pickle', 'wb') as pk:
    pickle.dump(cointncorr, pk)

Best trading pairs shall be: ETHEUR_ETHGBP under 1m interval
the coint is 50.0%
and the corr is 0.742
