Execute the preliminaries for downloading data and finding a suitable pair for pair trading.

Please re-execute if `params.py` is modified.

In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import json
import numpy as np
import pandas as pd
from params import *

from tqdm import tqdm
from itertools import combinations
from utils.read2df import read2df
from utils.cointncorr import CointnCorr

In [2]:
# %%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py \
        -i {" ".join(list(freqs.keys()))} -startDate {start_date} -endDate {end_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py \
        -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

usage: download-kline.py [-h] [-s SYMBOLS [SYMBOLS ...]]
                         [-y {2017,2018,2019,2020,2021,2022,2023} [{2017,2018,2019,2020,2021,2022,2023} ...]]
                         [-m {1,2,3,4,5,6,7,8,9,10,11,12} [{1,2,3,4,5,6,7,8,9,10,11,12} ...]]
                         [-d DATES [DATES ...]] [-startDate STARTDATE]
                         [-endDate ENDDATE] [-folder FOLDER]
                         [-skip-monthly {0,1}] [-skip-daily {0,1}] [-c {0,1}]
                         -t {spot,um,cm}
                         [-i {1s,1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,1mo} [{1s,1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,1mo} ...]]
download-kline.py: error: unrecognized arguments: 00:00:00


In [3]:
'''
Read the downloaded OHLCV data into `pandas` dataframe
'''

# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs, marketType='spot')

# have a preview
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1672531259999,15425.08,15426.91,15422.74,15426.08,0.72493,BTCEUR,1m,2023-01-01 00:00:59.999
1,1672531259999,13678.76,13678.76,13678.76,13678.76,0.01089,BTCGBP,1m,2023-01-01 00:00:59.999
2,1672531259999,1195551.00,1195551.00,1195551.00,1195551.00,0.00000,BTCRUB,1m,2023-01-01 00:00:59.999
3,1672531259999,1196.13,1196.14,1195.92,1196.13,65.50030,ETHUSDT,1m,2023-01-01 00:00:59.999
4,1672531319999,15425.24,15429.79,15423.96,15424.21,0.19929,BTCEUR,1m,2023-01-01 00:01:59.999
...,...,...,...,...,...,...,...,...,...
2085511,1703818739999,2341.62,2341.63,2340.96,2341.20,131.65150,ETHUSDT,1m,2023-12-29 02:58:59.999
2085512,1703818799999,38506.92,38506.92,38506.92,38506.92,0.00439,BTCEUR,1m,2023-12-29 02:59:59.999
2085513,1703818799999,35747.92,35986.75,35747.92,35986.75,0.00599,BTCGBP,1m,2023-12-29 02:59:59.999
2085514,1703818799999,3722975.00,3722975.00,3722975.00,3722975.00,0.00000,BTCRUB,1m,2023-12-29 02:59:59.999


In [4]:
'''
Set data before `trade_data` as training data, after `trade_data` is trade_data
'''

trains, tests = [], []
for i in range(len(dfs)):
    trains.append(dfs[i][(dfs[i]['datetime'] > start_date) & (dfs[i]['datetime'] < trade_date)].reset_index(drop=True))
    tests.append(dfs[i][(dfs[i]['datetime'] >= trade_date) & (dfs[i]['datetime'] < end_date)].reset_index(drop=True))

In [5]:
'''
Find the best pair
'''

import pickle

tables = CointnCorr(trains, freqs).tabulate()
cointncorrtxt = f"result/gridsearch/cointncorr.txt"

os.remove(f"{cointncorrtxt}") if os.path.exists(f"{cointncorrtxt}") else None

with open(cointncorrtxt, "w+") as f:
    for k, v in tables.items():
        f.write(f"{k}\n")
        f.write(f"{v}\n\n")
f.close()

best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq]*3 + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq

print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}%")
print(f"and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

cointncorr = best_pair.split("_"), best_freq

with open('result/cointncorr.pickle', 'wb') as pk:
    pickle.dump(cointncorr, pk)

Best trading pairs shall be: BTCEUR_BTCGBP under 1m interval
the coint is 56.67%
and the corr is 0.876
