In [1]:
import os
import numpy as np
import pandas as pd
from utils.gridsearch import gridsearch
from utils.read2df import read2df
from utils.cointncorr import CointnCorr
import statsmodels.api as sm

import backtrader as bt
from itertools import combinations

The `symbols` are the trading pairs we are interested in the pair trading.

`start_date` means we will start trading from the marked date.

`freqs` is a dictionary of all trading intervals to be considered

In [2]:
symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
# symbols = ['BTCUSDT', 'BNBUSDT', 'ADAUSDT']
start_date = '2023-01-01'

# freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}
freqs = {'3m':3, '5m':5, '15m':15, '30m':30}

# Download Data from [binance-public-data](https://github.com/binance/binance-public-data/tree/master/python)

Download BTCUSDT and ETHUSDT for histories after `start_date` with interested intervals.


In [3]:
%%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

Read the downloaded OHLCV data into `pandas` dataframe

In [4]:
# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1672531379999,0.24580,0.24590,0.24560,0.24580,86919.8000,ADAUSDT,3m,2023-01-01 00:02:59.999
1,1672531379999,246.30000,246.40000,246.20000,246.20000,291.8060,BNBUSDT,3m,2023-01-01 00:02:59.999
2,1672531379999,16541.77000,16544.76000,16534.52000,16536.43000,226.4364,BTCUSDT,3m,2023-01-01 00:02:59.999
3,1672531379999,0.07024,0.07034,0.07024,0.07024,926758.0000,DOGEUSDT,3m,2023-01-01 00:02:59.999
4,1672531379999,1196.13000,1196.25000,1195.85000,1195.85000,159.6038,ETHUSDT,3m,2023-01-01 00:02:59.999
...,...,...,...,...,...,...,...,...,...
1049512,1693526399999,1646.26000,1646.26000,1645.76000,1645.76000,203.5577,ETHUSDT,3m,2023-08-31 23:59:59.999
1049513,1693526399999,63.90000,63.90000,63.83000,63.89000,511.5890,LTCUSDT,3m,2023-08-31 23:59:59.999
1049514,1693526399999,19.74000,19.74000,19.72000,19.74000,1188.7800,SOLUSDT,3m,2023-08-31 23:59:59.999
1049515,1693526399999,0.07671,0.07673,0.07671,0.07673,80794.0000,TRXUSDT,3m,2023-08-31 23:59:59.999


# Cointegration and Correlation

Calculate daily coint and corr for all the pairs
Consider 1 day with 1440 minutes.

In [5]:
res = CointnCorr(dfs, freqs)
tables = res.tabulate()
for k, v in tables.items():
    print(k)
    print(v)

ADAUSDT_BNBUSDT
             3m        5m       15m       30m
coint  0.144033  0.144033  0.152263  0.156379
corr   0.690733  0.690790  0.691616  0.691488
ADAUSDT_BTCUSDT
             3m        5m       15m       30m
coint  0.148148  0.127572  0.168724  0.185185
corr   0.727269  0.726920  0.726703  0.726476
ADAUSDT_DOGEUSDT
             3m        5m       15m       30m
coint  0.156379  0.176955  0.172840  0.205761
corr   0.703392  0.703164  0.703266  0.705280
ADAUSDT_ETHUSDT
             3m        5m       15m       30m
coint  0.135802  0.119342  0.135802  0.176955
corr   0.752624  0.752703  0.753029  0.752102
ADAUSDT_LTCUSDT
             3m        5m       15m       30m
coint  0.139918  0.131687  0.135802  0.168724
corr   0.625637  0.625538  0.628556  0.629957
ADAUSDT_SOLUSDT
             3m        5m       15m       30m
coint  0.148148  0.135802  0.172840  0.176955
corr   0.701388  0.701036  0.701412  0.701888
ADAUSDT_TRXUSDT
             3m        5m       15m       30m
coint  0.1111

In [6]:
best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq] + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq
print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}% and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

best_freq = '30m'

Best trading pairs shall be: BTCUSDT_ETHUSDT under 15m interval
the coint is 14.4% and the corr is 0.843


In [7]:
# best_pairs = ()
# best_corr = 0.0
# FREQUENCY = None
# for i, (freq, f) in enumerate(freqs.items()):
#     for comb in combinations(set(dfs[i]['tic']), 2):
#         first_ele = dfs[i][dfs[i]['tic'] == comb[0]]['close']
#         second_ele = dfs[i][dfs[i]['tic'] == comb[1]]['close']
#         _, pvalue, _ = coint(first_ele, second_ele)
#         corr = np.corrcoef(first_ele, second_ele)[0][1]
#         if pvalue <= 0.05 and corr >= 0.8:
#             print(f"{comb[0]} and {comb[1]} are cointegrated under {freq} interval (correlation {corr})")
#             if corr > best_corr:
#                 best_pairs = (comb[0], comb[1])
#                 best_corr = corr
#                 FREQUENCY = freq
#         else:
#             # print(f"{comb[0]} and {comb[1]} are NOT cointegrated under {FREQUENCY} interval")

# print("===========================================")
# print(f"Best trading pairs shall be: {best_pairs[0]} and {best_pairs[1]} under {FREQUENCY} interval")
# print("===========================================")

# Define Trading Strategy

Firstly define a sizer based on [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [8]:
# Seems that Sizer can only be executed when self.buy(size=None). 
# We need to purchase amount in a certain ratio in Pair Trading.
# Therefore the Sizer is hard to implemented.

class KellyCriterionSizer(bt.Sizer):
    params = (('period', 30),)

    def _getsizing(self, comminfo, cash, data, isbuy):
        position = self.broker.getposition(data).size

        close_prices = data.close.get(size=self.p.period)
        returns = np.log(close_prices / close_prices.shift(1)).dropna()

        p = len(returns[returns > 0]) / len(returns)
        a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1.0
        b = (-returns[returns > 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1.0
        q = 1 - p

        f = min(max((p/a - q/b), 0), 1)

        if isbuy:
            size = cash * f / data.close[0]
        else:
            size = position * f

        return size

Define a custom indicator for [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [9]:
class KellyCriterionIndicator(bt.indicators.PeriodN):
    _mindatas = 1

    packages = (
        ('pandas', 'pd'),
    )
    lines = ('kc_f',)
    params = (
        ('period', 30),
    )

    def next(self):
        spreads = pd.Series(self.data.get(size=self.p.period))
        returns = spreads.pct_change()

        kc_p = len(returns[returns > 0]) / len(returns)
        kc_a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1
        kc_b = (returns[returns < 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1
        kc_q = 1 - kc_p
        
        kc_f = min(max((kc_p/kc_a - kc_q/kc_b), 0), 1)
        self.lines.kc_f[0] = kc_f

In [10]:
# Incomplete

class ZscoreIndicator(bt.indicators.PeriodN):
    _mindatas = 2 # ensure at least 2 data feeds are passed

    packages = (
        ('pandas', 'pd'),
        ('statsmodels.api', 'sm'),
    )
    lines = ('zscore',)
    params = (
        ('period', 30),
    )

    def next(self):
        p0 = pd.Series(self.data0.get(size=self.p.period))
        p1 = pd.Series(self.data1.get(size=self.p.period))
        sm.add_constant(X)

Define custom CommissionInfo (*Not in use*)

In [11]:
class PairTradingCommInfo(bt.CommInfoBase):
    params = (
        ('commission', 0.0), ('mult', 10), ('margin', 1000),
        ('stocklike', False),
        ('commtype', bt.CommInfoBase.COMM_PERC),
        ('percabs', True),
    )

The strategy with fixed ordersize

In [12]:
class PairTrading(bt.Strategy):
    params = dict(
        OPEN_THRE=2,
        CLOS_THRE=0.1,
        period=300
    )

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return

        if order.status == order.Completed:
            if order.isbuy():
                print(f"Buy {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")
            else:
                print(f"Sell {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")

        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            print('%s ,' % order.Status[order.status])
            pass

    def __init__(self):
        self.data0 = self.datas[0]
        self.data1 = self.datas[1]

        # Calculate zscore of the ratio
        transform = bt.indicators.OLS_TransformationN(self.data1, self.data0, period=self.p.period)
        spread = transform.spread
        self.zscore = transform.zscore

        self.kc_f = KellyCriterionIndicator(spread, period=self.p.period)

        # -1 for short data1/data0, 1 for long data1/data0, 0 for no position

    def next(self):
        # print(f'Right now the zscore is {self.transform.zscore[0]}, and the position is {self.position_status}')
        
        # Calculate the ratio between the 2 assets
        ratio = self.data1.close[0] / self.data0.close[0]
        cash = self.broker.get_cash()
        position = self.broker.getposition(self.data0).size + self.broker.getposition(self.data1).size

        if abs(self.zscore[0]) < self.p.CLOS_THRE and position != 0:
            print("------")
            print("close position")
            self.close(data=self.data0)
            self.close(data=self.data1)
    
        elif self.zscore[0] < -self.p.OPEN_THRE and position == 0:
            print("------")
            print("open position")
            # print(f"long {self.data0.alias} and short {self.data1.alias}")

            purchase_amount = self.broker.get_cash()/self.data0.close[0]*self.kc_f[0]

            self.sell(data=self.data1, size=purchase_amount/ratio)
            self.buy(data=self.data0, size=purchase_amount)

        elif self.zscore[0] > self.p.OPEN_THRE and position == 0:
            print("------")
            print("open position")
            # print(f"long {self.data1.alias} and short {self.data0.alias}")
            
            purchase_amount = self.broker.get_cash()/self.data1.close[0]*self.kc_f[0]

            self.sell(data=self.data0, size=purchase_amount*ratio)
            self.buy(data=self.data1, size=purchase_amount)

    def stop(self):
        self.close(data=self.data0)
        self.close(data=self.data1)
        
        print('==================================================')
        print('Starting Value - %.2f' % self.broker.startingcash)
        print('Ending   Value - %.2f' % self.broker.getvalue())
        print('==================================================')

# Execute the Strategy

Load the data

In [13]:
datafeeds_0 = []
datafeeds_1 = []

for idx, freq in enumerate(freqs):
    datafeeds_0.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pair.split('_')[0]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None,
        )
    )
    datafeeds_1.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pair.split('_')[1]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None,
        )
    )

freq_position = list(freqs.keys()).index(best_freq)
datafeed0 = datafeeds_0[freq_position]
datafeed1 = datafeeds_1[freq_position]
datafeeds = [datafeed0, datafeed1]

The main strategy engine

In [14]:
param = {'OPEN_THRE':1, 'CLOS_THRE':0.1, 'period':100}

def cerebro_run(datafeeds, param):
    # Create a Cerebro instance and add the data feed
    cerebro = bt.Cerebro()
    # TODO: should I include `best_pair` as a parameter?
    cerebro.adddata(datafeeds[0])
    cerebro.adddata(datafeeds[1])

    # Set up other parameters for backtest
    cerebro.broker.set_cash(100000)  # Set initial capital

    # comminfo = PairTradingCommInfo(commission=0.002, margin=1000, mult=10)
    # cerebro.broker.addcommissioninfo(comminfo)

    # cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='timereturns')
    cerebro.addanalyzer(bt.analyzers.Returns, _name='Returns')
    # cerebro.addsizer(KellyCriterionSizer)

    cerebro.addstrategy(PairTrading, **param)
    strats = cerebro.run()
    return strats

# cerebro_run(datafeeds, param)

# Grid Search the Strategy
Define scoring and param_grid

In [15]:
# param_grid = {
#     'OPEN_THRE':np.arange(1, 3, 1), 
#     'CLOS_THRE':np.arange(0.2, 1.0, 0.2), 
#     'period': np.arange(30, 60, 10)
# }

param_grid = {
    'OPEN_THRE': np.arange(2, 3, 1), 
    'CLOS_THRE': np.arange(0.1, 0.3, 0.2), 
    'period': [30, 40]
}

def scoring(strats):
    score = strats[0].analyzers.Returns.get_analysis()['rtot']
    return score

Grid Searching

In [16]:
gridsearch(cerebro_run, param_grid, scoring, datafeeds)

------
open position
Sell  @ price: 1200.16 for Qty: -67.12764389644762
Buy  @ price: 16608.0 for Qty: 4.850775163180494
------
close position
Sell  @ price: 16583.83 for Qty: -4.850775163180494
Buy  @ price: 1196.9 for Qty: 67.12764389644762
------
open position
Sell  @ price: 16721.27 for Qty: -5.98647907836053
Buy  @ price: 1213.08 for Qty: 82.51786172782593
------
close position
Buy  @ price: 16677.6 for Qty: 5.98647907836053
Sell  @ price: 1214.13 for Qty: -82.51786172782593
------
open position
Sell  @ price: 1213.55 for Qty: -52.802785905514455
Buy  @ price: 16685.21 for Qty: 3.840325741642395
------
close position
Sell  @ price: 16725.5 for Qty: -3.840325741642395
Buy  @ price: 1217.85 for Qty: 52.802785905514455
------
open position
Sell  @ price: 1215.19 for Qty: -32.093487030660484
Buy  @ price: 16719.9 for Qty: 2.332601780053544
------
close position
Sell  @ price: 16741.22 for Qty: -2.332601780053544
Buy  @ price: 1217.49 for Qty: 32.093487030660484
------
open position
Se

(0.16050079069053635, {'OPEN_THRE': 2, 'CLOS_THRE': 0.1, 'period': 40})