In [1]:
import os
import numpy as np
import pandas as pd
from utils.gridsearch import gridsearch
from utils.read2df import read2df
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint

import backtrader as bt
from itertools import combinations

The `symbols` are the trading pairs we are interested in the pair trading.

`start_date` means we will start trading from the marked date.

`freqs` is a dictionary of all trading intervals to be considered

In [2]:
symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
start_date = '2023-01-01'

freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}

# Download Data from [binance-public-data](https://github.com/binance/binance-public-data/tree/master/python)

Download BTCUSDT and ETHUSDT for histories after `start_date` with interested intervals.


In [None]:
%%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

Read the downloaded OHLCV data into `pandas` dataframe

In [3]:
# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1672534799999,0.24580,0.24590,0.24410,0.24450,4.476440e+06,ADAUSDT,1h,2023-01-01 00:59:59.999
1,1672534799999,246.30000,246.70000,245.50000,245.70000,4.233967e+03,BNBUSDT,1h,2023-01-01 00:59:59.999
2,1672534799999,16541.77000,16545.70000,16508.39000,16529.67000,4.364836e+03,BTCUSDT,1h,2023-01-01 00:59:59.999
3,1672534799999,0.07024,0.07034,0.06941,0.06980,4.061193e+07,DOGEUSDT,1h,2023-01-01 00:59:59.999
4,1672534799999,1196.13000,1196.70000,1192.72000,1194.09000,5.889384e+03,ETHUSDT,1h,2023-01-01 00:59:59.999
...,...,...,...,...,...,...,...,...,...
52465,1693526399999,1649.66000,1649.66000,1644.02000,1645.76000,6.182704e+03,ETHUSDT,1h,2023-08-31 23:59:59.999
52466,1693526399999,64.12000,64.13000,63.83000,63.89000,1.948685e+04,LTCUSDT,1h,2023-08-31 23:59:59.999
52467,1693526399999,19.79000,19.81000,19.70000,19.74000,3.812562e+04,SOLUSDT,1h,2023-08-31 23:59:59.999
52468,1693526399999,0.07692,0.07693,0.07668,0.07673,3.984715e+06,TRXUSDT,1h,2023-08-31 23:59:59.999


# Cointegration and Correlation

Check cointegration and correlations among every two pairs from the given symbols.
Find the best pair to be traded which has cointegration and highest correlation.

In [4]:
best_pairs = ()
best_corr = 0.0
FREQUENCY = None
for i, (freq, f) in enumerate(freqs.items()):
    for comb in combinations(set(dfs[i]['tic']), 2):
        first_ele = dfs[i][dfs[i]['tic'] == comb[0]]['close']
        second_ele = dfs[i][dfs[i]['tic'] == comb[1]]['close']
        _, pvalue, _ = coint(first_ele, second_ele)
        corr = np.corrcoef(first_ele, second_ele)[0][1]
        if pvalue <= 0.05 and corr >= 0.8:
            print(f"{comb[0]} and {comb[1]} are cointegrated under {freq} interval (correlation {corr})")
            if corr > best_corr:
                best_pairs = (comb[0], comb[1])
                best_corr = corr
                FREQUENCY = freq
        else:
            print(f"{comb[0]} and {comb[1]} are NOT cointegrated under {FREQUENCY} interval")

print("===========================================")
print(f"Best trading pairs shall be: {best_pairs[0]} and {best_pairs[1]} under {FREQUENCY} interval")
print("===========================================")

BNBUSDT and ETHUSDT are NOT cointegrated under None interval
BNBUSDT and TRXUSDT are NOT cointegrated under None interval
BNBUSDT and ADAUSDT are cointegrated under 1h interval (correlation 0.9177383346796679)
BNBUSDT and DOGEUSDT are NOT cointegrated under 1h interval
BNBUSDT and XMRUSDT are NOT cointegrated under 1h interval
BNBUSDT and BTCUSDT are NOT cointegrated under 1h interval
BNBUSDT and LTCUSDT are NOT cointegrated under 1h interval
BNBUSDT and SOLUSDT are NOT cointegrated under 1h interval
ETHUSDT and TRXUSDT are NOT cointegrated under 1h interval
ETHUSDT and ADAUSDT are NOT cointegrated under 1h interval
ETHUSDT and DOGEUSDT are NOT cointegrated under 1h interval
ETHUSDT and XMRUSDT are NOT cointegrated under 1h interval
ETHUSDT and BTCUSDT are cointegrated under 1h interval (correlation 0.9497573581917488)
ETHUSDT and LTCUSDT are NOT cointegrated under 1h interval
ETHUSDT and SOLUSDT are NOT cointegrated under 1h interval
TRXUSDT and ADAUSDT are NOT cointegrated under 1h i

# Define Trading Strategy

Firstly define a sizer based on [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [5]:
# Seems that Sizer can only be executed when self.buy(size=None). 
# We need to purchase amount in a certain ratio in Pair Trading.
# Therefore the Sizer is hard to implemented.

class KellyCriterionSizer(bt.Sizer):
    params = (('period', 30),)

    def _getsizing(self, comminfo, cash, data, isbuy):
        position = self.broker.getposition(data).size

        close_prices = data.close.get(size=self.p.period)
        returns = np.log(close_prices / close_prices.shift(1)).dropna()

        p = len(returns[returns > 0]) / len(returns)
        a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1.0
        b = (-returns[returns > 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1.0
        q = 1 - p

        f = min(max((p/a - q/b), 0), 1)

        if isbuy:
            size = cash * f / data.close[0]
        else:
            size = position * f

        return size

Define a custom indicator for [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [6]:
class KellyCriterionIndicator(bt.indicators.PeriodN):
    '''
    Uses ``pandas``
    '''
    _mindatas = 1

    packages = (
        ('pandas', 'pd'),
    )
    lines = ('kc_f',)
    params = (
        ('period', 30),
    )

    def next(self):
        spreads = pd.Series(self.data.get(size=self.p.period))
        returns = spreads.pct_change()

        kc_p = len(returns[returns > 0]) / len(returns)
        kc_a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1
        kc_b = (returns[returns < 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1
        kc_q = 1 - kc_p
        
        kc_f = min(max((kc_p/kc_a - kc_q/kc_b), 0), 1)
        self.lines.kc_f[0] = kc_f

Define custom CommissionInfo

In [7]:
class PairTradingCommInfo(bt.CommInfoBase):
    params = (
        ('commission', 0.0), ('mult', 10), ('margin', 1000),
        ('stocklike', False),
        ('commtype', bt.CommInfoBase.COMM_PERC),
        ('percabs', True),
    )

The strategy with fixed ordersize

In [8]:
class PairTrading(bt.Strategy):
    params = dict(
        OPEN_THRE=2,
        CLOS_THRE=0.1,
        period=30
    )

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return

        if order.status == order.Completed:
            if order.isbuy():
                print(f"Buy {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")
            else:
                print(f"Sell {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")

        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            print('%s ,' % order.Status[order.status])
            pass

    def __init__(self):
        self.data0 = self.datas[0]
        self.data1 = self.datas[1]

        # Calculate zscore of the ratio
        transform = bt.indicators.OLS_TransformationN(self.data1, self.data0, period=self.p.period)
        spread = transform.spread
        self.zscore = transform.zscore

        self.kc_f = KellyCriterionIndicator(spread, period=self.p.period)

        # -1 for short data1/data0, 1 for long data1/data0, 0 for no position
        self.position_status = 0

    def next(self):
        # print(f'Right now the zscore is {self.transform.zscore[0]}, and the position is {self.position_status}')
        
        # Calculate the ratio between the 2 assets
        ratio = self.data1.close[0] / self.data0.close[0]
        cash = self.broker.get_cash()

        if abs(self.zscore[0]) < self.p.CLOS_THRE and self.position_status != 0:
            print("------")
            print("close position")
            self.position_status = 0
            self.close(data=self.data0)
            self.close(data=self.data1)
    
        elif self.zscore[0] < -self.p.OPEN_THRE and self.position_status == 0:
            print("------")
            print(f"long {self.data0.alias} and short {self.data1.alias}")
            self.position_status = 1

            purchase_amount = self.broker.get_cash()/self.data0.close[0]*self.kc_f[0]

            self.sell(data=self.data1, size=purchase_amount/ratio)
            self.buy(data=self.data0, size=purchase_amount)

        elif self.zscore[0] > self.p.OPEN_THRE and self.position_status == 0:
            print("------")
            print(f"long {self.data1.alias} and short {self.data0.alias}")
            self.position_status = -1
            
            purchase_amount = self.broker.get_cash()/self.data1.close[0]*self.kc_f[0]

            self.sell(data=self.data0, size=purchase_amount*ratio)
            self.buy(data=self.data1, size=purchase_amount)

    def stop(self):
        print('==================================================')
        print('Starting Value - %.2f' % self.broker.startingcash)
        print('Ending   Value - %.2f' % self.broker.getvalue())
        print('==================================================')

# Execute the Strategy

Load the data

In [9]:
datafeeds_0 = []
datafeeds_1 = []

for idx, freq in enumerate(freqs):
    datafeeds_0.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pairs[0]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None
        )
    )
    datafeeds_1.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pairs[1]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None
        )
    )
    print(freq)

1h
2h
4h
6h
8h
12h
1d


The main strategy engine

In [10]:
freq_position = list(freqs.keys()).index(FREQUENCY)
datafeed0 = datafeeds_0[freq_position]
datafeed1 = datafeeds_1[freq_position]
datafeeds = [datafeed0, datafeed1]
param = {'OPEN_THRE':1, 'CLOS_THRE':0.1, 'period':30}

In [11]:
def cerebro_run(datafeeds, param):
    # Create a Cerebro instance and add the data feed
    cerebro = bt.Cerebro()
    cerebro.adddata(datafeeds[0], name=best_pairs[0])
    cerebro.adddata(datafeeds[1], name=best_pairs[1])

    # Set up other parameters for backtest
    cerebro.broker.set_cash(100000)  # Set initial capital
    
    comminfo = PairTradingCommInfo(commission=0.002, margin=1000, mult=10)
    cerebro.broker.addcommissioninfo(comminfo)

    # cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='timereturns')
    cerebro.addanalyzer(bt.analyzers.Returns, _name='Returns')
    # cerebro.addsizer(KellyCriterionSizer)

    cerebro.addstrategy(PairTrading, **param)
    strats = cerebro.run()
    return strats

# cerebro_run(datafeeds, param)

# Grid Search the Strategy
Define scoring and param_grid

In [12]:
# param_grid = {
#     'OPEN_THRE':np.arange(1, 3, 1), 
#     'CLOS_THRE':np.arange(0.2, 1.0, 0.2), 
#     'period': np.arange(30, 60, 10)
# }

param_grid = {
    'OPEN_THRE': np.arange(2, 5, 1), 
    'CLOS_THRE': np.arange(0.1, 1.0, 0.2), 
    'period': [30, 40, 50]
}

def scoring(strats):
    score = strats[0].analyzers.Returns.get_analysis()['rtot']
    return score

Grid Searching

In [13]:
gridsearch(cerebro_run, param_grid, scoring, datafeeds)

------
long () and short ()
Sell ETHUSDT @ price: 1647.04 for Qty: -33.169373213308205
Buy BTCUSDT @ price: 22907.01 for Qty: 2.3850571565202094
------
close position
Buy ETHUSDT @ price: 1578.84 for Qty: 33.169373213308205
Sell BTCUSDT @ price: 22967.48 for Qty: -2.3850571565202094
------
long () and short ()
------
close position
------
long () and short ()
Sell BTCUSDT @ price: 20455.73 for Qty: -3.774222505355107
Buy ETHUSDT @ price: 1471.9300000000003 for Qty: 52.44976224343405
------
close position
Sell ETHUSDT @ price: 1761.13 for Qty: -52.44976224343405
Buy BTCUSDT @ price: 26920.11 for Qty: 3.774222505355107
------
long () and short ()
Sell ETHUSDT @ price: 1780.67 for Qty: -9.018312762138438
Buy BTCUSDT @ price: 27972.87 for Qty: 0.574082286489898
------
close position
Buy ETHUSDT @ price: 1874.4499999999998 for Qty: 9.018312762138438
Sell BTCUSDT @ price: 27642.89 for Qty: -0.574082286489898
------
long () and short ()
Sell ETHUSDT @ price: 1880.9299999999998 for Qty: -10.71

(0.3126500925380934, {'OPEN_THRE': 3, 'CLOS_THRE': 0.1, 'period': 30})