In [1]:
import os
import pyfolio
import numpy as np
import pandas as pd
from utils.gridsearch import gridsearch
from utils.read2df import read2df
from utils.cointncorr import CointnCorr
import statsmodels.api as sm

import backtrader as bt
from itertools import combinations



The `symbols` are the trading pairs we are interested in the pair trading.

`start_date` means we will start trading from the marked date.

`freqs` is a dictionary of all trading intervals to be considered

In [2]:
symbols = ['BTCUSDT', 'ETHUSDT', 'LTCUSDT', 'XMRUSDT', 'BNBUSDT', 'ADAUSDT', 'DOGEUSDT', 'SOLUSDT', 'TRXUSDT']
# symbols = ['BTCUSDT', 'BNBUSDT', 'ADAUSDT']
start_date = '2023-01-01'

# freqs = {'1h':60, '2h':120, '4h':240, '6h':360, '8h':480, '12h':720, '1d':1440}
freqs = {'3m':3, '5m':5, '15m':15, '30m':30}

# Download Data from [binance-public-data](https://github.com/binance/binance-public-data/tree/master/python)

Download BTCUSDT and ETHUSDT for histories after `start_date` with interested intervals.


In [3]:
%%capture
if symbols is None:
    !python binance-public-data/python/download-kline.py -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1
else:
    !python binance-public-data/python/download-kline.py -s {" ".join(symbols)} -i {" ".join(list(freqs.keys()))} -startDate {start_date} -t spot -skip-daily 1

Read the downloaded OHLCV data into `pandas` dataframe

In [4]:
# dfs = read2df(symbols, freqs)
dfs = read2df(symbols, freqs)
dfs[0]

Unnamed: 0,time,open,high,low,close,volume,tic,itvl,datetime
0,1597125779999,0.142880,0.142880,0.142740,0.142870,400254.500000,ADAUSDT,3m,2020-08-11 06:02:59.999
1,1597125779999,22.418300,22.418600,22.360000,22.395800,7928.640000,BNBUSDT,3m,2020-08-11 06:02:59.999
2,1597125779999,11854.560000,11854.570000,11842.000000,11850.140000,90.373673,BTCUSDT,3m,2020-08-11 06:02:59.999
3,1597125779999,0.003556,0.003559,0.003556,0.003559,431600.000000,DOGEUSDT,3m,2020-08-11 06:02:59.999
4,1597125779999,395.100000,395.100000,394.530000,394.950000,607.335330,ETHUSDT,3m,2020-08-11 06:02:59.999
...,...,...,...,...,...,...,...,...,...
4945333,1696118399999,1671.100000,1671.150000,1670.890000,1670.890000,58.111300,ETHUSDT,3m,2023-09-30 23:59:59.999
4945334,1696118399999,65.960000,65.980000,65.950000,65.970000,101.543000,LTCUSDT,3m,2023-09-30 23:59:59.999
4945335,1696118399999,21.390000,21.390000,21.360000,21.370000,2700.670000,SOLUSDT,3m,2023-09-30 23:59:59.999
4945336,1696118399999,0.088610,0.088610,0.088590,0.088600,259647.200000,TRXUSDT,3m,2023-09-30 23:59:59.999


# Cointegration and Correlation

Calculate daily coint and corr for all the pairs
Consider 1 day with 1440 minutes.

In [5]:
res = CointnCorr(dfs, freqs)
tables = res.tabulate()
for k, v in tables.items():
    print(k)
    print(v)

ADAUSDT_BNBUSDT
             3m        5m       15m       30m
coint  0.213100  0.217467  0.217467  0.227074
corr   0.693539  0.693341  0.693219  0.692706
ADAUSDT_BTCUSDT
             3m        5m       15m       30m
coint  0.199127  0.193886  0.211354  0.222707
corr   0.703440  0.703408  0.703274  0.702010
ADAUSDT_DOGEUSDT
             3m        5m       15m       30m
coint  0.245415  0.255895  0.232314  0.238428
corr   0.664965  0.664983  0.665139  0.664156
ADAUSDT_ETHUSDT
             3m        5m       15m       30m
coint  0.204367  0.187773  0.203493  0.228821
corr   0.748309  0.748282  0.747662  0.746482
ADAUSDT_LTCUSDT
             3m        5m       15m       30m
coint  0.218341  0.199127  0.205240  0.221834
corr   0.692195  0.691953  0.692348  0.691979
ADAUSDT_SOLUSDT
            3m        5m       15m       30m
coint  0.19476  0.199127  0.217467  0.230568
corr   0.63650  0.636350  0.636068  0.635069
ADAUSDT_TRXUSDT
             3m        5m       15m       30m
coint  0.191266 

In [6]:
best_value = 0
for key in tables.keys():
    for freq in freqs:
        rel = tables[key].at['coint', freq] + tables[key].at['corr', freq]
        if rel > best_value:
            best_value = rel
            best_pair = key
            best_freq = freq
print("===========================================")
print(f"Best trading pairs shall be: {best_pair} under {best_freq} interval")
print(f"the coint is {round(tables[best_pair].at['coint', best_freq]*100, 2)}% and the corr is {round(tables[best_pair].at['corr', best_freq],3)}")
print("===========================================")

Best trading pairs shall be: BTCUSDT_ETHUSDT under 30m interval
the coint is 23.14% and the corr is 0.832


In [7]:
# best_pairs = ()
# best_corr = 0.0
# FREQUENCY = None
# for i, (freq, f) in enumerate(freqs.items()):
#     for comb in combinations(set(dfs[i]['tic']), 2):
#         first_ele = dfs[i][dfs[i]['tic'] == comb[0]]['close']
#         second_ele = dfs[i][dfs[i]['tic'] == comb[1]]['close']
#         _, pvalue, _ = coint(first_ele, second_ele)
#         corr = np.corrcoef(first_ele, second_ele)[0][1]
#         if pvalue <= 0.05 and corr >= 0.8:
#             print(f"{comb[0]} and {comb[1]} are cointegrated under {freq} interval (correlation {corr})")
#             if corr > best_corr:
#                 best_pairs = (comb[0], comb[1])
#                 best_corr = corr
#                 FREQUENCY = freq
#         else:
#             # print(f"{comb[0]} and {comb[1]} are NOT cointegrated under {FREQUENCY} interval")

# print("===========================================")
# print(f"Best trading pairs shall be: {best_pairs[0]} and {best_pairs[1]} under {FREQUENCY} interval")
# print("===========================================")

# Define Trading Strategy

Firstly define a sizer based on [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [8]:
# Seems that Sizer can only be executed when self.buy(size=None). 
# We need to purchase amount in a certain ratio in Pair Trading.
# Therefore the Sizer is hard to implemented.

class KellyCriterionSizer(bt.Sizer):
    params = (('period', 30),)

    def _getsizing(self, comminfo, cash, data, isbuy):
        position = self.broker.getposition(data).size

        close_prices = data.close.get(size=self.p.period)
        returns = np.log(close_prices / close_prices.shift(1)).dropna()

        p = len(returns[returns > 0]) / len(returns)
        a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1.0
        b = (-returns[returns > 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1.0
        q = 1 - p

        f = min(max((p/a - q/b), 0), 1)

        if isbuy:
            size = cash * f / data.close[0]
        else:
            size = position * f

        return size

Define a custom indicator for [Kelly Criterion](https://www.wikiwand.com/en/Kelly_criterion)

In [9]:
class KellyCriterionIndicator(bt.indicators.PeriodN):
    _mindatas = 1

    packages = (
        ('pandas', 'pd'),
    )
    lines = ('kc_f',)
    params = (
        ('period', 30),
    )

    def next(self):
        spreads = pd.Series(self.data.get(size=self.p.period))
        returns = spreads.pct_change()

        kc_p = len(returns[returns > 0]) / len(returns)
        kc_a = (returns[returns > 0].mean() + 1) if len(returns[returns > 0]) > 0 else 1
        kc_b = (returns[returns < 0].mean() + 1) if len(returns[returns < 0]) > 0 else 1
        kc_q = 1 - kc_p
        
        kc_f = min(max((kc_p/kc_a - kc_q/kc_b), 0), 1)
        self.lines.kc_f[0] = kc_f

In [10]:
# # Incomplete

# class ZscoreIndicator(bt.indicators.PeriodN):
#     _mindatas = 2 # ensure at least 2 data feeds are passed

#     packages = (
#         ('pandas', 'pd'),
#         ('statsmodels.api', 'sm'),
#     )
#     lines = ('zscore',)
#     params = (
#         ('period', 30),
#     )

#     def next(self):
#         p0 = pd.Series(self.data0.get(size=self.p.period))
#         p1 = pd.Series(self.data1.get(size=self.p.period))
#         sm.add_constant(X)

Define custom CommissionInfo (*Not in use*)

In [11]:
class PairTradingCommInfo(bt.CommInfoBase):
    params = (
        ('commission', 0.0), ('mult', 10), ('margin', 1000),
        ('stocklike', False),
        ('commtype', bt.CommInfoBase.COMM_PERC),
        ('percabs', True),
    )

The strategy with fixed ordersize

In [12]:
class PairTrading(bt.Strategy):
    params = dict(
        OPEN_THRE=2,
        CLOS_THRE=0.1,
        period=300
    )

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return

        if order.status == order.Completed:
            if order.isbuy():
                print(f"Buy {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")
            else:
                print(f"Sell {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")

        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            print('%s ,' % order.Status[order.status])
            pass

    def __init__(self):
        self.data0 = self.datas[0]
        self.data1 = self.datas[1]

        # Calculate zscore of the ratio
        transform = bt.indicators.OLS_TransformationN(self.data1, self.data0, period=self.p.period)
        spread = transform.spread
        self.zscore = transform.zscore

        self.kc_f = KellyCriterionIndicator(spread, period=self.p.period)

        # -1 for short data1/data0, 1 for long data1/data0, 0 for no position

    def next(self):
        # print(f'Right now the zscore is {self.transform.zscore[0]}, and the position is {self.position_status}')
        
        # Calculate the ratio between the 2 assets
        ratio = self.data1.close[0] / self.data0.close[0]
        cash = self.broker.get_cash()
        position = self.broker.getposition(self.data0).size + self.broker.getposition(self.data1).size

        if abs(self.zscore[0]) < self.p.CLOS_THRE and position != 0:
            print("------")
            print("close position")
            self.close(data=self.data0)
            self.close(data=self.data1)
    
        elif self.zscore[0] < -self.p.OPEN_THRE and position == 0:
            print("------")
            print("open position")
            # print(f"long {self.data0.alias} and short {self.data1.alias}")

            purchase_amount = self.broker.get_cash()/self.data0.close[0]*self.kc_f[0]

            self.sell(data=self.data1, size=purchase_amount/ratio)
            self.buy(data=self.data0, size=purchase_amount)

        elif self.zscore[0] > self.p.OPEN_THRE and position == 0:
            print("------")
            print("open position")
            # print(f"long {self.data1.alias} and short {self.data0.alias}")
            
            purchase_amount = self.broker.get_cash()/self.data1.close[0]*self.kc_f[0]

            self.sell(data=self.data0, size=purchase_amount*ratio)
            self.buy(data=self.data1, size=purchase_amount)

    def stop(self):
        self.close(data=self.data0)
        self.close(data=self.data1)
        
        print('==================================================')
        print('Starting Value - %.2f' % self.broker.startingcash)
        print('Ending   Value - %.2f' % self.broker.getvalue())
        print('==================================================')

# Execute the Strategy

Load the data

In [13]:
datafeeds_0 = []
datafeeds_1 = []

for idx, freq in enumerate(freqs):
    datafeeds_0.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pair.split('_')[0]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None,
        )
    )
    datafeeds_1.append(
        bt.feeds.PandasData(
            dataname=dfs[idx][dfs[idx]['tic']==best_pair.split('_')[1]],
            datetime='datetime',
            open='open',
            high='high',
            low='low',
            close='close',
            volume='volume',
            openinterest=None,
        )
    )

freq_position = list(freqs.keys()).index(best_freq)
datafeed0 = datafeeds_0[freq_position]
datafeed1 = datafeeds_1[freq_position]
datafeeds = [datafeed0, datafeed1]

The main strategy engine

In [14]:
param = {'OPEN_THRE':1, 'CLOS_THRE':0.1, 'period':1000}

def cerebro_run(datafeeds, param):
    # Create a Cerebro instance and add the data feed
    cerebro = bt.Cerebro()
    # TODO: should I include `best_pair` as a parameter?
    cerebro.adddata(datafeeds[0], name=best_pair.split('_')[0])
    cerebro.adddata(datafeeds[1], name=best_pair.split('_')[1])

    # Set up other parameters for backtest
    cerebro.broker.set_cash(100000)  # Set initial capital

    # comminfo = PairTradingCommInfo(commission=0.002, margin=1000, mult=10)
    # cerebro.broker.addcommissioninfo(comminfo)

    cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='timereturns', compression=60)
    cerebro.addanalyzer(bt.analyzers.Returns, _name='Returns')
    cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')
    # cerebro.addsizer(KellyCriterionSizer)

    cerebro.addstrategy(PairTrading, **param)
    strats = cerebro.run()
    return strats

# cerebro_run(datafeeds, param)

# Grid Search the Strategy
Define scoring and param_grid

In [15]:
# param_grid = {
#     'OPEN_THRE':np.arange(1, 3, 1), 
#     'CLOS_THRE':np.arange(0.2, 1.0, 0.2), 
#     'period': np.arange(30, 60, 10)
# }

freq_adjust = freqs[best_freq]

param_grid = {
    'OPEN_THRE': np.arange(2, 3, 1), 
    'CLOS_THRE': np.arange(0.1, 1, 0.2), 
    'period': np.arange(int(500/freq_adjust), int(600/freq_adjust), int(100/freq_adjust))
}

def scoring(strats):
    score = strats[0].analyzers.Returns.get_analysis()['rtot']
    return score

Grid Searching

In [16]:
best_score, best_params, best_res = gridsearch(cerebro_run, param_grid, scoring, datafeeds)

------
open position
Sell ETHUSDT @ price: 381.92 for Qty: -261.8349392542941
Buy BTCUSDT @ price: 11449.23 for Qty: 8.734211820358226
------
close position
Sell BTCUSDT @ price: 11295.33 for Qty: -8.734211820358226
Buy ETHUSDT @ price: 375.0 for Qty: 261.8349392542941
------
open position
Sell BTCUSDT @ price: 11561.55 for Qty: -8.689812575345572
Buy ETHUSDT @ price: 386.99 for Qty: 259.6131749670187
------
close position
Buy BTCUSDT @ price: 11521.77 for Qty: 8.689812575345572
Sell ETHUSDT @ price: 392.18 for Qty: -259.6131749670187
------
open position
------
open position
Sell ETHUSDT @ price: 424.54 for Qty: -51.103188508631895
Buy BTCUSDT @ price: 11779.77 for Qty: 1.8417103267788273
------
close position
Sell BTCUSDT @ price: 11723.24 for Qty: -1.8417103267788273
Buy ETHUSDT @ price: 421.02 for Qty: 51.103188508631895
------
open position
Sell BTCUSDT @ price: 11714.72 for Qty: -1.1058423787013585
Buy ETHUSDT @ price: 424.59 for Qty: 30.508795838736194
------
close position
Buy 

In [17]:
best_pair

'BTCUSDT_ETHUSDT'

# Analyze with [Pyfolio](https://pyfolio.ml4trading.io/api-reference.html)


In [18]:
res_pyfolio = best_res[0].analyzers.pyfolio.get_analysis()
res_pyfolio = pd.Series(res_pyfolio['returns'])
res_pyfolio.index = pd.to_datetime(res_pyfolio.index)
res_pyfolio = res_pyfolio = res_pyfolio.astype('float32')
res_pyfolio

2020-08-11    0.000000
2020-08-12    0.000251
2020-08-13    0.004193
2020-08-14    0.009189
2020-08-15   -0.005605
                ...   
2023-09-26   -0.000940
2023-09-27    0.001825
2023-09-28    0.002080
2023-09-29   -0.002028
2023-09-30    0.000000
Length: 1146, dtype: float32

In [19]:
# The error message is a known issue lack of maintainence, requires to modify the package manually
# https://github.com/quantopian/pyfolio/issues/652
pyfolio.tears.create_full_tear_sheet(returns=pd.Series(res_pyfolio))

  stats = pd.Series()
  for stat, value in perf_stats[column].iteritems():


Start date,2020-08-11,2020-08-11
End date,2023-09-30,2023-09-30
Total months,54,54
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,14.6%,
Cumulative returns,85.8%,
Annual volatility,15.1%,
Sharpe ratio,0.98,
Calmar ratio,0.60,
Stability,0.92,
Max drawdown,-24.4%,
Omega ratio,1.23,
Sortino ratio,1.57,
Skew,0.85,


AttributeError: 'numpy.int64' object has no attribute 'to_pydatetime'