In [1]:
import os, zipfile, math, logging, datetime
import pandas as pd
import backtrader as bt
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pyfolio as pf
from collections import OrderedDict

matplotlib.use('Qt5Agg')
plt.switch_backend('Qt5Agg')



# Read Data

In [2]:
# List of symbols to merge
symbols = ['BTCUSDT', 'ETHUSDT']

# List to store individual DataFrames
rawdfs = []

# Loop through each symbol
for symbol in symbols:
    directory = f'../mdt_utils/binance-public-data/python/data/spot/monthly/klines/{symbol}/1h/'
    
    # Loop through each zip file in the directory
    for file_name in os.listdir(directory):
        if file_name.endswith('.zip'):
            with zipfile.ZipFile(os.path.join(directory, file_name), 'r') as zip_ref:
                # only one CSV file in each zip archive
                csv_file = zip_ref.namelist()[0]
                with zip_ref.open(csv_file) as csv_fp:
                    # Read the CSV data into a DataFrame
                    temp_df = pd.read_csv(csv_fp, header=None)
                    temp_df.columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore']
                    # temp_df['date'] = pd.to_datetime(temp_df['close_time'], unit='ms').dt.strftime('%Y-%m-%d')
                    temp_df = temp_df.rename(columns={"close_time": "time"})
                    temp_df['tic'] = symbol
                    rawdfs.append(temp_df[['time', 'open', 'high', 'low', 'close', 'volume', 'tic']])

# Concatenate all DataFrames into a single DataFrame
rawdf = pd.concat(rawdfs, ignore_index=True)

# Count the number of unique 'tic' values per date
tic_counts = rawdf.groupby('time')['tic'].nunique()

# Filter the DataFrame to keep only rows where all 'tic' values participate
df = rawdf[rawdf['time'].isin(tic_counts[tic_counts == len(rawdf['tic'].unique())].index)]
# Only wanted columns
df = df[['time', 'open', 'high', 'low', 'close', 'volume', 'tic']]
df['datetime'] = pd.to_datetime(df['time'], unit='ms')

df = df.sort_values(['time', 'tic'],ignore_index=True)

# df = df.head(100000)
df

Unnamed: 0,time,open,high,low,close,volume,tic,datetime
0,1502945999999,4261.48,4313.62,4261.32,4308.83,47.181009,BTCUSDT,2017-08-17 04:59:59.999
1,1502945999999,301.13,302.57,298.00,301.61,125.668770,ETHUSDT,2017-08-17 04:59:59.999
2,1502949599999,4308.83,4328.69,4291.37,4315.32,23.234916,BTCUSDT,2017-08-17 05:59:59.999
3,1502949599999,301.61,303.28,300.00,303.10,377.672460,ETHUSDT,2017-08-17 05:59:59.999
4,1502953199999,4330.29,4345.45,4309.37,4324.35,7.229691,BTCUSDT,2017-08-17 06:59:59.999
...,...,...,...,...,...,...,...,...
104025,1690840799999,1853.51,1858.77,1851.02,1856.87,5720.590800,ETHUSDT,2023-07-31 21:59:59.999
104026,1690844399999,29243.32,29243.32,29201.00,29209.99,441.450670,BTCUSDT,2023-07-31 22:59:59.999
104027,1690844399999,1856.86,1857.58,1855.01,1856.40,2950.512300,ETHUSDT,2023-07-31 22:59:59.999
104028,1690847999999,29210.00,29240.50,29190.00,29232.25,437.205740,BTCUSDT,2023-07-31 23:59:59.999


In [3]:
class PairTrading(bt.Strategy):
    params = dict(
        OPEN_THRE=5,
        CLOS_THRE=0.1,
        period=60
    )

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return  # Do nothing for submitted or accepted orders

        if order.status == order.Completed:
            if order.isbuy():
                print(f"Buy {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")
            else:
                print(f"Sell {order.data._name} @ price: {order.executed.price} for Qty: {order.executed.size}")

        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            print('%s ,' % order.Status[order.status])
            pass  # Simply log

        # executed_price = order.executed.price
        # action = 'BUY' if order.isbuy() else 'SELL'
        # data_name = order.data._name
        # quantity = order.size

        # if order.status == order.Completed:
        #     if not self.position:
        #         action = 'SELL'

        #     print(f'{action} {data_name} @ price: {executed_price:.2f} for (Qty: {quantity})')
        # else:
        #     print(f'{order.Status[order.status]} for {data_name}')

    
    def __init__(self):
        self.data0 = self.datas[0]
        self.data1 = self.datas[1]

        # self.qty0 = self.qty1 = 0

        self.transform = bt.indicators.OLS_TransformationN(self.data1, self.data0, period=self.p.period)
        # self.ols = bt.indicators.OLS_Slope_InterceptN(self.data1, self.data0, period=self.p.period)

        # self.slope = self.ols.slope
        # self.intercept = self.ols.intercept

        # self.spread = self.transform.spread
        # self.spread_std = self.transform.spread_std
        # self.spread_mean = self.transform.spread_mean
        self.zscore = self.transform.zscore

        # -1 for short data1/data0, 1 for long data1/data0, 0 for no position
        self.position_status = 0

    def next(self):
        # print(f'Right now the zscore is {self.transform.zscore[0]}, and the position is {self.position_status}')

        ratio = self.data1.close[0] / self.data0.close[0]

        if self.zscore[0] > self.params.OPEN_THRE and self.position_status == 0:
            print("------")
            print("long data1 and short data0")
            self.position_status = -1

            self.sell(data=self.data0, size=ratio)
            self.buy(data=self.data1, size=1)
            
            # self.qty0 = -ratio
            # self.qty1 = 1

        elif self.zscore[0] < -self.params.OPEN_THRE and self.position_status == 0:
            print("------")
            print("long data0 and short data1")
            self.position_status = 1
            self.sell(data=self.data0, size=ratio)
            self.buy(data=self.data1, size=1)

            # self.qty0 = -ratio
            # self.qty1 = 1

        elif abs(self.zscore[0]) < self.params.CLOS_THRE and self.position_status != 0:
            print("------")
            print("close position")
            self.position_status = 0
            self.close(data=self.data0)
            self.close(data=self.data1)

            # self.qty0 = self.qty1 = 0

    def stop(self):
        print('==================================================')
        print('Starting Value - %.2f' % self.broker.startingcash)
        print('Ending   Value - %.2f' % self.broker.getvalue())
        print('==================================================')


In [4]:
datafeed_eth = bt.feeds.PandasData(
    dataname=df[df['tic']=='ETHUSDT'],
    datetime='datetime',
    open='open',
    high='high',
    low='low',
    close='close',
    volume='volume',
    openinterest=None  # You can specify open interest if your data includes it, otherwise set it to None
)

datafeed_btc = bt.feeds.PandasData(
    dataname=df[df['tic']=='BTCUSDT'],
    datetime='datetime',
    open='open',
    high='high',
    low='low',
    close='close',
    volume='volume',
    openinterest=None  # You can specify open interest if your data includes it, otherwise set it to None
)

In [5]:
# Create a Cerebro instance and add the data feed
cerebro = bt.Cerebro()
cerebro.adddata(datafeed_eth, name='eth')
cerebro.adddata(datafeed_btc, name='btc')

# Set up other parameters for your backtest
cerebro.broker.set_cash(100000)  # Set your initial capital
cerebro.broker.setcommission(commission=0.001)  # Set commission rate

# Add your trading strategy to Cerebro and run the backtest
cerebro.addstrategy(PairTrading)
cerebro.addanalyzer(bt.analyzers.TimeReturn, _name='timereturns')

strats = cerebro.run()

------
long data1 and short data0
Sell eth @ price: 730.96 for Qty: -20.09595613433859
Buy btc @ price: 14686.98 for Qty: 1
------
close position
Buy eth @ price: 744.45 for Qty: 20.09595613433859
Sell btc @ price: 15787.95 for Qty: -1
------
long data1 and short data0
Sell eth @ price: 854.71 for Qty: -17.05402781514276
Buy btc @ price: 14576.87 for Qty: 1
------
close position
Buy eth @ price: 868.77 for Qty: 17.05402781514276
Sell btc @ price: 15002.59 for Qty: -1
------
long data0 and short data1
Sell eth @ price: 402.7 for Qty: -16.84132108269183
Buy btc @ price: 6782.99 for Qty: 1
------
close position
Buy eth @ price: 400.85 for Qty: 16.84132108269183
Sell btc @ price: 6778.0 for Qty: -1
------
long data1 and short data0
Sell eth @ price: 460.31 for Qty: -16.705769899200558
Buy btc @ price: 7685.55 for Qty: 1
------
close position
Buy eth @ price: 492.63 for Qty: 16.705769899200558
Sell btc @ price: 7890.0 for Qty: -1
------
long data0 and short data1
Sell eth @ price: 684.97 fo

In [6]:
# The error message is an open issue on Github:
# https://github.com/quantopian/pyfolio/issues/652

strat = strats[0]
timereturns_analyzer = strat.analyzers.getbyname('timereturns')
timereturns = timereturns_analyzer.get_analysis()
series_timereturns = pd.Series(timereturns)
pf.create_full_tear_sheet(series_timereturns)

  stats = pd.Series()
  return np.abs(np.percentile(returns, 95)) / \
  for stat, value in perf_stats[column].iteritems():


Start date,2017-08-17,2017-08-17
End date,2023-07-31,2023-07-31
Total months,103,103
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,0.1%,
Cumulative returns,0.5%,
Annual volatility,1.5%,
Sharpe ratio,0.05,
Calmar ratio,0.02,
Stability,0.19,
Max drawdown,-3.1%,
Omega ratio,1.07,
Sortino ratio,0.06,
Skew,-11.07,


AttributeError: 'numpy.int64' object has no attribute 'to_pydatetime'

In [None]:
cerebro.plot(stdstats=False, iplot=False)