## Backtesting an ML strategy with Backtester

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline

from pathlib import Path
import csv
from time import time
import datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns

import backtrader as bt
from backtrader.feeds import PandasData

import pyfolio as pf

In [3]:
pd.set_option('display.expand_frame_repr', False)
np.random.seed(42)
sns.set_style('darkgrid')

In [4]:
def format_time(t):
    m_, s = divmod(t, 60)
    h, m = divmod(m_, 60)
    return f'{h:>02.0f}:{m:>02.0f}:{s:>02.0f}'

### Backtrader Setup

Custom Commission Schema

### DataFrame Loader

In [5]:
OHLCV = ['open', 'high', 'low', 'close', 'volume']

In [6]:
class SignalData(PandasData):
    """
    Defines pandas DataFrame structure
    """
    cols = OHLCV + ['predicted']
    
    # create lines
    lines = tuple(cols)
    
    # define parameters
    params = {c: -1 for c in cols}
    params.update({'datetime': None})
    params = tuple(params.items())

### Strategy

In [24]:
class MLStrategy(bt.Strategy):
    params = (('n_positions', 10),
              ('min_positions', 5),
              ('verbose', False),
              ('log_file', 'backtest.csv'))
    
    def log(self, txt, dt=None):
        """Logger for the strategy"""
        dt = dt or self.datas[0].datetime.datetime(0)
        with Path(self.p.log_file).open('a') as f:
            log_writer = csv.writer(f)
            log_writer.writerow([dt.isoformat()] + txt.split(','))
    
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return 
    
        # Check if an order has been completed
        # Broker could reject order if not enough cash
        if self.p.verbose:
            if order.status in [order.Completed]:
                p = order.executed.price 
                if order.isbuy():
                    self.log(f"{order.data._name}, BUY executed, {p:.2f}")
                if order.issell():
                    self.log(f"{order.data._name}, SELL executed, {p:.2f}")
            elif order.status in [order.Canceled, order.Margin, order.Rejected]:
                self.log(f"{order.data._name}, Order Canceled/Margin/Rejected")
                    
    def prenext(self):
        self.next()
        
    def next(self):
        today = self.datas[0].datetime.date()
        print(self.datas[0])
        print(self.getpositions())
        positions = [d._name for d, pos in self.getpositions().items() if pos]
        up, down = {}, {}
        missing = not_missing = 0
        for data in self.datas:
            if data.datetime.date() == today:
                if data.predicted[0] > 0:
                    up[data._name] = data.predicted[0]
                elif data.predicted[0] < 0:
                    down[data._name] = data.predicted[0]
        
        # sort dictionaries ascending/descending by value
        # returns list of tuples
        shorts = sorted(down, key=down.get)[:self.p.n_positions]
        longs = sorted(up, key=up.get, reverse=True)[:self.p.n_positions]
        n_shorts, n_longs = len(shorts), len(longs)
        
        # only take positions if at least min_n longs and shorts
        if n_shorts < self.p.min_positions or n_longs < self.p.min_positions:
            longs, shorts = [], []
    
        for ticker in positions:
            if ticker not in longs + shorts:
                self.order_target_percent(data=ticker, target=0)
                self.log(f'{ticker},CLOSING ORDER CREATED')   
        
        short_target = -1 / max(self.p.n_positions, n_shorts)
        long_target = -1 / max(self.p.n_positions, n_longs)   
        for ticker in shorts:
            self.order_target_percent(data=ticker, target=short_target)
            self.log('{ticker},SHORT ORDER CREATED')
        for ticker in longs:
            self.order_target_percent(data=ticker, target=long_target)
            self.log('{ticker},LONG ORDER CREATED')

### Create and Configure Cerebro Instance

In [25]:
cerebro = bt.Cerebro()
cash = 10000
cerebro.broker.setcash(cash)

In [26]:
### Add Input Data
idx = pd.IndexSlice
data = pd.read_hdf('00_data/backtest.h5', 'data').sort_index()
tickers = data.index.get_level_values(0).unique()

for ticker in tickers:
    df = data.loc[idx[ticker, :], :].droplevel('ticker', axis=0)
    df.index.name = 'datetime'
    bt_data = SignalData(dataname=df)
    cerebro.adddata(bt_data, name=ticker)

### Run Strategy Backtest

In [27]:
cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')
cerebro.addstrategy(MLStrategy, n_positions=25, min_positions=20, verbose=True, log_file='bt_log.csv')

start = time()
results = cerebro.run()
ending_value = cerebro.broker.get_value()
duration = time() - start 

print(f'Final Portfolio Value: {ending_value:,.2f}')
print(f'Duration: {format_time(duration)}')
