# Backtesting

This is going to be just an example of how to perform backtesting using backtrader.

At the end, we will have incredible results which are not common, this is because we are using the actual predictions to perform the backtesting. Thus, when we are going to perform an actual backtest using our own model predictions, we have to add these predictions to the "direction" column in the dataframe that we pass to the SignalData object.

In [1]:
import pandas as pd
import numpy as np
import pyfolio as pf
import backtrader as bt
import matplotlib.pyplot as plt
import seaborn as sns

from backtrader.feeds import PandasData
from backtrader.strategy import SignalStrategy
from pandas.core.frame import DataFrame
from app import load_scaler


OHLCV = ["open", "high", "low", "close", "volume"]

# class to define the columns we will provide
class SignalData(PandasData):
    """
    Define pandas DataFrame structure
    """

    cols = OHLCV + ["direction"]

    # create lines
    lines = tuple(cols)

    # define parameters
    params = {c: -1 for c in cols}
    params.update({"datetime": None})
    params = tuple(params.items())


# Define backtesting strategy class
class MLStrategy(bt.Strategy):
    """
    Strategy: 
    1.	Buy when the predicted value is +1 and sell (only if stock is in possession) when the predicted value is 0.
    2.	All-in strategy—when creating a buy order, buy as many shares as possible.
    3.	Short selling is not allowed
    """

    params = dict()

    def __init__(self):
        """
        prediction: str. It is the name of the target column
            it should be 1 for buying and 0 for selling
        """

        # keep track of the data
        self.data_predicted = self.datas[0].direction
        self.data_open = self.datas[0].open
        self.data_close = self.datas[0].close

        # keep track of pending orders/buy price/buy commission
        self.order = None
        self.price = None
        self.comm = None

    # logging function
    def log(self, txt):
        """Logging function"""
        dt = self.datas[0].datetime.date(0).isoformat()
        print(f"{dt}, {txt}")

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # order already submitted/accepted - no action required
            return

        # report executed order
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    f"BUY EXECUTED --- Price: {order.executed.price:.2f}, Cost: {order.executed.value:.2f},Commission: {order.executed.comm:.2f}"
                )
                self.price = order.executed.price
                self.comm = order.executed.comm
            else:
                self.log(
                    f"SELL EXECUTED --- Price: {order.executed.price:.2f}, Cost: {order.executed.value:.2f},Commission: {order.executed.comm:.2f}"
                )

        # report failed order
        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log("Order Failed")

        # set no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return
        self.log(
            f"OPERATION RESULT --- Gross: {trade.pnl:.2f}, Net: {trade.pnlcomm:.2f}"
        )

    # We have set cheat_on_open = True.This means that we calculated the signals on day t's close price,
    # but calculated the number of shares we wanted to buy based on day t+1's open price.
    def next_open(self):

        if not self.position:
            if self.data_predicted == 1:
                # calculate the max number of shares ('all-in')
                size = int(self.broker.getcash() / self.datas[0].open)

                # buy order
                self.log(
                    f"BUY CREATED --- Size: {size}, Cash: {self.broker.getcash():.2f}, Open: {self.data_open[0]}, Close: {self.data_close[0]}"
                )
                self.buy()
        else:
            if self.data_predicted == 0:
                # sell order
                self.log(f"SELL CREATED --- Size: {self.position.size}")
                self.sell(size=self.position.size)


def plot_cumulative_returns(probs=False, threshold=0.5):

    if probs:
        df["position_strategy"] = np.where(df.direction > threshold, 1, -1)
    else:
        df["position_strategy"] = np.where(df.direction > 0, 1, -1)

    df["strategy_returns"] = df["position_strategy"] * df["returns"]

    fig, ax = plt.subplots(1, 1, sharex=True, figsize=(14, 6))
    ax.plot(df.returns.cumsum().apply(np.exp), label="Buy and Hold")
    ax.plot(df.strategy_returns.cumsum().apply(np.exp), label="Strategy returns")
    ax.set(title="BTCUSDT Buy and Hold vs. Strategy", ylabel="Cumulative Returns")
    ax.grid(True)
    ax.legend()
    plt.yscale("log")
    plt.show()

# Create Data

The data that we must pass to the SignalData class must contain an datetime object as index and between the dataframe columns, we have to have ["open", "high", "low", "close", "volume"] and ["direction"] which is going to be used as the signal in the strategy.

In [None]:
symbol = "BNBUSDT"
cash = 250.0
commission = 0.001

X = pd.read_csv("data\\bnb\\X_valid.csv")
y = pd.read_csv("data\\bnb\\y_valid.csv")

# re-scale the data
scaler = load_scaler("data\\bnb\\bnb_scaler.pickle")
columns = X.columns
X = scaler.inverse_transform(X.values)

X = pd.DataFrame(X, columns=columns)

# Create the returns, directions and open_time columns necessary for backtesting
X["returns"] = y["returns_1"]
X["direction"] = y["signal_1"]
# transform open_time to datetime object and set as index
X["open_time"] = pd.to_datetime(y.open_time)
X.set_index("open_time", drop=True, inplace=True)


## Load the data and perform backtesting

In [2]:
# load the data
data = SignalData(dataname=X)

# instantiate Cerebro, add strategy, data, initial cash, commission and pyfolio for performance analysis
cerebro = bt.Cerebro(stdstats=False, cheat_on_open=True)
cerebro.addstrategy(MLStrategy)
cerebro.adddata(data, name=symbol)
cerebro.broker.setcash(cash)
cerebro.broker.setcommission(commission=commission)
cerebro.addanalyzer(bt.analyzers.PyFolio, _name="pyfolio")

# run the backtest
print("Starting Portfolio Value: %.2f" % cerebro.broker.getvalue())
backtest_result = cerebro.run()
print("Final Portfolio Value: %.2f" % cerebro.broker.getvalue())


.19, Cost: 74.19,Commission: 0.07
2021-02-08, SELL CREATED --- Size: 1
2021-02-08, SELL EXECUTED --- Price: 74.26, Cost: 74.19,Commission: 0.07
2021-02-08, OPERATION RESULT --- Gross: 0.07, Net: -0.08
2021-02-08, BUY CREATED --- Size: 10, Cash: 754.00, Open: 74.2903, Close: 74.2865
2021-02-08, BUY EXECUTED --- Price: 74.29, Cost: 74.29,Commission: 0.07
2021-02-08, SELL CREATED --- Size: 1
2021-02-08, SELL EXECUTED --- Price: 74.29, Cost: 74.29,Commission: 0.07
2021-02-08, OPERATION RESULT --- Gross: -0.00, Net: -0.15
2021-02-08, BUY CREATED --- Size: 10, Cash: 753.85, Open: 74.21900000000001, Close: 74.2451
2021-02-08, BUY EXECUTED --- Price: 74.22, Cost: 74.22,Commission: 0.07
2021-02-08, SELL CREATED --- Size: 1
2021-02-08, SELL EXECUTED --- Price: 74.25, Cost: 74.22,Commission: 0.07
2021-02-08, OPERATION RESULT --- Gross: 0.03, Net: -0.11
2021-02-08, BUY CREATED --- Size: 10, Cash: 753.73, Open: 74.232, Close: 74.36080000000001
2021-02-08, BUY EXECUTED --- Price: 74.23, Cost: 74.23,

## Get more insigths using pyfolio

In [3]:
# Extract inputs for pyfolio
strat = backtest_result[0]
pyfoliozer = strat.analyzers.getbyname("pyfolio")
returns, positions, transactions, gross_lev = pyfoliozer.get_pf_items()
returns.name = "Strategy"

# get performance statistics for strategy
pf.show_perf_stats(returns)

Start date,2020-12-11,2020-12-11
End date,2021-02-09,2021-02-09
Total months,2,2
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,13154.0%,
Cumulative returns,226.4%,
Annual volatility,22.0%,
Sharpe ratio,22.54,
Calmar ratio,66943.16,
Stability,0.99,
Max drawdown,-0.2%,
Omega ratio,413.22,
Sortino ratio,1118.41,
Skew,1.00,
