# Advanced Backtesting.py Features

In [None]:
import multiprocessing as mp
mp.set_start_method('fork')

end result?

- prove how we can correct the algorithm to improve the performance/returns

starting elements?

- we have the logic of the strategy
- all we need to do is add the new features

problems?

- we might not get better results
- people might not know these concepts from finance
  - short
  - long

actions to take?

- we need to explain the concepts
  - start with the dashboard
  - what is shorting
  - what is long
  - why is it important to take this into consideration?
    - mitigate the risk; sell when the stock is going down
    - we could catch the flow of the market instead of going in a stratight line
      - which is similar to buy and hold
- load the data
- load the strategies
- change them to use the new features
- evaluate the new features in a report
- show the report
- explain which other features we can use from backtesting.py library

## Load the data

In [32]:
import pandas as pd

df = pd.read_excel('data/Microsoft_LinkedIn_Processed.xlsx', parse_dates=['Date'], index_col=0)
df = df.drop(columns='change_tomorrow_direction')

## Feature selection: X and y

In [33]:
target = df.change_tomorrow
explanatory = df[['Open','High','Low','Close','Volume']]

## Previous Backtesting Strategy

In [34]:
from backtesting import Strategy

In [35]:
class Regression(Strategy):
    model = None

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600

    def init(self):
        self.already_bought = False
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow < self.limit_sell and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [36]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [37]:
from backtesting import Backtest
from sklearn.tree import DecisionTreeRegressor

model_dt = DecisionTreeRegressor(max_depth=15, random_state=42)

bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)
result = bt.run(N_TRAIN=200, model=model_dt, limit_buy=0, limit_sell=-4)
result.to_frame(name='Value').loc[:'Return [%]'].style

Unnamed: 0,Value
Start,2016-12-08 00:00:00
End,2023-03-15 00:00:00
Duration,2288 days 00:00:00
Exposure Time [%],87.182741
Equity Final [$],41086.639496
Equity Peak [$],49179.374064
Return [%],310.866395


In [8]:
bt.plot(filename='reports_backtesting/regression_walk_forward.html')

## Backtesting.py Advanced Features

https://github.com/kernc/backtesting.py/blob/master/doc/examples/Trading%20with%20Machine%20Learning.ipynb

In [38]:
import numpy as np

In [39]:
price_delta = .004

In [40]:
close = [65,45]

In [41]:
upper, lower = close[-1]*(np.r_[1,-1]*price_delta)

In [42]:
class Regression(Strategy):
    model = None

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600
    price_delta = .004

    def init(self):
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast_tomorrow < self.limit_sell and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)
        else:
            pass
        
        
        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta('2 days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)

In [43]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [16]:
bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)
result = bt.run(N_TRAIN=200, model=model_dt, limit_buy=0, limit_sell=-4)
result.to_frame(name='Value').loc[:'Return [%]'].style

Unnamed: 0,Value
Start,2016-12-08 00:00:00
End,2023-03-15 00:00:00
Duration,2288 days 00:00:00
Exposure Time [%],47.144670
Equity Final [$],5745.325525
Equity Peak [$],10000.000000
Return [%],-42.546745


In [17]:
bt.plot(filename='reports_backtesting/regression_walk_forward_sl_tp.html')

## Optimization

In [51]:
class Regression(Strategy):
    
    model = DecisionTreeRegressor(max_depth=15, random_state=42)

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600
    price_delta = .004

    n_days_stop_loss = 2
    
    def init(self):
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast_tomorrow < self.limit_sell and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)
        else:
            pass
        
        
        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta(f'{self.n_days_stop_loss} days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)

In [52]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [60]:
%%time

bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)

stats_skopt, heatmap, optimize_result = bt.optimize(
    model=[model_dt],
    limit_buy=[0, 10],
    limit_sell=[-10, 0],
    n_days_stop_loss=[2, 30],
    maximize='Return [%]',
    method='skopt',
    max_tries=500,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

CPU times: user 1min 26s, sys: 166 ms, total: 1min 26s
Wall time: 1min 26s


In [63]:
dff = heatmap.reset_index()

In [64]:
dff

Unnamed: 0,model,limit_buy,limit_sell,n_days_stop_loss,Return [%]
0,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-8,5,-37.728050
1,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-4,30,-37.728050
...,...,...,...,...,...
176,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,4,-22.306271
177,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,29,-22.306271


## Change the size of the investment

In [71]:
class Regression(Strategy):
    
    model = DecisionTreeRegressor(max_depth=15, random_state=42)

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600
    price_delta = .004

    n_days_stop_loss = 2
    size_trades = .2
    
    def init(self):
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and not self.position.is_long:
            self.buy(size=self.size_trades, tp=upper, sl=lower)
        elif forecast_tomorrow < self.limit_sell and not self.position.is_short:
            self.sell(size=self.size_trades, tp=lower, sl=upper)
        else:
            pass
        
        
        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta(f'{self.n_days_stop_loss} days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)

In [72]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [73]:
%%time

bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)

stats_skopt, heatmap, optimize_result = bt.optimize(
    model=[model_dt],
    limit_buy=[0, 10],
    limit_sell=[-10, 0],
    n_days_stop_loss=[2, 30],
    size_trades=[.1, .5],
    maximize='Return [%]',
    method='skopt',
    max_tries=500,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

CPU times: user 4min 15s, sys: 1.1 s, total: 4min 16s
Wall time: 4min 23s


In [74]:
dff = heatmap.reset_index()

In [75]:
dff

Unnamed: 0,model,limit_buy,limit_sell,n_days_stop_loss,size_trades,Return [%]
0,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-9,22,0.108779,-21.602655
1,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-8,13,0.116019,-23.216624
...,...,...,...,...,...,...
498,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,29,0.495043,-47.952323
499,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,30,0.226875,-25.037668


## Remove aggressive stop-loss

> How has the backtest performed without stop-loss and take-profit?

In [81]:
class Regression(Strategy):
    
    model = DecisionTreeRegressor(max_depth=15, random_state=42)

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600
    price_delta = .004

    n_days_stop_loss = 2
    size_trades = .2
    
    def init(self):
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and not self.position.is_long:
            self.buy(size=self.size_trades, tp=upper, sl=lower)
        elif forecast_tomorrow < self.limit_sell and not self.position.is_short:
            self.sell(size=self.size_trades, tp=lower, sl=upper)
        else:
            pass
        
        
        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        # for trade in self.trades:
        #     if current_time - trade.entry_time > pd.Timedelta(f'{self.n_days_stop_loss} days'):
        #         if trade.is_long:
        #             trade.sl = max(trade.sl, low)
        #         else:
        #             trade.sl = min(trade.sl, high)

In [82]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [86]:
%%time

bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)

stats_skopt, heatmap, optimize_result = bt.optimize(
    model=[model_dt],
    limit_buy=[0, 10],
    limit_sell=[-10, 0],
    # n_days_stop_loss=[2, 30],
    size_trades=[.1, 1],
    maximize='Return [%]',
    method='skopt',
    max_tries=500,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

CPU times: user 4min 6s, sys: 645 ms, total: 4min 7s
Wall time: 4min 10s


In [87]:
dff = heatmap.reset_index()

In [94]:
dff.sort_values(['limit_sell', 'limit_buy', 'size_trades']).head(20).style

Unnamed: 0,model,limit_buy,limit_sell,size_trades,Return [%]
0,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.102955,-20.361544
1,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.107304,-21.127504
2,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.117346,-23.485099
3,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.119286,-23.815851
4,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.140891,-27.704775
5,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.409104,-62.938627
6,"DecisionTreeRegressor(max_depth=15, random_state=42)",0,-10,0.723712,-82.921553
27,"DecisionTreeRegressor(max_depth=15, random_state=42)",1,-10,0.113514,-17.748895
28,"DecisionTreeRegressor(max_depth=15, random_state=42)",1,-10,0.142858,-22.188764
29,"DecisionTreeRegressor(max_depth=15, random_state=42)",1,-10,0.251286,-36.32646


In [76]:
class Regression(Strategy):
    
    model = DecisionTreeRegressor(max_depth=15, random_state=42)

    limit_buy = 1
    limit_sell = -5

    N_TRAIN = 600
    price_delta = .004

    n_days_stop_loss = 2
    
    def init(self):
        
        X_train = self.data.df.iloc[:self.N_TRAIN, :-1]
        y_train = self.data.df.iloc[:self.N_TRAIN, -1]

        self.model.fit(X_train, y_train)

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        # conditions to sell or buy
        if forecast_tomorrow > self.limit_buy and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast_tomorrow < self.limit_sell and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)
        else:
            pass
        
        
        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        # for trade in self.trades:
        #     if current_time - trade.entry_time > pd.Timedelta(f'{self.n_days_stop_loss} days'):
        #         if trade.is_long:
        #             trade.sl = max(trade.sl, low)
        #         else:
        #             trade.sl = min(trade.sl, high)

In [77]:
class WalkForward(Regression):
    def next(self):

        if len(self.data) < self.N_TRAIN:
            return # we don't take any action and move on to the following day
        
        if len(self.data) % 200 != 0:
            return super().next()
        
        X_train = self.data.df.iloc[-self.N_TRAIN:, :-1]
        y_train = self.data.df.iloc[-self.N_TRAIN:, -1]

        self.model.fit(X_train, y_train)

        super().next()

In [78]:
%%time

bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)

stats_skopt, heatmap, optimize_result = bt.optimize(
    model=[model_dt],
    limit_buy=[0, 10],
    limit_sell=[-10, 0],
    n_days_stop_loss=[2, 30],
    maximize='Return [%]',
    method='skopt',
    max_tries=500,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

KeyboardInterrupt: 

In [None]:
dff = heatmap.reset_index()

In [None]:
dff

Unnamed: 0,model,limit_buy,limit_sell,n_days_stop_loss,Return [%]
0,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-4,10,-37.728050
1,"DecisionTreeRegressor(max_depth=15, random_sta...",0,-1,5,-42.402823
...,...,...,...,...,...
136,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,2,-22.306271
137,"DecisionTreeRegressor(max_depth=15, random_sta...",10,0,3,-22.306271


In [None]:
dff = dff.pivot(index='limit_buy', columns='limit_sell', values='Return [%]')
dff.sort_index(axis=1, ascending=False)\
    .style.format(precision=0)\
    .background_gradient(vmin=np.nanmin(dff), vmax=np.nanmax(dff))\
    .highlight_null(props='background-color: transparent; color: transparent')

In [None]:
bt = Backtest(df, WalkForward, cash=10000, commission=.002, exclusive_orders=True)
result = bt.run(N_TRAIN=200, model=model_dt, limit_buy=0, limit_sell=-4)
result.to_frame(name='Value').loc[:'Return [%]'].style

Unnamed: 0,Value
Start,2016-12-08 00:00:00
End,2023-03-15 00:00:00
Duration,2288 days 00:00:00
Exposure Time [%],47.144670
Equity Final [$],5745.325525
Equity Peak [$],10000.000000
Return [%],-42.546745


In [None]:
bt.plot(filename='reports_backtesting/regression_walk_forward_sl_tp.html')