# Forward Sample Backtesting

![](src/forward.webp)

![](https://cdn.shortpixel.ai/spai/q_lossy+w_730+to_webp+ret_img/https://algotrading101.com/learn/wp-content/uploads/2019/08/walk-forward-optimization-chart-2.png)

- [ ] Backtesting In-Sample
- [ ] Backtesting Out of Sample
- [X] Forward Performance

## Load the data

In [1]:
import pandas as pd

df = pd.read_excel('data/Microsoft_LinkedIn_Processed.xlsx', parse_dates=['Date'], index_col=0)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change_tomorrow,change_tomorrow_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-12-08,56.325228,56.582507,55.902560,56.058762,21220800,-1.549143,DOWN
2016-12-09,56.214968,56.959234,56.169027,56.940857,27349400,-0.321692,DOWN
...,...,...,...,...,...,...,...
2023-03-14,256.750000,261.070007,255.860001,260.790009,33620300,-1.751806,DOWN
2023-03-15,259.980011,266.480011,259.209991,265.440002,46028000,-3.895731,DOWN


## Machine Learning Model

### Separate the data

1. Target: which variable do you want to predict?
2. Explanatory: which variables will you use to calculate the prediction?

In [2]:
target = df.change_tomorrow_direction
explanatory = df[['Open','High','Low','Close','Volume']]

### Compute the model

The following Python code will compute the numbers of the mathematical equation that we will use to calculate if the ticker goes UP or DOWN.

In [3]:
from sklearn.tree import DecisionTreeClassifier

model_dt = DecisionTreeClassifier(max_depth=15)
model_dt.fit(explanatory, target)

### Calculate the predictions

In [4]:
y_pred = model_dt.predict(X=explanatory)
y_pred

array(['DOWN', 'DOWN', 'DOWN', ..., 'UP', 'UP', 'UP'], dtype=object)

In [5]:
df_predictions = df[['change_tomorrow_direction']].copy()
df_predictions['prediction'] = y_pred
df_predictions

Unnamed: 0_level_0,change_tomorrow_direction,prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-08,DOWN,DOWN
2016-12-09,DOWN,DOWN
...,...,...
2023-03-14,DOWN,UP
2023-03-15,DOWN,UP


### Evaluate the model: compare predictions with the reality

In [6]:
model_dt.score(X=explanatory, y=target)

0.8477157360406091

## Train test split

### Split the dataset

- Imagine we are in 2020, we can only train the data up until 31st December 2020, how good would have the model been going foward?

In [7]:
y = df.change_tomorrow_direction

In [8]:
X = df.drop(columns=['change_tomorrow', 'change_tomorrow_direction'])

In [9]:
n_days = len(df.index)

In [10]:
n_days_split = int(n_days*0.80)

In [11]:
X_train, y_train = X.iloc[:n_days_split], y.iloc[:n_days_split]
X_test, y_test = X.iloc[n_days_split:], y.iloc[n_days_split:]

### Fit the model on train set

In [12]:
model_dt_split = DecisionTreeClassifier(max_depth=15)

In [13]:
model_dt_split.fit(X=X_train, y=y_train)

### Calculate predictions on test set

In [14]:
y_pred = model_dt_split.predict(X=X_test)

In [15]:
df_predictions = y_test.to_frame()
df_predictions['prediction'] = y_pred
df_predictions

Unnamed: 0_level_0,change_tomorrow_direction,prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-12-10,UP,UP
2021-12-13,UP,UP
...,...,...
2023-03-14,DOWN,UP
2023-03-15,DOWN,DOWN


### Evaluate model

#### On test set

In [16]:
model_dt_split.score(X_test, y_test)

0.48417721518987344

#### On train set

In [17]:
model_dt_split.score(X_train, y_train)

0.8198412698412698

## Backtesting

In [18]:
import strategies



In [19]:
N_TRAIN = 400

In [20]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change_tomorrow,change_tomorrow_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-12-08,56.325228,56.582507,55.902560,56.058762,21220800,-1.549143,DOWN
2016-12-09,56.214968,56.959234,56.169027,56.940857,27349400,-0.321692,DOWN
...,...,...,...,...,...,...,...
2023-03-14,256.750000,261.070007,255.860001,260.790009,33620300,-1.751806,DOWN
2023-03-15,259.980011,266.480011,259.209991,265.440002,46028000,-3.895731,DOWN


In [21]:
from backtesting import Strategy

In [23]:
class SimpleClassificationUD(Strategy):
    N_TRAIN = 400
    
    def init(self):
        self.already_bought = False
        self.model = DecisionTreeClassifier(max_depth=15)
        
        df = self.data.df.iloc[:self.N_TRAIN]
        y = df['change_tomorrow_direction']
        X = df.drop(columns='change_tomorrow_direction')
        
        self.model.fit(X, y)
        
    def next(self):
        
        if len(self.data) < self.N_TRAIN:
            return
        
        X = self.data.df.drop(columns='change_tomorrow_direction')
        X_tomorrow = X.iloc[[-1], :]
        
        forecast_tomorrow = self.model.predict(X_tomorrow)[0]
        
        # conditions to sell or buy
        if forecast_tomorrow == 'UP' and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow == 'DOWN' and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [24]:
class ClassificationWalkForward(SimpleClassificationUD):
    def next(self):
        if len(self.data) < self.N_TRAIN:
            pass
        
        if len(self.data) % 200:
            return super().next()
        
        df = self.data.df.iloc[-self.N_TRAIN:, :]
        
        X = df.drop(columns='change_tomorrow_direction')
        y = df['change_tomorrow_direction']

        self.model.fit(X, y)
        
        super().next()

In [25]:
from backtesting import Backtest

In [26]:
df_model = df.drop(columns='change_tomorrow')

In [27]:
bt = Backtest(df_model, ClassificationWalkForward, commission=.0002, margin=.05)
result = bt.run(N_TRAIN = 1000)

In [29]:
result.reset_index().style

Unnamed: 0,index,0
0,Start,2016-12-08 00:00:00
1,End,2023-03-15 00:00:00
2,Duration,2288 days 00:00:00
3,Exposure Time [%],1.649746
4,Equity Final [$],0.000000
5,Equity Peak [$],21571.401072
6,Return [%],-100.000000
7,Buy & Hold Return [%],373.503151
8,Return (Ann.) [%],0.000000
9,Volatility (Ann.) [%],83.679515


In [28]:
%%time

stats_skopt, heatmap, optimize_result = bt.optimize(
    N_TRAIN = [100, 1200],
    maximize='Equity Final [$]',
    method='skopt',
    max_tries=200,
    random_state=0,
    return_heatmap=True,
    return_optimization=True)

ImportError: Need package 'scikit-optimize' for method='skopt'. pip install scikit-optimize

In [113]:
df_results_heatmap = heatmap.reset_index()

In [114]:
df_results_heatmap.sort_values('Equity Final [$]')

Unnamed: 0,N_TRAIN,Equity Final [$]
0,103,0.0
120,740,0.0
...,...,...
58,438,0.0
187,1200,0.0


In [84]:
bt.plot(filename='report.html')

In [39]:
from backtesting import Strategy

N_TRAIN = 400

class SimpleClassificationUD(Strategy):
    model = None
    
    def init(self):
        self.already_bought = False

    def next(self):
        if len(self.data) < N_TRAIN:
            return
        
        if len(self.data) % 20:
        
            explanatory_today = self.data.df.iloc[-1:, :]
            forecast_tomorrow = self.model.predict(explanatory_today)[0]

            # conditions to sell or buy
            if forecast_tomorrow == 'UP' and self.already_bought == False:
                self.buy()
                self.already_bought = True
            elif forecast_tomorrow == 'DOWN' and self.already_bought == True:
                self.sell()
                self.already_bought = False
            else:
                pass

        # Retrain on last N_TRAIN values
        df = self.data.df[-N_TRAIN:]
        X, y = get_clean_Xy(df)
        self.model.fit(X, y)
        
        explanatory_today = self.data.df.iloc[-1:, :]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        # conditions to sell or buy
        if forecast_tomorrow == 'UP' and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow == 'DOWN' and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [40]:
from backtesting import Backtest

In [41]:
model = DecisionTreeClassifier()

In [42]:
bt = Backtest(X, strategies.SimpleClassificationUD,
              cash=10000, commission=.002, exclusive_orders=True)

results = bt.run(model=model)

results_test = results.to_frame(name='Values').loc[:'Return [%]']\
    .rename({'Values':'Test set'}, axis=1)

NotFittedError: This DecisionTreeClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [33]:
results_test

Unnamed: 0,Test set
Start,2021-12-10 00:00:00
End,2023-03-15 00:00:00
Duration,460 days 00:00:00
Exposure Time [%],98.734177
Equity Final [$],7104.005872
Equity Peak [$],13666.338686
Return [%],-28.959941


In [23]:
bt = Backtest(X_train, strategies.SimpleClassificationUD,
              cash=10000, commission=.002, exclusive_orders=True)

results = bt.run(model=model_dt_split)

results_train = results.to_frame(name='Values').loc[:'Return [%]']\
    .rename({'Values':'Train set'}, axis=1)

In [24]:
df_results = pd.concat([results_test, results_train], axis=1)

In [25]:
df_results

Unnamed: 0,Test set,Train set
Start,2021-12-10 00:00:00,2016-12-08 00:00:00
End,2023-03-15 00:00:00,2021-12-09 00:00:00
Duration,460 days 00:00:00,1827 days 00:00:00
Exposure Time [%],98.734177,99.84127
Equity Final [$],7104.005872,3907244.699141
Equity Peak [$],13666.338686,3944942.797611
Return [%],-28.959941,38972.446991


The backtest is taking decisions within the class; in other words:

1. Retrains the data
2. Takes decisions for the next 20 points 
3. Retrains taking into account the past 20 points
4. And on and on and on...

In [28]:
import backtesting
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
from backtesting.test import SMA, GOOG

class MyStrategy(Strategy):
    def init(self):
        self.ma1 = self.I(SMA, self.data.Close, 10)
        self.ma2 = self.I(SMA, self.data.Close, 20)
        
    def next(self):
        if crossover(self.ma1, self.ma2):
            self.buy()
        elif crossover(self.ma2, self.ma1):
            self.sell()
            
# Define the time period for the backtest
start = '2010-01-01'
end = '2020-12-31'

# Define the walk forward period
period = 252 * 2 # Two years

# Define the number of periods to test
n_periods = 5

# Define the number of periods to train on
n_train_periods = 3

# Define the data to use for the backtest
data = GOOG[start:end]

# Perform the walk forward analysis
results = []
for i in range(n_periods):
    start_date = data.index[0] + pd.DateOffset(days=i * period)
    end_date = start_date + pd.DateOffset(days=period)
    
    train_start = start_date
    train_end = start_date + pd.DateOffset(days=n_train_periods * period)
    
    test_start = train_end
    test_end = end_date
    
    train_data = data.loc[train_start:train_end]
    test_data = data.loc[test_start:test_end]
    
    bt = Backtest(train_data, MyStrategy)
    bt.run()
    
    results.append(bt.run(test_data))
    
# Combine the results of each walk forward period
final_results = backtesting.combine(results)

# Print the final results
print(final_results)


TypeError: Backtest.run() takes 1 positional argument but 2 were given