# Challenge: Walk Forward on Other Datasets

## Download data from `yfinance`

In [2]:
import yfinance as yf

ticker = 'AAPL'
df = yf.download(ticker, multi_level_index=False, auto_adjust=False)
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-12-12,0.128348,0.128906,0.128348,0.128348,0.099584,469033600
1980-12-15,0.122210,0.122210,0.121652,0.121652,0.094388,175884800
...,...,...,...,...,...,...
2023-05-15,173.160004,173.210007,171.470001,172.070007,172.070007,37266700
2023-05-16,171.990005,173.139999,171.800003,172.070007,172.070007,42071100


## Preprocess the data

### Filter the date range

In [3]:
df = df.loc['2018-01-01':].copy()

### Create the target variable

#### Percentage change

- Percentage change on `Adj Close` for tomorrow

In [4]:
df['change_tomorrow'] = df['Adj Close'].pct_change(-1)
df.change_tomorrow = df.change_tomorrow * -1
df.change_tomorrow = df.change_tomorrow * 100

#### Remove rows with any missing data

In [5]:
df = df.dropna().copy()
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,change_tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-02,42.540001,43.075001,42.314999,43.064999,40.831585,102223600,-0.017417
2018-01-03,43.132500,43.637501,42.990002,43.057499,40.824474,118071600,0.462368
...,...,...,...,...,...,...,...
2023-05-12,173.619995,174.059998,171.000000,172.570007,172.570007,45497800,-0.290579
2023-05-15,173.160004,173.210007,171.470001,172.070007,172.070007,37266700,-0.000000


## Machine Learning modelling

### Separate the data

1. Target: which variable do you want to predict?
2. Explanatory: which variables will you use to calculate the prediction?

In [6]:
y = df.change_tomorrow
X = df[['Open','High','Low','Close','Volume']]

### Time Series Split

In [8]:
from sklearn.model_selection import TimeSeriesSplit

In [None]:
ts = TimeSeriesSplit

### Compute and evaluate model in a for loop

1. Separate the data in train and test
2. Compute the model on the train set
3. Evaluate the model (mse) on the test set
4. Append the errors (mse) in an empty list

In [9]:
from sklearn.ensemble import RandomForestRegressor

In [11]:
list_mse = []

for ...
  model = RandomForestRegressor()
  model.fit(X, y)
  mse = mean_squared_error()
  
  list_mse.append(mse)

In [12]:
from sklearn.metrics import mean_squared_error

In [None]:
for ...

## Anchored Walk Forward evaluation in backtesting

![](<src/10_Table_Validation Methods.png>)

### Create a new strategy

In [None]:
from backtesting import Strategy

In [None]:
class Regression(Strategy):
    ???
    
    def init(self):
        ???
            
        X_train = ???
        y_train = ???
        
        ???

    def next(self):
        explanatory_today = self.data.df.iloc[[-1], :-1]
        forecast_tomorrow = self.model.predict(explanatory_today)[0]
        
        if forecast_tomorrow > self.limit_buy and self.already_bought == False:
            self.buy()
            self.already_bought = True
        elif forecast_tomorrow < self.limit_sell and self.already_bought == True:
            self.sell()
            self.already_bought = False
        else:
            pass

In [None]:
class WalkForwardAnchored(Regression):
  
    def ???(self):
      
      # conditions to retrain the model
      
      super().next()

### Run the backtest with optimization

In [None]:
import multiprocessing as mp
mp.set_start_method('fork')

In [None]:
from backtesting import Backtest
bt = Backtest(???, ???, cash=10000, commission=.002, exclusive_orders=True)

In [None]:
bt.optimize(???)

## Unanchored Walk Forward

### Create a library of strategies

[strategies.py](strategies.py)

### Create the unanchored walk forward class

In the previously created library

![](<src/10_Table_Validation Methods.png>)

### Import the strategy and perform the backtest with optimization

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import ???

In [None]:
bt_unanchored = Backtest(df, ???, cash=10000, commission=.002, exclusive_orders=True)

bt_unanchored.optimize(???)

### Interpret the strategies' performance

In [None]:
bt.???

In [None]:
bt_unanchored.???

## Course Conclusion

Watch video → [Next steps]()