In [72]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
import os
import urllib
import json
import datetime as dt
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS


In [None]:
data_source = 'alphavantage' # alphavantage or kaggle

if data_source == 'alphavantage':
    # ====================== Loading Data from Alpha Vantage ==================================

    api_key = '5ZMKOTEJIMO3MPUX'

    # Apple stock market prices
    ticker = "AAPL"

    # JSON file with all the stock market data for AAL from the last 20 years
    url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key)

    # Save data to this file
    file_to_save = 'stock_market_data-%s.csv'%ticker

    # If you haven't already saved data,
    # Go ahead and grab the data from the url
    # And store date, low, high, volume, close, open values to a Pandas DataFrame
    if not os.path.exists(file_to_save):
        with urllib.request.urlopen(url_string) as url:
            data = json.loads(url.read().decode())
            # extract stock market data
            data = data['Time Series (Daily)']
            df = pd.DataFrame(columns=['Date','Low','High','Close','Open'])
            for k,v in data.items():
                date = dt.datetime.strptime(k, '%Y-%m-%d')
                data_row = [date.date(),float(v['3. low']),float(v['2. high']),
                            float(v['4. close']),float(v['1. open'])]
                df.loc[-1,:] = data_row
                df.index = df.index + 1
        print('Data saved to : %s'%file_to_save)        
        df.to_csv(file_to_save)

    # If the data is already there, just load it from the CSV
    else:
        print('File already exists. Loading data from CSV')
        df = pd.read_csv(file_to_save)

Data saved to : stock_market_data-AAPL.csv


In [108]:
data1 = pd.read_csv("../stock_market_data-AAPL.csv", index_col='Date', parse_dates=True)
data2 = pd.read_csv("../stock_market_data-MSFT.csv", index_col='Date', parse_dates=True)
data1.columns = [column.capitalize() for column in data1.columns]
data2.columns = [column.capitalize() for column in data2.columns]
data1 = data1[:1000]
data2 = data2[:1000]
pairs = pd.DataFrame(index=data1.index)
pairs['AAPL_close'] = data1['Close']
pairs['MSFT_close'] = data2['Close']
pairs.index = pd.to_datetime(pairs.index)
pairs = pairs.dropna()

In [109]:
pairs

Unnamed: 0_level_0,AAPL_close,MSFT_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-05-13,186.28,413.72
2024-05-10,183.05,414.74
2024-05-09,184.57,412.32
2024-05-08,182.74,410.54
2024-05-07,182.40,409.34
...,...,...
2020-05-29,317.94,183.25
2020-05-28,318.25,181.40
2020-05-27,318.11,181.81
2020-05-26,316.73,181.57


In [118]:
class PairsTradingStrategy(Strategy):
    def init(self):
        # Precompute the z-score of the spread
        close1 = self.data.Close1
        close2 = self.data.Close2
        spread = close1 - close2
        self.zscore = (spread - spread.rolling(20).mean()) / spread.rolling(20).std()

    def next(self):
        # If the z-score > 1, short the spread
        if self.zscore[-1] > 1:
            self.sell(data='Asset1')
            self.buy(data='Asset2')
        # If the z-score < -1, long the spread
        elif self.zscore[-1] < -1:
            self.buy(data='Asset1')
            self.sell(data='Asset2')
        # If the z-score approaches zero, close positions
        elif abs(self.zscore[-1]) < 0.1:
            self.position.close()


In [121]:
# Load data from CSV files
asset1 = pd.read_csv('../stock_market_data-AAPL.csv', index_col='Date', parse_dates=True)
asset2 = pd.read_csv('../stock_market_data-MSFT.csv', index_col='Date', parse_dates=True)

# Ensure the data is sorted by date
asset1 = asset1.sort_index()
asset2 = asset2.sort_index()

data = pd.DataFrame({
    'Close1': asset1['Close'],
    'Close2': asset2['Close']
})

In [123]:
data

Unnamed: 0_level_0,Close1,Close2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-11-01,77.62,92.37
1999-11-02,80.25,92.56
1999-11-03,81.50,92.00
1999-11-04,83.62,91.75
1999-11-05,88.31,91.56
...,...,...
2024-05-07,182.40,409.34
2024-05-08,182.74,410.54
2024-05-09,184.57,412.32
2024-05-10,183.05,414.74


In [None]:
bt = Backtest(data, PairsTradingStrategy, cash=10_000, commission=.002)
