In [70]:
from dotenv import load_dotenv
import os

import requests
from tabula import read_pdf
import pandas as pd 

import yfinance as yf
import numpy as np

In [68]:
# get environment variables
load_dotenv(dotenv_path="../.env")
DATAPATH = os.getenv("DATAPATH")

# Set other variables 
length_of_data = "60d"
n_lags = 10

In [30]:
# Get data from Euronext website
url = "https://live.euronext.com/sites/default/files/documentation/index-composition/BEL_20_Index_Composition.pdf"
 # Get response object for link
response = requests.get(url)
 
# Write content in pdf file
pdf = open(f"{DATAPATH}/composition_BEL_20.pdf", 'wb')
pdf.write(response.content)
pdf.close()

In [34]:
table = read_pdf(f"{DATAPATH}/composition_BEL_20.pdf", pages="all", multiple_tables=False, pandas_options={'header': None})[0] # Get data from pdf
tickers = [f"{ticker}.BR" if country=="BE" else f"{ticker}.AS" for country, ticker in zip(table[1], table[2])] # Add .BR or .AS to tickers

In [77]:
# Get data from Yahoo Finance
data = pd.DataFrame()
for ticker in tickers: 
    msft = yf.Ticker(ticker)
    hist = msft.history(period=length_of_data)
    hist['ticker'] = ticker
    data = pd.concat([data, hist])

In [81]:
data["close_previous_day"] = data.groupby("ticker")["Close"].shift(1) # shift the close price by 1 day
data["close_growth"] = np.log(data["Close"]) - np.log(data["close_previous_day"]) # calculate the growth rate
data.rename(columns={'Close': 'close'}, inplace=True) # rename the column

for i in range(1, n_lags+1): 
    data[f"close_growth_lag_{i}"] = data.groupby("ticker")["close_growth"].shift(i) # shift the close price by i days

In [82]:
# Write data to pickle file
data.filter(regex='ticker|Date|close').to_pickle(f"{DATAPATH}/BEL_20.pkl")