In [1]:
import os
import time
from selenium.webdriver import Firefox, FirefoxOptions, FirefoxService
import pandas as pd

In [2]:
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.wait import WebDriverWait
import selenium.webdriver.support.expected_conditions as EC

In [3]:
symbols = [
    ('GMO Quality Investment Fund', 'IE00B3SBSR82:USD', '2010-11-10'),
    ]

In [4]:
def select_start_date(driver: Firefox, start_date: pd.Timestamp):
    driver.find_elements(by='class name', value='picker__input')[0].click()
    start_date_picker_xpath = '/html/body/div[3]/div[2]/section[3]/div[1]/div/div/div[1]/div[1]/div/form/fieldset/span/div[1]'
    while True:
        start_year = driver.find_element(by='xpath', value=f'{start_date_picker_xpath}/div[2]/div/div/div/div/div/div/select')
        start_year_select = Select(start_year)
        if int(start_year_select.options[0].text) <= start_date.year:
            break
        start_year.click()
        start_year_select.select_by_index(0)

    start_year = driver.find_element(by='xpath', value=f'{start_date_picker_xpath}/div[2]/div/div/div/div/div/div/select')
    start_year_select = Select(start_year)
    start_year.click()
    start_year_select.select_by_value(f'{start_date.year}')

    for i in range(15):
        start_month = driver.find_element(by='xpath', value=f'{start_date_picker_xpath}/div[2]/div/div/div/div/div/div/div[1]')
        if start_month.text == start_date.strftime('%B'):
            break
        start_prev = driver.find_element(by='xpath', value=f'{start_date_picker_xpath}/div[2]/div/div/div/div/div/div/div[2]')
        start_prev.click()

    start_date_cell = driver.find_element(by='xpath', value=start_date_picker_xpath).find_element(by='css selector', value=f'div[aria-label="{start_date.strftime("%#d %b, %Y")}"]')
    start_date_cell.click()

In [5]:
def select_end_date(driver: Firefox, end_date: pd.Timestamp):
    end_date_field = driver.find_elements(by='class name', value='picker__input')[1]
    end_date_field.click()
    end_date_picker_xpath = '/html/body/div[3]/div[2]/section[3]/div[1]/div/div/div[1]/div[1]/div/form/fieldset/span/div[2]'

    while True:
        end_year = driver.find_element(by='xpath', value=f'{end_date_picker_xpath}/div[2]/div/div/div/div/div/div/select')
        end_year_select = Select(end_year)
        if int(end_year_select.options[0].text) <= end_date.year:
            break
        end_year.click()
        end_year_select.select_by_index(0)

    end_year = driver.find_element(by='xpath', value=f'{end_date_picker_xpath}/div[2]/div/div/div/div/div/div/select')
    end_year_select = Select(end_year)
    end_year.click()
    end_year_select.select_by_value(f'{end_date.year}')

    for i in range(15):
        end_month = driver.find_element(by='xpath', value=f'{end_date_picker_xpath}/div[2]/div/div/div/div/div/div/div[1]')
        if end_month.text == end_date.strftime('%B'):
            break
        end_next = driver.find_element(by='xpath', value=f'{end_date_picker_xpath}/div[2]/div/div/div/div/div/div/div[3]')
        end_next.click()

    end_date_cell = driver.find_element(by='xpath', value=end_date_picker_xpath).find_element(by='css selector', value=f'div[aria-label="{end_date.strftime("%#d %b, %Y")}"]')
    end_date_cell.click()

In [6]:
def get_data(driver: Firefox, beginning_date: pd.Timestamp):
    rows: list[str] = []

    for start_date, end_date in zip(
        pd.DatetimeIndex([beginning_date]).union(pd.date_range(start=beginning_date, end='today', freq='YS')),
        pd.date_range(start=beginning_date, end=pd.to_datetime('today'), freq='YE').union([pd.Timestamp.today().normalize()])
    ):

        if start_date == beginning_date:
            select_start_date(driver, start_date)
            WebDriverWait(driver, 50).until(EC.invisibility_of_element(('class name', 'mod-ui-loading__overlay')))
            select_end_date(driver, end_date)

        else:
            select_end_date(driver, end_date)
            WebDriverWait(driver, 50).until(EC.invisibility_of_element(('class name', 'mod-ui-loading__overlay')))
            select_start_date(driver, start_date)

        WebDriverWait(driver, 50).until(EC.invisibility_of_element(('class name', 'mod-ui-loading__overlay')))

        rows.extend(driver.find_element(by='class name', value='mod-tearsheet-historical-prices__results').text.split('\n')[:0:-1])
        
    return rows

In [7]:
def parse_rows(rows: list[str]):
    df = pd.DataFrame([row.rsplit(' ', maxsplit=5) for row in rows], columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    df['Date'] = df['Date'].apply(pd.to_datetime)
    df = df.replace(',', '', regex=True)
    df[['Open', 'High', 'Low', 'Close', 'Volume']] = df[['Open', 'High', 'Low', 'Close', 'Volume']].astype(float)
    return df

In [8]:
def main():
    options = FirefoxOptions()
    options.profile = os.environ['FIREFOX_PROFILE_PATH']
    driver = Firefox(options=options)
    driver.implicitly_wait(0.5)
    for name, symbol, beginning_date in symbols:
        driver.get(f'https://markets.ft.com/data/funds/tearsheet/historical?s={symbol}')
        driver.find_element(by='class name', value='mod-ui-filter-overlay__filter-toggle').click()
        rows = get_data(driver, pd.Timestamp(beginning_date))
        df = parse_rows(rows)
        df.set_index('Date', drop=True).to_csv(f'data/{name}.csv')
    driver.quit()

In [9]:
if __name__ == '__main__':
    main()