In [1]:
from dataclasses import dataclass
import pickle
from datetime import datetime

import pandas as pd

from selenium import webdriver
from selenium.webdriver import ChromeOptions

from bs4 import BeautifulSoup

In [2]:
@dataclass
class InvestmentTrust:
    symbol: str
    name: str
    link: str
    tradeable: bool

In [4]:
def check_symbol_row(symbol_row):
    td_rows = symbol_row.findAll('td')

    if len(td_rows) != 4:
        return False

    td_row_1 = td_rows[0].findAll('td')

    return len(td_row_1) == 0


def get_symbol_row_data(symbol_row):
    symbol_rows = symbol_row.findAll('td')
    symbol = symbol_rows[0].text
    name = symbol_rows[1].text
    link = symbol_rows[1].find('a', href=True)['href']
    dealable = symbol_rows[3].text.startswith('\nDeal')

    return symbol, name, link, dealable

In [42]:
def get_page_url(offset, search_input):
    url = f'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset={offset}&it_search_input={search_input}'

    return url

def get_page_urls(soup, search_input):
    num_pages = 0

    for link in soup.findAll('a', href=True):
        title = link.get('title')

        if not title:
            continue

        if title.startswith('View page '):
            num_pages += 1

    num_pages = num_pages // 2
    step_size = 50
    max_offset = num_pages * step_size

    page_urls = [get_page_url(0, search_input)]
    offset = step_size
    while offset <= max_offset:
        url = get_page_url(offset, search_input)
        page_urls.append(url)
        offset += step_size

    return page_urls


def get_inv_trust_data(soup, investment_trusts):
    table = soup.find('table')
    rows = table.find_all('tr')

    for symbol_row in rows:
        if not check_symbol_row(symbol_row):
            continue
        
        symbol, name, link, dealable = get_symbol_row_data(symbol_row)

        found_sym = next((trust for trust in investment_trusts if trust.symbol == symbol), None)

        if not found_sym:
            investment_trusts.append(InvestmentTrust(symbol, name, link, dealable))

In [37]:
options = ChromeOptions()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)

In [51]:
# most frequent letters, give us 351, 374 then 375 results
search_inputs = ["e", "a", "r"]
page_urls = []
investment_trusts = []

In [52]:
for search_input in search_inputs:

    # Get page 1, so we can determine how many other pages there are
    url = get_page_url(0, search_input)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'lxml')

    # Generate URLs for the other pages
    page_urls += get_page_urls(soup, search_input)

    for page_url in page_urls:
        # print(page_url)
        driver.get(page_url)
        page_soup = BeautifulSoup(driver.page_source, 'lxml')

        get_inv_trust_data(page_soup, investment_trusts)

    print(f"num trusts: {len(investment_trusts)}")

page_urls

num trusts: 351
num trusts: 374
num trusts: 375


['https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=0&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=50&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=100&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=150&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=200&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=250&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=300&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=350&it_search_input=e',
 'https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=0&it_search_input=a',
 'https://www.hl.co.uk/shares/inv

In [43]:
for page_url in page_urls:
    print(page_url)
    driver.get(page_url)
    page_soup = BeautifulSoup(driver.page_source, 'lxml')

    get_inv_trust_data(page_soup, investment_trusts)

https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=0&it_search_input=q
https://www.hl.co.uk/shares/investment-trusts/search-for-investment-trusts?offset=0&it_search_input=x


In [45]:
investment_trusts

[InvestmentTrust(symbol='AEI', name='Abrdn Equity Income Trust plc', link='https://www.hl.co.uk/shares/shares-search-results/0603959', tradeable=True),
 InvestmentTrust(symbol='ALF', name='Alternative Liquidity Fund Ltd', link='https://www.hl.co.uk/shares/shares-search-results/BYRGPD6', tradeable=False),
 InvestmentTrust(symbol='AEET', name='Aquila Energy Efficiency Trust plc', link='https://www.hl.co.uk/shares/shares-search-results/BN6JYS7', tradeable=True),
 InvestmentTrust(symbol='AERS', name='Aquila European Renewables plc', link='https://www.hl.co.uk/shares/shares-search-results/BJMXQK1', tradeable=True),
 InvestmentTrust(symbol='AIE', name='Ashoka India Equity Inv Trust Plc', link='https://www.hl.co.uk/shares/shares-search-results/BF50VS4', tradeable=True),
 InvestmentTrust(symbol='CYN', name='CQS Natural Resources Growth and Income plc', link='https://www.hl.co.uk/shares/shares-search-results/0035392', tradeable=True),
 InvestmentTrust(symbol='NCYF', name='CQS New City High Yiel