In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import numpy as np
from ib_async import *
from time import sleep
import numpy as np
import pandas as pd
import pycountry
from tqdm import tqdm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

---
### Scrape available IBKR ETFs
---

In [4]:
driver = webdriver.Chrome()
url = 'https://www.interactivebrokers.ie/en/trading/products-exchanges.php#/'
driver.get(url)

try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'modal-content')))
    reject_button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.ID, 'gdpr-reject-all')))
    reject_button.click()
except Exception:
    print('No GDPR modal found')

sleep(2) # because the client refreshes the page after rejecting the cookies
dropdown_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.accordion_btn[tabindex="1"]')))
dropdown_button.click()

etf_checkbox = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'ETF')]/preceding-sibling::input[@type='checkbox']")))
etf_checkbox.click()

apply_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".btn.btn-sm.btn-primary")))
driver.execute_script("arguments[0].click();", apply_button)

# rows_per_page_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.form-select')))
# select = Select(rows_per_page_select)
# select.select_by_value('500')


# Start scraping tables
def extract_table_data():
    table = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'tableContacts')))
    headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
    rows = table.find_elements(By.TAG_NAME, 'tr')
    data = []
    for row in rows[1:]:  # Skip the header row
        cells = row.find_elements(By.TAG_NAME, 'td')
        data.append([cell.text for cell in cells])
    return pd.DataFrame(data, columns=headers)


master_df = extract_table_data()
total_pages = int(driver.find_element(By.CSS_SELECTOR, '.form-pagination span').text.strip())
for i in range(1, total_pages):
    forward_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn.btn-xs.btn-default.btn-forward')))
    driver.execute_script("arguments[0].click();", forward_button)

    page_df = extract_table_data()
    master_df = pd.concat([master_df, page_df], ignore_index=True)

products_found_text = driver.find_element(By.CSS_SELECTOR, '.text-start.fs-9.text-primary.d-inline strong').text
products_found = int(products_found_text.replace(',', ''))
driver.quit()

if len(master_df) == products_found:
    try:
        existing_df = pd.read_csv('data/ib_products.csv')
        master_df = pd.concat([existing_df, master_df]).drop_duplicates()
        print('Updating previous scrape')
    except FileNotFoundError:
        print('Previous scrape file not found. Saving this scrape')
        pass
    master_df.to_csv('data/ib_products.csv', index=False)
else:
    print(f"Number listed in site({products_found}) doesn't match number extracted({len(master_df)}). Nothing will be saved")

No GDPR modal found
Updating previous scrape


---
### Load ETFs and start up IBKR API
---

In [14]:
# Load ETF csvs
df = pd.read_csv('data/ib_products.csv')
df.columns = df.columns.str.lower()
df = df.drop('product', axis=1)
df = df.rename(columns={'exchange  *primary exchange': 'exchange', 'ibkr symbol': 'ibkr_symbol'})

regions = df['region'].unique()
region_dict = {}
for region in regions:
    if region == 'XX':
        region_dict[region] = 'XX - Other'
    else:
        country = pycountry.countries.get(alpha_2=region)
        if country:
            region_dict[region] = f"{region} - {country.name}"
        else:
            region_dict[region] = f"{region} - Unknown"

df['region'] = df['region'].map(region_dict)

# Filter to EUR etfs
df['exchange'] = df['exchange'].str.replace('*', '')
# df = df[df['currency'] == 'EUR']

---
### Contract Details
---

In [None]:
# Connect to ibkr
util.startLoop()

ib = IB()
ib.connect('127.0.0.1', 7497, clientId=1)

Error 321, reqId -1: Error validating request.-'cs' : cause - The API interface is currently in Read-Only mode.
Error 321, reqId -1: Error validating request.-'b2' : cause - The API interface is currently in Read-Only mode.
open orders request timed out
completed orders request timed out


<IB connected to 127.0.0.1:7497 clientId=1>

Error 200, reqId 188: No security definition has been found for the request, contract: Stock(symbol='09K0', exchange='GETTEX', currency='EUR')
Error 200, reqId 774: No security definition has been found for the request, contract: Stock(symbol='2SBT', exchange='IBIS', currency='EUR')
Error 200, reqId 775: No security definition has been found for the request, contract: Stock(symbol='2SBT', exchange='SMART', currency='EUR')
Error 200, reqId 1126: No security definition has been found for the request, contract: Stock(symbol='3OIS.OLD', exchange='LSE', currency='OLD')
Error 200, reqId 1127: No security definition has been found for the request, contract: Stock(symbol='3OIS.OLD', exchange='SMART', currency='OLD')
Error 200, reqId 1193: No security definition has been found for the request, contract: Stock(symbol='3SOI.OLD', exchange='LSE', currency='OLD')
Error 200, reqId 1194: No security definition has been found for the request, contract: Stock(symbol='3SOI.OLD', exchange='SMART', curren

In [15]:
# Get contract details for each ETF
try:
    contracts_df = pd.read_csv('data/contract_details.csv') 
except Exception:
    pass

if 'contracts_df' in locals() and isinstance(contracts_df, pd.DataFrame):
    merged_df = df.merge(contracts_df[['symbol', 'exchange']], on=['symbol', 'exchange'], how='left', indicator=True)
    unchecked_df = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])
    details_dfs = []
else:
    contract_df = pd.DataFrame()
    unchecked_df = df.copy()
    details_dfs = []

for _, row in tqdm(unchecked_df.iterrows(), total=len(unchecked_df)):
    symbol = row['symbol']
    exchange = row['exchange']
    currency = row['currency']

    details_list = ib.reqContractDetails(Stock(symbol, exchange, currency))
    if not details_list:
        # print(f'{row['symbol']}')
        details_list = ib.reqContractDetails(Stock(symbol, 'SMART', currency))

    if details_list:
        details_df = util.df(details_list)
        contract_dict = vars(details_df['contract'].iloc[0])
        contract_dict = {k: v for k, v in contract_dict.items() if v}
        contract_df = pd.DataFrame([contract_dict])

        details_df = pd.concat([contract_df, details_df], axis=1)
        # details_df.replace('', np.nan, inplace=True)
        # details_df.drop('contract', axis=1, inplace=True)

        details_dfs.append(details_df)

if details_dfs:
    details_dfs = pd.concat(details_dfs, ignore_index=True)
    details_dfs.replace('', np.nan, inplace=True)

    for index, row in details_dfs.iterrows():
        for tag_value in row['secIdList']:
            tag = tag_value.tag.lower().strip()
            details_dfs.at[index, tag] = tag_value.value
    details_dfs.drop(columns=['secIdList'], inplace=True)

    details_dfs = details_dfs.loc[:, details_dfs.isna().mean() != 1]
    contracts_df = pd.concat([contracts_df, details_dfs]).drop_duplicates().reset_index(drop=True)
    contracts_df.to_csv('data/contract_details.csv', index=False)
    display(details_dfs)
else:
    print('None found')

  4%|▎         | 47/1270 [00:03<01:28, 13.76it/s]


KeyboardInterrupt: 