In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
import numpy as np
from ib_async import *
from time import sleep
import numpy as np
import pandas as pd
import pycountry
from tqdm.auto import tqdm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Scrape product list
if input('Scrape new products? (y/n)').lower().strip() == 'y':
    driver = webdriver.Chrome()
    url = 'https://www.interactivebrokers.ie/en/trading/products-exchanges.php'
    driver.get(url)
    driver.maximize_window()

    input('Navigate to PRODUCT TAB, and pick desired filters')

    # Start scraping tables
    def extract_table_data():
        table = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'tableContacts')))
        headers = [header.text for header in table.find_elements(By.TAG_NAME, 'th')]
        rows = table.find_elements(By.TAG_NAME, 'tr')
        data = []
        for row in rows[1:]:  # Skip the header row
            cells = row.find_elements(By.TAG_NAME, 'td')
            data.append([cell.text for cell in cells])
        return pd.DataFrame(data, columns=headers)


    master_df = extract_table_data()
    total_pages = int(driver.find_element(By.CSS_SELECTOR, '.form-pagination span').text.strip())
    for i in range(1, total_pages):
        forward_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn.btn-xs.btn-default.btn-forward')))
        driver.execute_script("arguments[0].click();", forward_button)

        page_df = extract_table_data()
        master_df = pd.concat([master_df, page_df], ignore_index=True)

    products_found_text = driver.find_element(By.CSS_SELECTOR, '.text-start.fs-9.text-primary.d-inline strong').text
    products_found = int(products_found_text.replace(',', ''))
    driver.quit()

    if len(master_df) == products_found:
        try:
            existing_df = pd.read_csv('data/ib_products.csv')
            master_df = pd.concat([existing_df, master_df]).drop_duplicates()
            print('Updating previous scrape')
        except FileNotFoundError:
            print('Previous scrape file not found. Saving this scrape')
            pass
        master_df.to_csv('data/ib_products.csv', index=False)
    else:
        print(f"Number listed in site({products_found}) doesn't match number extracted({len(master_df)}). Nothing will be saved")

In [None]:
# Get missing symbols
df = pd.read_csv('data/ib_products.csv')
df.columns = df.columns.str.lower()
df = df.rename(columns={'exchange  *primary exchange': 'exchange', 'ibkr symbol': 'ibkr_symbol'})
df['exchange'] = df['exchange'].str.replace('*', '')

# Get contract details for each ETF
try:
    details_dfs = []
    merge_cols = ['symbol', 'currency', 'exchange']
    contracts_df = pd.read_csv('data/contract_details.csv')
    merged_df = df.merge(contracts_df[merge_cols], on=merge_cols, how='left', indicator=True)
    unchecked_df = merged_df[merged_df['_merge'] == 'left_only'].drop(columns=['_merge'])
except Exception:
    contracts_df = pd.DataFrame()
    unchecked_df = df[merge_cols].copy()

In [None]:
# Connect to ibkr
if not 'ib' in globals():
    util.startLoop()

    ib = IB()
    ib.connect('127.0.0.1', 7497, clientId=2)

In [None]:
for _, row in tqdm(unchecked_df.iterrows(), total=len(unchecked_df)):
    symbol = row['symbol']
    exchange = row['exchange']
    currency = row['currency']

    details_list = ib.reqContractDetails(Stock(symbol, exchange, currency))
    if not details_list:
        details_list = ib.reqContractDetails(Stock(symbol, 'SMART', currency))

    if details_list:
        details_df = util.df(details_list)
        contract_dict = vars(details_df['contract'].iloc[0])
        contract_dict = {k: v for k, v in contract_dict.items() if v}
        contract_df = pd.DataFrame([contract_dict])

        details_df = pd.concat([contract_df, details_df], axis=1)
        details_df.drop('contract', axis=1, inplace=True)

        details_dfs.append(details_df)

if details_dfs:
    details_df = pd.concat(details_dfs, ignore_index=True)
    pd.set_option('future.no_silent_downcasting', True)
    details_df = details_df.replace('', np.nan)
    details_df = details_df.dropna(axis=1, how='all')
    details_df = details_df.dropna(axis=0)

    for index, row in details_df.iterrows():
        for tag_value in row['secIdList']:
            tag = tag_value.tag.lower().strip()
            details_df.at[index, tag] = tag_value.value
    details_df.drop(columns=['secIdList'], inplace=True)

    single_value_columns = [col for col in details_df.columns if details_df[col].nunique() == 1]
    details_df = details_df.drop(columns=single_value_columns, errors='ignore')

    cols_to_drop = ['suggestedSizeIncrement', 'sizeIncrement', 'minSize', 'marketRuleIds', 'aggGroup', 'liquidHours', 'tradingHours', 'timeZoneId', 'priceMagnifier', 'orderTypes', 'minTick', 'localSymbol', 'tradingClass', 'marketName']
    details_df = details_df.drop(columns=cols_to_drop).dropna(axis=0)
    contracts_df = pd.concat([contracts_df, details_df]).drop_duplicates().reset_index(drop=True)

    # Sort by currency and exchanges
    currency_trade_volume_order = ["EUR", "USD", "JPY", "GBP", "CNY", "AUD", "CAD", "CHF", "SGD", "HKD", "SEK", "NOK", "MXN", "INR", "RUB", "PLN", "TWD", "ZAR", "DKK", "ILS", "MYR", "SAR", "HUF", ]
    contracts_df['currency_ordered'] = pd.Categorical(contracts_df['currency'], categories=currency_trade_volume_order, ordered=True)
    eur_exchanges = contracts_df[contracts_df['currency'] == 'EUR'].primaryExchange.unique()
    contracts_df = (contracts_df
                    .assign(exchange_is_european=contracts_df['exchange'].isin(eur_exchanges))
                    .assign(primary_is_european=contracts_df['primaryExchange'].isin(eur_exchanges))
                    .sort_values(by=['currency_ordered', 'exchange_is_european', 'primary_is_european'], ascending=[True, False, False])
                    .drop(columns=['currency_ordered', 'exchange_is_european', 'primary_is_european'])
                    )
    
    contracts_df.to_csv('data/contract_details.csv', index=False)
else:
    print('None found')

In [None]:
# Sort by currency and exchanges
currency_trade_volume_order = ["EUR", "USD", "JPY", "GBP", "CNY", "AUD", "CAD", "CHF", "SGD", "HKD", "SEK", "NOK", "MXN", "INR", "RUB", "PLN", "TWD", "ZAR", "DKK", "ILS", "MYR", "SAR", "HUF"]
contracts_df['currency_ordered'] = pd.Categorical(contracts_df['currency'], categories=currency_trade_volume_order, ordered=True)
eur_exchanges = contracts_df[contracts_df['currency'] == 'EUR'].primaryExchange.unique()
contracts_df = (contracts_df
                .assign(exchange_is_european=contracts_df['exchange'].isin(eur_exchanges))
                .assign(primary_is_european=contracts_df['primaryExchange'].isin(eur_exchanges))
                .sort_values(by=['currency_ordered', 'exchange_is_european', 'primary_is_european'], ascending=[True, False, False])
                .drop(columns=['currency_ordered', 'exchange_is_european', 'primary_is_european'])
                )

contracts_df.to_csv('data/contract_details.csv', index=False)