In [None]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# read in business IDs
ids = pd.read_csv("./businesses.csv")

# use a headless browser (saves time)
chrome_options = Options()
chrome_options.add_argument("--headless")

# set up the browser
driver = webdriver.Chrome(options=chrome_options)
business_details = pd.DataFrame(columns=["biz_name", "trading_name", "individual", "abn", "acn"])

# loop through businesses
for index, id in enumerate(ids["businessIds"]):
    print(f"Scraping {id} ... ({index + 1} of {len(ids)})")
    driver.get(f"https://ibd.supplynation.org.au/public/s/supplierprofile?accid={id}")

    header = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, ".container .slds-col"))
    )
    
    time.sleep(2)
    
    try:
        biz_name = driver.find_element_by_class_name("profile-title").get_attribute("innerText")
    except:
        biz_name = False
    
    try:
        trading_name = header.find_element_by_tag_name("div").find_element_by_xpath("..").find_element_by_tag_name("div").get_attribute("innerText")
        if "Trading as: " in trading_name:
            trading_name = trading_name[12:]
        else:
            trading_name = False
    except:
        trading_name = False
        
    body = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, ".slds-media:nth-child(3) .slds-media__body"))
    )
    
    try:
        name = body.find_element_by_tag_name("span").get_attribute("innerText")
    except:
        name = False
    
    labels = body.find_elements_by_css_selector(".slds-text-longform:nth-child(odd)")
    values = body.find_elements_by_css_selector(".slds-text-longform:nth-child(even)")
    abn, acn = False, False
    for i, label in enumerate(labels):
        if "ABN" in label.get_attribute("innerText"):
            abn = values[i].get_attribute("innerText")
        elif "ACN" in label.get_attribute("innerText"):
            acn = values[i].get_attribute("innerText")

    print(biz_name, trading_name, name, abn, acn)
    business_details.loc[id] = [biz_name, trading_name, name, abn, acn]
    
    if (index > 0) and (index % 10 == 0):
        business_details.to_csv("./business_details.csv", index_label="id")        

Scraping a1G7F000000SlZkUAK ... (1 of 2576)
Pattemore Consultants Pty Ltd Pattemore Consultants Peter Pattemore 86611502713 611502713
Scraping a1G7F000000SlYOUA0 ... (2 of 2576)
Skoor Pty Ltd Skoor Pty Ltd - Express Pest Control Peter Rooks 23611925725 611925725
Scraping a1G7F000000SlYJUA0 ... (3 of 2576)
Skoor Pty Ltd Express Car Cleaning NT Peter Rooks 23611925725# 611925725
Scraping a1G7F000000Nr11UAC ... (4 of 2576)
PINNACLE PLUMBING NT Pinnacle Plumbing NT Stephen Stavrinos 91363159020 False
Scraping a1G7F00000029uCUAQ ... (5 of 2576)
Mak Mak Constructions Pty Ltd False Rodney Illingworth 90627206660 627206660
Scraping a1G7F0000002KgVUAU ... (6 of 2576)
Winya Indigenous Office Furniture PTY LTD Winya Indigenous Office Furniture Greg Welsh 97604704065 604704065
Scraping a1G7F000001hJePUAU ... (7 of 2576)
Innovative Indigenous Building Solutions (NT) Pty Ltd Innovative Indigenous Building Solutions (NT) Mark Moodie 95635121176 635121176
Scraping a1G7F00000023wIUAQ ... (8 of 2576)
Al