In [6]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# read in business data
data = pd.read_csv("./new_business_data.csv", dtype={"abn": "str"})

# use a headless browser (saves time)
chrome_options = Options()
chrome_options.add_argument("--headless")

# set up the browser
driver = webdriver.Chrome(options=chrome_options)

# begin loop
contract_data = pd.DataFrame(columns=["abn", "cn", "agency", "category", "period", "value"])
contract_number = 0
for i, abn in enumerate(data["abn"]):
    driver.get(f"https://www.tenders.gov.au/Search/CnAdvancedSearch?SupplierAbn={abn}")
    print(f"\nScraping results for business {abn} ... (business {i + 1} of {len(data)})")
    while True:
        # wait for results to load
        WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "search-results"))
        )

        # iterate through contracts
        contracts = driver.find_elements_by_tag_name("article")
        for contract in contracts:
            details = contract.find_elements_by_class_name("list-desc")            
            for detail in details:                
                if detail.find_element_by_tag_name("span").get_attribute("innerText") == "CN ID:":
                    cn = detail.find_element_by_css_selector(".list-desc-inner a").get_attribute("innerText")                
                if detail.find_element_by_tag_name("span").get_attribute("innerText") == "Agency:":
                    agency = detail.find_element_by_class_name("list-desc-inner").get_attribute("innerText")                
                if detail.find_element_by_tag_name("span").get_attribute("innerText") == "Category:":
                    category = detail.find_element_by_class_name("list-desc-inner").get_attribute("innerText")                
                if detail.find_element_by_tag_name("span").get_attribute("innerText") == "Contract Period:":
                    period = detail.find_element_by_class_name("list-desc-inner").get_attribute("innerText")                
                if detail.find_element_by_tag_name("span").get_attribute("innerText") == "Contract Value (AUD):":
                    value = detail.find_element_by_class_name("list-desc-inner").get_attribute("innerText")
            contract_data.loc[contract_number] = [abn, cn, agency, category, period, value]
            contract_number = contract_number + 1

        # is there a 'next' page?
        try:
            next = driver.find_element_by_css_selector("a[aria-label='Next page']")
            print(len(contracts), "contracts scraped, more to come")
            next.click()
        except:            
            print(len(contracts), "contracts scraped, no more")
            break

driver.quit()
contract_data.to_csv("contract_data.csv", index=False)


Scraping results for business 86611502713 ... (business 1 of 2522)
86611502713 CN3642373-A4 Department of Defence Building construction and support and maintenance and repair services 8-Nov-2019 to 26-May-2022 $11,309,053.66
86611502713 CN3651064-A1 Department of Defence Building construction and support and maintenance and repair services 19-Dec-2019 to 26-May-2022 $1,080,533.84
86611502713 CN3539285-A4 Department of Defence Building construction and support and maintenance and repair services 27-Aug-2018 to 26-May-2022 $3,820,222.62
86611502713 CN3565166-A2 Department of Defence Building construction and support and maintenance and repair services 10-Jan-2019 to 26-May-2022 $2,677,638.70
86611502713 CN3717367 Department of Defence Building construction and support and maintenance and repair services 14-Sep-2020 to 26-May-2022 $2,606,516.00
86611502713 CN3716407 Department of Defence Building construction and support and maintenance and repair services 9-Sep-2020 to 26-May-2022 $2,18

KeyboardInterrupt: 