In [2]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from geopy.geocoders import GoogleV3

# use a headless browser (saves time)
print("preparing scraping tools ...")
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=chrome_options)

# open exposure sites
print("accessing COVID-19 website ...")
driver.get("https://www.covid19.act.gov.au/act-status-and-response/act-covid-19-exposure-locations")

# check archived
print("selecting all exposure locations ...")
archived = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "chkArchived1822887"))
)
archived.click()

# gather results
print("reading results ...")
tableRows = WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#tableResults1822887 tbody tr"))
)

# build results
results = pd.DataFrame(columns=["contact", "location", "address", "suburb", "date"])
print("recording results ...", end="\r")
start = time.time()
interval = 1
for i, row in enumerate(tableRows):
    cells = row.find_elements_by_tag_name("td")
    cells = [x.get_attribute("innerText") for x in cells]
    results.loc[len(results)] = [cells[7], cells[1], cells[2], cells[3], cells[4]]
    if time.time() - interval > start:
        interval = interval + 1
        print(f"recording results ... {(i + 1) / len(tableRows):.1%}", end="\r")
print("recording results ... complete")

driver.close()
driver.quit()

results["date"] = pd.to_datetime(results["date"], format="%d/%m/%Y - %A")

print("geocoding addresses ...", end="\r")
geolocator = GoogleV3(api_key="AIzaSyDwoN6b6VnZhTNrK65LC6B4vcnskrI3EeA")
start = time.time()
interval = 1
for i in results.index:
#     location = geolocator.geocode(f"{results.at[i, 'location']}, {results.at[i, 'address']}, {results.at[i, 'suburb']} ACT Australia")
#     try:        
#         results.at[i, "lng"] = location.longitude
#         results.at[i, "lat"] = location.latitude
#     except:
    results.at[i, "lng"] = None
    results.at[i, "lat"] = None
    if time.time() - interval > start:
        interval = interval + 1
        print(f"geocoding addresses ... {(i + 1) / len(results):.1%}", end="\r")
print("geocoding addresses ... complete")

results.to_csv("results.csv", index=False)
results

preparing scraping tools ...
accessing COVID-19 website ...
selecting all exposure locations ...
reading results ...
recording results ... complete
geocoding addresses ... complete


Unnamed: 0,contact,location,address,suburb,date,lng,lat
0,Monitor,United Petroleum Amaroo,Katherine and Horse Park Drive,Amaroo,2021-09-05,,
1,Casual,Woolworths Gungahlin,30 Hibberson Street,Gungahlin,2021-09-05,,
2,Monitor,Metro Petroleum Mitchell,Lysaght Street,Mitchell,2021-09-04,,
3,Monitor,TSG Jamison,"Jamison Plaza, Bowman Street",Macquarie,2021-09-04,,
4,Casual,Australia War Memorial (Construction Site),Treloar Crescent,Campbell,2021-09-03,,
...,...,...,...,...,...,...,...
769,Close,"TK Maxx, Canberra Outlet Centre","X20, 337 Canberra Avenue",Fyshwick,2021-08-08,,
770,Monitor,U14 girls AFL Ainslie Red vs Belconnen Black,Aranda Playing Fields,Aranda,2021-08-08,,
771,Close,"Windsor Smith, Canberra Outlet Centre","T103, 337 Canberra Avenue",Fyshwick,2021-08-08,,
772,Investigation Location,Assembly The People's Pub,11 Lonsdale Street,Braddon,2021-08-07,,
