In [33]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select

# load in suburb names
suburbs = pd.read_csv("./valid_suburbs.csv", index_col="suburb")
data = pd.read_csv("./demographics.csv")
# data = pd.DataFrame(columns=["suburb", "state", "income", "pop_before", "pop_now"])
holder_values = ["-"] * len(suburbs)

# use a headless browser (saves time)
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless")

# set up the browser
driver = webdriver.Chrome(options=chrome_options)

def search_for_suburb(suburb, state):
    ''' navigates to search pages and inputs suburb '''    
    # navigate to search screen
    driver.get("https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/2016%20QuickStats")
    print("search:", suburb, state)
    search_field = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "gwt-SearchWidget-SuggestBox-Fade"))
    )
    # input suburb name
    search_field.clear()
    driver.execute_script("arguments[0].scrollIntoView();", search_field)
    for letter in suburb:        
        search_field.send_keys(letter)
    # gather suggestions    
    time.sleep(2)
    location_suggestions = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "suggestion-state"))
    )    
    location_types = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "suggestion-type"))
    )
    # check suggestions, click correct one
    print(len(location_suggestions), "options")
    location = 0
    while True:
        text1 = location_suggestions[location].get_attribute("innerText")
        text2 = location_types[location].get_attribute("innerText")        
        if (suburb in text1) and (state in text1) and ("SSC" in text2):
            location_suggestions[location].click()
            print("found it!")
            check = True
            break        
        else:
            location = location + 1
            if location == len(location_suggestions):
                check = False
                print("run out of options")
                break
    if check:
        driver.find_element_by_class_name("gwt-SearchWidget-Button").click()
    else:
        print("failed at", suburb)
    
    population = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "#summaryTableAP .summaryData"))
    )
    pop_now_data = population.get_attribute("innerText")
    
    income = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, ".qsDwelling tr:nth-child(3) .summaryData"))
    )
    income_data = income.get_attribute("innerText")
    
    # navigate to search screen
    driver.get("https://www.abs.gov.au/websitedbs/D3310114.nsf/Home/2016%20QuickStats")
    
    year_select = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "gwt-ListBox"))
    )
    Select(year_select).select_by_value("2006")      
    
    search_field = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "gwt-SearchWidget-SuggestBox-Fade"))
    )
    # input suburb name
    search_field.clear()
    driver.execute_script("arguments[0].scrollIntoView();", search_field)    
    for letter in suburb:        
        search_field.send_keys(letter)
    # gather suggestions    
    time.sleep(2)
    try:
        location_suggestions = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "suggestion-state"))
        )
        location_types = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "suggestion-type"))
        )
        # check suggestions, click correct one
        print(len(location_suggestions), "options")
        location = 0
        while True:
            text1 = location_suggestions[location].get_attribute("innerText")
            text2 = location_types[location].get_attribute("innerText")
            if (suburb in text1) and (state in text1) and ("SSC" in text2):
                location_suggestions[location].click()
                print("found it!")
                check = True
                break        
            else:
                location = location + 1
                if location == len(location_suggestions):
                    check = False
                    print("run out of options")
                    break
        if check:
            driver.find_element_by_class_name("gwt-SearchWidget-Button").click()
        else:
            print("failed at", suburb)

        population = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#summaryTableAP .summaryData"))
        )
        pop_before_data = population.get_attribute("innerText")        
    except:
        pop_before_data = "-"
    return [suburb, state, income_data, pop_before_data, pop_now_data]

# iterate through suburbs
index = list(suburbs.index).index("Monash")
for suburb in suburbs.index[index:]:
    data.loc[len(data)] = search_for_suburb(suburb, suburbs.loc[suburb].state)
    print(len(data))
    data.to_csv("demographics.csv", index=False)

# print(valid_suburbs)
driver.quit()
data

search: Monash ACT
6 options
found it!
9 options
found it!
71
search: Narrabundah ACT
3 options
found it!
2 options
found it!
72
search: Ngunnawal ACT
2 options
found it!
2 options
found it!
73
search: Nicholls ACT
3 options
found it!
3 options
found it!
74
search: Oaks Estate ACT
1 options
found it!
2 options
found it!
75
search: O'Connor ACT
5 options
found it!
5 options
found it!
76
search: O'Malley ACT
2 options
found it!
2 options
found it!
77
search: Oxley ACT
15 options
found it!
13 options
found it!
78
search: Page ACT
9 options
found it!
7 options
found it!
79
search: Palmerston ACT
11 options
found it!
8 options
found it!
80
search: Pearce ACT
9 options
found it!
8 options
found it!
81
search: Phillip ACT
8 options
found it!
9 options
found it!
82
search: Queanbeyan NSW
15 options
found it!
9 options
found it!
83
search: Queanbeyan East NSW
1 options
found it!
84
search: Queanbeyan West NSW
2 options
found it!
85
search: Red Hill ACT
13 options
found it!
8 options
found it!
8

Unnamed: 0,suburb,state,income,pop_before,pop_now
0,Ainslie,ACT,"$2,027",4815,5189
1,Amaroo,ACT,"$2,456",5502,5710
2,Aranda,ACT,"$2,414",2412,2393
3,Banks,ACT,"$2,148",4907,4967
4,Barton,ACT,"$2,356",940,1439
...,...,...,...,...,...
99,Waramanga,ACT,"$1,875",2535,2629
100,Watson,ACT,"$1,962",4188,5862
101,Weetangera,ACT,"$2,699",2544,2576
102,Weston,ACT,"$2,096",3176,3576


In [32]:
suburbs.tail(50)

Unnamed: 0_level_0,state
suburb,Unnamed: 1_level_1
Isaacs,ACT
Isabella Plains,ACT
Jerrabomberra,NSW
Kaleen,ACT
Kambah,ACT
Karabar,NSW
Kingston,ACT
Latham,ACT
Lyneham,ACT
Lyons,ACT
