In [82]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# load in existing data
suburbs = pd.read_csv("./suburbs.csv")
suburbs = suburbs["name"]

# use a headless browser (saves time)
chrome_options = Options()
# chrome_options.add_argument("--headless")
chrome_options.add_argument("--start-maximized")

# set up the browser
driver = webdriver.Chrome(options=chrome_options)

# establish database
data = pd.read_csv("scrapedData.csv")

# cycle through suburbs
for suburb in suburbs[103:]:

    driver.get("https://www.allhomes.com.au/ah/research/property-and-past-sales")
    enter_suburb = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "ss"))
    )
    enter_suburb.clear()
    enter_suburb.send_keys(suburb)
    
    location_suggestions = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#ui-id-1 .ui-menu-item"))
    )

    for suggestion in location_suggestions:        
        if suggestion.find_element_by_tag_name("img").get_attribute("src") == "https://www.allhomes.com.au/ah/image/searchIcons/division-icon.png?v2":
            suggestion_details = suggestion.find_element_by_tag_name("div").get_attribute("innerText").split(", ")            
            if (suggestion_details[0].lower() == suburb.lower()) & (suggestion_details[1] == "ACT"):
                print(suggestion_details)
                suggestion.click()                
                break
    
    time.sleep(1)
    headers = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "research-header"))
    )
    
    for header in headers:
        if header.find_element_by_tag_name("span").get_attribute("innerText") == "Suburbs and Towns":            
            suburbLink = header.find_element_by_xpath("..").find_element_by_tag_name("a")
            suburbLink.click()
            break
    
    years = ["2019", "2020", "2021"]
    for year in years:
        print("Scraping sales in", suburb, "in", year, "...")
        time.sleep(1)
        try:
            saleYears = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a.research-year-button"))
            )               
            for saleYear in saleYears:
                if saleYear.get_attribute("innerText") == year:        
                    saleYear.click()
                    break

            sales = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, "research-table-row"))
            )
            for sale in sales:
                address = sale.find_element_by_css_selector(".research-address-link span:last-child").get_attribute("innerText").strip()
                bedrooms = sale.find_element_by_css_selector(".research-house-stats span:nth-child(3)").get_attribute("innerText").strip()
                date = sale.find_element_by_class_name("research-table-column").get_attribute("innerText").strip()            
                price = sale.find_element_by_css_selector(".research-table-column:nth-child(4)").get_attribute("innerText").strip()
                block = sale.find_element_by_css_selector(".research-table-column:nth-child(5)").get_attribute("innerText").strip()
                source = sale.find_element_by_css_selector(".expanded-details-col1 .research-details-value:last-child").get_attribute("innerText").strip()
                purpose = sale.find_element_by_css_selector(".expanded-details-col2 .research-details-value:nth-child(11)").get_attribute("innerText").strip()

                data.loc[len(data)] = [suburb, year, address, bedrooms, date, price, block, source, purpose]
            
            data.to_csv("scrapedData.csv", index=False)
            driver.back()
            
        except:
            print("No sale data for", suburb)

print("Scraping complete")

driver.close()
driver.quit()

['Strathnairn', 'ACT', '2615']
Scraping sales in Strathnairn in 2019 ...
Scraping sales in Strathnairn in 2020 ...
Scraping sales in Strathnairn in 2021 ...
['Symonston', 'ACT', '2609']
Scraping sales in Symonston in 2019 ...
Scraping sales in Symonston in 2020 ...
Scraping sales in Symonston in 2021 ...
['Taylor', 'ACT', '2913']
Scraping sales in Taylor in 2019 ...
Scraping sales in Taylor in 2020 ...
Scraping sales in Taylor in 2021 ...
['Tharwa', 'ACT', '2620']
Scraping sales in Tharwa in 2019 ...
Scraping sales in Tharwa in 2020 ...
No sale data for Tharwa
Scraping sales in Tharwa in 2021 ...
['Theodore', 'ACT', '2905']
Scraping sales in Theodore in 2019 ...
Scraping sales in Theodore in 2020 ...
Scraping sales in Theodore in 2021 ...
['Throsby', 'ACT', '2914']
Scraping sales in Throsby in 2019 ...
Scraping sales in Throsby in 2020 ...
Scraping sales in Throsby in 2021 ...
['Torrens', 'ACT', '2607']
Scraping sales in Torrens in 2019 ...
Scraping sales in Torrens in 2020 ...
Scrapin

In [80]:
suburbs.tolist().index("Strathnairn")

103

In [81]:
data = data.query("suburb != 'Strathnairn'")
data.to_csv("scrapedData.csv", index=False)
data

Unnamed: 0,suburb,year,address,bedrooms,contract date,price,block size,source,purpose
0,Acton,2019,28/1 Gordon Street *,2,11/07/2019,"$630,000",0m2,Allhomes Record,–
1,Acton,2020,1204/19 Marcus Clarke Street *,2,30/06/2020,"$880,000",0m2,Allhomes Record,–
2,Acton,2020,G04/19 Marcus Clarke Street *,1,02/12/2020,"$557,000",0m2,Allhomes Record,–
3,Acton,2021,1311/25 Edinburgh Avenue *,3,08/05/2021,"$1,240,000",0m2,Allhomes Record,–
4,Acton,2021,906/19 Marcus Clarke Street *,2,14/04/2021,"$795,000",0m2,Allhomes Record,–
...,...,...,...,...,...,...,...,...,...
32027,Stirling,2021,89 Fremantle Drive *,2,09/07/2021,"$385,000",–,Allhomes Record,–
32028,Stirling,2021,88 Fremantle Drive,–,15/01/2021,$0,807m2,Government Record,Single residential dwelling
32029,Stirling,2021,41/121 Streeton Drive,–,15/01/2021,$0,28761m2,Government Record,Multiple unit dwelling
32030,Stirling,2021,10 Frayne Place,–,03/05/2021,$0,669m2,Government Record,Single residential dwelling
