In [7]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time

In [8]:
# Max 1000 property links per suburb

def get_property_links(suburb, postcode, max_pages=50):
    base_url = f'https://www.domain.com.au/rent/{suburb}-vic-{postcode}/?sort=price-desc&page='
    property_links = []

    for page in range(1, max_pages + 1):
        url = base_url + str(page)
        print(f"Getting links from page {page}...")

        response = requests.get(url, headers={'User-Agent': 'PostmanRuntime/7.6.0'})
        
        # Check if the response status code is not 200
        if response.status_code != 200:
            print(f"Failed to retrieve page {page}. Status code: {response.status_code}. Exiting loop.")
            break
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        listings = soup.find_all('a', class_='address')

        if not listings:
            print(f"No listings found on page {page}. Exiting loop.")
            break

        for listing in listings:
            link = listing.get('href')
            if link:
                property_links.append(link)
    
    return property_links

In [9]:
suburb = 'essendon'
postcode = '3040'

property_links = get_property_links(suburb, postcode)

Getting links from page 1...
Getting links from page 2...
Getting links from page 3...
Getting links from page 4...
Getting links from page 5...
Getting links from page 6...
Getting links from page 7...
Getting links from page 8...
Getting links from page 9...
Getting links from page 10...
Getting links from page 11...
Getting links from page 12...
Getting links from page 13...
Getting links from page 14...
Getting links from page 15...
Getting links from page 16...
Getting links from page 17...
No listings found on page 17. Exiting loop.


In [10]:
# Convert to DataFrame and save to CSV
df = pd.DataFrame(property_links, columns=['Property Link'])

In [11]:
import json

def extract_json_data(soup):
    # Look for any script tags that might contain JSON data
    for script in soup.find_all('script'):
        if script.string and 'application/json' in script.attrs.get('type', ''):
            try:
                json_data = json.loads(script.string)
                return json_data
            except json.JSONDecodeError:
                continue
    return None

In [12]:
def extract_property_details(soup):
    # Locate the JSON data embedded in the HTML
    script_data = soup.find('script', id='__NEXT_DATA__').string
    json_data = json.loads(script_data)
    
    # Extract necessary details
    layout_props = json_data['props']['pageProps']['layoutProps']
    property_details = layout_props["digitalData"]["page"]["pageInfo"]["property"]
    component_props = json_data['props']['pageProps']['componentProps']
    
    data = {
        'title': layout_props.get('title'),
        'description': layout_props.get('description'),
        'street_adress': property_details.get('address'),
        'suburb': property_details.get('suburb'),
        'postcode': property_details.get('postcode'),
        'price': property_details.get('price'),
        'bedrooms': property_details.get('bedrooms'),
        'bathrooms': property_details.get('bathrooms'),
        'parking': property_details.get('parking'),
        'primary_property_type': property_details.get('primaryPropertyType'),
        'property_features': property_details.get('propertyFeatures'),
        'structured_features': property_details.get('structuredFeatures', []),
        'video_count': property_details.get('videoCount'),
        'photo_count': property_details.get('photoCount'),
        'date_listed': property_details.get('dateListed'),
        'days_listed': property_details.get('daysListed'),
        'floor_plans_count': property_details.get('floorPlansCount'),
        'virtual_tour': property_details.get('virtualTour'),
        'nbn_details': layout_props.get('nbnDetails'),
        'nearby_schools': component_props.get('schoolCatchment', {}).get('schools', [])
    }
    
    return data

In [13]:
def scrape_properties(property_links):
    # Create an empty list to store the property details
    all_properties = []

    for url in property_links:
        # Fetch the page content
        soup = BeautifulSoup(requests.get(url, headers={'User-Agent':"PostmanRuntime/7.6.0"}).content, 'html.parser')
        
        # Extract the property details
        property_data = extract_property_details(soup)
        all_properties.append(property_data)
    
    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(all_properties)
    return df

In [14]:
property_df = scrape_properties(property_links)

In [15]:
property_df.to_csv(f'property_details.csv', index=False)

In [16]:
property_df

Unnamed: 0,title,description,street_adress,suburb,postcode,price,bedrooms,bathrooms,parking,primary_property_type,property_features,structured_features,video_count,photo_count,date_listed,days_listed,floor_plans_count,virtual_tour,nbn_details,nearby_schools
0,"12 Riverview Road, Essendon VIC 3040 - House F...","View this 5 bedroom, 2 bathroom rental house a...","12 Riverview Road, Essendon VIC 3040",Essendon,3040,$2280pw / $9907pcm,5,2,4,House,Air conditioning,"[{'name': 'Gas', 'category': 'Indoor', 'source...",0,13,2024-08-05T10:21:05.000,29,0,False,,"[{'id': '', 'educationLevel': 'combined', 'nam..."
1,"28 Violet Street, Essendon VIC 3040 - Apartmen...","View this 3 bedroom, 2 bathroom rental apartme...","28 Violet Street, Essendon VIC 3040",Essendon,3040,$950 per week,3,2,3,Apartment,,"[{'name': 'Secure Parking', 'category': 'Outdo...",0,5,2024-09-02T16:36:08.000,1,0,False,,"[{'id': '', 'educationLevel': 'secondary', 'na..."
2,"81 Deakin Street, Essendon VIC 3040 - Townhous...","View this $950/week 4 bedroom, 3 bathroom rent...","81 Deakin Street, Essendon VIC 3040",Essendon,3040,$950.00,4,3,2,Townhouse/Villa,"Secure Parking, Air conditioning, Alarm System...","[{'name': 'Internal Laundry', 'category': 'Ind...",0,5,2024-08-30T16:01:05.000,4,0,False,,"[{'id': '2139', 'educationLevel': 'primary', '..."
3,"1/20 Fletcher St, Essendon VIC 3040 - Townhous...","View this $920/week 4 bedroom, 3 bathroom rent...","1/20 Fletcher St, Essendon VIC 3040",Essendon,3040,$920.00,4,3,2,Townhouse/Villa,"Secure Parking, Air conditioning, Balcony / De...","[{'name': 'Internal Laundry', 'category': 'Ind...",0,14,2024-07-23T11:59:13.000,42,1,False,,"[{'id': '1638', 'educationLevel': 'primary', '..."
4,"10 Butler Street, Essendon VIC 3040 - House Fo...","View this 4 bedroom, 2 bathroom rental house a...","10 Butler Street, Essendon VIC 3040",Essendon,3040,$900pw / $3910pcm,4,2,2,House,"Air conditioning, Ducted Cooling, Ducted Heati...","[{'name': 'Internal Laundry', 'category': 'Ind...",0,16,2024-07-23T15:05:33.000,42,0,False,,"[{'id': '2132', 'educationLevel': 'primary', '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,"15/3 Royal Ave, Essendon North VIC 3041 - Stud...","View this 1 bedroom, 1 bathroom rental studio ...","15/3 Royal Ave, Essendon North VIC 3041",Essendon North,3041,$310 weekly,1,1,1,Apartment,"Secure Parking, Built in wardrobes, Floorboard...","[{'name': 'Heating', 'category': 'Indoor', 'so...",0,7,2024-08-08T10:22:39.000,26,0,False,,"[{'id': '1637', 'educationLevel': 'primary', '..."
306,"2/49 Bent Street, Moonee Ponds VIC 3039 - Apar...","View this 1 bedroom, 1 bathroom rental apartme...","2/49 Bent Street, Moonee Ponds VIC 3039",Moonee Ponds,3039,$310 Per Week,1,1,2,Apartment,,[],0,5,2024-03-15T10:41:03.000,172,0,False,,"[{'id': '2821', 'educationLevel': 'primary', '..."
307,"5/140 Maribyrnong Road, Moonee Ponds VIC 3039 ...","View this $310/week 1 bedroom, 1 bathroom rent...","5/140 Maribyrnong Road, Moonee Ponds VIC 3039",Moonee Ponds,3039,$310.00,1,1,1,Apartment,,"[{'name': 'Heating', 'category': 'Indoor', 'so...",0,5,2024-08-29T10:26:32.000,5,0,False,,"[{'id': '10491', 'educationLevel': 'primary', ..."
308,"5/26 Collier Crescent, Brunswick West VIC 3055...","View this 1 bedroom, 1 bathroom rental studio ...","5/26 Collier Crescent, Brunswick West VIC 3055",Brunswick West,3055,$320 Weekly,1,1,1,Apartment,Built in wardrobes,"[{'name': 'Internal Laundry', 'category': 'Ind...",1,4,2024-08-12T13:20:55.000,22,0,False,,"[{'id': '1183', 'educationLevel': 'primary', '..."
