In [1]:
import re
import json
import asyncio
import jmespath
from httpx import AsyncClient, Response
from parsel import Selector
from typing import List, Dict

In [2]:
client = AsyncClient(
    # add basic browser headers to mimize blocking chancesd
    headers={
        "accept-language": "en-US,en;q=0.9",
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "accept-language": "en-US;en;q=0.9",
        "accept-encoding": "gzip, deflate, br",
        
    },
    cookies={
        'KP_UIDz-ssn': '0ba9fpglbev2UIAHxkzT1MWt3G1EDEiiiKCvfPfP3bTuXFXHjKAlJoFMjPCykPtES0dZHXZz3taVEBzQNjdXaIfe0G8Vxh2fQDxfsZjpC3W5Ls7jXLnsWGOHffPCSBChRdlXvlgVzu8wNIJXnTje6rsJYtHeChFly1DJNad',
    }
)

    
def parse_property_data(data: Dict) -> Dict:
    """refine property data from JSON"""
    if not data:
        return
    result = jmespath.search(
        """{
        id: id,
        propertyType: propertyType.display,
        description: description,            
        propertyLink: _links.canonical.href,
        address: address,
        propertySizes: propertySizes,
        generalFeatures: generalFeatures,
        propertyFeatures: propertyFeatures[].{featureName: displayLabel, value: value},
        images: media.images[].templatedUrl,
        videos: videos,
        floorplans: floorplans,        
        listingCompany: listingCompany.{name: name, id: id, companyLink: _links.canonical.href, phoneNumber: businessPhone, address: address.display.fullAddress, ratingsReviews: ratingsReviews, description: description},
        listers: listers,
        auction: auction
        }
        """,
        data,
    )
    return result

    
def parse_hidden_data(response: Response) -> Dict:
    """parse JSON data from script tag"""
    selector = Selector(response.text)
    script = selector.xpath(
        "//script[contains(text(),'window.ArgonautExchange')]/text()"
    ).get()
    # data needs to be parsed mutiple times
    data = json.loads(re.findall(r"window.ArgonautExchange=(\{.+\});", script)[0])
    data = json.loads(data["resi-property_listing-experience-web"]["urqlClientCache"])
    data = json.loads(list(data.values())[0]["data"])
    return data

    
async def scrape_properties(urls: List[str]) -> List[Dict]:
    """scrape listing data from property pages"""
    # add the property pages URLs to a scraping list
    to_scrape = [client.get(url) for url in urls]
    properties = []
    # scrape all the property pages concurrently
    for response in asyncio.as_completed(to_scrape):
        response = await response
        assert response.status_code == 200, "request has been blocked"
        data = parse_hidden_data(response)["details"]["listing"]
        data = parse_property_data(data)
        properties.append(data)
    print(f"scraped {len(properties)} property listings")
    return properties   

In [3]:
async def run():
    data = await scrape_properties(
        urls = [
            "https://www.realestate.com.au/property-apartment-vic-west+melbourne-439855148",
        ]
    )
    # print the data in JSON format
    print(json.dumps(data, indent=2))

In [4]:
await run()

scraped 1 property listings
[
  {
    "id": "439855148",
    "propertyType": "Apartment",
    "description": "Stylish Urban Living in the Heart of West Melbourne<br/><br/>Step into a world of contemporary luxury with this immaculate 2-bedroom, 2-bathroom apartment at 506/105 Batman Street, West Melbourne. Perfectly positioned in a thriving urban enclave, this residence offers a blend of modern elegance and unbeatable convenience.<br/><br/>Property Features:<br/>\u2022\tOpen-Plan Living: The expansive living and dining area is designed for comfort and style, featuring large windows that flood the space with natural light and offer breathtaking views of the city skyline<br/>\u2022\tModern Kitchen: high-end stainless steel appliances, stylish stone benchtops, and ample cabinetry for all your storage needs.<br/>\u2022\tMaster Suite: with a built-in wardrobe and a chic ensuite bathroom<br/>\u2022\tRemaining bedrooms: with built-in wardrobes<br/>\u2022\tStylish Bathrooms: Enjoy the elegance 