In [3]:
from typing import Union
from datetime import datetime
import json
import pickle
import time

import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import random

user_agents = [
    "Mozilla/5.0 (X11; CrOS x86_64 12871.102.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.141 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
    "Opera/9.25 (Windows NT 5.1; U; en)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
    "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)",
    "Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9",
]

home_url = {
    "domain": "https://www.domain.com.au",
    "realestate": "https://www.realestate.com.au",
}

In [4]:
def _domain_get_response(url: str) -> Union[dict, None]:
    """
    Return data object from target url __NEXT_DATA__ section

    Args:
        postcode (int): victoria postcode
        page (int): page number

    Returns:
        Union[dict, None]: data object
    """
    
    try:
        response = requests.get(
            url, headers={"User-Agent": random.choice(user_agents)}, timeout=5
        )
    except response.exceptions.Timeout:
        return None

    # parse http requests
    bs_object = BeautifulSoup(response.text, "html.parser")

    try:
        data = json.loads(bs_object.find("script", {"id": "__NEXT_DATA__"}).text)[
            "props"
        ]["pageProps"]
        return data
    except Exception:
        return None

In [5]:
page_props = _domain_get_response(url)
component_props = page_props["componentProps"]

TypeError: 'NoneType' object is not subscriptable

In [None]:
def _domain_nearby_schools(component_props: dict) -> list:
    """
    Obtain school names based on given target property info page

    Args:
        componentProps (dict): webpage component description information

    Returns:
        list: list of nearby school names of a given property
    """
    if "schoolCatchment" in component_props:
        if "schools" in component_props["schoolCatchment"]:
            return list(
                s["name"] for s in component_props["schoolCatchment"]["schools"]
            )
        else:
            return list()