In [None]:
from urllib.parse import urlencode
import json
import httpx

# we should use browser-like request headers to prevent being instantly blocked
BASE_HEADERS = {
    "accept-language": "en-US,en;q=0.9",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "accept-language": "en-US;en;q=0.9",
    "accept-encoding": "gzip, deflate, br",
}


url = "https://www.zillow.com/search/GetSearchPageState.htm?"
parameters = {
    "searchQueryState": {
        "pagination": {},
        "usersSearchTerm": "New Haven, CT",
        # map coordinates that indicate New Haven city's area
        "mapBounds": {
            "west": -73.03037621240235,
            "east": -72.82781578759766,
            "south": 41.23043771298298,
            "north": 41.36611033618769,
        },
    },
    "wants": {
        # cat1 stands for agent listings
        "cat1": ["mapResults"]
        # and cat2 for non-agent listings
        # "cat2":["mapResults"]
    },
    "requestId": 2,
}
response = httpx.get(url + urlencode(parameters), headers=BASE_HEADERS)
data = response.json()
results = response.json()["cat1"]["searchResults"]["mapResults"]
print(json.dumps(results, indent=2))
print(f"found {len(results)} property results")


In [3]:
parameters = {
    "searchQueryState": {
        "mapBounds": {
            "west": -158.43543544809847,
            "east": -157.48786464731722,
            "south": 21.020622641199793,
            "north": 21.854054046894433
        },
        "pagination": {},
    },
    "wants": {
        # cat1 stands for agent listings
        "cat1": ["mapResults"]
        # and cat2 for non-agent listings
        # "cat2":["mapResults"]
    },
    "requestId": 2,
}
response = httpx.get(url + urlencode(parameters), headers=BASE_HEADERS)
data = response.json()
results = response.json()["cat1"]["searchResults"]["mapResults"]
print(json.dumps(results, indent=2))
print(f"found {len(results)} property results")


[
  {
    "buildingId": "21.286045--157.8388",
    "lotId": 1002234214,
    "price": "From $118,000",
    "latLong": {
      "latitude": 21.286045,
      "longitude": -157.8388
    },
    "minBeds": 1,
    "minBaths": 1.0,
    "minArea": 513,
    "imgSrc": "https://photos.zillowstatic.com/fp/9240709c14b00da8d33a739d385293c1-p_e.jpg",
    "hasImage": true,
    "isFeaturedListing": false,
    "unitCount": 3,
    "isBuilding": true,
    "address": "1720 Ala Moana Blvd, Honolulu, HI",
    "variableData": {},
    "badgeInfo": null,
    "statusType": "FOR_SALE",
    "statusText": "For Rent",
    "listingType": "",
    "isFavorite": false,
    "detailUrl": "/b/moana-vista-apartments-honolulu-hi-5XrrfT/",
    "has3DModel": false,
    "hasAdditionalAttributions": false,
    "canSaveBuilding": false
  },
  {
    "zpid": "2087652362",
    "price": "$230,000",
    "priceLabel": "$230K",
    "beds": 0,
    "baths": 1.0,
    "area": 331,
    "latLong": {
      "latitude": 21.4411,
      "longitude":

In [16]:
def parse_property(data):
    hdpData = data.get("hdpData")
    if not hdpData:
        return None
    homeInfo = hdpData.get("homeInfo")
    if not homeInfo:
        return None
    parsed = {
        "latitude": homeInfo.get("latitude"),
        "longitude": homeInfo.get("longitude"),
        "price": homeInfo.get("price"),
        "bedrooms": homeInfo.get("bedrooms"),
        "bathrooms": homeInfo.get("bathrooms"),
        "homeType": homeInfo.get("homeType"),
        "homeStatus": homeInfo.get("homeStatusForHDP")
    }
    return parsed

parsed_results = [each for each in [parse_property(each) for each in results] if each]
print(json.dumps(parsed_results, indent=2))

[
  {
    "latitude": 21.4411,
    "longitude": -158.1885,
    "price": 230000.0,
    "bedrooms": 0.0,
    "bathrooms": 1.0,
    "homeType": "CONDO",
    "homeStatus": "FOR_SALE"
  },
  {
    "latitude": 21.320112,
    "longitude": -157.79565,
    "price": 1925000.0,
    "bedrooms": 4.0,
    "bathrooms": 3.0,
    "homeType": "SINGLE_FAMILY",
    "homeStatus": "FOR_SALE"
  },
  {
    "latitude": 21.321608,
    "longitude": -158.0127,
    "price": 829900.0,
    "bedrooms": 3.0,
    "bathrooms": 2.0,
    "homeType": "SINGLE_FAMILY",
    "homeStatus": "FOR_SALE"
  },
  {
    "latitude": 21.440254,
    "longitude": -158.18042,
    "price": 725000.0,
    "bedrooms": 3.0,
    "bathrooms": 2.0,
    "homeType": "SINGLE_FAMILY",
    "homeStatus": "FOR_SALE"
  },
  {
    "latitude": 21.369734,
    "longitude": -157.92274,
    "price": 480000.0,
    "bedrooms": 1.0,
    "bathrooms": 1.0,
    "homeType": "TOWNHOUSE",
    "homeStatus": "FOR_SALE"
  },
  {
    "latitude": 21.673733,
    "longitude": 

In [17]:
import pandas as pd
df = pd.DataFrame(parsed_results)

In [19]:
df

Unnamed: 0,latitude,longitude,price,bedrooms,bathrooms,homeType,homeStatus
0,21.441100,-158.18850,230000.0,0.0,1.0,CONDO,FOR_SALE
1,21.320112,-157.79565,1925000.0,4.0,3.0,SINGLE_FAMILY,FOR_SALE
2,21.321608,-158.01270,829900.0,3.0,2.0,SINGLE_FAMILY,FOR_SALE
3,21.440254,-158.18042,725000.0,3.0,2.0,SINGLE_FAMILY,FOR_SALE
4,21.369734,-157.92274,480000.0,1.0,1.0,TOWNHOUSE,FOR_SALE
...,...,...,...,...,...,...,...
368,21.303436,-157.83414,895000.0,2.0,2.0,CONDO,FOR_SALE
369,21.425900,-157.73927,2300000.0,3.0,2.0,SINGLE_FAMILY,FOR_SALE
370,21.287191,-157.83963,625000.0,2.0,2.0,CONDO,FOR_SALE
371,21.297043,-157.67923,1399000.0,,,LOT,FOR_SALE


In [20]:
df.to_csv("HawaiiProperties.csv")