In [1]:
import json
import httpx
from urllib.parse import urlencode
from webdriver_manager.chrome import ChromeDriverManager
from parsel import Selector
from typing import List
from zillow_scrape_functions import find_listings, find_properties


### Replicate the zillow request

In [3]:
# need headers to bypass bot checks, retrieved from devtools headers for the search request
base_headers = {
    "accept-language": "en-US,en;q=0.9",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5414.74 Safari/537.36",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "accept-language": "en-US;en;q=0.9",
    "accept-encoding": "gzip, deflate, br",
}

url = "https://www.zillow.com/search/GetSearchPageState.htm?"

# parameters from the request, retrieved from devtools search query
parameters = {
    "searchQueryState": {
        "pagination": {},
        "usersSearchTerm": "Honolulu, HI",
        # map coordinates that indicate Honolulu's area
        "mapBounds": {
            "west":-158.05336190136717,
            "east":-157.5493640986328,
            "south":20.809352615926436,
            "north":21.839148440373638
        },
    },
    "wants": {
        # cat1 stands for agent listings
        "cat1": ["mapResults"]
        # and cat2 for non-agent listings
        # "cat2":["mapResults"]
    },
    "requestId": 2,
}

In [3]:
# test request
response = httpx.get(url + urlencode(parameters), headers=base_headers)
data = response.json()
results = response.json()["cat1"]["searchResults"]["mapResults"]
print(json.dumps(results, indent=2))
print(f"found {len(results)} property results")

[
  {
    "zpid": "620653",
    "price": "$1,975,000",
    "priceLabel": "$1.98M",
    "beds": 4,
    "baths": 2.0,
    "area": 2807,
    "latLong": {
      "latitude": 21.29127,
      "longitude": -157.70784
    },
    "statusType": "FOR_SALE",
    "statusText": "House for sale",
    "isFavorite": false,
    "isUserClaimingOwner": false,
    "isUserConfirmedClaim": false,
    "imgSrc": "https://photos.zillowstatic.com/fp/7098fe3e7344b7fbbb00c4364bb78339-p_e.jpg",
    "hasImage": true,
    "visited": false,
    "listingType": "",
    "variableData": {
      "type": "TIME_ON_INFO",
      "text": "5 minutes ago",
      "data": {
        "isFresh": false
      }
    },
    "hdpData": {
      "homeInfo": {
        "zpid": 620653,
        "zipcode": "96825",
        "city": "Honolulu",
        "state": "HI",
        "latitude": 21.29127,
        "longitude": -157.70784,
        "price": 1975000.0,
        "bathrooms": 2.0,
        "bedrooms": 4.0,
        "livingArea": 2807.0,
        "home

In [4]:
# import asyncio
# doesn't work for jupyter
# asyncio.run(main())
listings = await find_listings(headers=base_headers)

2023-02-03 14:15:16.770 | INFO     | zillow_scrape_functions:search_rent:55 - scraping rent search for: Honolulu, HI
2023-02-03 14:15:17.681 | INFO     | zillow_scrape_functions:zillow_request:40 - found 396 results for query: Honolulu, HI


In [5]:
len(listings)

396

In [10]:
listings[0]['detailUrl']

'/b/hickam-communities-honolulu-hi-65dDMr/'

In [None]:
listings = await find_properties(headers=base_headers)