# zillow-gql
Exploring Zillow's GraphQL API

Zillow is moving more and more of its API to GraphQL. Named queries are being used which limits the kinds of data that can be fetched but the `/graphql` endpoint still supports general GraphQL queries (for now).

In [1]:
from lxml import html
import requests
import json
from datetime import datetime
from random import randint
from time import sleep
import pandas as pd

In [2]:
def printErrors(r):
    if (type(r) is dict) & ('errors' in r):
        for error in r.errors:
            print(error.message)
    else:
        print(r)

In [3]:
zpid = 15574599
propertyURL = 'https://www.zillow.com/homes/767-Upland-Rd-Redwood-City,-CA,-94062_rb/15574599_zpid/'

One can reverse engineer much of the Zillow schema by looking at the payloads that come back from calls to the `/graphql` endpoint. For example a query like https://www.zillow.com/graphql/?zpid=15574599&contactFormRenderParameter=&queryId=5413b5a24e9812cba14b76bd3bc30e6a&operationName=NotForSaleShopperPlatformFullRenderQuery which gets run when you click on a property.

In [55]:
def PriceTaxQuery(zpid, clientVersion="home-details/6.0.11.1315.master.2fc8ca5",timePeriod="FIVE_YEARS",metricType="LOCAL_HOME_VALUES",forecast=True):
    return {
        "query": """
             query PriceTaxQuery($zpid: ID!, $metricType: HomeValueChartMetricType, $timePeriod: HomeValueChartTimePeriod) {
                 property(zpid: $zpid) {
                    address {
                        city
                        community
                        neighborhood
                        state
                        streetAddress
                        subdivision
                        zipcode
                    }
                    bathrooms
                    bedrooms
                    countyFIPS
                    dateSold
                    hdpUrl
                    homeStatus
                    homeValueChartData(metricType: $metricType, timePeriod: $timePeriod) {
                        points {
                            x
                            y
                        }
                        name
                    }
                    lastSoldPrice
                    latitude
                    livingArea
                    livingAreaUnits
                    longitude
                    lotSize
                    parcelId
                    price
                    priceHistory {
                        time
                        price
                        event
                        buyerAgent {
                            profileUrl
                            name
                        }
                        sellerAgent {
                            profileUrl
                            name
                        }
                    }
                    yearBuilt
                    zestimate
                    zpid
                }
            }
        """,
        "operationName": "PriceTaxQuery",
        "variables": {
            "zpid": zpid,
            "timePeriod": timePeriod,
            "metricType": metricType,
            "forecast": forecast
        },
        "clientVersion": clientVersion
    }

In [14]:
def get_gql_headers(zpid):
    """
    Return the GraphQL headers required to query Zillow's GraphQL server
    
    Parameters:
    zpid the Zillow property ID
    queryName the name of the GraphQL query
    
    Returns: a JSON POST header object
    """
    # these were copied from an actual qraphQL request to zillow using Chrome
    return {
        'authority': 'www.zillow.com',
        'method': 'POST',
        'path': '/graphql/?zpid={}'.format(zpid),
        'scheme': 'https',
        'accept': '*/*',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'en-US,en',
        'content-type': 'text/plain',
        'dnt': '1',
        'origin': 'https://www.zillow.com',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
    }

In [60]:
def post_qql_query(zpid,query,gql_server = 'https://www.zillow.com/graphql/'):
    """
    Run the PriceTaxQuery query against Zillow's GraphQL server
    
    Parameters:
    zpid Zillow property ID
    
    Returns: json object including all the data requested by the query 
             while omitting the top two levels of hierarchy (data.property)
    """

    
    sleep(randint(1,4)) # act human
    r = requests.post(
        gql_server,
        json=query(zpid),
        headers=get_gql_headers(zpid),
        params={'zpid': zpid}
    )

    print(r.status_code,'POST ',gql_server,zpid)

    if r.ok:
        try:
            return r.json().get('data').get('property')
        except:
            print('Error getting json from graphql query results')
            return None
    else:
        printErrors(r)
        return None

In [61]:
prop = post_qql_query(zpid,PriceTaxQuery)

200 POST  https://www.zillow.com/graphql/ 15574599


In [64]:
prop

{'address': {'city': 'Redwood City',
  'community': None,
  'neighborhood': None,
  'state': 'CA',
  'streetAddress': '767 Upland Rd',
  'subdivision': None,
  'zipcode': '94062'},
 'bathrooms': 4,
 'bedrooms': 4,
 'countyFIPS': '06081',
 'dateSold': 1604880000000,
 'hdpUrl': '/homedetails/767-Upland-Rd-Redwood-City-CA-94062/15574599_zpid/',
 'homeStatus': 'RECENTLY_SOLD',
 'homeValueChartData': [{'points': [{'x': 1464678000000, 'y': 2170436},
    {'x': 1467270000000, 'y': 2205292},
    {'x': 1469948400000, 'y': 2149919},
    {'x': 1472626800000, 'y': 2167531},
    {'x': 1475218800000, 'y': 2212150},
    {'x': 1477897200000, 'y': 2232124},
    {'x': 1480492800000, 'y': 2142636},
    {'x': 1483171200000, 'y': 2163062},
    {'x': 1485849600000, 'y': 2193079},
    {'x': 1488268800000, 'y': 2274400},
    {'x': 1490943600000, 'y': 2292923},
    {'x': 1493535600000, 'y': 2247033},
    {'x': 1496214000000, 'y': 2298335},
    {'x': 1498806000000, 'y': 2422478},
    {'x': 1501484400000, 'y': 24

In [62]:
data = {}
# map all the string, int, and float values directly
for key in prop:  
    if isinstance(prop[key],(str,int,float)):
        data[key] = prop[key]

In [63]:
data

{'bathrooms': 4,
 'bedrooms': 4,
 'countyFIPS': '06081',
 'dateSold': 1604880000000,
 'hdpUrl': '/homedetails/767-Upland-Rd-Redwood-City-CA-94062/15574599_zpid/',
 'homeStatus': 'RECENTLY_SOLD',
 'lastSoldPrice': 2500000,
 'latitude': 37.471012115478516,
 'livingArea': 2963,
 'livingAreaUnits': 'Square Feet',
 'longitude': -122.25418090820312,
 'lotSize': 9900,
 'parcelId': '058282360',
 'price': 2500000,
 'yearBuilt': 1979,
 'zestimate': 2634117,
 'zpid': 15574599}