# REAL ESTATE DATA API

## Imports

In [107]:
import subprocess
import warnings
from datetime import datetime
import pickle

import pandas as pd
import requests

warnings.filterwarnings('ignore')


## Generating Dataset

### API Props

In [2]:
client_secret = "secret_9d2aeb1d94d5f400c8128c5199a8d8de"
client_id = "client_2bf50cfd2349026b489480f4c464f395"

raw = subprocess.check_output("node getToken.js", shell=True)
bearer = raw.decode("utf-8").strip()

In [51]:
headers = {
    "accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": f"Bearer {bearer}"
}

In [4]:
# URLs
listings = "https://api.domain.com.au/v1/listings/residential/_search"

In [5]:
req_body = lambda pg_no: {
  "listingType":"Sale",
  "propertyTypes":[
    "House"
  ],
  "minBedrooms":2,
  "maxBedrooms":5,
  "minPrice": 350000,
  "maxPrice": 800000,
  "listingAttributes": [
    "HasPhotos"
  ], 
  "locations":[
    {
      "state":"VIC",
      "region":"Melbourne Region",
      "area":"",
      "suburb":"",
      "postCode":""
    }
  ],
  "excludePriceWithheld": True,
  "excludeDepositTaken": True,
  "pageSize": 200,
  "pageNumber": pg_no
}

### Health Check

In [13]:
resp = requests.post(url=listings, json=req_body(1), headers=headers)
resp_json = resp.json()
print(resp)

<Response [200]>


### Dataframe Writer

In [78]:
now = datetime.now().strftime("%Y-%m-%d")
sp_file = f"data/suburb_profiles_{now}.pkl"

# New profiles
# SUBURB_PROFILES = {}

# Load Profiles
with open(sp_file, "rb") as a_file:
    SUBURB_PROFILES = pickle.load(a_file)

In [103]:
def update_df(df, resp_json):
  for listing in resp_json:
    if "listing" in listing:
      house = listing['listing']
      full_dict = {}
      full_dict.update({k: v for k,v in house.items() if k in ['listingType', 'id', 'dateListed']})
      full_dict.update(house['priceDetails'])
      full_dict.update({k: v for k,v in house['propertyDetails'].items() if k in ['propertyType', 'bathrooms', 'bedrooms', 'carspaces', 'unitNumber', 'streetNumber', 'street', 'area', 'region', 'suburb', 'postcode', 'displayableAddress', 'landArea']})
      
      if full_dict['suburb'] not in SUBURB_PROFILES:
        # Get loc id
        addressLocatorsURL = f"https://api.domain.com.au/v1/addressLocators?searchLevel=Suburb&suburb={full_dict['suburb']}&state=VIC"
        resp = requests.get(url=addressLocatorsURL, headers=headers)
        try:
          locID = resp.json()[0]['ids'][0]['id']
        except: 
          locID = False

        # Query Profiles Data
        suburbProfiles = f"https://api.domain.com.au/v1/locations/profiles/{locID}"
        resp = requests.get(url=suburbProfiles, headers=headers)
        try:
          if 'data' in resp.json():
            SUBURB_PROFILES[full_dict['suburb']] = resp.json()['data']
          else:
            SUBURB_PROFILES[full_dict['suburb']] = {}
        except:
          SUBURB_PROFILES[full_dict['suburb']] = {}

        if locID:
          sPerfURL = f"https://api.domain.com.au/v1/suburbPerformanceStatistics?state=VIC&suburbId={locID}&propertyCategory={full_dict['propertyType']}&chronologicalSpan=12&tPlusFrom=1&tPlusTo=3&bedrooms={full_dict['bedrooms']}"
          resp = requests.get(url=sPerfURL, headers=headers)
          
          try:
            for sp in resp.json()['series']['seriesInfo']:
              SUBURB_PROFILES[full_dict['suburb']]['suburb_performance'] = {f"sp_{sp['year']}_{k}": v for k,v in sp["values"].items()}
          except:
            SUBURB_PROFILES[full_dict['suburb']]['suburb_performance'] = {}

      # Add suburb data
      s_profile = SUBURB_PROFILES[full_dict['suburb']]
      if "renterPercentage" in s_profile:
        full_dict["renterPercentage"] = s_profile["renterPercentage"]
        for prop_type in s_profile['propertyCategories']:
          if prop_type['bedrooms'] == full_dict['bedrooms'] and prop_type['propertyCategory'] == full_dict['propertyType']:
            full_dict.update({f"sp_{k}": v for k,v in prop_type.items() if k in [
              'numberSold', 'estimatedRepayments', 'forSale', 'medianSoldPrice', 'medianRentPrice', 
              'daysOnMarket', 'forRent', 'entryLevelPrice']})
            for yearGrowth in prop_type["salesGrowthList"]:
              if yearGrowth['year'] >= 2019:
                full_dict.update({f"{yearGrowth['year']}_{k}": v for k,v in house.items() if k in ['medianSoldPrice', 'annualGrowth', 'numberSold']})
      
      if 'suburb_performance' in s_profile:
        full_dict.update(SUBURB_PROFILES[full_dict['suburb']]['suburb_performance'])
  
      # Get URL
      listingsIdURL = f"https://api.domain.com.au/v1/listings/{full_dict['id']}"
      resp = requests.get(url=listingsIdURL, headers=headers)
      try:
        full_dict['url'] = resp.json()['seoUrl']
      except:
        ""
      
      df = df.append(full_dict, ignore_index=True)
    
  return df

### Main

In [104]:
if __name__ == "__main__":
    FULL_DF = pd.DataFrame()

    for i in range(1, 10):
        resp = requests.post(url=listings, json=req_body(i), headers=headers)
        resp_json = resp.json()
        
        # process data and update df
        FULL_DF = update_df(FULL_DF, resp_json)
        if len(resp_json) < 200:
            break

In [105]:
FULL_DF

Unnamed: 0,listingType,id,dateListed,price,priceFrom,priceTo,displayPrice,propertyType,bathrooms,bedrooms,...,sp_2022_auctionNumberSold,sp_2022_auctionNumberWithdrawn,sp_2022_daysOnMarket,sp_2022_discountPercentage,sp_2022_medianRentListingPrice,sp_2022_numberRentListing,sp_2022_highestRentListingPrice,sp_2022_lowestRentListingPrice,url,landArea
0,Sale,2017960201,2022-07-27T12:44:27,735612,735612,735612,"$735,612",House,2.0,3.0,...,19,,31,3.2,430,271,650,277,https://www.domain.com.au/lot-105-17-18-pagett...,
1,Sale,2017934574,2022-07-14T12:24:02,628000,628000,628000,"$628,000",House,1.0,3.0,...,37,,30,,375,75,480,300,https://www.domain.com.au/11-shale-court-delah...,
2,Sale,2017920977,2022-07-07T15:24:49,600000,600000,600000,"$600,000",House,1.0,3.0,...,203,,52,4.76,350,1641,600,87,https://www.domain.com.au/3-willow-street-werr...,595.0
3,Sale,2017900632,2022-06-28T21:28:16,710000,710000,710000,"$710,000",House,1.0,3.0,...,184,,47,4.3,350,859,750,4,https://www.domain.com.au/113-powell-drive-hop...,884.0
4,Sale,2017872934,2022-06-15T16:25:29,700000,700000,700000,"$700,000",House,2.0,4.0,...,203,,52,4.76,350,1641,600,87,https://www.domain.com.au/2-koombahla-court-we...,393.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,Sale,2017700881,2022-03-30T15:24:51,495000,495000,495000,"$495,000",House,2.0,3.0,...,50,,55,4.19,350,834,550,87,,
195,Sale,2017699190,2022-03-30T10:28:08,798000,798000,798000,"$798,000",House,1.0,3.0,...,214,,54,6.97,430,524,950,140,,544.0
196,Sale,2017601184,2022-03-29T15:14:18,463580,463580,463580,"$463,580",House,1.0,3.0,...,1,,75,,410,62,490,370,,
197,Sale,2017696563,2022-03-29T11:48:47,620000,620000,680000,"$620,000 to $680,000",House,1.0,2.0,...,74,,38,3.01,450,328,1000,180,,


### Save to CSV

In [106]:
FULL_DF.to_csv(f"data/houses-{now}.csv")

### Save Suburb Profiles

In [None]:
with open(sp_file, "wb") as a_file:
    pickle.dump(SUBURB_PROFILES, a_file)

##### Other Fields 
- Distance to city
- Distance to schools

## Data Visualisation #TODO

In [None]:
# Choose CSV file (default latest)
# Plotly graphs, scatterplots - ensure popup thingos include address
# Geo plot?