Housing data - Zillow

In [75]:
#import the necessary libraries

import pandas as pd
import numpy as np
import plotly.express as px
import requests
import warnings
import folium
from geopy.distance import distance

# settings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

Now, we are going to extract the data from a website. We need an api key to access the data. This can be obtained for free. 

https://app.scrapeak.com/dashboard/scrapers/zillow-scraper-api

In [76]:
def get_listings(api_key, listing_url):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/listing"

    querystring = {
        "api_key": api_key,
        "url":listing_url
    }

    return requests.request("GET", url, params=querystring)

def get_property_detail(api_key, zpid):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/property"

    querystring = {
        "api_key": api_key,
        "zpid":zpid
    }

    return requests.request("GET", url, params=querystring)

def get_zpid(api_key, street, city, state, zip_code=None):
    url = "https://app.scrapeak.com/v1/scrapers/zillow/zpidByAddress"

    querystring = {
        "api_key": api_key,
        "street": street,
        "city": city,
        "state": state,
        "zip_code":zip_code
    }

    return requests.request("GET", url, params=querystring)

In [77]:
api_key = "37471fc1-4636-4f56-86a7-948af18f2fb3"

In [78]:
# zillow search url
rent_listing_url = "https://www.zillow.com/los-angeles-ca/rentals/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22Los%20Angeles%2C%20CA%22%2C%22mapBounds%22%3A%7B%22west%22%3A-119.21510774414062%2C%22east%22%3A-117.60835725585937%2C%22south%22%3A33.56570519655188%2C%22north%22%3A34.47400545884537%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A12447%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22days%22%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%7D"

# get listings
rent_listing_response = get_listings(api_key, rent_listing_url)

In [79]:
# view all keys
print(rent_listing_response.json().keys())

# check if request is successful
print("Request success:", rent_listing_response.json()["is_success"])

# view count of properies returned in request
num_of_properties = rent_listing_response.json()["data"]["categoryTotals"]["cat1"]["totalResultCount"]
print("Count of properties:", num_of_properties)

dict_keys(['is_success', 'data', 'message'])
Request success: True
Count of properties: 8193


In [80]:
# view all listings
df_rent_listings = pd.json_normalize(rent_listing_response.json()["data"]["cat1"]["searchResults"]["mapResults"])
print("Number of rows:", len(df_rent_listings))
print("Number of columns:", len(df_rent_listings.columns))
df_rent_listings

Number of rows: 500
Number of columns: 81


Unnamed: 0,buildingId,lotId,price,minBeds,minBaths,minArea,streetViewMetadataURL,streetViewURL,imgSrc,plid,isFeaturedListing,isShowcaseListing,unitCount,isHomeRec,isBuilding,address,badgeInfo,statusType,statusText,listingType,isFavorite,detailUrl,has3DModel,hasAdditionalAttributions,canSaveBuilding,timeOnZillow,latLong.latitude,latLong.longitude,zpid,priceLabel,beds,baths,area,isUserClaimingOwner,isUserConfirmedClaim,visited,shouldShowZestimateAsPrice,pgapt,sgapt,hasVideo,availabilityDate,variableData.type,variableData.text,variableData.data.isFresh,hdpData.homeInfo.zpid,hdpData.homeInfo.zipcode,hdpData.homeInfo.city,hdpData.homeInfo.state,hdpData.homeInfo.latitude,hdpData.homeInfo.longitude,hdpData.homeInfo.price,hdpData.homeInfo.bathrooms,hdpData.homeInfo.bedrooms,hdpData.homeInfo.livingArea,hdpData.homeInfo.homeType,hdpData.homeInfo.homeStatus,hdpData.homeInfo.daysOnZillow,hdpData.homeInfo.isFeatured,hdpData.homeInfo.shouldHighlight,hdpData.homeInfo.isRentalWithBasePrice,hdpData.homeInfo.rentZestimate,hdpData.homeInfo.isUnmappable,hdpData.homeInfo.isPreforeclosureAuction,hdpData.homeInfo.homeStatusForHDP,hdpData.homeInfo.priceForHDP,hdpData.homeInfo.isNonOwnerOccupied,hdpData.homeInfo.isPremierBuilder,hdpData.homeInfo.isZillowOwned,hdpData.homeInfo.currency,hdpData.homeInfo.country,hdpData.homeInfo.unit,hasImage,hdpData.homeInfo.zestimate,hdpData.homeInfo.datePriceChanged,hdpData.homeInfo.priceChange,hdpData.homeInfo.taxAssessedValue,buildingName,hdpData.homeInfo.priceReduction,badgeInfo.type,badgeInfo.text,hdpData.homeInfo.videoCount
0,34.087723--118.28199,2.196491e+09,"$4,495+/mo",1.0,1.0,900.0,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/staticmap...,4gga97443qzxj,True,False,2.0,False,True,"906 Hyperion Ave, Los Angeles, CA",,FOR_RENT,For Rent,,False,/b/906-hyperion-ave-los-angeles-ca-9n2sWT/,False,False,True,129722,34.087723,-118.281990,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,,,"$1,795/mo",,,,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/staticmap...,,True,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/homedetails/523-S-Mariposa-Ave-APT-15-Los-Ang...,False,False,,135035,34.064630,-118.299560,2084350992,$1.8K,1.0,1.0,700.0,False,False,False,False,ForRent,For Rent,False,,TIME_ON_INFO,2 minutes ago,True,2.084351e+09,90020,Los Angeles,CA,34.064630,-118.299560,1795.0,1.0,1.0,700.0,APARTMENT,FOR_RENT,-1.0,True,False,False,1984.0,False,False,FOR_RENT,1795.0,True,False,False,USD,USA,Apt 15,,,,,,,,,,
2,,,"$1,850/mo",,,,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/staticmap...,,False,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/homedetails/125-S-Avenue-53-30A-Los-Angeles-C...,False,False,,139514,34.106285,-118.197556,2057843920,$1.9K,1.0,1.0,650.0,False,False,False,False,ForRent,For Rent,False,,TIME_ON_INFO,2 minutes ago,True,2.057844e+09,90042,Los Angeles,CA,34.106285,-118.197556,1850.0,1.0,1.0,650.0,APARTMENT,FOR_RENT,-1.0,False,False,False,,False,False,FOR_RENT,1850.0,True,False,False,USD,USA,# 30A,,,,,,,,,,
3,,,"$7,300/mo",,,,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/streetvie...,https://maps.googleapis.com/maps/api/staticmap...,,False,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/homedetails/1153-W-37th-Dr-1-Los-Angeles-CA-9...,False,False,,149600,34.019302,-118.293260,2057843952,$7.3K,4.0,4.0,1372.0,False,False,False,False,ForRent,For Rent,False,2023-08-10 00:00:00,TIME_ON_INFO,2 minutes ago,True,2.057844e+09,90007,Los Angeles,CA,34.019302,-118.293260,7300.0,4.0,4.0,1372.0,APARTMENT,FOR_RENT,-1.0,False,False,False,,False,False,FOR_RENT,7300.0,True,False,False,USD,USA,# 1,,,,,,,,,,
4,,,"$1,450/mo",,,,,,https://photos.zillowstatic.com/fp/c4209322852...,,False,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/homedetails/648-N-Hobart-Blvd-APT-17-Los-Ange...,False,False,,377446,34.083107,-118.305440,2057843961,$1.5K,0.0,1.0,450.0,False,False,False,False,ForRent,For Rent,False,,TIME_ON_INFO,6 minutes ago,True,2.057844e+09,90004,Los Angeles,CA,34.083107,-118.305440,1450.0,1.0,0.0,450.0,APARTMENT,FOR_RENT,-1.0,False,False,False,,False,False,FOR_RENT,1450.0,True,False,False,USD,USA,Apt 17,True,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,,,"$1,650/mo",,,,,,https://photos.zillowstatic.com/fp/0248aebe231...,,True,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/b/lan8123-van-nuys-ca-9MZzk5/,False,False,,54816112,34.218224,-118.469960,2084080590,$1.7K,1.0,1.0,,False,False,False,False,ForRent,For Rent,False,2023-05-12 00:00:00,TIME_ON_INFO,15 hours ago,True,2.084081e+09,91406,Van Nuys,CA,34.218224,-118.469960,1650.0,1.0,1.0,,APARTMENT,FOR_RENT,-1.0,True,False,False,2094.0,False,False,FOR_RENT,1650.0,True,False,False,USD,USA,Apt 8,True,,,,,,,,,
496,34.055275--118.28785,1.004491e+09,"$1,350+/mo",0.0,1.0,,,,https://photos.zillowstatic.com/fp/ed02d8c65db...,15vq5pfy61uyv,True,False,2.0,False,True,"2743 San Marino St, Los Angeles, CA",,FOR_RENT,san2743,,False,/b/san2743-los-angeles-ca-5YG3jj/,False,False,True,54816665,34.055275,-118.287850,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,,,,,san2743,,,,
497,33.988613--118.47448,1.049494e+09,"$1,945+/mo",0.0,1.0,350.0,,,https://photos.zillowstatic.com/fp/061c4a11304...,mukkh5c0ey2z,False,False,2.0,False,True,"14 Westminster Ave, Venice, CA",,FOR_RENT,For Rent,,False,/b/14-westminster-ave-venice-ca-5j4Y2P/,False,False,False,54902329,33.988613,-118.474480,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,True,,,,,,,,,
498,,,"$2,650/mo",,,,,,https://photos.zillowstatic.com/fp/d7676939287...,,True,False,,False,,--,,FOR_RENT,Apartment for rent,,False,/homedetails/5866-Bowcroft-St-UNIT-2-Los-Angel...,False,False,,54914417,34.018890,-118.374950,20430784,$2.7K,2.0,1.5,,False,False,False,False,ForRent,For Rent,False,,TIME_ON_INFO,15 hours ago,True,2.043078e+07,90016,Los Angeles,CA,34.018890,-118.374950,2650.0,1.5,2.0,,APARTMENT,FOR_RENT,-1.0,True,False,False,2695.0,False,False,FOR_RENT,2650.0,True,False,False,USD,USA,Unit 2,True,565200.0,1.681801e+12,-145.0,439341.0,,$145 (Apr 18),,,


In [81]:
# remove non-numeric characters from 'priceLabel' column
df_rent_listings['priceLabel'] = df_rent_listings['priceLabel'].str.replace(r'\D', '')

# convert 'priceLabel' column to numeric dtype
df_rent_listings['priceLabel'] = pd.to_numeric(df_rent_listings['priceLabel'])

df_rent_by_location = df_rent_listings.groupby(['latLong.latitude', 'latLong.longitude']).agg({'priceLabel': 'mean'}).reset_index()

In [92]:
center = (df_rent_by_location['latLong.latitude'].iloc[0], df_rent_by_location['latLong.longitude'].iloc[0])

filtered_rentals = []
for index, row in df_rent_listings.iterrows():
    property_location = (row['latLong.latitude'], row['latLong.longitude'])
    if distance(property_location, center).km <= 5:
        filtered_rentals.append(row)

In [93]:
df_filtered_rentals = pd.DataFrame(filtered_rentals)

In [94]:
map = folium.Map(location=[df_filtered_rentals['latLong.latitude'].iloc[0], df_filtered_rentals['latLong.longitude'].iloc[0]], zoom_start=10)

center = (df_filtered_rentals['latLong.latitude'].iloc[1], df_filtered_rentals['latLong.longitude'].iloc[1])
folium.Circle(location=center, radius=5000, color='red', fill=True, fill_color='red', opacity=0.2).add_to(map)

<folium.vector_layers.Circle at 0x7faebdfbf3a0>

In [95]:
for index, row in df_filtered_rentals.iterrows(): 
    popup_text = "Average Rent: $" + str(round(row['priceLabel'], 2))
    folium.Marker(location=[row['latLong.latitude'], row['latLong.longitude']], popup=popup_text).add_to(map)

In [96]:
map

In [69]:
map.save('Final_map.html')