In [1]:
import pandas as pd
import requests
import json
import time

#Show all columns
pd.set_option('display.max_columns', None)

In [2]:
##Home Types: Houses, Townhomes, Multi_family, Condos/Co-ops, Lots/Land, Apartments, Manufactured.

##We will look at zillow real estates at these cities:
##Fort Worth
##Arlington
##Irving
##Southlake
##Grapevine
##Grand Prairie
##Dallas
##Plano
##Garland
##Lewisville
##Keller

cities = ['fort worth', 'arlington', 'irving', 'southlake', 'grapevine', 'grand prairie', 'dallas', 
        'plano', 'garland', 'lewisville', 'keller']
state = 'tx'
search_str = list()
for city in cities:
    search_str.append(city + ', ' + state)
print('Search string for all cities:', search_str)

Search string for all cities: ['fort worth, tx', 'arlington, tx', 'irving, tx', 'southlake, tx', 'grapevine, tx', 'grand prairie, tx', 'dallas, tx', 'plano, tx', 'garland, tx', 'lewisville, tx', 'keller, tx']


### Data pulling (Using Zillow.com on rapidApi by apimaker)

In [3]:
##Keep the search below 25 pages, api only uses agent listings
##Keep them in a range price to pull out better data
#Like 0-100000, 100001-200000, 200001-300000, and so forth
#We can get the page number right at the end

#There are house prices at 0 dolloars so we have to remove those instances

In [2]:
api_key = 'put your key here'

In [3]:
price_range_list = [(0, 100000), (100001, 200000), (200001, 300000), (300001, 400000),
                    (400001, 500000), (500001, 600000), (600001, 700000), (700001, 800000), (800001, 0)]

##Price range is a tuple of 2 numbers
def properties_getter(home_type, city, price_range):
    ##Using a list to get all of the pages
    all_responses = list()
    
    ##This will pull the data from the first page
    url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"
    min_price = price_range[0]
    max_price = price_range[1]
    if max_price == 0:
        querystring = {"location": city, "home_type": home_type, "minPrice": min_price}
    else:
        querystring = {"location": city, "home_type": home_type, "minPrice": min_price, "maxPrice": max_price}
    headers = {
                "X-RapidAPI-Host": "zillow-com1.p.rapidapi.com",
                "X-RapidAPI-Key": api_key}
    response = requests.request("GET", url, headers=headers, params=querystring)
    
    ##Transform into a json format
    ##If there are no houses existed in the price range, then we return 'None'
    data = response.json()
    all_responses.append(data)
    total_pages = data['totalPages']
    totalResultCount = data['totalResultCount']
    time.sleep(2.5)
    if totalResultCount == 0:
        return "None"
    
    ##Pulling house data from every pages
    ##If total pages > 1 and you want to pull data from second page and so forth
    if total_pages > 1:
        count = 1
        for i in range(1, total_pages):
            if max_price == 0:
                querystring = {"location": city, "page": count + i, "home_type": home_type, 
                               "minPrice": min_price}
            else:
                querystring = {"location": city, "page": count + i, "home_type": home_type, 
                               "minPrice": min_price, "maxPrice": max_price}
            headers = {
                        "X-RapidAPI-Host": "zillow-com1.p.rapidapi.com",
                        "X-RapidAPI-Key": api_key}
            response = requests.request("GET", url, headers=headers, params=querystring)
            data = response.json()
            all_responses.append(data)
            time.sleep(2.5)
    
    return all_responses

In [17]:
fw2_3 = properties_getter('Houses', 'fort worth, tx', (300001, 400000))
fw2_3

[{'props': [{'dateSold': None,
    'propertyType': 'SINGLE_FAMILY',
    'lotAreaValue': 0.258,
    'address': '3416 Lawndale Ave, Fort Worth, TX 76133',
    'daysOnZillow': -1,
    'price': 365000,
    'listingDateTime': None,
    'longitude': -97.366295,
    'latitude': 32.65684,
    'contingentListingType': None,
    'listingStatus': 'FOR_SALE',
    'zpid': '29109397',
    'listingSubType': {'is_FSBA': True},
    'imgSrc': 'https://photos.zillowstatic.com/fp/b2c7e15411c44b3d9b1e4ab69c5e921b-p_e.jpg',
    'livingArea': 2215,
    'bathrooms': 3,
    'lotAreaUnit': 'acres',
    'country': 'USA',
    'currency': 'USD',
    'bedrooms': 4,
    'hasImage': True},
   {'dateSold': None,
    'propertyType': 'SINGLE_FAMILY',
    'lotAreaValue': 7187.4,
    'address': '8033 Moss Rock Dr, Fort Worth, TX 76123',
    'daysOnZillow': -1,
    'price': 325000,
    'listingDateTime': None,
    'longitude': -97.38204,
    'latitude': 32.624577,
    'contingentListingType': None,
    'listingStatus': 'FO

#### Do this for all cities at different price ranges (Houses)

In [13]:
home_type = "Houses"
all_dfs = list()
for city in search_str:
    for price_range in price_range_list:
        response_city = properties_getter(home_type, city, price_range)
        if response_city != 'None':
            for i in range(len(response_city)):
                df = pd.json_normalize(data = response_city[i]['props'])
                all_dfs.append(df)

In [14]:
df = pd.concat(all_dfs, axis = 0)
df.reset_index(drop= True, inplace = True)
df

Unnamed: 0,dateSold,propertyType,lotAreaValue,address,daysOnZillow,price,listingDateTime,longitude,latitude,contingentListingType,listingStatus,zpid,imgSrc,livingArea,bathrooms,lotAreaUnit,country,currency,bedrooms,hasImage,listingSubType.is_FSBA,listingSubType.is_newHome,listingSubType.is_openHouse
0,,SINGLE_FAMILY,7187.4,"2510 Dundee Ave, Fort Worth, TX 76106",-1,89900,,-97.324875,32.79195,,FOR_SALE,28965889,https://photos.zillowstatic.com/fp/38411e3b1b6...,572.0,1,sqft,USA,USD,1.0,True,True,,
1,,SINGLE_FAMILY,,"Available Soon Plan, Bailey Park",-1,0,,-97.406490,32.88091,,FOR_SALE,2063150394,https://photos.zillowstatic.com/fp/d45a35d4c93...,,0,,USA,USD,,True,,True,
2,,SINGLE_FAMILY,,"Available Soon Plan, Chisholm Trail Ranch",-1,0,,-97.406800,32.60520,,FOR_SALE,2066907485,https://photos.zillowstatic.com/fp/89d93853dde...,,0,,USA,USD,,True,,True,
3,,SINGLE_FAMILY,,"Available Soon Plan, Risinger Court",-1,0,,-97.374300,32.61520,,FOR_SALE,2067027570,https://photos.zillowstatic.com/fp/c8c6240ccc4...,,0,,USA,USD,,True,,True,
4,,SINGLE_FAMILY,,"Available Soon Plan, Keller Crossing",-1,0,,-97.255970,32.93925,,FOR_SALE,2066657560,https://photos.zillowstatic.com/fp/d45a35d4c93...,,0,,USA,USD,,True,,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3274,,SINGLE_FAMILY,,"Glenmere Plan, Gean Estates",-1,958999,,-97.236800,32.94240,,FOR_SALE,2093819676,https://photos.zillowstatic.com/fp/1836b802b65...,3603,4,,USA,USD,3,True,,True,
3275,,SINGLE_FAMILY,,"Westbrook Plan, Gean Estates",-1,938999,,-97.236800,32.94240,,FOR_SALE,2093819679,https://photos.zillowstatic.com/fp/a002b7a0d38...,3168,3,,USA,USD,3,True,,True,
3276,,SINGLE_FAMILY,,"Legacy Plan, Gean Estates",-1,992999,,-97.236800,32.94240,,FOR_SALE,2093819678,https://photos.zillowstatic.com/fp/fde336a1348...,3818,4,,USA,USD,4,True,,True,
3277,,SINGLE_FAMILY,0.689,"2505 Bella Rdg, Keller, TX 76262",-1,2979000,,-97.187920,32.95122,,FOR_SALE,337104319,https://maps.googleapis.com/maps/api/staticmap...,6683,6,acres,USA,USD,5,,,True,


# Using counties instead of cities to get more data

In [18]:
##These are all for sale houses
##We should look at counties instead of cities to pull out more data!
##North Texas Counties:
##Wise
##Denton
##Collin
##Parker
##Tarrant
##Dallas
##Rockwall
##Kaufman
##Johnson
##Ellis

##We can reuse the same function above: properties_getter

In [4]:
counties = ['wise', 'denton', 'collin', 'parker', 'tarrant', 'dallas', 'rockwall', 
            'kaufman', 'johnson', 'ellis']

state = 'tx'
search_str2 = list()
for county in counties:
    search_str2.append(county + ' county, ' + state)
print('Search string for all counties:', search_str2)

Search string for all counties: ['wise county, tx', 'denton county, tx', 'collin county, tx', 'parker county, tx', 'tarrant county, tx', 'dallas county, tx', 'rockwall county, tx', 'kaufman county, tx', 'johnson county, tx', 'ellis county, tx']


In [5]:
##Outer loop goes through each of the counties
#Then we go through each of the price range,
#Then we pull all the data from all pages based on price range and county
##The function properties getter returned a list of dictionaries, and within a dictionary,
##A key named props existed, holding a list of houses on a page, which transformed into json
##Then we dataframed it.

home_type = "Houses"
all_dfs2 = list()
for county in search_str2:
    for price_range in price_range_list:
        response_county = properties_getter(home_type, county, price_range)
        if response_county != 'None':
            for i in range(len(response_county)):
                df2 = pd.json_normalize(data = response_county[i]['props'])
                all_dfs2.append(df2)

In [6]:
df2 = pd.concat(all_dfs2, axis = 0)
df2.reset_index(drop= True, inplace = True)
df2

Unnamed: 0,dateSold,propertyType,lotAreaValue,address,priceChange,zestimate,imgSrc,price,bedrooms,contingentListingType,longitude,latitude,listingStatus,zpid,rentZestimate,daysOnZillow,bathrooms,livingArea,country,currency,lotAreaUnit,hasImage,variableData.text,variableData.type,listingSubType.is_FSBA,variableData,newConstructionType,unit,listingSubType.is_newHome,listingSubType.is_openHouse,listingSubType.is_bankOwned
0,,SINGLE_FAMILY,0.287,"210 Boling St, Alvord, TX 76225",-10000.0,,https://photos.zillowstatic.com/fp/3c58760b725...,99000,2.0,,-97.691216,33.360535,FOR_SALE,78947318,1350.0,-1,2,1080.0,USA,USD,acres,True,"$10,000 (Mar 11)",PRICE_REDUCTION,True,,,,,,
1,,SINGLE_FAMILY,0.5,"307 S Buffalo St, Chico, TX 76431",,,https://maps.googleapis.com/maps/api/staticmap...,75000,3.0,,-97.798035,33.291977,FOR_SALE,220098026,,11,2,1152.0,USA,USD,acres,,,,True,,,,,,
2,,SINGLE_FAMILY,,"Homes Available Soon Plan, Runaway Bay",,,https://maps.googleapis.com/maps/api/staticmap...,0,,,-97.87063,33.17075,FOR_SALE,2062186179,1350.0,7,0,,USA,USD,,,,,,,BUILDER_PLAN,# G00X42,True,,
3,,SINGLE_FAMILY,1.601,"4654 Fm 2264, Decatur, TX 76234",-10100.0,162501.0,https://photos.zillowstatic.com/fp/fa2eac4d679...,164900,5,,-97.439285,33.164368,FOR_SALE,239956572,2213.0,-1,2,1620,USA,USD,acres,True,"$10,100 (Mar 14)",PRICE_REDUCTION,True,,,,,,
4,,SINGLE_FAMILY,7013.16,"212 S Allen St, Boyd, TX 76023",-18000.0,176351.0,https://photos.zillowstatic.com/fp/677bc38643b...,180000,3,,-97.5655,33.076668,FOR_SALE,78938508,1863.0,-1,2,1197,USA,USD,sqft,True,"$18,000 (Mar 17)",PRICE_REDUCTION,True,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17964,,SINGLE_FAMILY,9880.0,"4750 Brinker St, Prosper, TX 75078",,,https://photos.zillowstatic.com/fp/21306298ff5...,1119990,5,,-96.87871,32.24284,FOR_SALE,2058833127,,-1,6,4212,USA,USD,sqft,True,Open: Monday - Saturday...,OPEN_HOUSE,,,BUILDER_SPEC,,True,,
17965,,SINGLE_FAMILY,93.28,"2955 Fm 667, Italy, TX 76651",1.0,,https://photos.zillowstatic.com/fp/eade0b23495...,1000000,3,,-96.83913,32.12741,FOR_SALE,98910632,1450.0,-1,1,1422,USA,USD,acres,True,Video Walkthrough,VIDEO_WALKTHROUGH,True,,,,,,
17966,,SINGLE_FAMILY,1.003,"2206 Alyssum Dr, Cedar Hill, TX 75104",-20000.0,924235.0,https://photos.zillowstatic.com/fp/4f5b3128d76...,955000,4,,-97.011894,32.5438,FOR_SALE,98898464,1424.0,116,4,4398,USA,USD,acres,True,,,,,BUILDER_SPEC,,True,,
17967,,SINGLE_FAMILY,2.985,"1108 W Lampasas St, Ennis, TX 75119",,,https://photos.zillowstatic.com/fp/2957a4c0688...,899900,2,,-96.635216,32.31849,FOR_SALE,27361004,1801.0,270,1,1364,USA,USD,acres,True,,,True,,,,,,


In [7]:
df2.to_csv("Raw_API_propertyExtendedSearch/Raw_data_for_sales_houses_counties_March_2023.csv", index = False)