In [26]:
import requests
import json
import pandas as pd
import time

In [27]:
def scrape_properties(city_slug, city_name, status=["for_sale", "ready_to_build"], min_date=None):
    url = 'https://www.realtor.com/api/v1/rdc_search_srp?client_id=rdc-search-for-sale-search&schema=vesta'
    headers = {"content-type": "application/json"}
    limit = 200
    offset = 0
    all_properties = []
    api_total = None

    while api_total is None or offset < api_total:
        query = {
            "status": status,
            "search_location": {
                "location": city_name},
            "type": ["single_family"]
        }

        if "sold" in status:
            query["sold_date"] = {
                "min": min_date
                }
            sort_key = "sort"
            sort = [{
                "field": "sold_date",
                "direction": "desc"
            }, {
                "field": "photo_count", "direction": "desc"
            }]
        else:
            sort_key = "sort_type"
            sort = "relevant"

        graphql_query = """
            query ConsumerSearchQuery(
                $query: HomeSearchCriteria!
                $limit: Int
                $offset: Int
                $search_promotion: SearchPromotionInput
                $sort: [SearchAPISort]
                $sort_type: SearchSortType
                $client_data: JSON
                $bucket: SearchAPIBucket
                ) {
                home_search: home_search(
                    query: $query
                    sort: $sort
                    limit: $limit
                    offset: $offset
                    sort_type: $sort_type
                    client_data: $client_data
                    bucket: $bucket
                    search_promotion: $search_promotion
                ) {
                    count
                    total
                    search_promotion {
                    names
                    slots
                    promoted_properties {
                        id
                        from_other_page
                    }
                    }
                    mortgage_params {
                    interest_rate
                    }
                    properties: results {
                    property_id
                    list_price
                    search_promotions {
                        name
                        asset_id
                    }
                    primary_photo(https: true) {
                        href
                    }
                    rent_to_own {
                        right_to_purchase
                        rent
                    }
                    listing_id
                    matterport
                    virtual_tours {
                        href
                        type
                    }
                    status
                    products {
                        products
                        brand_name
                    }
                    source {
                        id
                        type
                        spec_id
                        plan_id
                        agents {
                        office_name
                        }
                    }
                    lead_attributes {
                        show_contact_an_agent
                        opcity_lead_attributes {
                        cashback_enabled
                        flip_the_market_enabled
                        }
                        lead_type
                        ready_connect_mortgage {
                        show_contact_a_lender
                        show_veterans_united
                        }
                    }
                    community {
                        description {
                        name
                        }
                        property_id
                        permalink
                        advertisers {
                        office {
                            hours
                            phones {
                            type
                            number
                            primary
                            trackable
                            }
                        }
                        }
                        promotions {
                        description
                        href
                        headline
                        }
                    }
                    permalink
                    price_reduced_amount
                    description {
                        name
                        beds
                        baths_consolidated
                        sqft
                        lot_sqft
                        baths_max
                        baths_min
                        beds_min
                        beds_max
                        sqft_min
                        sqft_max
                        type
                        sub_type
                        sold_price
                        sold_date
                    }
                    location {
                        street_view_url
                        address {
                        line
                        postal_code
                        state
                        state_code
                        city
                        coordinate {
                            lat
                            lon
                        }
                        }
                        county {
                        name
                        fips_code
                        }
                    }
                    open_houses {
                        start_date
                        end_date
                    }
                    branding {
                        type
                        name
                        photo
                    }
                    flags {
                        is_coming_soon
                        is_new_listing(days: 14)
                        is_price_reduced(days: 30)
                        is_foreclosure
                        is_new_construction
                        is_pending
                        is_contingent
                    }
                    list_date
                    photos(limit: 2, https: true) {
                        href
                    }
                    advertisers {
                        type
                        builder {
                        name
                        href
                        logo
                        }
                    }
                    }
                }
                commute_polygon: get_commute_polygon(query: $query) {
                    areas {
                    id
                    breakpoints {
                        width
                        height
                        zoom
                    }
                    radius
                    center {
                        lat
                        lng
                    }
                    }
                    boundary
                }
                }
        """

        body = {
            "query": graphql_query,
            "variables": {
                "geoSupportedSlug": city_slug,
                "query": query,
                "client_data": {
                    "device_data": {
                        "device_type": "desktop"
                    }
                },
                "limit": limit,
                "offset": offset,
                sort_key: sort,
                "search_promotion": {
                    "names": ["CITY"],
                    "slots": [],
                    "promoted_properties": []
                }
            },
            "isClient": True,
            "visitor_id": "7ffa9c49-550f-4c23-aa1b-e93786671450"
            }

        response = requests.post(url, headers=headers, json=body)
        data = response.json()

        if 'data' not in data or 'home_search' not in data['data'] or data['data']['home_search'] is None:
            print("Error: Invalid response structure or missing data.")
            print(data)
            break
        
        if api_total is None:
            api_total = data['data']['home_search']['total']
            print(f"Total properties available for {city_name.split(',')[0]}: {api_total}")
        
        try:
            current_batch = data['data']['home_search']['properties']
            all_properties.extend(current_batch)
            print(f"Fetched {len(current_batch)} properties this batch. Total fetched: {len(all_properties)}.")
        except TypeError as e:
            print("Error processing properties:", str(e))
            print(data['data']['home_search'])
            break

        offset += limit

        if len(all_properties) >= api_total:
            break

    return {
        "count": len(all_properties),
        "total": api_total,
        "properties": all_properties
    }

min_date = "2023-06-01"

chicago_data = scrape_properties("Chicato_IL", "Chicago, IL")
print(f"Done fetching {chicago_data['count']} properties selling in Chicago.")
time.sleep(60)

chicago_data_sold = scrape_properties("Chicago_IL", "Chicago, IL", ["sold"], min_date=min_date)
print(f"Done fetching {chicago_data_sold['count']} sold properties in Chicago since {min_date.split('-')[0]}-{min_date.split('-')[1]}.")
time.sleep(60)

new_york_data = scrape_properties("New-York_NY", "New York, NY")
print(f"Done fetching {new_york_data['count']} properties selling in New York.")
time.sleep(60)

new_york_data_sold = scrape_properties("New-York_NY", "New York, NY", ["sold"], min_date=min_date)
print(f"Done fetching {new_york_data_sold['count']} sold properties in New York since {min_date.split('-')[0]}-{min_date.split('-')[1]}.")

Total properties available for Chicago: 3139
Fetched 200 properties this batch. Total fetched: 200.
Fetched 200 properties this batch. Total fetched: 400.
Fetched 200 properties this batch. Total fetched: 600.
Fetched 200 properties this batch. Total fetched: 800.
Fetched 200 properties this batch. Total fetched: 1000.
Fetched 200 properties this batch. Total fetched: 1200.
Fetched 200 properties this batch. Total fetched: 1400.
Fetched 200 properties this batch. Total fetched: 1600.
Fetched 200 properties this batch. Total fetched: 1800.
Fetched 200 properties this batch. Total fetched: 2000.
Fetched 200 properties this batch. Total fetched: 2200.
Fetched 200 properties this batch. Total fetched: 2400.
Fetched 200 properties this batch. Total fetched: 2600.
Fetched 200 properties this batch. Total fetched: 2800.
Fetched 200 properties this batch. Total fetched: 3000.
Fetched 139 properties this batch. Total fetched: 3139.
Done fetching 3139 properties selling in Chicago.
Total propert

In [85]:
chicago_json = chicago_data['properties']
chicago_selling = pd.DataFrame(chicago_json)
chicago_selling.head()

Unnamed: 0,property_id,list_price,search_promotions,primary_photo,rent_to_own,listing_id,matterport,virtual_tours,status,products,...,permalink,price_reduced_amount,description,location,open_houses,branding,flags,list_date,photos,advertisers
0,7109976785,215000.0,,{'href': 'https://ap.rdcpix.com/696e87b0527eda...,,2966364640,False,,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,9906-S-Seeley-Ave_Chicago_IL_60643_M71099-76785,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'P.R.S. Associates...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-04-18T17:33:51.000000Z,[{'href': 'https://ap.rdcpix.com/696e87b0527ed...,"[{'type': 'seller', 'builder': None}]"
1,9462841010,340000.0,,{'href': 'https://ap.rdcpix.com/82ea120aaba299...,,2968097446,False,[{'href': 'https://kuula.co/share/5vfHt/collec...,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,650-N-Ridgeway-Ave_Chicago_IL_60624_M94628-41010,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Inherent Homes LL...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-06-03T19:03:15.000000Z,[{'href': 'https://ap.rdcpix.com/82ea120aaba29...,"[{'type': 'seller', 'builder': None}]"
2,8121218584,130000.0,,{'href': 'https://ap.rdcpix.com/fbf76220705e52...,,2969183028,False,,for_sale,"{'products': ['core.agent', 'core.broker', 'co...",...,Chicago_IL_60644_M81212-18584,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': '@properties East ...","{'is_coming_soon': None, 'is_new_listing': Tru...",2024-07-03T17:05:05.000000Z,[{'href': 'https://ap.rdcpix.com/fbf76220705e5...,"[{'type': 'seller', 'builder': None}]"
3,8027933003,79000.0,,{'href': 'https://ap.rdcpix.com/0bc6e301b88c58...,,2969198384,False,,for_sale,"{'products': ['core.agent', 'core.broker', 'co...",...,11549-S-Church-St_Chicago_IL_60643_M80279-33003,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': '@properties East ...","{'is_coming_soon': None, 'is_new_listing': Tru...",2024-07-03T22:27:08.000000Z,[{'href': 'https://ap.rdcpix.com/0bc6e301b88c5...,"[{'type': 'seller', 'builder': None}]"
4,8854936353,1200000.0,,{'href': 'https://ap.rdcpix.com/d8974be7019906...,,2968439242,False,,for_sale,"{'products': ['core.agent', 'core.broker', 'co...",...,3015-N-Racine-Ave_Chicago_IL_60657_M88549-36353,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Dream Town Realty...","{'is_coming_soon': None, 'is_new_listing': Tru...",2024-07-04T01:36:50.000000Z,[{'href': 'https://ap.rdcpix.com/d8974be701990...,"[{'type': 'seller', 'builder': None}]"


In [86]:
chicago_sold_json = chicago_data_sold['properties']
chicago_sold = pd.DataFrame(chicago_sold_json)
chicago_sold.head()

Unnamed: 0,property_id,list_price,search_promotions,primary_photo,rent_to_own,listing_id,matterport,virtual_tours,status,products,...,permalink,price_reduced_amount,description,location,open_houses,branding,flags,list_date,photos,advertisers
0,8692627061,1450000.0,,{'href': 'https://ap.rdcpix.com/9c9d6fb2138ad7...,,2966742347,False,,sold,"{'products': ['core.agent', 'core.broker', 'co...",...,1457-W-Byron-St_Chicago_IL_60613_M86926-27061,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Dream Town Realty...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-04-30T15:34:00.000000Z,[{'href': 'https://ap.rdcpix.com/9c9d6fb2138ad...,"[{'type': 'seller', 'builder': None}]"
1,8327979815,350000.0,,{'href': 'https://ap.rdcpix.com/f262b25b221d0c...,,2966650190,False,,sold,"{'products': ['core.agent', 'listing_owner_bra...",...,2735-W-Saint-Georges-Ct_Chicago_IL_60647_M8327...,,"{'name': None, 'beds': 2, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'RE MAX Loyalty', ...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-05-14T02:34:02.000000Z,[{'href': 'https://ap.rdcpix.com/f262b25b221d0...,"[{'type': 'seller', 'builder': None}]"
2,7515398232,715000.0,,{'href': 'https://ap.rdcpix.com/9965b1bf16e72e...,,2967803327,False,,sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,5405-W-Ardmore-Ave_Chicago_IL_60646_M75153-98232,,"{'name': None, 'beds': 6, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Northwest Real Es...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-05-28T13:39:42.000000Z,[{'href': 'https://ap.rdcpix.com/9965b1bf16e72...,"[{'type': 'seller', 'builder': None}]"
3,9484646484,950000.0,,{'href': 'https://ap.rdcpix.com/bc25dbc2800687...,,2967971895,False,,sold,"{'products': ['core.agent', 'core.broker', 'co...",...,3915-N-Monticello-Ave_Chicago_IL_60618_M94846-...,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Redfin Corporatio...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-05-30T20:38:15.000000Z,[{'href': 'https://ap.rdcpix.com/bc25dbc280068...,"[{'type': 'seller', 'builder': None}]"
4,8499198111,295000.0,,{'href': 'https://ap.rdcpix.com/6dae60996b3af2...,,2966200044,False,,sold,"{'products': ['core.agent', 'core.broker', 'co...",...,8205-S-Troy-St_Chicago_IL_60652_M84991-98111,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Coldwell Banker R...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-04-16T11:47:29.000000Z,[{'href': 'https://ap.rdcpix.com/6dae60996b3af...,"[{'type': 'seller', 'builder': None}]"


In [87]:
new_york_json = new_york_data['properties']
new_york_selling = pd.DataFrame(new_york_json)
new_york_selling.head()

Unnamed: 0,property_id,list_price,search_promotions,primary_photo,rent_to_own,listing_id,matterport,virtual_tours,status,products,...,permalink,price_reduced_amount,description,location,open_houses,branding,flags,list_date,photos,advertisers
0,3243883955,260000,,{'href': 'https://ap.rdcpix.com/777a0a838fb235...,,2962223385,False,,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,620-Sinclair-Ave_Staten-Island_NY_10312_M32438...,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Sowae Corp', 'pho...","{'is_coming_soon': None, 'is_new_listing': Fal...",2023-12-04T23:24:59.000000Z,[{'href': 'https://ap.rdcpix.com/777a0a838fb23...,"[{'type': 'seller', 'builder': None}]"
1,3974591407,99000,,{'href': 'https://ap.rdcpix.com/625c0dbb654a69...,,2965322798,False,,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,77-City-Blvd_Staten-Island_NY_10301_M39745-91407,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'NYC SHORT SALES B...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-03-20T18:26:05.000000Z,[{'href': 'https://ap.rdcpix.com/625c0dbb654a6...,"[{'type': 'seller', 'builder': None}]"
2,3536803586,250000,,{'href': 'https://ap.rdcpix.com/9febd1c13274aa...,,2961898884,False,,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,24005-147th-Ave_Rosedale_NY_11422_M35368-03586,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Sowae Corp', 'pho...","{'is_coming_soon': None, 'is_new_listing': Fal...",2023-11-21T20:27:17.000000Z,[{'href': 'https://ap.rdcpix.com/9febd1c13274a...,"[{'type': 'seller', 'builder': None}]"
3,3204384532,275000,,{'href': 'https://ap.rdcpix.com/bdbd7e0b94a143...,,2926324318,False,,for_sale,"{'products': ['co_broke'], 'brand_name': 'basi...",...,579-E-29th-St_Brooklyn_NY_11210_M32043-84532,,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'ONLY NINE REALTY ...","{'is_coming_soon': None, 'is_new_listing': Fal...",2021-02-24T19:23:01.000000Z,[{'href': 'https://ap.rdcpix.com/bdbd7e0b94a14...,"[{'type': 'seller', 'builder': None}]"
4,3570164952,789000,,{'href': 'https://ap.rdcpix.com/af829816bf4336...,,2969181829,False,[{'href': 'https://app.doaudiotours.com/unbran...,for_sale,"{'products': ['core.agent', 'co_broke'], 'bran...",...,179-Barclay-Ave_Staten-Island_NY_10312_M35701-...,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'JM Properties', '...","{'is_coming_soon': True, 'is_new_listing': Tru...",2024-07-03T16:48:23.000000Z,[{'href': 'https://ap.rdcpix.com/af829816bf433...,"[{'type': 'seller', 'builder': None}]"


In [88]:
new_york_sold_json = new_york_data_sold['properties']
new_york_sold = pd.DataFrame(new_york_sold_json)
new_york_sold.head()

Unnamed: 0,property_id,list_price,search_promotions,primary_photo,rent_to_own,listing_id,matterport,virtual_tours,status,products,...,permalink,price_reduced_amount,description,location,open_houses,branding,flags,list_date,photos,advertisers
0,4621132764,669888.0,,{'href': 'https://ap.rdcpix.com/c76f86607e4658...,,2964473616,False,[{'href': 'https://www.youtube.com/embed/sqUGu...,sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,319-Green-Valley-Rd_Staten-Island_NY_10312_M46...,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Mark Internationa...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-02-27T14:19:28.000000Z,[{'href': 'https://ap.rdcpix.com/c76f86607e465...,"[{'type': 'seller', 'builder': None}]"
1,3436380921,680000.0,,{'href': 'https://ap.rdcpix.com/12266bf82ade55...,,2965169452,False,,sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,491-Doane-Ave_Staten-Island_NY_10308_M34363-80921,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Ozana Realty Grou...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-03-15T13:51:28.000000Z,[{'href': 'https://ap.rdcpix.com/12266bf82ade5...,"[{'type': 'seller', 'builder': None}]"
2,3476436735,549999.0,,{'href': 'https://ap.rdcpix.com/046e20b2fc7528...,,2965831609,False,"[{'href': 'https://youtu.be/yyKSXSQyIFc', 'typ...",sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,274-Dixon-Ave_Staten-Island_NY_10303_M34764-36735,,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Martino Realty Gr...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-04-05T15:12:40.000000Z,[{'href': 'https://ap.rdcpix.com/046e20b2fc752...,"[{'type': 'seller', 'builder': None}]"
3,4915844983,1369000.0,,{'href': 'https://ap.rdcpix.com/cf6a702ab6e617...,,2963798007,False,,sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,1327-84th-St_Brooklyn_NY_11228_M49158-44983,60000.0,"{'name': None, 'beds': 3, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Brooklyn4U Rltyof...","{'is_coming_soon': None, 'is_new_listing': Fal...",2024-02-05T20:30:26.000000Z,[{'href': 'https://ap.rdcpix.com/cf6a702ab6e61...,"[{'type': 'seller', 'builder': None}]"
4,4877937449,559000.0,,{'href': 'https://ap.rdcpix.com/f069417e235ee9...,,2959829830,False,,sold,"{'products': ['core.agent', 'co_broke'], 'bran...",...,132-Elm-St_Staten-Island_NY_10310_M48779-37449,10000.0,"{'name': None, 'beds': 4, 'baths_consolidated'...",{'street_view_url': 'https://maps.googleapis.c...,,"[{'type': 'Office', 'name': 'Coldwell Banker A...","{'is_coming_soon': None, 'is_new_listing': Fal...",2023-09-20T15:50:10.000000Z,[{'href': 'https://ap.rdcpix.com/f069417e235ee...,"[{'type': 'seller', 'builder': None}]"


In [101]:
def extract_data(properties):
    extracted_data = []

    for property in properties:
        id = property.get('property_id', None)
        permalink = property.get('permalink', None)
        post_link = "https://www.realtor.com/realestateandhomes-detail/" + permalink if permalink else None
        price = property.get('list_price', None)
        
        location = property.get('location', {})
        address_line = location.get('address', {}).get('line', None)
        city = location.get('address', {}).get('city', None)
        state_code = location.get('address', {}).get('state_code', None)
        postal_code = location.get('address', {}).get('postal_code', None)
        address = f"{address_line}, {city}, {state_code} {postal_code}" if all([address_line, city, state_code, postal_code]) else None

        status = property.get('status', None)
        status = status.upper() if status else None

        description = property.get('description', {})
        area = description.get('sqft', None)
        bedrooms = description.get('beds', None)
        bathrooms = description.get('baths_consolidated', None)


        coordinate = location.get('address', {}).get('coordinate', None)
        latitude = coordinate['lat'] if coordinate else None
        longitude = coordinate['lon'] if coordinate else None

        extracted_data.append({
            'Data Source': 'https://www.realtor.com/',
            'ID': id,
            'Post link': post_link,
            'Price': price,
            'Address': address,
            'Status': status,
            'Area': area,
            'Bedrooms': bedrooms,
            'Bathrooms': bathrooms,
            'Latitude': latitude,
            'Longitude': longitude
        })

    return extracted_data

batch = [chicago_json, chicago_sold_json, new_york_json, new_york_sold_json]
extracted_batches = [[], [], [], []]

for i, properties in enumerate(batch):
    extracted_batches[i] = extract_data(properties)

In [102]:
chicago_selling_extracted = pd.DataFrame(extracted_batches[2])
chicago_selling_extracted.head()

Unnamed: 0,Data Source,ID,Post link,Price,Address,Status,Area,Bedrooms,Bathrooms,Latitude,Longitude
0,https://www.realtor.com/,3243883955,https://www.realtor.com/realestateandhomes-det...,260000,"620 Sinclair Ave, Staten Island, NY 10312",FOR_SALE,2015.0,4,2.0,40.541781,-74.196109
1,https://www.realtor.com/,3974591407,https://www.realtor.com/realestateandhomes-det...,99000,"77 City Blvd, Staten Island, NY 10301",FOR_SALE,1176.0,3,1.0,40.628757,-74.104166
2,https://www.realtor.com/,3536803586,https://www.realtor.com/realestateandhomes-det...,250000,"240-05 147 Ave, Rosedale, NY 11422",FOR_SALE,2304.0,4,2.0,40.657539,-73.743602
3,https://www.realtor.com/,3204384532,https://www.realtor.com/realestateandhomes-det...,275000,"579 E 29th St, Brooklyn, NY 11210",FOR_SALE,,4,1.0,40.636665,-73.948744
4,https://www.realtor.com/,3570164952,https://www.realtor.com/realestateandhomes-det...,789000,"179 Barclay Ave, Staten Island, NY 10312",FOR_SALE,1150.0,3,2.5,40.538502,-74.174199


In [103]:
chicago_sold_extracted = pd.DataFrame(extracted_batches[3])
chicago_sold_extracted.head()

Unnamed: 0,Data Source,ID,Post link,Price,Address,Status,Area,Bedrooms,Bathrooms,Latitude,Longitude
0,https://www.realtor.com/,4621132764,https://www.realtor.com/realestateandhomes-det...,669888.0,"319 Green Valley Rd, Staten Island, NY 10312",SOLD,1614.0,3.0,2.0,40.549982,-74.193644
1,https://www.realtor.com/,3436380921,https://www.realtor.com/realestateandhomes-det...,680000.0,"491 Doane Ave, Staten Island, NY 10308",SOLD,1439.0,3.0,3.5,40.562004,-74.157873
2,https://www.realtor.com/,3476436735,https://www.realtor.com/realestateandhomes-det...,549999.0,"274 Dixon Ave, Staten Island, NY 10303",SOLD,1205.0,3.0,1.5,40.628432,-74.151338
3,https://www.realtor.com/,4915844983,https://www.realtor.com/realestateandhomes-det...,1369000.0,"1327 84th St, Brooklyn, NY 11228",SOLD,1920.0,3.0,2.5,40.614938,-74.011767
4,https://www.realtor.com/,4877937449,https://www.realtor.com/realestateandhomes-det...,559000.0,"132 Elm St, Staten Island, NY 10310",SOLD,1292.0,4.0,2.0,40.63851,-74.114339


In [104]:
new_york_selling_extracted = pd.DataFrame(extracted_batches[0])
new_york_selling_extracted.head()

Unnamed: 0,Data Source,ID,Post link,Price,Address,Status,Area,Bedrooms,Bathrooms,Latitude,Longitude
0,https://www.realtor.com/,7109976785,https://www.realtor.com/realestateandhomes-det...,215000.0,"9906 S Seeley Ave, Chicago, IL 60643",FOR_SALE,1567.0,3,2,41.713489,-87.673732
1,https://www.realtor.com/,9462841010,https://www.realtor.com/realestateandhomes-det...,340000.0,"650 N Ridgeway Ave, Chicago, IL 60624",FOR_SALE,1445.0,3,2,,
2,https://www.realtor.com/,8121218584,https://www.realtor.com/realestateandhomes-det...,130000.0,,FOR_SALE,888.0,4,2,41.884098,-87.754854
3,https://www.realtor.com/,8027933003,https://www.realtor.com/realestateandhomes-det...,79000.0,"11549 S Church St, Chicago, IL 60643",FOR_SALE,1386.0,4,2,41.683247,-87.669495
4,https://www.realtor.com/,8854936353,https://www.realtor.com/realestateandhomes-det...,1200000.0,"3015 N Racine Ave, Chicago, IL 60657",FOR_SALE,3100.0,4,4,41.93674,-87.658509


In [105]:
new_york_sold_extracted = pd.DataFrame(extracted_batches[1])
new_york_sold_extracted.head()

Unnamed: 0,Data Source,ID,Post link,Price,Address,Status,Area,Bedrooms,Bathrooms,Latitude,Longitude
0,https://www.realtor.com/,8692627061,https://www.realtor.com/realestateandhomes-det...,1450000.0,"1457 W Byron St, Chicago, IL 60613",SOLD,3300.0,3.0,3.5,41.952312,-87.666381
1,https://www.realtor.com/,8327979815,https://www.realtor.com/realestateandhomes-det...,350000.0,"2735 W Saint Georges Ct, Chicago, IL 60647",SOLD,900.0,2.0,1.0,41.921132,-87.696639
2,https://www.realtor.com/,7515398232,https://www.realtor.com/realestateandhomes-det...,715000.0,"5405 W Ardmore Ave, Chicago, IL 60646",SOLD,2200.0,6.0,3.0,41.985516,-87.763739
3,https://www.realtor.com/,9484646484,https://www.realtor.com/realestateandhomes-det...,950000.0,"3915 N Monticello Ave, Chicago, IL 60618",SOLD,3200.0,3.0,3.5,41.952424,-87.718648
4,https://www.realtor.com/,8499198111,https://www.realtor.com/realestateandhomes-det...,295000.0,"8205 S Troy St, Chicago, IL 60652",SOLD,1200.0,3.0,2.0,41.744117,-87.700664


In [106]:
# for carefree data processing
chicago_selling_extracted = chicago_selling_extracted.dropna().drop_duplicates()
chicago_sold_extracted = chicago_sold_extracted.dropna().drop_duplicates()
new_york_selling_extracted = new_york_selling_extracted.dropna().drop_duplicates()
new_york_sold_extracted = new_york_sold_extracted.dropna().drop_duplicates()


In [109]:
chicago_selling_extracted.to_csv('chicago_realtor_selling.csv', index=False)
chicago_sold_extracted.to_csv('chicago_realtor_sold.csv', index=False)
new_york_selling_extracted.to_csv('new_york_realtor_selling.csv', index=False)
new_york_sold_extracted.to_csv('new_york_realtor_sold.csv', index=False)