## Data aquiring date: 10/16/2020

# Import modules

In [1]:
import json
from pprint import pprint
import glob
from datetime import datetime
import requests
import pandas as pd

# Self-define functions

In [2]:
## function for realtor api requests

def realtor_api_request(sort = "relevance", city = "Philadelphia", limit = "200", offset = "0", state_code = "PA"):
    url = "https://realtor.p.rapidapi.com/properties/v2/list-for-sale"

    querystring = {
        "sort": sort,
        "city": city,
        "limit": limit,
        "offset": offset,
        "state_code": state_code
    }

    headers = {
        'x-rapidapi-host': "realtor.p.rapidapi.com",
        'x-rapidapi-key': "e2aef74c96msh10b708b155c5b67p1e6d19jsn4cb3a156a37d"
        }

    response = requests.request("GET", url, headers=headers, params=querystring)
    
    return response.json()

In [3]:
## convert response to pandas df

def process_response(response_json):
    '''
    This function is to convert each request result to a dataframe.
    
    1. create an empty list
    2. loop for each response and get details from key 'properties' 
    3. convert details to df
    4. append single df to list
    5. concat the list to one df
    
    '''
    
    # empty list
    convert_list=[]
#     if response_json['properties']: return pd.DataFrame([])
    # loop through each 'properties'
    for col in response_json['properties']:
        
        # convert details to dataframe
        single_df = pd.DataFrame.from_dict(col, orient='index').T
        
        # append to list
        convert_list.append(single_df)
        
    # concat to a whole df, null for missing vals
    return pd.concat(convert_list, axis = 0, ignore_index=True, sort=False)

# Get data

In [4]:
## run for each offset
'''
There are 200 instances in each request. Restricted by Rapidapi
'''
def response_to_df(sort = "relevance", city = "Philadelphia", limit = "200", offset = "0", state_code = "PA"):
    property_for_sale = realtor_api_request(sort = sort, city = city, limit = limit, offset = offset, state_code = state_code)
    df_properties_for_sale_raw = process_response(response_json = property_for_sale)
    pd.set_option('display.max_columns', None)
    return df_properties_for_sale_raw


# Export response to csv file

In [5]:
## export each response to csv file
'''
Since each request could get 200 instances, after the each request, we increase the offset 200.
So on the second run, we can get data from 201 - 400, theoratically.
However, the system will keep updating the data so we may either get the data overlapped or miss some data.
We will deal with duplicated data later.

Currently, the loop will crash when the instances in the last run is less than 200. 
'''

start = datetime.now() ## to see how long will it finish the run
i=1
for offset in range(0, 9562, 200):
    response_to_df(offset = offset).to_csv(f'test{i}.csv',encoding='utf-8', index=False)
    if i % 10 == 0:
        print(f'The {i}th Time')
    i += 1    
end = datetime.now() -start

print(f'Total time is: {end}')

The 10th Time
The 20th Time
The 30th Time
The 40th Time
Total time is: 0:02:54.604970


# Data preprocessing

In [6]:
## import all csv to one df

path = r'E:\Projects\Drexel\DSCI 591 Capstone I\Projects\csv'
all_files = glob.glob(path + "/*.csv")

## empty list
file_list = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    file_list.append(df)
    
combined_df = pd.concat(file_list, axis = 0, ignore_index=True)

combined_df

Unnamed: 0,property_id,listing_id,rdc_web_url,prop_type,prop_sub_type,address,branding,prop_status,price,baths_full,baths,beds,building_size,agents,office,last_update,client_display_flags,lead_forms,photo_count,thumbnail,page_no,rank,list_tracking,lot_size,mls,baths_half,virtual_tour,open_houses,plan_id,new_plan,quick_to_sell_days
0,M4046594895,2.922386e+09,https://www.realtor.com/realestateandhomes-det...,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '1516 N 62nd ...",{'listing_office': {'list_item': {'name': 'Arc...,for_sale,249900,3.0,3,6.0,"{'size': 1632, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '1291281',...","{'id': '3863c258f6142e2a2340175af3f64df0', 'na...",2020-10-13T17:54:05Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",9,https://ap.rdcpix.com/509b1ca1768ae6574e170ca7...,1,1,type|property|data|prop_id|4046594895|list_id|...,"{'size': 1842, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH944934', 'pla...",,,,,,
1,M3939384476,2.918257e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '6102 Reedlan...",{'listing_office': {'list_item': {'name': 'Vih...,for_sale,116800,1.0,1,3.0,"{'size': 1092, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '347285', ...","{'id': 'a73ac4220ff5775a3f069bf0acab8e50', 'na...",2020-10-13T18:18:18Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",7,https://ap.rdcpix.com/f6ca6372b4cb434fd09d14b6...,1,2,type|property|data|prop_id|3939384476|list_id|...,"{'size': 992, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH911464', 'pla...",,,,,,
2,M4036371277,2.922385e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '5703 N 13th ...",{'listing_office': {'list_item': {'name': 'Pre...,for_sale,215000,1.0,2,3.0,"{'size': 1360, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': 'Kev...","{'id': '37e66a23ad2c2f122fe75b7df00955a5', 'na...",2020-10-13T17:24:20Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",35,https://ap.rdcpix.com/28d9bd1f5b0fdcb2de5b7aca...,1,3,type|property|data|prop_id|4036371277|list_id|...,"{'size': 1501, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH945010', 'pla...",1.0,,,,,
3,M3553029343,2.922384e+09,https://www.realtor.com/realestateandhomes-det...,single_family,,"{'city': 'Philadelphia', 'line': '1009 Rhawn S...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,394800,1.0,2,3.0,"{'size': 1856, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '4759', 'i...","{'id': 'd4965070728907fc00e98d31065c1b62', 'na...",2020-10-13T17:11:54Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",123,https://ap.rdcpix.com/792a419aa85110ba7780b21b...,1,4,type|property|data|prop_id|3553029343|list_id|...,"{'size': 15913, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH943096', 'pla...",1.0,,,,,
4,M3649199107,2.922384e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '3850 N Bouvi...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,130000,1.0,2,3.0,"{'size': 1180, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '391546', ...","{'id': 'a90ca2a01b749ef58fe768c91859baae', 'na...",2020-10-13T17:02:13Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",33,https://ap.rdcpix.com/bcbe5fa622b831b251d80654...,1,5,type|property|data|prop_id|3649199107|list_id|...,"{'size': 1240, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH918016', 'pla...",1.0,{'href': 'https://view.ricohtours.com/0701bde8...,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9757,M3400474681,2.869904e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '2077 Bridge ...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,94900,1.0,1,4.0,"{'size': 1296, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': ''}]","{'id': '334bcccadbd3750e0a22a4d8f540d8cf', 'na...",2020-10-05T12:27:53Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",18,https://ap.rdcpix.com/51a31645187a5499fb26d5c8...,42,33,type|property|data|prop_id|3400474681|list_id|...,"{'size': 1064, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858856', 'pla...",,,,,,
9758,M3654238233,2.890369e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '1452 N 57th ...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,149900,1.0,2,3.0,"{'size': 1026, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '950515', ...","{'id': 'ff5c6c7cc6cb2afb6004190e153cd8eb', 'na...",2020-10-01T06:57:05Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",14,https://ap.rdcpix.com/b50a71041cca0f93050c6339...,42,34,type|property|data|prop_id|3654238233|list_id|...,"{'size': 1231, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858898', 'pla...",1.0,,,,,
9759,M9041602444,2.704677e+09,https://www.realtor.com/realestateandhomes-det...,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '7354 Ridge A...",{'listing_office': {'list_item': {'name': 'BHH...,for_sale,432990,3.0,4,3.0,"{'size': 1614, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '948779', ...","{'id': 'b0eb9dcb0e39719cd69a04a2b34a18d0', 'na...",2020-10-06T09:19:02Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",2,https://ap.rdcpix.com/a60365ae378f77c264810299...,42,35,type|property|data|prop_id|9041602444|list_id|...,,"{'name': 'BrightMLS', 'id': 'PAPH838626', 'pla...",1.0,,,,,
9760,M3276555576,2.915162e+09,https://www.realtor.com/realestateandhomes-det...,condo,condos,"{'city': 'Philadelphia', 'line': '1003 N Bodin...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,405000,2.0,2,2.0,"{'size': 1020, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '2229603',...","{'id': 'ff5c6c7cc6cb2afb6004190e153cd8eb', 'na...",2020-10-05T10:35:58Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",25,https://ap.rdcpix.com/cfa25d32ea17b0bd22957d23...,42,36,type|property|data|prop_id|3276555576|list_id|...,,"{'name': 'BrightMLS', 'id': 'PAPH888146', 'pla...",,{'href': 'https://my.matterport.com/show/?m=eR...,,,,


There are 9762 instances we achieved. However, there should be some duplicated instances. 

In [7]:
## remove duplicated instances
raw_data = combined_df.drop_duplicates(subset=['property_id'])
raw_data

Unnamed: 0,property_id,listing_id,rdc_web_url,prop_type,prop_sub_type,address,branding,prop_status,price,baths_full,baths,beds,building_size,agents,office,last_update,client_display_flags,lead_forms,photo_count,thumbnail,page_no,rank,list_tracking,lot_size,mls,baths_half,virtual_tour,open_houses,plan_id,new_plan,quick_to_sell_days
0,M4046594895,2.922386e+09,https://www.realtor.com/realestateandhomes-det...,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '1516 N 62nd ...",{'listing_office': {'list_item': {'name': 'Arc...,for_sale,249900,3.0,3,6.0,"{'size': 1632, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '1291281',...","{'id': '3863c258f6142e2a2340175af3f64df0', 'na...",2020-10-13T17:54:05Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",9,https://ap.rdcpix.com/509b1ca1768ae6574e170ca7...,1,1,type|property|data|prop_id|4046594895|list_id|...,"{'size': 1842, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH944934', 'pla...",,,,,,
1,M3939384476,2.918257e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '6102 Reedlan...",{'listing_office': {'list_item': {'name': 'Vih...,for_sale,116800,1.0,1,3.0,"{'size': 1092, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '347285', ...","{'id': 'a73ac4220ff5775a3f069bf0acab8e50', 'na...",2020-10-13T18:18:18Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",7,https://ap.rdcpix.com/f6ca6372b4cb434fd09d14b6...,1,2,type|property|data|prop_id|3939384476|list_id|...,"{'size': 992, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH911464', 'pla...",,,,,,
2,M4036371277,2.922385e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '5703 N 13th ...",{'listing_office': {'list_item': {'name': 'Pre...,for_sale,215000,1.0,2,3.0,"{'size': 1360, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': 'Kev...","{'id': '37e66a23ad2c2f122fe75b7df00955a5', 'na...",2020-10-13T17:24:20Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",35,https://ap.rdcpix.com/28d9bd1f5b0fdcb2de5b7aca...,1,3,type|property|data|prop_id|4036371277|list_id|...,"{'size': 1501, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH945010', 'pla...",1.0,,,,,
3,M3553029343,2.922384e+09,https://www.realtor.com/realestateandhomes-det...,single_family,,"{'city': 'Philadelphia', 'line': '1009 Rhawn S...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,394800,1.0,2,3.0,"{'size': 1856, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '4759', 'i...","{'id': 'd4965070728907fc00e98d31065c1b62', 'na...",2020-10-13T17:11:54Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",123,https://ap.rdcpix.com/792a419aa85110ba7780b21b...,1,4,type|property|data|prop_id|3553029343|list_id|...,"{'size': 15913, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH943096', 'pla...",1.0,,,,,
4,M3649199107,2.922384e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '3850 N Bouvi...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,130000,1.0,2,3.0,"{'size': 1180, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '391546', ...","{'id': 'a90ca2a01b749ef58fe768c91859baae', 'na...",2020-10-13T17:02:13Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",33,https://ap.rdcpix.com/bcbe5fa622b831b251d80654...,1,5,type|property|data|prop_id|3649199107|list_id|...,"{'size': 1240, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH918016', 'pla...",1.0,{'href': 'https://view.ricohtours.com/0701bde8...,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9757,M3400474681,2.869904e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '2077 Bridge ...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,94900,1.0,1,4.0,"{'size': 1296, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': ''}]","{'id': '334bcccadbd3750e0a22a4d8f540d8cf', 'na...",2020-10-05T12:27:53Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",18,https://ap.rdcpix.com/51a31645187a5499fb26d5c8...,42,33,type|property|data|prop_id|3400474681|list_id|...,"{'size': 1064, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858856', 'pla...",,,,,,
9758,M3654238233,2.890369e+09,https://www.realtor.com/realestateandhomes-det...,condo,townhomes,"{'city': 'Philadelphia', 'line': '1452 N 57th ...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,149900,1.0,2,3.0,"{'size': 1026, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '950515', ...","{'id': 'ff5c6c7cc6cb2afb6004190e153cd8eb', 'na...",2020-10-01T06:57:05Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",14,https://ap.rdcpix.com/b50a71041cca0f93050c6339...,42,34,type|property|data|prop_id|3654238233|list_id|...,"{'size': 1231, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858898', 'pla...",1.0,,,,,
9759,M9041602444,2.704677e+09,https://www.realtor.com/realestateandhomes-det...,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '7354 Ridge A...",{'listing_office': {'list_item': {'name': 'BHH...,for_sale,432990,3.0,4,3.0,"{'size': 1614, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '948779', ...","{'id': 'b0eb9dcb0e39719cd69a04a2b34a18d0', 'na...",2020-10-06T09:19:02Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",2,https://ap.rdcpix.com/a60365ae378f77c264810299...,42,35,type|property|data|prop_id|9041602444|list_id|...,,"{'name': 'BrightMLS', 'id': 'PAPH838626', 'pla...",1.0,,,,,
9760,M3276555576,2.915162e+09,https://www.realtor.com/realestateandhomes-det...,condo,condos,"{'city': 'Philadelphia', 'line': '1003 N Bodin...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,405000,2.0,2,2.0,"{'size': 1020, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '2229603',...","{'id': 'ff5c6c7cc6cb2afb6004190e153cd8eb', 'na...",2020-10-05T10:35:58Z,"{'presentation_status': 'for_sale', 'is_showca...","{'form': {'name': {'required': True, 'minimum_...",25,https://ap.rdcpix.com/cfa25d32ea17b0bd22957d23...,42,36,type|property|data|prop_id|3276555576|list_id|...,,"{'name': 'BrightMLS', 'id': 'PAPH888146', 'pla...",,{'href': 'https://my.matterport.com/show/?m=eR...,,,,


In [8]:
## export to csv file
raw_data.to_csv('raw_data.csv', encoding='utf-8', index=False)

And now, we have all 9562 instances.

In [99]:
## Remove unuseful cols
processed_data = raw_data.drop(columns=[
    'rdc_web_url',
    'office',
    'thumbnail',
    'list_tracking',
    'open_houses',
    'plan_id',
    'new_plan',
    'quick_to_sell_days',
    'virtual_tour',
    'client_display_flags',
    'lead_forms'
    
])
processed_data

Unnamed: 0,property_id,listing_id,prop_type,prop_sub_type,address,branding,prop_status,price,baths_full,baths,beds,building_size,agents,last_update,photo_count,page_no,rank,lot_size,mls,baths_half
0,M4046594895,2.922386e+09,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '1516 N 62nd ...",{'listing_office': {'list_item': {'name': 'Arc...,for_sale,249900,3.0,3,6.0,"{'size': 1632, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '1291281',...",2020-10-13T17:54:05Z,9,1,1,"{'size': 1842, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH944934', 'pla...",
1,M3939384476,2.918257e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '6102 Reedlan...",{'listing_office': {'list_item': {'name': 'Vih...,for_sale,116800,1.0,1,3.0,"{'size': 1092, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '347285', ...",2020-10-13T18:18:18Z,7,1,2,"{'size': 992, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH911464', 'pla...",
2,M4036371277,2.922385e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '5703 N 13th ...",{'listing_office': {'list_item': {'name': 'Pre...,for_sale,215000,1.0,2,3.0,"{'size': 1360, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': 'Kev...",2020-10-13T17:24:20Z,35,1,3,"{'size': 1501, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH945010', 'pla...",1.0
3,M3553029343,2.922384e+09,single_family,,"{'city': 'Philadelphia', 'line': '1009 Rhawn S...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,394800,1.0,2,3.0,"{'size': 1856, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '4759', 'i...",2020-10-13T17:11:54Z,123,1,4,"{'size': 15913, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH943096', 'pla...",1.0
4,M3649199107,2.922384e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '3850 N Bouvi...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,130000,1.0,2,3.0,"{'size': 1180, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '391546', ...",2020-10-13T17:02:13Z,33,1,5,"{'size': 1240, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH918016', 'pla...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9757,M3400474681,2.869904e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '2077 Bridge ...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,94900,1.0,1,4.0,"{'size': 1296, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': ''}]",2020-10-05T12:27:53Z,18,42,33,"{'size': 1064, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858856', 'pla...",
9758,M3654238233,2.890369e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '1452 N 57th ...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,149900,1.0,2,3.0,"{'size': 1026, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '950515', ...",2020-10-01T06:57:05Z,14,42,34,"{'size': 1231, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858898', 'pla...",1.0
9759,M9041602444,2.704677e+09,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '7354 Ridge A...",{'listing_office': {'list_item': {'name': 'BHH...,for_sale,432990,3.0,4,3.0,"{'size': 1614, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '948779', ...",2020-10-06T09:19:02Z,2,42,35,,"{'name': 'BrightMLS', 'id': 'PAPH838626', 'pla...",1.0
9760,M3276555576,2.915162e+09,condo,condos,"{'city': 'Philadelphia', 'line': '1003 N Bodin...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,405000,2.0,2,2.0,"{'size': 1020, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '2229603',...",2020-10-05T10:35:58Z,25,42,36,,"{'name': 'BrightMLS', 'id': 'PAPH888146', 'pla...",


## Reset index

In [100]:
processed_data = processed_data.reset_index(drop= True)
processed_data

Unnamed: 0,property_id,listing_id,prop_type,prop_sub_type,address,branding,prop_status,price,baths_full,baths,beds,building_size,agents,last_update,photo_count,page_no,rank,lot_size,mls,baths_half
0,M4046594895,2.922386e+09,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '1516 N 62nd ...",{'listing_office': {'list_item': {'name': 'Arc...,for_sale,249900,3.0,3,6.0,"{'size': 1632, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '1291281',...",2020-10-13T17:54:05Z,9,1,1,"{'size': 1842, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH944934', 'pla...",
1,M3939384476,2.918257e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '6102 Reedlan...",{'listing_office': {'list_item': {'name': 'Vih...,for_sale,116800,1.0,1,3.0,"{'size': 1092, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '347285', ...",2020-10-13T18:18:18Z,7,1,2,"{'size': 992, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH911464', 'pla...",
2,M4036371277,2.922385e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '5703 N 13th ...",{'listing_office': {'list_item': {'name': 'Pre...,for_sale,215000,1.0,2,3.0,"{'size': 1360, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': 'Kev...",2020-10-13T17:24:20Z,35,1,3,"{'size': 1501, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH945010', 'pla...",1.0
3,M3553029343,2.922384e+09,single_family,,"{'city': 'Philadelphia', 'line': '1009 Rhawn S...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,394800,1.0,2,3.0,"{'size': 1856, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '4759', 'i...",2020-10-13T17:11:54Z,123,1,4,"{'size': 15913, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH943096', 'pla...",1.0
4,M3649199107,2.922384e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '3850 N Bouvi...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,130000,1.0,2,3.0,"{'size': 1180, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '391546', ...",2020-10-13T17:02:13Z,33,1,5,"{'size': 1240, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH918016', 'pla...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9557,M3400474681,2.869904e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '2077 Bridge ...",{'listing_office': {'list_item': {'name': 'Re/...,for_sale,94900,1.0,1,4.0,"{'size': 1296, 'units': 'sqft'}","[{'primary': True, 'photo': None, 'name': ''}]",2020-10-05T12:27:53Z,18,42,33,"{'size': 1064, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858856', 'pla...",
9558,M3654238233,2.890369e+09,condo,townhomes,"{'city': 'Philadelphia', 'line': '1452 N 57th ...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,149900,1.0,2,3.0,"{'size': 1026, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '950515', ...",2020-10-01T06:57:05Z,14,42,34,"{'size': 1231, 'units': 'sqft'}","{'name': 'BrightMLS', 'id': 'PAPH858898', 'pla...",1.0
9559,M9041602444,2.704677e+09,condo,duplex_triplex,"{'city': 'Philadelphia', 'line': '7354 Ridge A...",{'listing_office': {'list_item': {'name': 'BHH...,for_sale,432990,3.0,4,3.0,"{'size': 1614, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '948779', ...",2020-10-06T09:19:02Z,2,42,35,,"{'name': 'BrightMLS', 'id': 'PAPH838626', 'pla...",1.0
9560,M3276555576,2.915162e+09,condo,condos,"{'city': 'Philadelphia', 'line': '1003 N Bodin...",{'listing_office': {'list_item': {'name': 'Kel...,for_sale,405000,2.0,2,2.0,"{'size': 1020, 'units': 'sqft'}","[{'primary': True, 'advertiser_id': '2229603',...",2020-10-05T10:35:58Z,25,42,36,,"{'name': 'BrightMLS', 'id': 'PAPH888146', 'pla...",


In [101]:
## Export to csv file

processed_data.to_csv('processed_data.csv',encoding='utf-8', index=False)

# Extra more info within cols

In [102]:
## Check NaN in each col

processed_data.isna().sum()

property_id         0
listing_id         94
prop_type           0
prop_sub_type    2512
address             0
branding            0
prop_status         0
price               0
baths_full       1760
baths               0
beds             1182
building_size    1381
agents             94
last_update         0
photo_count         0
page_no             0
rank                0
lot_size         1593
mls                94
baths_half       6168
dtype: int64

In [81]:
import numpy as np

In [None]:
def xtr_info():
    for i in 

In [64]:
## extra dic type strings from col
import ast

def str_to_dic(string):
    if string == None: pass
    else:
        return ast.literal_eval(string)
    

In [103]:
processed_data['agents'][154]

"[{'primary': True, 'advertiser_id': '1882837', 'id': '1882837', 'photo': {'href': 'https://ap.rdcpix.com/1862199854/0fb12a21b18e9527e37f8eabacc14f5fa-w0od-r7_w110.jpg'}, 'name': 'Ronald L Wynn Sr'}]"

In [104]:
processed_data['agents'][155]

nan

In [109]:
processed_data['agents'][155] == nan

NameError: name 'nan' is not defined

In [84]:
str_to_dic(processed_data['agents'][155])[0]


ValueError: malformed node or string: nan

In [57]:
def xtr_info_from_dict(col, level = 0, keys = []):
    if 
    for i in keys:
        if i not in col[level].keys(): return None
        else:
            col = col[level][i]
    return col

In [56]:
xtr_info_from_dict(str_to_dic(processed_data['agents'][2]),keys = ['id'])

In [74]:
for i in range(len(processed_data['agents'])):
    if processed_data['agents'][i] == None:
        print(i, None)
    else:
        print(i, xtr_info_from_dict(str_to_dic(processed_data['agents'][i]), level=0, keys = ['id']))

0 1291281
1 347285
2 None
3 4759
4 391546
5 2024102
6 3239504
7 3125177
8 152138
9 2757730
10 1321989
11 2024102
12 3725707
13 949078
14 2811932
15 1304523
16 2024102
17 2811932
18 3059910
19 1433132
20 28367
21 2012338
22 3068413
23 102362
24 431045
25 2243381
26 3198133
27 None
28 2229603
29 102362
30 2229603
31 456474
32 None
33 None
34 None
35 None
36 176252
37 None
38 None
39 None
40 863816
41 1076103
42 3354710
43 1629732
44 1133961
45 603602
46 2229603
47 949485
48 2229603
49 949425
50 3823329
51 1244260
52 1341145
53 1502759
54 3239504
55 2229603
56 3245594
57 None
58 1183257
59 None
60 3339739
61 None
62 2216397
63 3813696
64 624397
65 None
66 2024102
67 1016663
68 256603
69 25467
70 2187288
71 468460
72 3843382
73 3611804
74 3987163
75 3813696
76 706768
77 1968669
78 944316
79 949327
80 1996796
81 1496849
82 786102
83 None
84 1992601
85 843130
86 2007120
87 169442
88 2229603
89 3348846
90 1185906
91 2818330
92 140760
93 948783
94 297748
95 2908752
96 1028194
97 None
98 947139

ValueError: malformed node or string: nan

There are some cols have details such `address`, `branding`, `agents`, `lot_size`, and `mls`. We need to discuss what features should we extract from them.