In [5]:
import requests
from retrying import retry
from ediblepickle import checkpoint
from urllib.parse import quote
from typing import Union
from datetime import datetime
from keys import *

In [6]:
keys = getKeys()

In [7]:
@retry(stop_max_attempt_number=5)
@checkpoint(key=lambda args, kwargs: quote(args[0]) + '.pkl', work_dir='Saved Results/PropertyDetail/')
def get_PropertyDetail(property_id : str) -> dict:
    if not isinstance(property_id, str):
        try:
            property_id = str(property_id)
        except:
            raise Exception('Could not convert input to string.')

    url = "https://us-real-estate.p.rapidapi.com/v2/property-detail"

    querystring = {
        "property_id": property_id
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

@retry(stop_max_attempt_number=5)
def get_PropertyForSaleByZipcode(zipcode : str, 
        property_type : str = 'single_family',
        n_results : int = 100
    ) -> dict:

    url = "https://us-real-estate.p.rapidapi.com/v2/for-sale-by-zipcode"

    # We need to make a loop here and iterate the offset until we hit the end or the limit.
    # This is going to take a lot of API calls.

    # This can be increased to 200 once we get the paid plan.
    limit = min(42, n_results)

    '''
    Other query string parameters:
    sort = (default: relevant)|newest|lowest_price|highest_price|open_house_date|price_reduced_date|largest_sqft|lot_size|sold_date
    price_min/max = $ USD
    beds_min/max = #
    bath_min/max = #
    property_type = multi_family|single_family|mobile|land|farm (I think we should just use : 'single_family')
    '''

    querystring = {
        "zipcode":zipcode,
        "offset":"0",
        "limit":str(limit),
        "property_type":property_type
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    output = []

    '''
    while(True):
        response = requests.request("GET", url, headers=headers, params=querystring)
        output.append(response.json())

        if len(response) < limit
    '''

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

@retry(stop_max_attempt_number=5)
def get_PropertySoldByZipcode(zipcode : str, 
        n_results : int,
        property_type : str = 'single_family'
    ) -> dict:
    
    '''
    NOTE: This does not seem to have a limit arguement. I do not know how this works with offset?
    '''

    url = "https://us-real-estate.p.rapidapi.com/v2/sold-homes-by-zipcode"

    # This can be increased to 200 once we get the paid plan.
    offset = str(42)

    '''
    Other query string parameters:
    sort = (default: relevant)|newest|lowest_price|highest_price|open_house_date|price_reduced_date|largest_sqft|lot_size|sold_date
    price_min/max = $ USD
    beds_min/max = #
    bath_min/max = #
    property_type = multi_family|single_family|mobile|land|farm (I think we should just use : 'single_family')
    '''

    querystring = {
        "zipcode":zipcode,
        "offset":offset,
        "property_type":property_type
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    output = []

    '''
    while(True):
        response = requests.request("GET", url, headers=headers, params=querystring)
        output.append(response.json())

        if len(response) < limit
        '''

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

def get_LocationSuggest(search_keyword : str, 
        return_all : bool = False
    ) -> dict:

    url = "https://us-real-estate.p.rapidapi.com/location/suggest"

    querystring = {"input":search_keyword}

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    response_json = response.json()

    return response_json if return_all else response_json['data'][0]

def get_PropertyForSaleByArea(
        city : str = '',
        state : str = '',
        n_results : int = 100 # How many houses do you want to get back.
    ) -> dict:

    # This can be increased to 200 once we move to the paid version.
    limit = min(42, n_results)

    url = "https://us-real-estate.p.rapidapi.com/v2/for-sale"

    querystring = {
        "state_code":state,
        "city":city,
        "offset":"0",
        "limit":str(limit),
        "sort":"newest"
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    '''
    output = []

    while(n_results > 0):
        response = requests.request("GET", url, headers=headers, params=querystring).json()
        output.append(response)

        if len(response) < limit:
            # This means there is nothing left to reply with
            n_results = 0
        '''

    response = requests.request("GET", url, headers=headers, params=querystring).json()
    total_houses_available = int(response['data']['home_search']['total'])
    total_houses_in_request = int(response['data']['home_search']['count'])

    return response

In [86]:
tt = get_LocationSuggest('Seattle', return_all=False)

In [8]:
tt = get_PropertyForSaleByArea(city='seattle', state='WA')

In [60]:
#tt['data']['home_search']['results'] # This is the juicy housing details.
#tt['data']['geo'] # Pulls in median listing price data for neighborig zip codes and major cities
tt['data']['geo'].keys()# has some solid month to month analysis.

dict_keys(['parents', 'recommended_zips', 'recommended_cities', 'recommended_counties', 'geo_statistics', 'recommended_neighborhoods'])

In [109]:
tt['data']['geo']['geo_statistics']

{'housing_market': {'month_to_month': {'median_days_on_market_percent_change': 10,
   'median_listing_price_percent_change': -2.93,
   'active_listing_count_percent_change': -13.49,
   'median_listing_price_sqft_percent_change': -0.48},
  'median_days_on_market': 44,
  'median_sold_price': 820000,
  'median_price_per_sqft': 574,
  'median_listing_price': 825000,
  'listing_count': 1589,
  'by_prop_type': [{'attributes': {'median_sold_price': 811250,
     'median_listing_price': 814000,
     'median_days_on_market': 43,
     'median_lot_size': 4650,
     'median_price_per_sqft': 576},
    'type': 'home'}],
  'median_rent_price': 2795}}

In [111]:
# This will be taking in the following: tt['data']['geo']
class geo_data():
    '''
    This is going to be used to organize the meta information about each query.
    I need to think where it is most appropriate to do this.
    '''
    def __init__(self, stats : dict):
        self.zip_info = self._parse_areas(stats.get('recommended_zips', {}).get('geos'))
        self.city_info = self._parse_areas(stats.get('recommended_cities', {}).get('geos'))
        self.county_info = self._parse_areas(stats['recommended_counties']['geos'])
        self.neighborhood_info = self._parse_areas(stats['recommended_neighborhoods']['geos'])
        self.market_stats = self._parse_statistics(stats.get('geo_statistics', {}).get('housing_market'))

    def _parse_areas(self, geos : dict) -> dict:
        return None if geos is None else {
            v.get(v.get('geo_type', 'slug_id'), '_parse_areas_FAILED') : {
                'slug_id' : v.get('slug_id'),
                'median_listing_price' : v.get('geo_statistics', {}).get('housing_market', {}).get('median_listing_price'),
                'state_code' : v.get('state_code'),
                'city_code' : v.get('city'),
                'geo_type' : v.get('geo_type')
            } for v in geos
        }
    
    def _parse_statistics(self, geo_stats : dict) -> dict:
        return None if geo_statistics is None else {
            'median_sold_price' : geo_stats.get('median_sold_price'),
            'median_days_on_market' : geo_stats.get('median_days_on_market'),
            'median_price_per_sqft' : geo_stats.get('median_price_per_sqft')
        }

In [45]:
# This will be taking in the following: tt['data']['home_search']['results'][n]
class house():
    '''
    This is going to be the class that houses (hehe) all the house data. Each house will have its own instance.
    When we use the API, there is a lot of data reutned nested in a number of dictionaries. This will take the 'juicy' bit.
    The idea for this class is that it will hold all the needed info for:
         1) the GUI, address, google street view, other photos. This will probably be a flask application to start, but we are far from even thinking about that.
         2) the MODEL, tags, list_prices, other flags. What if we created a word cloud and have the user select key words for their house until they have selected some flat number or % contribution to model from the tags TBD. There will be dates there, we will use days old (or something similar) for the model training, while the actual took will use zero, as the user is entering 100% correct info. This may or may not be a good idea, as it might have unintended implications within the model.

    Interior functions:
        Date Cleaning
        Location Cleaning
        Description Cleaning
    '''
    def __init__(self, listing : list):
        self.reference_info = { # This is stuff not going into the model
            'id' : listing['property_id'],
            'photos' : list(set([listing['primary_photo']['href']] + [l['href'] for l in listing['photos']]))
        }

        self.raw_last_update = listing['last_update_date']
        self.raw_list_date = listing['list_date']
        self.tags = listing['tags'] # This one will be weird, might need to wrap in nlp somehow with this, maybe not, just dummy it
        self.list_price = listing['list_price']
        self.new_construction = False if listing['flags']['is_new_construction'] is None else True

        self.raw_location = listing['location']
        self.raw_description = listing['description']

        self._clean_dates()
        self._clean_location()
        self._clean_description()
        
    def _convert_date(self, date : str):
        return datetime.strptime(date, '%Y-%m-%d')
    
    def _clean_dates(self):
        last_update_date_parsed = self.raw_last_update.split('T')
        list_date_parsed = self.raw_list_date.split('T')
        self.last_update = self._convert_date(last_update_date_parsed[0]) if len(last_update_date_parsed) == 2 else None
        self.list_date = self._convert_date(list_date_parsed[0]) if len(list_date_parsed) == 2 else None

    def _clean_location(self):
        self.reference_info.update({
            'zip_code' : self.raw_location.get('address', {}).get('postal_code'),
            'state' : self.raw_location.get('address', {}).get('state'),
            'google_map_street_view' : self.raw_location.get('street_view_url'),
            'fips_code' : self.raw_location.get('county', {}).get('fips_code'),
            'county' : self.raw_location.get('county', {}).get('county')
        })
        self.lat_long = (self.raw_location.get('address', {}).get('coordinate', {}).get('lat'), 
                         self.raw_location.get('address', {}).get('coordinate', {}).get('lon'))

    def _clean_description(self):
        self.baths_full = self.raw_description.get('baths_full') or 0
        self.baths_3qtr = self.raw_description.get('baths_3qtr') or 0
        self.baths_half = self.raw_description.get('baths_half') or 0
        self.baths_1qtr = self.raw_description.get('baths_1qtr') or 0
        self.year_built = self.raw_description.get('year_built')
        self.lot_sqft = self.raw_description.get('lot_sqft')
        self.sqft = self.raw_description.get('sqft')
        self.garage = self.raw_description.get('garage') or 0
        self.stories = self.raw_description.get('stories') or 1
        self.beds = self.raw_description.get('beds')
        self.type = self.raw_description.get('type') # this one will need to be dummied with other values for other houses.

    def _validate(self):
        '''
        This will be used to flag anything the looks strange (midding values, etc)
        '''

In [46]:
hh = tt['data']['home_search']['results']
hh_f = house(hh[0])


In [53]:
print(hh_f.garage)

1


In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")

In [1]:
tt = geolocator.geocode('Chicago')

NameError: name 'geolocator' is not defined

In [96]:
a = {'bb':5,
'cc' : {
    'dd' : 4,
    'ee' : 7
}}

In [102]:
a.get('xx', {}).get('ff')