In [43]:
import requests
from retrying import retry
from ediblepickle import checkpoint
from urllib.parse import quote
from typing import Union
from datetime import datetime
from keys import *

In [3]:
keys = getKeys()

In [11]:
@retry(stop_max_attempt_number=5)
@checkpoint(key=lambda args, kwargs: quote(args[0]) + '.pkl', work_dir='Saved Results/PropertyDetail/')
def get_PropertyDetail(property_id : str) -> dict:
    if not isinstance(property_id, str):
        try:
            property_id = str(property_id)
        except:
            raise Exception('Could not convert input to string.')

    url = "https://us-real-estate.p.rapidapi.com/v2/property-detail"

    querystring = {
        "property_id": property_id
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

@retry(stop_max_attempt_number=5)
def get_PropertyForSaleByZipcode(zipcode : str, 
        property_type : str = 'single_family',
        n_results : int = 100
    ) -> dict:

    url = "https://us-real-estate.p.rapidapi.com/v2/for-sale-by-zipcode"

    # We need to make a loop here and iterate the offset until we hit the end or the limit.
    # This is going to take a lot of API calls.

    # This can be increased to 200 once we get the paid plan.
    limit = min(42, n_results)

    '''
    Other query string parameters:
    sort = (default: relevant)|newest|lowest_price|highest_price|open_house_date|price_reduced_date|largest_sqft|lot_size|sold_date
    price_min/max = $ USD
    beds_min/max = #
    bath_min/max = #
    property_type = multi_family|single_family|mobile|land|farm (I think we should just use : 'single_family')
    '''

    querystring = {
        "zipcode":zipcode,
        "offset":"0",
        "limit":str(limit),
        "property_type":property_type
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    output = []

    '''
    while(True):
        response = requests.request("GET", url, headers=headers, params=querystring)
        output.append(response.json())

        if len(response) < limit
    '''

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

@retry(stop_max_attempt_number=5)
def get_PropertySoldByZipcode(zipcode : str, 
        n_results : int,
        property_type : str = 'single_family'
    ) -> dict:
    
    '''
    NOTE: This does not seem to have a limit arguement. I do not know how this works with offset?
    '''

    url = "https://us-real-estate.p.rapidapi.com/v2/sold-homes-by-zipcode"

    # This can be increased to 200 once we get the paid plan.
    offset = str(42)

    '''
    Other query string parameters:
    sort = (default: relevant)|newest|lowest_price|highest_price|open_house_date|price_reduced_date|largest_sqft|lot_size|sold_date
    price_min/max = $ USD
    beds_min/max = #
    bath_min/max = #
    property_type = multi_family|single_family|mobile|land|farm (I think we should just use : 'single_family')
    '''

    querystring = {
        "zipcode":zipcode,
        "offset":offset,
        "property_type":property_type
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    output = []

    '''
    while(True):
        response = requests.request("GET", url, headers=headers, params=querystring)
        output.append(response.json())

        if len(response) < limit
        '''

    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()

def get_LocationSuggest(search_keyword : str, 
        return_all : bool = False
    ) -> dict:

    url = "https://us-real-estate.p.rapidapi.com/location/suggest"

    querystring = {"input":search_keyword}

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    response_json = response.json()

    return response_json if return_all else response_json['data'][0]

def get_PropertyForSaleByArea(
        city : str = '',
        state : str = '',
        n_results : int = 100
    ) -> dict:

    # This can be increased to 200 once we move to the paid version.
    limit = min(42, n_results)

    url = "https://us-real-estate.p.rapidapi.com/v2/for-sale"

    querystring = {
        "state_code":state,
        "city":city,
        "offset":"0",
        "limit":str(limit),
        "sort":"newest"
    }

    headers = {
        "X-RapidAPI-Key": keys['USRealEstate'],
        "X-RapidAPI-Host": "us-real-estate.p.rapidapi.com"
    }

    '''
    output = []

    while(n_results > 0):
        response = requests.request("GET", url, headers=headers, params=querystring).json()
        output.append(response)

        if len(response) < limit:
            # This means there is nothing left to reply with
            n_results = 0
        '''

    response = requests.request("GET", url, headers=headers, params=querystring).json()
    return response

In [86]:
tt = get_LocationSuggest('Seattle', return_all=False)

In [12]:
tt = get_PropertyForSaleByArea(city='seattle', state='WA')

In [33]:
# tt['data']['geo']['geo_statistics'] # has some solid month to month analysis.
tt['data']['home_search']['results'][0]

{'primary_photo': {'href': 'https://ap.rdcpix.com/5a07a527b5821000030c9ca212d698dfl-m1706552722s-w1024_h768.jpg'},
 'last_update_date': '2022-12-15T00:13:16Z',
 'source': {'agents': [{'office_name': None}],
  'id': 'SEWA',
  'plan_id': None,
  'spec_id': None,
  'type': 'mls'},
 'tags': ['community_outdoor_space',
  'community_security_features',
  'den_or_office',
  'dining_room',
  'dishwasher',
  'family_room',
  'fireplace',
  'hardwood_floors',
  'hill_or_mountain_view',
  'lake_view',
  'ocean_view',
  'view',
  'washer_dryer',
  'water_view',
  'basement',
  'garage_1_or_more',
  'garage_2_or_more',
  'two_or_more_stories',
  'floor_plan',
  'trails',
  'lake',
  'beach',
  'medicalcare'],
 'permalink': '3620-42nd-Ave-NE_Seattle_WA_98105_M26724-10792',
 'status': 'for_sale',
 'list_date': '2022-12-15T00:13:16Z',
 'open_houses': None,
 'tax_record': {'public_record_id': 'D9B701F835A00613A60FF98845F3FB66'},
 'branding': [{'name': 'Windermere RE Greenwood',
   'photo': None,
   'ty

In [55]:
#tt['data']['home_search']['results'] # This is the juicy housing details.
#tt['data']['geo'] # Pulls in median listing price data for neighborig zip codes and major cities
#tt['data']['home_search']['total'] # How many homes were returned, total number?
#tt['data']['home_search']['count'] # Not sure how this works, how many are there?
tt['data']['home_search']['results'][0]

{'primary_photo': {'href': 'https://ap.rdcpix.com/5a07a527b5821000030c9ca212d698dfl-m1706552722s-w1024_h768.jpg'},
 'last_update_date': '2022-12-15T00:13:16Z',
 'source': {'agents': [{'office_name': None}],
  'id': 'SEWA',
  'plan_id': None,
  'spec_id': None,
  'type': 'mls'},
 'tags': ['community_outdoor_space',
  'community_security_features',
  'den_or_office',
  'dining_room',
  'dishwasher',
  'family_room',
  'fireplace',
  'hardwood_floors',
  'hill_or_mountain_view',
  'lake_view',
  'ocean_view',
  'view',
  'washer_dryer',
  'water_view',
  'basement',
  'garage_1_or_more',
  'garage_2_or_more',
  'two_or_more_stories',
  'floor_plan',
  'trails',
  'lake',
  'beach',
  'medicalcare'],
 'permalink': '3620-42nd-Ave-NE_Seattle_WA_98105_M26724-10792',
 'status': 'for_sale',
 'list_date': '2022-12-15T00:13:16Z',
 'open_houses': None,
 'tax_record': {'public_record_id': 'D9B701F835A00613A60FF98845F3FB66'},
 'branding': [{'name': 'Windermere RE Greenwood',
   'photo': None,
   'ty

In [62]:
# This will be taking in the following: tt['data']['home_search']['results'][n]

class house():
    '''
    This is going to be the class that houses (hehe) all the house data. Each house will have its own instance.
    When we use the API, there is a lot of data reutned nested in a number of dictionaries. This will take the 'juicy' bit.
    The idea for this class is that it will hold all the needed info for:
         1) the GUI, address, google street view, other photos. This will probably be a flask application to start, but we are far from even thinking about that.
         2) the MODEL, tags, list_prices, other flags. What if we created a word cloud and have the user select key words for their house until they have selected some flat number or % contribution to model from the tags TBD. There will be dates there, we will use days old (or something similar) for the model training, while the actual took will use zero, as the user is entering 100% correct info. This may or may not be a good idea, as it might have unintended implications within the model.

    Interior functions:
        Date Cleaning
        ???
    '''
    def __init__(self, listing : list):
        self.reference_info = { # This is stuff not going into the model
            'id' : listing['property_id'],
            'photos' : list(set([listing['primary_photo']['href']].extend([l['href'] for l in listing['photos']])))
        }

        self.raw_last_update = listing['last_update_date']
        self.raw_list_date = listing['list_date']
        self.tags = listing['tags'] # This one will be weird, might need to wrap in nlp somehow with this, maybe not, just dummy it
        self.list_price = listing['list_price']
        self.new_construction = False if listing['flags']['is_new_construction'] is None else True

        self.raw_location = listing['address']
        self.raw_description = listing['description']

        self._clean_dates()
        
    def _convert_date(date : str):
        return datetime.strptime(date, '%Y-%m-%d')
    
    def _clean_dates(self):
        last_update_date_parsed = self.raw_last_update.split('T')
        list_date_parsed = self.raw_list_date.split('T')
        self.last_update = _convert_date(last_update_date_parsed[0]) if len(last_update_date_parsed) == 2 else None
        self.list_date = _convert_date(list_date_parsed[0]) if len(list_date_parsed) == 2 else None

    def _clean_location(self):
        return None

    def _clean_description(self):
        self.baths_full = self.raw_description.get('baths_full', default = 0)
        self.baths_3qtr = self.raw_description.get('baths_3qtr', default = 0)
        self.baths_half = self.raw_description.get('baths_half', default = 0)
        self.baths_1qtr = self.raw_description.get('baths_1qtr', default = 0)
        self.year_built = self.raw_description.get('year_built')
        self.lot_sqft = self.raw_description.get('lot_sqft')
        self.sqft = self.raw_description.get('sqft')
        self.garage = self.raw_description.get('garage')
        self.stories = self.raw_description.get('stroies')
        self.beds = self.raw_description.get('beds')
        self.type = self.raw_description.get('type') # this one will need to be dummied with other values.
        return self

In [71]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")

In [1]:
tt = geolocator.geocode('Chicago')

NameError: name 'geolocator' is not defined

In [56]:
a = [1,2,3]
a.extend([5,6,7])
print(a)

[1, 2, 3, 5, 6, 7]


In [35]:
vv['b'] if 'b' in vv else 0

0