In [1]:
import re
import requests
import pandas as pd
from env import hud_token

In [2]:
# getting root url
url = 'https://www.huduser.gov/hudapi/public/fmr/listMetroAreas'

header = {'Authorization': f'Bearer {hud_token}'}

response = requests.get(url, headers=header)

data = response.json()

In [3]:
df = pd.DataFrame(data)

In [4]:
df[df['area_name'].str.startswith('San Antonio') == True]

Unnamed: 0,cbsa_code,area_name,category
488,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area",MetroArea


In [5]:
# getting entity_id
entity_id = 'METRO41700M41700'

In [6]:
url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}'
response = requests.get(url, headers=header)
data = response.json()

In [7]:
data['data']['year']

'2023'

In [8]:
df = pd.DataFrame(data['data']['basicdata'])

In [9]:
# adding year
df['year'] = data['data']['year']

In [10]:
df

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,year
0,78002,760,870,1060,1350,1640,2023
1,78003,790,910,1110,1410,1710,2023
2,78006,1030,1240,1530,1870,2340,2023
3,78009,840,890,1170,1530,1570,2023
4,78010,790,910,1100,1400,1710,2023
...,...,...,...,...,...,...,...
131,78670,910,1030,1220,1560,1850,2023
132,78676,1200,1330,1560,2000,2320,2023
133,78883,670,760,940,1200,1430,2023
134,78884,790,910,1100,1400,1710,2023


In [11]:
years = ['2022', '2021', '2020', '2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012', '2011', '2010' ]

for year in years:
    url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}?year={year}'
    response = requests.get(url, headers=header)
    print(year, response)

# no data available for the metro area

2022 <Response [200]>
2021 <Response [200]>
2020 <Response [200]>
2019 <Response [200]>
2018 <Response [200]>
2017 <Response [200]>
2016 <Response [400]>
2015 <Response [400]>
2014 <Response [400]>
2013 <Response [400]>
2012 <Response [400]>
2011 <Response [400]>
2010 <Response [400]>


#### Takeaways:
* The data is not avaiable until 2017

#### Actions:
* Make a for loop that creates a dummy df and adds the information from the newly acquired data to it 

#### Later inspection
* What is the reason for this? Was the zipcode data not available? Why is this the case? Will this impact our results?


In [12]:
years = ['2022', '2021', '2020', '2019', '2018']

for year in years:
    url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}?year={year}'
    response = requests.get(url, headers=header)
    data = response.json()
    dum = pd.DataFrame(data['data']['basicdata'])
    dum['year'] = data['data']['year']
    df = pd.concat([df, dum])
    
df

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,year
0,78002,760,870,1060,1350,1640,2023
1,78003,790,910,1110,1410,1710,2023
2,78006,1030,1240,1530,1870,2340,2023
3,78009,840,890,1170,1530,1570,2023
4,78010,790,910,1100,1400,1710,2023
...,...,...,...,...,...,...,...
130,78676,800,950,1160,1560,1880,2018
131,78850,560,690,870,1150,1380,2018
132,78883,610,760,950,1250,1520,2018
133,78884,560,690,870,1150,1380,2018


##### Takeaways:
* We wonly get data from 2018 and onward due to data challenges with zipcodes not being present
* Once again, this needs to investigated but in the future

In [13]:
# basic requirements
entity_id = 'METRO41700M41700'
header = {'Authorization': f'Bearer {hud_token}'}

# accessing api
url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}'
response = requests.get(url, headers=header)
data = response.json()

# getting initial df
df = pd.DataFrame()

# establishing years to pull
years = ['2023', '2022', '2021', '2020', '2019', '2018']

# For loop to access data from each year and concat onto the initial df
for year in years:
    url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}?year={year}'
    response = requests.get(url, headers=header)
    data = response.json()
    dum = pd.DataFrame(data['data']['basicdata'])
    dum['year'] = data['data']['year']
    df = pd.concat([df, dum])
    
df

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,year
0,78002,760,870,1060,1350,1640,2023
1,78003,790,910,1110,1410,1710,2023
2,78006,1030,1240,1530,1870,2340,2023
3,78009,840,890,1170,1530,1570,2023
4,78010,790,910,1100,1400,1710,2023
...,...,...,...,...,...,...,...
130,78676,800,950,1160,1560,1880,2018
131,78850,560,690,870,1150,1380,2018
132,78883,610,760,950,1250,1520,2018
133,78884,560,690,870,1150,1380,2018


In [14]:
zip_codes = df['zip_code'].unique()

# Zillow API

In [15]:
# url = "https://zillow56.p.rapidapi.com/search"

# querystring = {"location":"78002"}

# headers = {
# 	"X-RapidAPI-Key": "114e895469msh94a2c2aa405c447p161c7ejsn7b09da3c1151",
# 	"X-RapidAPI-Host": "zillow56.p.rapidapi.com"
# }

In [16]:
# response = requests.get(url, headers=headers, params=querystring)
# data = response.json()

In [17]:
# data

In [18]:
# pd.DataFrame(data['results'])

In [19]:


# for zip_code in zip_codes:
#     querystring = {"location":f"{str(zip_code)}"}
#     response = requests.get(url, headers=headers, params=querystring)
#     data = response.json()
#     print(zip_code, data[totalPages])

In [20]:
# querystring = {"location":f"{}"}
# response = requests.get(url, headers=headers, params=querystring)
# data = response.json()

Split Zipcodes

In [21]:
corey = zip_codes[:49]
mack = zip_codes[49:99]
aaron = zip_codes[99:]

In [22]:
aaron

array(['78258', '78259', '78260', '78261', '78263', '78264', '78265',
       '78266', '78268', '78269', '78270', '78278', '78279', '78280',
       '78283', '78285', '78288', '78291', '78292', '78293', '78294',
       '78295', '78296', '78297', '78298', '78299', '78606', '78623',
       '78638', '78648', '78655', '78666', '78670', '78676', '78883',
       '78884', '78885', '78117', '78850'], dtype=object)

In [31]:
rapid_api_key = "114e895469msh94a2c2aa405c447p161c7ejsn7b09da3c1151"

headers = {
	"X-RapidAPI-Key": rapid_api_key,
	"X-RapidAPI-Host": "realty-mole-property-api.p.rapidapi.com"
}
zipcode = '78258'
url = f"https://realty-mole-property-api.p.rapidapi.com/zipCodes/{zipcode}"
response = requests.get(url, headers=headers)
data = response.json()

In [33]:
data2 = data

In [37]:
market_rent_dict = {}

In [38]:
market_rent_dict['78258'] = data2
market_rent_dict['78256'] = data2

Unnamed: 0,id,zipCode,rentalData
78258,78258,78258,"{'averageRent': 2761, 'minRent': 956, 'maxRent..."
78256,78258,78258,"{'averageRent': 2761, 'minRent': 956, 'maxRent..."


dict_keys(['78258', '78256'])

In [45]:
market_rent_dict = {}

In [69]:
aaron_removed = list(aaron)
for key in market_rent_dict.keys():
    aaron_removed.remove(key)

In [63]:
# aaron_removed

In [96]:
# need to import sleep time
import time

In [97]:
time.sleep(2)

In [73]:
import time

for zipcode in aaron_removed:
    url = f"https://realty-mole-property-api.p.rapidapi.com/zipCodes/{zipcode}"
    response = requests.get(url, headers=headers)
    
    if (
    response.status_code != 204 and
    response.headers["content-type"].strip().startswith("application/json")
    ):
        try:
            
            data = response.json()
            
            market_rent_dict[zipcode] = data
        
        except ValueError as e:
            
            print('ValueError', e)
    time.sleep(2)

In [102]:
import time

def get_rapid_api(zipcode_list, rapid_api_key, filename):
    '''
    Arguments: 
        1. a list of zipcodes, with each zipcode in the list a string
        2. the rapid_api_key as a string that can be copied from the website after subscribing to the realty mole api using this website
            https://rapidapi.com/realtymole/api/realty-mole-property-api
        3. a string literal with .csv at the end for the filename of the saved df
    Actions: This fucntion pulls data from the REalty Mole rapid api and saves the messy data as a csv 
    Return: Messy market rent history data
    Modules:
    import time
    import requests
    import pandas
    '''
    # intialize dicitonary
    market_rent_dict = {}
    
    # the headers necessary for the data, the raopid api key entered in the 
    headers = {
        "X-RapidAPI-Key": rapid_api_key,
        "X-RapidAPI-Host": "realty-mole-property-api.p.rapidapi.com"
    }

    # For each zipcode in the list of sipcodes entered
    for zipcode in zipcode_list:
        
        # place the zipcode in the endpoint
        url = f"https://realty-mole-property-api.p.rapidapi.com/zipCodes/{zipcode}"
        
        # get the repsonse from the end point using the headers specififed
        response = requests.get(url, headers=headers)

        # if the reponse acts accordingly
        if (
        response.status_code != 204 and
        response.headers["content-type"].strip().startswith("application/json")
        ):
            try:

                # the repsonse is stored in data
                data = response.json()

                # tdata is stored in the dictionary with the zipcode as a key
                market_rent_dict[zipcode] = data

            except ValueError as e:

                print('ValueError', e)
        
        # the pull pauses for 2 seconds in orderreduce the load on the api and prevent error messages
        time.sleep(2)
    
    # the dictionary is turned into a df
    df = pd.DataFrame(market_rent_dict)
    
    # the df is saved as csv using the filename specified above
    df.to_csv(filename)
    
    # the function is exited and the df is returned
    return df

array(['78002', '78003', '78006', '78009', '78010', '78015', '78023',
       '78039', '78052', '78054', '78055', '78063', '78064', '78069',
       '78070', '78073', '78101', '78108', '78109', '78112', '78113',
       '78114', '78115', '78121', '78123', '78124', '78130', '78131',
       '78132', '78133', '78140', '78143', '78147', '78148', '78150',
       '78152', '78154', '78155', '78156', '78160', '78161', '78163',
       '78201', '78202', '78203', '78204', '78205', '78206', '78207'],
      dtype=object)

In [None]:
df = get_rapid_api(corey, "10a41d4c5fmsh2586ce89e03226cp1230b4jsn7ec85dfc4b6f", 'corey.csv')

In [106]:
df.T

Unnamed: 0,id,zipCode,rentalData
78002,78002,78002,"{'averageRent': 900, 'minRent': 800, 'maxRent'..."
78003,78003,78003,"{'averageRent': 1387, 'minRent': 350, 'maxRent..."
78006,78006,78006,"{'averageRent': 1927, 'minRent': 844, 'maxRent..."
78009,78009,78009,"{'averageRent': 2531, 'minRent': 719, 'maxRent..."
78010,78010,78010,"{'averageRent': 1500, 'minRent': 800, 'maxRent..."
78015,78015,78015,"{'averageRent': 2778, 'minRent': 820, 'maxRent..."
78023,78023,78023,"{'averageRent': 2316, 'minRent': 1127, 'maxRen..."
78039,78039,78039,"{'averageRent': 1050, 'minRent': 1050, 'maxRen..."
78052,78052,78052,"{'averageRent': 2154, 'minRent': 885, 'maxRent..."
78054,78054,78054,"{'averageRent': 2000, 'minRent': 2000, 'maxRen..."


In [99]:
df = pd.DataFrame(market_rent_dict)

In [100]:
df.to_csv('csv')

In [76]:
len(aaron)

39

In [75]:
len(market_rent_dict.keys())

26

In [79]:
aaron_df = pd.DataFrame(market_rent_dict).T

In [86]:
aaron_df['message'].value_counts()

You have exceeded the rate limit per second for your plan, BASIC, by the API provider    10
Name: message, dtype: int64

In [90]:
rental = aaron_df.loc[:,'rentalData']

In [93]:
rental.dropna(inplace = True)

In [95]:
rental.loc['78258']

{'averageRent': 2761,
 'minRent': 956,
 'maxRent': 8750,
 'totalRentals': 182,
 'detailed': [{'bedrooms': 1,
   'averageRent': 1366,
   'minRent': 956,
   'maxRent': 1920,
   'totalRentals': 13},
  {'bedrooms': 2,
   'averageRent': 1391,
   'minRent': 1135,
   'maxRent': 1550,
   'totalRentals': 3},
  {'bedrooms': 3,
   'averageRent': 2488,
   'minRent': 1707,
   'maxRent': 4750,
   'totalRentals': 63},
  {'bedrooms': 4,
   'averageRent': 2984,
   'minRent': 2000,
   'maxRent': 6000,
   'totalRentals': 83},
  {'bedrooms': 5,
   'averageRent': 3466,
   'minRent': 2850,
   'maxRent': 5000,
   'totalRentals': 18},
  {'bedrooms': 6,
   'averageRent': 6875,
   'minRent': 5000,
   'maxRent': 8750,
   'totalRentals': 2}],
 'history': {'2020-04': {'averageRent': 1788.12,
   'minRent': 775,
   'maxRent': 7500,
   'totalRentals': 258,
   'detailed': [{'bedrooms': 0,
     'averageRent': 937.28,
     'minRent': 780,
     'maxRent': 1260,
     'totalRentals': 25},
    {'bedrooms': 1,
     'averageR

In [65]:
# The detailed section pulled out
pd.DataFrame(dum.loc['detailed', 'rentalData'])

Unnamed: 0,bedrooms,averageRent,minRent,maxRent,totalRentals
0,1,1366,956,1920,13
1,2,1391,1135,1550,3
2,3,2475,1707,4750,62
3,4,2998,2000,6000,83
4,5,3470,2850,5000,17
5,6,6875,5000,8750,2


In [70]:
history = pd.DataFrame(dum.loc['history', 'rentalData']).T

In [79]:
history.loc[:,'detailed']

2020-04    [{'bedrooms': 0, 'averageRent': 937.28, 'minRe...
2020-05    [{'bedrooms': 0, 'averageRent': 973.12, 'minRe...
2020-06    [{'bedrooms': 0, 'averageRent': 980.08, 'minRe...
2020-07    [{'bedrooms': 0, 'averageRent': 982.12, 'minRe...
2020-08    [{'bedrooms': 0, 'averageRent': 987.68, 'minRe...
2020-09    [{'bedrooms': 0, 'averageRent': 1004.74, 'minR...
2020-10    [{'bedrooms': 0, 'averageRent': 1004.3, 'minRe...
2020-11    [{'bedrooms': 0, 'averageRent': 1007.75, 'minR...
2020-12    [{'bedrooms': 0, 'averageRent': 996.48, 'minRe...
2021-01    [{'bedrooms': 0, 'averageRent': 996.48, 'minRe...
2021-02    [{'bedrooms': 0, 'averageRent': 1002.32, 'minR...
2021-03    [{'bedrooms': 0, 'averageRent': 1002.32, 'minR...
2021-04    [{'bedrooms': 0, 'averageRent': 1002.32, 'minR...
2021-05    [{'bedrooms': 0, 'averageRent': 1012.84, 'minR...
2021-06    [{'bedrooms': 0, 'averageRent': 1012.84, 'minR...
2021-11    [{'bedrooms': 0, 'averageRent': 2113.16, 'minR...
2021-12    [{'bedrooms':

SyntaxError: invalid syntax (1200186067.py, line 1)

Completed Query

In [None]:
# basic requirements
entity_id = 'METRO41700M41700'
headers = {'Authorization': f'Bearer {hud_token}'}

# accessing api
url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}'
response = requests.get(url, headers=header)
data = response.json()

# getting initial df
df = pd.DataFrame()

# establishing years to pull
years = ['2023', '2022', '2021', '2020', '2019', '2018']

# For loop to access data from each year and concat onto the initial df
for year in years:
    url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}?year={year}'
    response = requests.get(url, headers=header)
    data = response.json()
    dum = pd.DataFrame(data['data']['basicdata'])
    dum['year'] = data['data']['year']
    df = pd.concat([df, dum])
    
df

https://rapidapi.com/realtymole/api/realty-mole-property-api