## HUD API Testing Notebook

In [1]:
import numpy as np
import pandas as pd
import os
import requests
from env import hud_token

In [2]:
url = 'https://www.huduser.gov/hudapi/public/fmr/listCounties/TX'
headers = {
    'Authorization': f'Bearer {hud_token}'
}

response = requests.get(url, headers=headers)

# Process the response...


In [3]:
response.status_code

200

In [4]:
response

<Response [200]>

In [5]:
response.text

'[{"state_code":"TX","fips_code":"4800199999","county_name":"Anderson County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800399999","county_name":"Andrews County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800599999","county_name":"Angelina County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800799999","county_name":"Aransas County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800999999","county_name":"Archer County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801199999","county_name":"Armstrong County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801399999","county_name":"Atascosa County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801599999","county_name":"Austin County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801799999","county_name":"Bailey County","town_name":"","category":"County

In [6]:
# ok, up and running with a response. Texas state code is 48, which is shared with Census Data

In [7]:
# Now to see if we can get the data we're interested in
# start with a list of counties in Texas
url = 'https://www.huduser.gov/hudapi/public/fmr/listCounties/TX'
headers = {
    'Authorization': f'Bearer {hud_token}'
}

response = requests.get(url, headers=headers)

In [8]:
response.text

'[{"state_code":"TX","fips_code":"4800199999","county_name":"Anderson County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800399999","county_name":"Andrews County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800599999","county_name":"Angelina County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800799999","county_name":"Aransas County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4800999999","county_name":"Archer County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801199999","county_name":"Armstrong County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801399999","county_name":"Atascosa County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801599999","county_name":"Austin County","town_name":"","category":"County"},{"state_code":"TX","fips_code":"4801799999","county_name":"Bailey County","town_name":"","category":"County

In [9]:
# fips for Bexar County is 
# {"state_code":"TX","fips_code":"4802999999","county_name":"Bexar County","town_name":"","category":"County"}

In [10]:
response

<Response [200]>

In [11]:
# let's try this with a Metropolitan Statistical Area of San Antonio
url = 'https://www.huduser.gov/hudapi/public/fmr/listMetroAreas'
headers = {
    'Authorization': f'Bearer {hud_token}'
}

response = requests.get(url, headers=headers)

In [12]:
response.text

'[{"cbsa_code":"METRO10180M10180","area_name":"Abilene, TX MSA","category":"MetroArea"},{"cbsa_code":"METRO29180N22001","area_name":"Acadia Parish, LA HUD Metro FMR Area","category":"MetroArea"},{"cbsa_code":"METRO10380M10380","area_name":"Aguadilla-Isabela, PR HUD Metro FMR Area","category":"MetroArea"},{"cbsa_code":"METRO10420M10420","area_name":"Akron, OH MSA","category":"MetroArea"},{"cbsa_code":"METRO10500M10500","area_name":"Albany, GA MSA","category":"MetroArea"},{"cbsa_code":"METRO10540M10540","area_name":"Albany, OR MSA","category":"MetroArea"},{"cbsa_code":"METRO10580M10580","area_name":"Albany-Schenectady-Troy, NY MSA","category":"MetroArea"},{"cbsa_code":"METRO10740M10740","area_name":"Albuquerque, NM MSA","category":"MetroArea"},{"cbsa_code":"METRO10780M10780","area_name":"Alexandria, LA MSA","category":"MetroArea"},{"cbsa_code":"METRO14540N21003","area_name":"Allen County, KY HUD Metro FMR Area","category":"MetroArea"},{"cbsa_code":"METRO10900M10900","area_name":"Allentow

In [13]:
# for San Antonio MSA is:
#{"cbsa_code":"METRO41700M41700","area_name":"San Antonio-New Braunfels, TX HUD Metro FMR Area","category":"MetroArea"}

In [14]:
# let's try this with a Metropolitan Statistical Area of San Antonio
url = 'https://www.huduser.gov/hudapi/public/fmr/data/METRO41700M41700'
headers = {
    'Authorization': f'Bearer {hud_token}'
}

response = requests.get(url, headers=headers)



In [15]:
response.text

'{"data":{"county_name":"","counties_msa":"Bandera County, TX; Bexar County, TX; Comal County, TX; Guadalupe County, TX; and Wilson County, TX","town_name":"","metro_status":"1","metro_name":"San Antonio-New Braunfels, TX","area_name":"San Antonio-New Braunfels, TX HUD Metro FMR Area","smallarea_status":"1","year":"2023","basicdata":[{"zip_code":"78002","Efficiency":760,"One-Bedroom":870,"Two-Bedroom":1060,"Three-Bedroom":1350,"Four-Bedroom":1640},{"zip_code":"78003","Efficiency":790,"One-Bedroom":910,"Two-Bedroom":1110,"Three-Bedroom":1410,"Four-Bedroom":1710},{"zip_code":"78006","Efficiency":1030,"One-Bedroom":1240,"Two-Bedroom":1530,"Three-Bedroom":1870,"Four-Bedroom":2340},{"zip_code":"78009","Efficiency":840,"One-Bedroom":890,"Two-Bedroom":1170,"Three-Bedroom":1530,"Four-Bedroom":1570},{"zip_code":"78010","Efficiency":790,"One-Bedroom":910,"Two-Bedroom":1100,"Three-Bedroom":1400,"Four-Bedroom":1710},{"zip_code":"78015","Efficiency":1380,"One-Bedroom":1610,"Two-Bedroom":1960,"Three

In [16]:
# ok, got the first year of data for San Antonio/NewBraunfels MSA,
# now try to get other years

In [62]:
url = 'https://www.huduser.gov/hudapi/public/fmr/data/METRO41700M41700?year=2018'
headers = {
    'Authorization': f'Bearer {hud_token}'
}

response = requests.get(url, headers=headers)


In [63]:
response.text

'{"data":{"county_name":"","counties_msa":"Bandera County, TX; Bexar County, TX; Comal County, TX; Guadalupe County, TX; and Wilson County, TX","town_name":"","metro_status":"1","metro_name":"San Antonio-New Braunfels, TX MSA","area_name":"San Antonio-New Braunfels, TX HUD Metro FMR Area","smallarea_status":"1","year":"2018","basicdata":[{"zip_code":"78002","Efficiency":570,"One-Bedroom":710,"Two-Bedroom":880,"Three-Bedroom":1170,"Four-Bedroom":1420},{"zip_code":"78003","Efficiency":640,"One-Bedroom":790,"Two-Bedroom":980,"Three-Bedroom":1300,"Four-Bedroom":1580},{"zip_code":"78006","Efficiency":760,"One-Bedroom":930,"Two-Bedroom":1170,"Three-Bedroom":1480,"Four-Bedroom":2050},{"zip_code":"78009","Efficiency":560,"One-Bedroom":690,"Two-Bedroom":870,"Three-Bedroom":1240,"Four-Bedroom":1500},{"zip_code":"78010","Efficiency":610,"One-Bedroom":750,"Two-Bedroom":940,"Three-Bedroom":1240,"Four-Bedroom":1500},{"zip_code":"78015","Efficiency":1010,"One-Bedroom":1250,"Two-Bedroom":1560,"Three-B

In [19]:
data = response.json()

In [20]:
HUD_SA_2023 = pd.DataFrame(data['data']['basicdata'])

In [21]:
HUD_SA_2023

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom
0,78002,760,870,1060,1350,1640
1,78003,790,910,1110,1410,1710
2,78006,1030,1240,1530,1870,2340
3,78009,840,890,1170,1530,1570
4,78010,790,910,1100,1400,1710
...,...,...,...,...,...,...
131,78670,910,1030,1220,1560,1850
132,78676,1200,1330,1560,2000,2320
133,78883,670,760,940,1200,1430
134,78884,790,910,1100,1400,1710


In [23]:
from datetime import datetime

def get_year_list():
    '''
    This function creates a list of years beginning with 2018 up to one year before the
    current year. 2018 correlates to the first year that HUD MSA Small Area data was available
    so that starting point is fixed. The function adds a new year at the end of the calendar year. 
    HUD generally publishes new rates in September for implementation with the Federal Fiscal Year 
    which begins October 1st of year prior. This lag to the start of the calendar year gives time 
    to sure that the database is updated before adding the next year to the query. As the HUD API 
    uses a different query format for current year, this list will not include the current year as
    that is not specified to default to the current year
    
    Arguments: None
    
    Returns: A list of years from 2018 to present.
    '''
    start_year = 2018  # desired starting year
    current_year = datetime.now().year

    year_range = list(range(start_year, current_year + 1))
    
    return year_range


In [24]:
get_year_list()

[2018, 2019, 2020, 2021, 2022, 2023]

In [85]:
def get_entity_data(entity_id):
    '''
    This function takes in a single HUD entity (in this case Metropolitan Statistical Area)
    and requests the Fair Market Rent Small Area Data for that entity for the current year. This current year response is converted to a .json and then a DataFrame and then
    each prior is queried and and concatenated to the original df. Requires a HUD API User Token
    which can be acquired here: https://www.huduser.gov/portal/dataset/fmr-api.html
    Entity_ids can be found here: https://www.huduser.gov/portal/datasets/geotools.html
    
    Arguments: a HUD entity id
    
    Returns: a DataFrame of Fair Market Rent rates for each ZIP code in 
    '''
    header = {'Authorization': f'Bearer {hud_token}'}

    years = get_year_list()
    df = pd.DataFrame()

    for year in years:
        url = f'https://www.huduser.gov/hudapi/public/fmr/data/{entity_id}?year={year}'
        response = requests.get(url, headers=header)
        data = response.json()
        dum = pd.DataFrame(data['data']['basic_data'])
        dum['year'] = data['data']['year']
        dum['entity_id'] = entity_id
        dum['area_name'] = data['data']['area_name']
        df = pd.concat([df, dum])

    return df

In [44]:
get_entity_data('METRO41700M41700')

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,year,entity_id,area_name
0,78002,570,710,880,1170,1420,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
1,78003,640,790,980,1300,1580,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2,78006,760,930,1170,1480,2050,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
3,78009,560,690,870,1240,1500,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
4,78010,610,750,940,1240,1500,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
...,...,...,...,...,...,...,...,...,...
131,78670,910,1030,1220,1560,1850,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
132,78676,1200,1330,1560,2000,2320,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
133,78883,670,760,940,1200,1430,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
134,78884,790,910,1100,1400,1710,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"


In [41]:
# ok, we have data from 2018 to 2023. Now, to put all of these years into one DF

# need to add a column year and concat on axis = 0

def get_hud_data(entity_ids):
    '''
    This function takes in a list of HUD entities (in this case Metropolitan Statistical Areas)
    and requests the Fair Market Rent Small Area Data for each entity. It then creates a new 
    column "entity_id" and concatenates the all of the entity DataFrames into a single DataFrame.
    Entity_ids can be found here: https://www.huduser.gov/portal/datasets/geotools.html
    
    Arguments: a list of HUD entity_ids
    
    Returns: a DataFrame of HUD FMR for each ZIP code and year from 2018-present for all HUD
            entities in entity_id list.
    '''
    df = pd.DataFrame()
    
    for entity_id in entity_ids:
        
        df = pd.concat([df, get_entity_data(entity_id)])
        
    return df
    

In [45]:
entity_ids = ['METRO41700M41700']

In [46]:
df = get_hud_data(entity_ids)
df

Unnamed: 0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,year,entity_id,area_name
0,78002,570,710,880,1170,1420,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
1,78003,640,790,980,1300,1580,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2,78006,760,930,1170,1480,2050,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
3,78009,560,690,870,1240,1500,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
4,78010,610,750,940,1240,1500,2018,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
...,...,...,...,...,...,...,...,...,...
807,78670,910,1030,1220,1560,1850,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
808,78676,1200,1330,1560,2000,2320,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
809,78883,670,760,940,1200,1430,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
810,78884,790,910,1100,1400,1710,2023,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"


In [47]:
# check out basic info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 812 entries, 0 to 811
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   zip_code       812 non-null    object
 1   Efficiency     812 non-null    int64 
 2   One-Bedroom    812 non-null    int64 
 3   Two-Bedroom    812 non-null    int64 
 4   Three-Bedroom  812 non-null    int64 
 5   Four-Bedroom   812 non-null    int64 
 6   year           812 non-null    object
 7   entity_id      812 non-null    object
 8   area_name      812 non-null    object
dtypes: int64(5), object(4)
memory usage: 57.2+ KB


In [50]:
# let's see if we can cast year to date time.
df['year'] = pd.to_datetime(df.year)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 812 entries, 0 to 811
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   zip_code       812 non-null    object        
 1   Efficiency     812 non-null    int64         
 2   One-Bedroom    812 non-null    int64         
 3   Two-Bedroom    812 non-null    int64         
 4   Three-Bedroom  812 non-null    int64         
 5   Four-Bedroom   812 non-null    int64         
 6   year           812 non-null    datetime64[ns]
 7   entity_id      812 non-null    object        
 8   area_name      812 non-null    object        
dtypes: datetime64[ns](1), int64(5), object(3)
memory usage: 57.2+ KB


In [52]:
# ok, now to set the date as index
df.set_index('year', inplace=True)

In [53]:
df.head()

Unnamed: 0_level_0,zip_code,Efficiency,One-Bedroom,Two-Bedroom,Three-Bedroom,Four-Bedroom,entity_id,area_name
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-01,78670,910,1030,1220,1560,1850,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,78676,1200,1330,1560,2000,2320,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,78883,670,760,940,1200,1430,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,78884,790,910,1100,1400,1710,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,78885,790,910,1100,1400,1710,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"


In [64]:
cols = df.columns.tolist()

In [65]:
cols

['zip_code',
 'Efficiency',
 'One-Bedroom',
 'Two-Bedroom',
 'Three-Bedroom',
 'Four-Bedroom',
 'entity_id',
 'area_name']

In [68]:
cols = [col.lower().replace('-', '_') for col in cols]

In [69]:
cols

['zip_code',
 'efficiency',
 'one_bedroom',
 'two_bedroom',
 'three_bedroom',
 'four_bedroom',
 'entity_id',
 'area_name']

In [76]:
new_cols = dict(zip(df.columns, cols))
new_cols

{'zip_code': 'zip_code',
 'Efficiency': 'efficiency',
 'One-Bedroom': 'one_bedroom',
 'Two-Bedroom': 'two_bedroom',
 'Three-Bedroom': 'three_bedroom',
 'Four-Bedroom': 'four_bedroom',
 'entity_id': 'entity_id',
 'area_name': 'area_name'}

In [78]:
df = df.rename(columns=new_cols)
df.head()

Unnamed: 0_level_0,zip_code,efficiency,one_bedroom,two_bedroom,three_bedroom,four_bedroom,entity_id,area_name
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01,78002,570,710,880,1170,1420,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,78003,640,790,980,1300,1580,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,78006,760,930,1170,1480,2050,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,78009,560,690,870,1240,1500,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,78010,610,750,940,1240,1500,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"


In [79]:
df.drop(columns='zip_code')

Unnamed: 0_level_0,efficiency,one_bedroom,two_bedroom,three_bedroom,four_bedroom,entity_id,area_name
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-01,570,710,880,1170,1420,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,640,790,980,1300,1580,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,760,930,1170,1480,2050,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,560,690,870,1240,1500,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2018-01-01,610,750,940,1240,1500,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
...,...,...,...,...,...,...,...
2023-01-01,910,1030,1220,1560,1850,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,1200,1330,1560,2000,2320,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,670,760,940,1200,1430,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
2023-01-01,790,910,1100,1400,1710,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"


In [84]:
df_grouped = df.groupby(['entity_id', 'year']).agg(
    {'zip_code':'count', 'efficiency':'mean', 'one_bedroom':'mean', 'two_bedroom':'mean', 
     'three_bedroom':'mean', 'four_bedroom':'mean', 'entity_id':'first', 'area_name':'first'})

In [83]:
df_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,zip_code,efficiency,one_bedroom,two_bedroom,three_bedroom,four_bedroom,entity_id,area_name
entity_id,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
METRO41700M41700,2018-01-01,135,678.592593,835.925926,1000.0,1380.222222,1677.037037,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
METRO41700M41700,2019-01-01,135,708.074074,867.925926,1040.0,1418.444444,1735.851852,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
METRO41700M41700,2020-01-01,134,720.671642,874.179104,1040.0,1407.835821,1734.701493,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
METRO41700M41700,2021-01-01,136,782.5,936.617647,1110.0,1480.735294,1834.411765,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
METRO41700M41700,2022-01-01,136,833.970588,982.867647,1160.0,1530.367647,1884.632353,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
METRO41700M41700,2023-01-01,136,934.779412,1077.426471,1270.0,1658.676471,2017.647059,METRO41700M41700,"San Antonio-New Braunfels, TX HUD Metro FMR Area"
