###### Imports and Settings

In [57]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("../../../Functions and Dictionaries") # Adds higher directory to python modules path
import geodict
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
tofullcensus = geodict.tofullcensus
geotogeoid = geodict.geotogeoid
shorttnplaces = geodict.shorttnplaces
shortkyplaces = geodict.shortkyplaces
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
import sqlite3 as sq
#functions
def percent(x, y):
        try:
            return ((x/y)*100)
        except ZeroDivisionError:
            return 0
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
#calculate real and percent change between all columns for all possible time frames
def calculate_changes(df, columns, time_frames, years):
    for column in columns:
        for time_frame in time_frames:
            start_year, end_year = time_frame.split('-')
            df[f'{column} % Change', 'None', f'{time_frame}'] = percentchange(df[(column, int(end_year), 'None')], df[(column, int(start_year), 'None')])
            df[f'{column} Change', 'None', f'{time_frame}'] = (df[(column, int(end_year), 'None')] - df[(column, int(start_year), 'None')])

    return df
#generate all possible time frames from a list of years
def generate_time_frames(years):
    time_frames = []
    for i in range(len(years)-1):
        for j in range(i+1, len(years)):
            time_frames.append(f"{years[i]}-{years[j]}")
    return time_frames

# This notebook outlines the download and formatting process for the Center for Neighborhood Technology's Housing and Transportation Cost Index as well as the data combined from our end for counties and places in the GNRC operating region.  

Go to this page: https://htaindex.cnt.org/download/  
Upon registering for access, download the following documents:  
+ HTA Index for Counties in Tennessee and Kentucky  
+ HTA Index for MPOs  
+ HTA Index for Block Groups in Tennessee  

Save these csvs as they come in the Data Downloads folder of Parent Data Gathering  

### Calculations are made both for Comphrehensive plans at higher geography levels (counties, MPO), but also by block group to identify distressed areas at a granular level.

In [58]:
#Load API Key
with open('api_keys.pkl', 'rb') as keys_file:
        keys_dict_2 = pickle.load(keys_file)
#create a variable that contains your api key
census_key = keys_dict_2['CENSUS']
bea_key = keys_dict_2['BEA']

In [59]:
#2022 ACS 5 Year Median Household Income
#counties
url_str= 'https://api.census.gov/data/2022/acs/acs5?key='+census_key
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS', 'GeoFIPS']
df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
df = df.loc[df['GeoFIPS'].isin(GNRC)]
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)                                                              
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS', 'GeoFIPS']
kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)]
df = pd.concat([df, kycos], axis = 0)
#ky places call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shortkyplaces)]
df = pd.concat([df, places], axis = 0)
#places
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shorttnplaces)]
df = pd.concat([df, places], axis = 0)
#state call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "state:47"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS']
state=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
state['GeoFIPS'] = '0'
df = pd.concat([df, state], axis = 0)
#national call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "us:*"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'StateFIPS']
national=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
national['GeoFIPS'] = '0'
df = pd.concat([df, national], axis = 0)
# predicates= {} #block groups GNRC Region
# get_vars= ["NAME", 'B19013_001E']
# predicates["get"]= ",". join(get_vars)
# predicates["for"]= "block group:*"
# predicates["in"]= "state:47, county:*, tract:*"
# data= requests.get(url_str, params = predicates)
# col_names = ['NAME', 'Median Household Income', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
# bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
# bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# #bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
# bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)
# df = bg
#pd.concat([df, bg], axis = 0)
savename = df
print('Okay Finished')

Okay Finished


In [60]:
savename.head()

Unnamed: 0,NAME,GEO_ID,Median Household Income,StateFIPS,GeoFIPS
10,"Cheatham County, Tennessee",0500000US47021,77014,47,21
18,"Davidson County, Tennessee",0500000US47037,71863,47,37
21,"Dickson County, Tennessee",0500000US47043,68492,47,43
41,"Houston County, Tennessee",0500000US47083,51544,47,83
42,"Humphreys County, Tennessee",0500000US47085,54185,47,85


In [61]:
#drop unneeded columns and change columns that need to be to float
hhincome = savename.reset_index(drop = True)
hhincome = hhincome.drop(columns = ['StateFIPS', 'GeoFIPS'])
cols = ['Median Household Income']
hhincome[cols] = hhincome[cols].astype(float)

In [62]:
#clarify the annual MHI and create a column for monthly MHI then drop the original column
hhincome['Annual Median Household Income'] = hhincome['Median Household Income']
#hhincome['Monthly Median Household Income'] = hhincome['Median Household Income']/12
hhincome = hhincome.drop(columns = 'Median Household Income')

In [63]:
#check before moving on to H&T
hhincome.tail()

Unnamed: 0,NAME,GEO_ID,Annual Median Household Income
71,"Westmoreland town, Tennessee",1600000US4779420,58194.0
72,"White Bluff town, Tennessee",1600000US4779980,66000.0
73,"White House city, Tennessee",1600000US4780200,85404.0
74,Tennessee,0400000US47,64035.0
75,United States,0100000US,75149.0


In [64]:
kycos = pd.read_csv('../../Data Downloads/CNT_KYCounties_2020_HT.csv')
tncos = pd.read_csv('../../Data Downloads/CNT_TNCounties_2020_HT.csv')
mpos = pd.read_csv('../../Data Downloads/CNT_MPOs_2020_HT.csv')
tnplaces = pd.read_csv('../../Data Downloads/CNT_TNPlaces_2020_HT.csv')
kyplaces = pd.read_csv('../../Data Downloads/CNT_KYPlaces_2020_HT.csv')
#tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2020_HT.csv')
#states = pd.read_csv('../../DataDownloads/CNT_States_2020_HT.csv')
#cbsas = pd.read_csv('../../DataDownloads/CNT_CBSAs_2020_HT.csv')

In [65]:
# mpos['name'] = mpos['name'].str.strip('\"')
# mpos['GEO_ID'] = mpos['mpo'].str.strip('\"')
# mpos = mpos.loc[mpos['name'] == 'Nashville Area MPO']

In [66]:
# #strip the extra characters from the GEOID imports
tncos['GEO_ID'] = tncos['county'].str.replace('"', '')
tncos['NAME'] = tncos['name'].str.replace('"', '')
kycos['GEO_ID'] = kycos['county'].str.strip('\"')
tnplaces['GEO_ID'] = tnplaces['place'].str.strip('\"')
kyplaces['GEO_ID'] = kyplaces['place'].str.strip('\"')
#tnbg['GEO_ID'] = tnbg['blkgrp'].str.strip('\"')

In [67]:
# #select which columns to keep
tncos = tncos[['GEO_ID', 'h_cost', 't_cost_ami', 'households', 'population']]
kycos = kycos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnplaces = tnplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kyplaces = kyplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
#tnbg = tnbg[['GEO_ID', 'h_cost', 't_cost_ami', 'households']]
#mpos = mpos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]

In [68]:
# #get annual numbers
tncos['h_cost_ami'] = tncos['h_cost']*12
kycos['h_cost_ami'] = kycos['h_cost']*12
tnplaces['h_cost_ami'] = tnplaces['h_cost']*12
kyplaces['h_cost_ami'] = kyplaces['h_cost']*12
#tnbg['h_cost_ami'] = tnbg['h_cost']*12
#mpos['h_cost_ami'] = mpos['h_cost']*12

In [69]:
# #drop original numbers
tncos= tncos.drop(columns = 'h_cost')
kycos= kycos.drop(columns = 'h_cost')
tnplaces= tnplaces.drop(columns = 'h_cost')
kyplaces= kyplaces.drop(columns = 'h_cost')
#tnbg= tnbg.drop(columns = 'h_cost')
#mpos= mpos.drop(columns = 'h_cost')

In [70]:
# #remove full GEOIDs for filtering from custom module lists
tncos['GEO_ID'] = tncos['GEO_ID'].str[2:]
kycos['GEO_ID'] = kycos['GEO_ID'].str[2:]
tnplaces['GEO_ID'] = '1600000US' + tnplaces['GEO_ID']
kyplaces['GEO_ID'] = '1600000US' + kyplaces['GEO_ID']
#tnbg['CountyFIPS'] = tnbg['GEO_ID'].str[2:5]

In [71]:
# #filter geographies
tncos = tncos.loc[tncos['GEO_ID'].isin(GNRC)].reset_index(drop = True)
kycos = kycos.loc[kycos['GEO_ID'].isin(KY)].reset_index(drop = True)
tnplaces = tnplaces.loc[tnplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
kyplaces = kyplaces.loc[kyplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
#tnbg = tnbg.loc[tnbg['CountyFIPS'].isin(GNRC)]
# tnbg = tnbg.drop(columns = ['CountyFIPS']).reset_index(drop = True)

In [72]:
# #create full geoid for joining
tncos['GEO_ID'] = '0500000US47' + tncos['GEO_ID']
kycos['GEO_ID'] = '0500000US21' + kycos['GEO_ID']

In [73]:
# #merge and set index
df = pd.concat([tncos, kycos, tnplaces, kyplaces, tnbg]).reset_index(drop = True)
data = df.merge(hhincome, on = 'GEO_ID')
#data = data.drop(columns = 'name')

In [74]:
data.head()

Unnamed: 0,GEO_ID,t_cost_ami,households,population,h_cost_ami,name,CountyFIPS,NAME,Annual Median Household Income
0,0500000US47021,15735.0,15317.0,40539.0,15480.0,,,"Cheatham County, Tennessee",77014.0
1,0500000US47037,12668.0,289191.0,690540.0,17028.0,,,"Davidson County, Tennessee",71863.0
2,0500000US47043,15635.0,19559.0,53289.0,13620.0,,,"Dickson County, Tennessee",68492.0
3,0500000US47083,14494.0,2919.0,8201.0,11292.0,,,"Houston County, Tennessee",51544.0
4,0500000US47085,15328.0,6869.0,18528.0,10584.0,,,"Humphreys County, Tennessee",54185.0


In [75]:
data.set_index('NAME', inplace = True)

In [78]:
data = data.drop(columns = ['name', 'CountyFIPS'])

In [79]:
#create a list of columns not-indexed and perform mathematical operations for desired outputs
cols = list(data.columns)
cols.remove('GEO_ID')
data[cols] = data[cols].astype(float)
data['Annual Median Household Income'] = data['Annual Median Household Income']
#data['Monthly Median Household Income'] = data['Annual Median Household Income']/12
data['Annual Transportation Cost'] = data['t_cost_ami']
#data['Monthly Transportation Cost'] = data['t_cost_ami']/12
data['Annual Housing Cost'] = data['h_cost_ami']
#data['Monthly Housing Cost'] = data['h_cost_ami']/12
data['Annual Housing and Transportation Cost'] = data['Annual Housing Cost'] + data['Annual Transportation Cost']
#data['Monthly Housing and Transportation Cost'] = (data['Annual Housing Cost'] + data['Annual Transportation Cost'])/12
#data['Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Transportation Cost'], data['Monthly Median Household Income'])
data['Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Transportation Cost'], data['Annual Median Household Income'])
#data['Housing Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing Cost'], data['Monthly Median Household Income'])
data['Housing Cost as % of Annual Median Household Income'] = percent(data['Annual Housing Cost'], data['Annual Median Household Income'])
#data['Housing and Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing and Transportation Cost'], data['Monthly Median Household Income'])
data['Housing and Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Housing and Transportation Cost'], data['Annual Median Household Income'])
data["'Discretionary Spending': Median Annual Household Income Minus Housing and Transportation Costs"] = data['Annual Median Household Income'] -data['Annual Housing and Transportation Cost']

In [80]:
#drop the input columms no longer needed
data = data.drop(columns = ['t_cost_ami', 'h_cost_ami']).reset_index()

In [81]:
data.head()

Unnamed: 0,NAME,GEO_ID,households,population,Annual Median Household Income,Annual Transportation Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,'Discretionary Spending': Median Annual Household Income Minus Housing and Transportation Costs
0,"Cheatham County, Tennessee",0500000US47021,15317.0,40539.0,77014.0,15735.0,15480.0,31215.0,20.43135,20.100242,40.531592,45799.0
1,"Davidson County, Tennessee",0500000US47037,289191.0,690540.0,71863.0,12668.0,17028.0,29696.0,17.627987,23.695086,41.323073,42167.0
2,"Dickson County, Tennessee",0500000US47043,19559.0,53289.0,68492.0,15635.0,13620.0,29255.0,22.827484,19.885534,42.713018,39237.0
3,"Houston County, Tennessee",0500000US47083,2919.0,8201.0,51544.0,14494.0,11292.0,25786.0,28.119665,21.907497,50.027161,25758.0
4,"Humphreys County, Tennessee",0500000US47085,6869.0,18528.0,54185.0,15328.0,10584.0,25912.0,28.288272,19.533081,47.821353,28273.0


In [82]:
#set source
data['Year'] = '2020'

In [83]:
data.head()

Unnamed: 0,NAME,GEO_ID,households,population,Annual Median Household Income,Annual Transportation Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,'Discretionary Spending': Median Annual Household Income Minus Housing and Transportation Costs,Year
0,"Cheatham County, Tennessee",0500000US47021,15317.0,40539.0,77014.0,15735.0,15480.0,31215.0,20.43135,20.100242,40.531592,45799.0,2020
1,"Davidson County, Tennessee",0500000US47037,289191.0,690540.0,71863.0,12668.0,17028.0,29696.0,17.627987,23.695086,41.323073,42167.0,2020
2,"Dickson County, Tennessee",0500000US47043,19559.0,53289.0,68492.0,15635.0,13620.0,29255.0,22.827484,19.885534,42.713018,39237.0,2020
3,"Houston County, Tennessee",0500000US47083,2919.0,8201.0,51544.0,14494.0,11292.0,25786.0,28.119665,21.907497,50.027161,25758.0,2020
4,"Humphreys County, Tennessee",0500000US47085,6869.0,18528.0,54185.0,15328.0,10584.0,25912.0,28.288272,19.533081,47.821353,28273.0,2020


In [84]:
#data.to_csv('CNT2020BGs.csv', index = False)

In [85]:
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('CNT_HT_2020', conn, if_exists = 'replace', index = False)

74