###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("..") # Adds higher directory to python modules path
import geodict
import geodict
tofullcensus = geodict.tofullcensus
geotogeoid = geodict.geotogeoid
shorttnplaces = geodict.shorttnplaces
shortkyplaces = geodict.shortkyplaces
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
import sqlite3 as sq
#functions
def percent(x, y):
        try:
            return ((x/y)*100)
        except ZeroDivisionError:
            return 0
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
#calculate real and percent change between all columns for all possible time frames
def calculate_changes(df, columns, time_frames, years):
    for column in columns:
        for time_frame in time_frames:
            start_year, end_year = time_frame.split('-')
            df[f'{column} % Change', 'None', f'{time_frame}'] = percentchange(df[(column, int(end_year), 'None')], df[(column, int(start_year), 'None')])
            df[f'{column} Change', 'None', f'{time_frame}'] = (df[(column, int(end_year), 'None')] - df[(column, int(start_year), 'None')])

    return df
#generate all possible time frames from a list of years
def generate_time_frames(years):
    time_frames = []
    for i in range(len(years)-1):
        for j in range(i+1, len(years)):
            time_frames.append(f"{years[i]}-{years[j]}")
    return time_frames

# This notebook outlines the download and formatting process for the Center for Neighborhood Technology's Housing and Transportation Cost Index as well as the data combined from our end for counties and places in the GNRC operating region.  

Go to this page: https://htaindex.cnt.org/download/  
Upon registering for access, download the following documents:  
+ HTA Index for Counties in Tennessee and Kentucky  
+ HTA Index for MPOs  
+ HTA Index for Block Groups in Tennessee  

Save these csvs as they come in the Data Downloads folder of Parent Data Gathering  

### Calculations are made both for Comphrehensive plans at higher geography levels (counties, MPO), but also by block group to identify distressed areas at a granular level.

In [2]:
#Load API Key
with open('api_keys.pkl', 'rb') as keys_file:
        keys_dict_2 = pickle.load(keys_file)
#create a variable that contains your api key
census_key = keys_dict_2['CENSUS']
bea_key = keys_dict_2['BEA']

In [3]:
#2015 ACS 5 Year Median Household Income, total occupied housing units for households
#counties
url_str= 'https://api.census.gov/data/2015/acs/acs5?key='+census_key
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
df = df.loc[df['GeoFIPS'].isin(GNRC)]
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)                                                              
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)]
df = pd.concat([df, kycos], axis = 0)
#ky places call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shortkyplaces)]
df = pd.concat([df, places], axis = 0)
#places
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shorttnplaces)]
df = pd.concat([df, places], axis = 0)
#state call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "state:47"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
state=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
state['GeoFIPS'] = '0'
df = pd.concat([df, state], axis = 0)
#national call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "us:*"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
national=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
national['GeoFIPS'] = '0'
df = pd.concat([df, national], axis = 0)
# predicates= {} #block groups GNRC Region
# get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
# predicates["get"]= ",". join(get_vars)
# predicates["for"]= "block group:*"
# predicates["in"]= "state:47, county:*, tract:*"
# data= requests.get(url_str, params = predicates)
# col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
# bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
# bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
# bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)
#df = pd.concat([df, bg], axis = 0)
savename = df
print('Okay Finished')

Okay Finished


In [4]:
predicates= {} #block groups GNRC Region
get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "block group:*"
predicates["in"]= "state:47, county:*, tract:*"
data= requests.get(url_str, params = predicates)
col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)

In [5]:
df = pd.concat([df, bg], axis = 0)

In [7]:
#drop unneeded columns and change columns that need to be to float
hhincome = df.reset_index(drop = True)
hhincome = hhincome.drop(columns = ['StateFIPS', 'GeoFIPS'])
cols = ['Median Household Income', 'Households']
hhincome[cols] = hhincome[cols].astype(float)

In [8]:
#clarify the annual MHI and create a column for monthly MHI then drop the original column
hhincome['Annual Median Household Income'] = hhincome['Median Household Income']
hhincome['Monthly Median Household Income'] = hhincome['Median Household Income']/12
hhincome = hhincome.drop(columns = 'Median Household Income')

## H&T

In [34]:
kycos = pd.read_csv('../../Data Downloads/CNT_KYCounties_2015_HT.csv')
tncos = pd.read_csv('../../Data Downloads/CNT_TNCounties_2015_HT.csv')
#mpos = pd.read_csv('../../Data Downloads/CNT_MPOs_2015_HT.csv')
tnplaces = pd.read_csv('../../Data Downloads/CNT_TNPlaces_2015_HT.csv')
kyplaces = pd.read_csv('../../Data Downloads/CNT_KYPlaces_2015_HT.csv')
tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2015_HT.csv')

In [35]:
tnplaces.head()

Unnamed: 0,place,name,cbsa,blkgrps,population,households,land_acres,ht_ami,ht_80ami,ht_nmi,h_ami,h_80ami,h_nmi,t_ami,t_80ami,t_nmi,co2_per_hh_local,co2_per_acre_local,autos_per_hh_ami,autos_per_hh_80ami,autos_per_hh_nmi,vmt_per_hh_ami,vmt_per_hh_80ami,vmt_per_hh_nmi,pct_transit_commuters_ami,pct_transit_commuters_80ami,pct_transit_commuters_nmi,t_cost_ami,t_cost_80ami,t_cost_nmi,auto_ownership_cost_ami,auto_ownership_cost_80ami,auto_ownership_cost_nmi,vmt_cost_ami,vmt_cost_80ami,vmt_cost_nmi,transit_cost_ami,transit_cost_80ami,transit_cost_nmi,transit_trips_ami,transit_trips_80ami,transit_trips_nmi,compact_ndx,emp_ovrll_ndx,res_density,gross_hh_density,hh_gravity,frac_sfd,emp_gravity,emp_ndx,block_size,intersection_density,avg_block_perimeter_meters,h_cost,median_smoc,median_gross_rent,pct_owner_occupied_hu,pct_renter_occupied_hu
0,"""4700200""","""Adams""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",0.07,147.45,51.42,1713.47,55,66,55,26,33,26,29,33,29,11.27,0.35,2.2,2,2,25966,23286,25832,0,0,0,15654,14384,15570,11978,11088,11912,3675,3296,3656,1,0,1,0,0,0,1.2,0.8,0.23,0.03,2321,83,1797,82,177,7,3207,1172,1251,846.0,80,20
1,"""4700240""","""Adamsville""","""""",0.52,665.2,270.34,4391.39,63,74,42,24,30,15,39,44,27,8.97,0.8,1.81,2,2,23154,20842,25034,1,1,1,12636,11563,14596,9340,8591,11035,3278,2951,3543,18,21,17,15,18,15,2.1,2.4,0.67,0.06,1289,81,1124,79,73,13,2450,661,738,445.0,74,26
2,"""4700440""","""Alamo""","""Jackson, TN""",0.59,532.78,182.99,1439.6,54,62,42,22,27,17,32,35,26,9.98,2.36,1.83,2,2,23655,21398,24411,0,0,0,13317,11706,13967,9968,8675,10511,3348,3030,3455,0,1,0,0,1,0,3.0,1.2,0.92,0.13,2074,72,2310,83,55,25,2171,743,826,628.0,56,44
3,"""4700540""","""Alcoa""","""Knoxville, TN""",8.07,11228.86,4834.71,9429.01,50,58,43,23,29,19,27,29,24,8.16,6.1,1.69,2,2,21555,19174,22722,0,0,0,12254,10631,12928,9203,7916,9711,3051,2715,3216,1,1,1,1,1,1,5.2,5.1,1.83,0.51,7567,66,11904,87,15,80,1123,868,813,733.0,60,40
4,"""4700620""","""Alexandria""","""""",0.18,260.07,102.15,1291.49,61,72,46,25,31,18,36,41,28,10.18,0.81,1.96,2,2,24713,22313,25497,0,0,0,13641,12528,14967,10142,9368,11358,3499,3159,3609,0,0,0,0,0,0,2.4,0.4,0.61,0.08,1984,82,1768,83,108,14,2338,803,842,722.0,68,32


In [36]:
# mpos['name'] = mpos['name'].str.strip('\"')
# mpos['GEO_ID'] = mpos['mpo'].str.strip('\"')
# mpos = mpos.loc[mpos['name'] == 'Nashville Area MPO']

In [37]:
#strip the extra characters from the GEOID imports
tncos['GEO_ID'] = tncos['county'].str.strip('\"')
kycos['GEO_ID'] = kycos['county'].str.strip('\"')
tnplaces['GEO_ID'] = tnplaces['place'].str.strip('\"')
kyplaces['GEO_ID'] = kyplaces['place'].str.strip('\"')
tnbg['GEO_ID'] = tnbg['blkgrp'].str.strip('\"')

In [38]:
#select which columns to keep
tncos = tncos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kycos = kycos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnplaces = tnplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kyplaces = kyplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnbg = tnbg[['GEO_ID', 'h_cost', 't_cost_ami']]
#mpos = mpos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]

In [39]:
#get annual numbers
tncos['h_cost_ami'] = tncos['h_cost']*12
kycos['h_cost_ami'] = kycos['h_cost']*12
tnplaces['h_cost_ami'] = tnplaces['h_cost']*12
kyplaces['h_cost_ami'] = kyplaces['h_cost']*12
tnbg['h_cost_ami'] = tnbg['h_cost']*12
#mpos['h_cost_ami'] = mpos['h_cost']*12

In [40]:
#drop the original numbers
tncos= tncos.drop(columns = 'h_cost')
kycos= kycos.drop(columns = 'h_cost')
tnplaces= tnplaces.drop(columns = 'h_cost')
kyplaces= kyplaces.drop(columns = 'h_cost')
tnbg= tnbg.drop(columns = 'h_cost')
#mpos= mpos.drop(columns = 'h_cost')

In [41]:
#remove full GEOIDs for filtering from custom module lists
tncos['GEO_ID'] = tncos['GEO_ID'].str[2:]
kycos['GEO_ID'] = kycos['GEO_ID'].str[2:]
tnplaces['GEO_ID'] = '1600000US' + tnplaces['GEO_ID']
kyplaces['GEO_ID'] = '1600000US' + kyplaces['GEO_ID']
tnbg['CountyFIPS'] = tnbg['GEO_ID'].str[2:5]

In [42]:
#filter geographies
tncos = tncos.loc[tncos['GEO_ID'].isin(GNRC)].reset_index(drop = True)
kycos = kycos.loc[kycos['GEO_ID'].isin(KY)].reset_index(drop = True)
tnplaces = tnplaces.loc[tnplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
kyplaces = kyplaces.loc[kyplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
tnbg = tnbg.loc[tnbg['CountyFIPS'].isin(GNRC)]
tnbg = tnbg.drop(columns = ['CountyFIPS']).reset_index(drop = True)

In [43]:
#create full geoid for joining
tncos['GEO_ID'] = '0500000US47' + tncos['GEO_ID']
kycos['GEO_ID'] = '0500000US21' + kycos['GEO_ID']

In [54]:
#merge and set index
df = pd.concat([tncos, kycos, tnplaces, kyplaces, tnbg]).reset_index(drop = True)
data = df.merge(hhincome, on = 'GEO_ID')
data = data.drop(columns = 'name')

In [55]:
data = data.set_index('NAME').transpose()
data = data.rename(columns = tofullcensus)
data = data.transpose().reset_index(drop = False)

In [58]:
data = data.set_index(['NAME', 'GEO_ID']).transpose()
GNRCCounties = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                data[('Rutherford County, Tennessee', '0500000US47149')]]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                   data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                   data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                   data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                   data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                   data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                   data[('Rutherford County, Tennessee', '0500000US47149')],data[('Maury County, Tennessee', '0500000US47119')]]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
               data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
               data[('Williamson County, Tennessee', '0500000US47187')],data[('Rutherford County, Tennessee', '0500000US47149')],
               data[('Maury County, Tennessee', '0500000US47119')]]
data['MPO'] = sum(MPOCounties)
RuthInc = [data[('Eagleville city, Tennessee', '1600000US4722360')],data[('La Vergne city, Tennessee', '1600000US4741200')],
           data[('Murfreesboro city, Tennessee', '1600000US4751560')],data[('Smyrna town, Tennessee', '1600000US4769420')]]
data[('Rutherford Incorporated', 'None')] = sum(RuthInc)
data[('Rutherford Unincorporated', 'None')] = data[('Rutherford County, Tennessee', '0500000US47149')] - data[('Rutherford Incorporated', 'None')]
WilsonInc = [data[('Lebanon city, Tennessee', '1600000US4741520')],data[('Mount Juliet city, Tennessee', '1600000US4750780')],
             data[('Watertown city, Tennessee', '1600000US4778320')]]
data[('Wilson Incorporated', 'None')] = sum(WilsonInc)
data[('Wilson Unincorporated', 'None')] = data[('Wilson County, Tennessee', '0500000US47189')] - data[('Wilson Incorporated', 'None')]
CheathInc = [data[('Ashland City town, Tennessee', '1600000US4702180')],data[('Kingston Springs town, Tennessee', '1600000US4739660')],
             data[('Pegram town, Tennessee', '1600000US4757480')],data[('Pleasant View city, Tennessee', '1600000US4759560')]]
data[('Cheatham Incorporated', 'None')] = sum(CheathInc)
data[('Cheatham Unincorporated', 'None')] = data[('Cheatham County, Tennessee', '0500000US47021')] - data[('Cheatham Incorporated', 'None')]
DicksInc = [data[('Burns town, Tennessee', '1600000US4709880')],data[('Charlotte town, Tennessee', '1600000US4713080')],
            data[('Dickson city, Tennessee', '1600000US4720620')],data[('Slayden town, Tennessee', '1600000US4769080')],
            data[('Vanleer town, Tennessee', '1600000US4776860')],data[('White Bluff town, Tennessee', '1600000US4779980')]]
data[('Dickson Incorporated', 'None')] = sum(DicksInc)
data[('Dickson Unincorporated', 'None')] = data[('Dickson County, Tennessee', '0500000US47043')] - data[('Dickson Incorporated', 'None')]
HumphInc = [data[('McEwen city, Tennessee', '1600000US4744840')],data[('New Johnsonville city, Tennessee', '1600000US4752820')],
            data[('Waverly city, Tennessee', '1600000US4778560')]]
data[('Humphreys Incorporated', 'None')] = sum(HumphInc)
data[('Humphreys Unincorporated', 'None')] = data[('Humphreys County, Tennessee', '0500000US47085')] - data[('Humphreys Incorporated', 'None')]
data[('Montgomery Incorporated', 'None')] = data[('Clarksville city, Tennessee', '1600000US4715160')]
data[('Montgomery Unincorporated', 'None')] = data[('Montgomery County, Tennessee', '0500000US47125')] - data[('Montgomery Incorporated', 'None')]
data = data.transpose()

In [59]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,t_cost_ami,h_cost_ami,Households,Annual Median Household Income,Monthly Median Household Income
NAME,GEO_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Cheatham County, Tennessee",0500000US47021,14845.0,13980.0,14499.0,51857.0,4321.416667
"Davidson County, Tennessee",0500000US47037,12082.0,14508.0,264211.0,48368.0,4030.666667
"Dickson County, Tennessee",0500000US47043,14503.0,12288.0,18556.0,44680.0,3723.333333
"Houston County, Tennessee",0500000US47083,13114.0,9564.0,3247.0,39401.0,3283.416667
"Humphreys County, Tennessee",0500000US47085,13919.0,11088.0,7124.0,41949.0,3495.75


In [60]:
#create a list of columns not-indexed and perform mathematical operations for desired outputs
cols = list(data.columns)
data[cols] = data[cols].astype(float)
data['Annual Median Household Income'] = data['Annual Median Household Income']
data['Monthly Median Household Income'] = data['Annual Median Household Income']/12
data['Annual Transportation Cost'] = data['t_cost_ami']
data['Monthly Transportation Cost'] = data['t_cost_ami']/12
data['Annual Housing Cost'] = data['h_cost_ami']
data['Monthly Housing Cost'] = data['h_cost_ami']/12
data['Annual Housing and Transportation Cost'] = data['Annual Housing Cost'] + data['Annual Transportation Cost']
data['Monthly Housing and Transportation Cost'] = (data['Annual Housing Cost'] + data['Annual Transportation Cost'])/12
data['Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Transportation Cost'], data['Monthly Median Household Income'])
data['Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Transportation Cost'], data['Annual Median Household Income'])
data['Housing Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing Cost'], data['Monthly Median Household Income'])
data['Housing Cost as % of Annual Median Household Income'] = percent(data['Annual Housing Cost'], data['Annual Median Household Income'])
data['Housing and Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing and Transportation Cost'], data['Monthly Median Household Income'])
data['Housing and Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Housing and Transportation Cost'], data['Annual Median Household Income'])

In [61]:
#drop the input columms no longer needed
data = data.drop(columns = ['t_cost_ami', 'h_cost_ami']).reset_index()

In [62]:
#find the difference between cost and income overall
data['Difference Annual Median Household Income and Total Annual Costs'] = data['Annual Median Household Income'] - data['Annual Housing and Transportation Cost']

In [64]:
#remove CDPs
data = data.loc[~data['NAME'].str.contains('CDP')]

In [65]:
#just make sure it's the full census geoid
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data.head(2)

Unnamed: 0,NAME,GEO_ID,Households,Annual Median Household Income,Monthly Median Household Income,Annual Transportation Cost,Monthly Transportation Cost,Annual Housing Cost,Monthly Housing Cost,Annual Housing and Transportation Cost,Monthly Housing and Transportation Cost,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Difference Annual Median Household Income and Total Annual Costs
0,"Cheatham County, Tennessee",0500000US47021,14499.0,51857.0,4321.416667,14845.0,1237.083333,13980.0,1165.0,28825.0,2402.083333,28.626801,28.626801,26.958752,26.958752,55.585553,55.585553,23032.0
1,"Davidson County, Tennessee",0500000US47037,264211.0,48368.0,4030.666667,12082.0,1006.833333,14508.0,1209.0,26590.0,2215.833333,24.979325,24.979325,29.995038,29.995038,54.974363,54.974363,21778.0


In [66]:
#set the source
data['Year'] = '2015'

In [67]:
fifteen = data

## 2019

In [68]:
#2019 ACS 5 Year Median Household Income, total occupied housing units for households
#counties
url_str= 'https://api.census.gov/data/2019/acs/acs5?key='+census_key
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
df = df.loc[df['GeoFIPS'].isin(GNRC)]
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)                                                              
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)]
df = pd.concat([df, kycos], axis = 0)
#ky places call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shortkyplaces)]
df = pd.concat([df, places], axis = 0)
#places
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shorttnplaces)]
df = pd.concat([df, places], axis = 0)
#state call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "state:47"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
state=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
state['GeoFIPS'] = '0'
df = pd.concat([df, state], axis = 0)
#national call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "us:*"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
national=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
national['GeoFIPS'] = '0'
df = pd.concat([df, national], axis = 0)
predicates= {} #block groups GNRC Region
get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "block group:*"
predicates["in"]= "state:47, county:*, tract:*"
data= requests.get(url_str, params = predicates)
col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)
df = pd.concat([df, bg], axis = 0)
savename = df
print('Okay Finished')

Okay Finished


In [69]:
#drop unneeded columns and change columns that need to be to float
hhincome = savename.reset_index(drop = True)
hhincome = hhincome.drop(columns = ['StateFIPS', 'GeoFIPS'])
cols = ['Median Household Income', 'Households']
hhincome[cols] = hhincome[cols].astype(float)

In [70]:
#clarify the annual MHI and create a column for monthly MHI then drop the original column
hhincome['Annual Median Household Income'] = hhincome['Median Household Income']
hhincome['Monthly Median Household Income'] = hhincome['Median Household Income']/12
hhincome = hhincome.drop(columns = 'Median Household Income')

In [73]:
#check before moving on to H&T
hhincome.head()

Unnamed: 0,NAME,GEO_ID,Households,Annual Median Household Income,Monthly Median Household Income
0,"Sumner County, Tennessee",0500000US47165,67089.0,67204.0,5600.333333
1,"Trousdale County, Tennessee",0500000US47169,3189.0,56321.0,4693.416667
2,"Davidson County, Tennessee",0500000US47037,282366.0,60388.0,5032.333333
3,"Montgomery County, Tennessee",0500000US47125,72617.0,57541.0,4795.083333
4,"Rutherford County, Tennessee",0500000US47149,111676.0,67429.0,5619.083333


In [74]:
kycos = pd.read_csv('../../Data Downloads/CNT_KYCounties_2019_HT.csv')
tncos = pd.read_csv('../../Data Downloads/CNT_TNCounties_2019_HT.csv')
mpos = pd.read_csv('../../Data Downloads/CNT_MPOs_2019_HT.csv')
tnplaces = pd.read_csv('../../Data Downloads/CNT_TNPlaces_2019_HT.csv')
kyplaces = pd.read_csv('../../Data Downloads/CNT_KYPlaces_2019_HT.csv')
tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2019_HT.csv')

In [75]:
# mpos['name'] = mpos['name'].str.strip('\"')
# mpos['GEO_ID'] = mpos['mpo'].str.strip('\"')
# mpos = mpos.loc[mpos['name'] == 'Nashville Area MPO']

In [76]:
#strip the extra characters from the GEOID imports
tncos['GEO_ID'] = tncos['county'].str.strip('\"')
kycos['GEO_ID'] = kycos['county'].str.strip('\"')
tnplaces['GEO_ID'] = tnplaces['place'].str.strip('\"')
kyplaces['GEO_ID'] = kyplaces['place'].str.strip('\"')
tnbg['GEO_ID'] = tnbg['blkgrp'].str.strip('\"')

In [77]:
#select which columns to keep
tncos = tncos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kycos = kycos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnplaces = tnplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kyplaces = kyplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnbg = tnbg[['GEO_ID', 'h_cost', 't_cost_ami']]
#mpos = mpos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]

In [78]:
#get annual numbers
tncos['h_cost_ami'] = tncos['h_cost']*12
kycos['h_cost_ami'] = kycos['h_cost']*12
tnplaces['h_cost_ami'] = tnplaces['h_cost']*12
kyplaces['h_cost_ami'] = kyplaces['h_cost']*12
tnbg['h_cost_ami'] = tnbg['h_cost']*12
#mpos['h_cost_ami'] = mpos['h_cost']*12

In [79]:
#drop original numbers
tncos= tncos.drop(columns = 'h_cost')
kycos= kycos.drop(columns = 'h_cost')
tnplaces= tnplaces.drop(columns = 'h_cost')
kyplaces= kyplaces.drop(columns = 'h_cost')
tnbg= tnbg.drop(columns = 'h_cost')
#mpos= mpos.drop(columns = 'h_cost')

In [80]:
#remove full GEOIDs for filtering from custom module lists
tncos['GEO_ID'] = tncos['GEO_ID'].str[2:]
kycos['GEO_ID'] = kycos['GEO_ID'].str[2:]
tnplaces['GEO_ID'] = '1600000US' + tnplaces['GEO_ID']
kyplaces['GEO_ID'] = '1600000US' + kyplaces['GEO_ID']
tnbg['CountyFIPS'] = tnbg['GEO_ID'].str[2:5]

In [81]:
#filter geographies
tncos = tncos.loc[tncos['GEO_ID'].isin(GNRC)].reset_index(drop = True)
kycos = kycos.loc[kycos['GEO_ID'].isin(KY)].reset_index(drop = True)
tnplaces = tnplaces.loc[tnplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
kyplaces = kyplaces.loc[kyplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
tnbg = tnbg.loc[tnbg['CountyFIPS'].isin(GNRC)]
tnbg = tnbg.drop(columns = ['CountyFIPS']).reset_index(drop = True)

In [82]:
#create full geoid for joining
tncos['GEO_ID'] = '0500000US47' + tncos['GEO_ID']
kycos['GEO_ID'] = '0500000US21' + kycos['GEO_ID']

In [90]:
#merge and set index
df = pd.concat([tncos, kycos, tnplaces, kyplaces, tnbg]).reset_index(drop = True)
data = df.merge(hhincome, on = 'GEO_ID')
data = data.drop(columns = 'name')

In [91]:
data = data.set_index('NAME').transpose()
data = data.rename(columns = tofullcensus)
data = data.transpose().reset_index(drop = False)

In [92]:
data = data.set_index(['NAME', 'GEO_ID']).transpose()
GNRCCounties = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                data[('Rutherford County, Tennessee', '0500000US47149')]]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                   data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                   data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                   data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                   data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                   data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                   data[('Rutherford County, Tennessee', '0500000US47149')],data[('Maury County, Tennessee', '0500000US47119')]]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
               data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
               data[('Williamson County, Tennessee', '0500000US47187')],data[('Rutherford County, Tennessee', '0500000US47149')],
               data[('Maury County, Tennessee', '0500000US47119')]]
data['MPO'] = sum(MPOCounties)
RuthInc = [data[('Eagleville city, Tennessee', '1600000US4722360')],data[('La Vergne city, Tennessee', '1600000US4741200')],
           data[('Murfreesboro city, Tennessee', '1600000US4751560')],data[('Smyrna town, Tennessee', '1600000US4769420')]]
data[('Rutherford Incorporated', 'None')] = sum(RuthInc)
data[('Rutherford Unincorporated', 'None')] = data[('Rutherford County, Tennessee', '0500000US47149')] - data[('Rutherford Incorporated', 'None')]
WilsonInc = [data[('Lebanon city, Tennessee', '1600000US4741520')],data[('Mount Juliet city, Tennessee', '1600000US4750780')],
             data[('Watertown city, Tennessee', '1600000US4778320')]]
data[('Wilson Incorporated', 'None')] = sum(WilsonInc)
data[('Wilson Unincorporated', 'None')] = data[('Wilson County, Tennessee', '0500000US47189')] - data[('Wilson Incorporated', 'None')]
CheathInc = [data[('Ashland City town, Tennessee', '1600000US4702180')],data[('Kingston Springs town, Tennessee', '1600000US4739660')],
             data[('Pegram town, Tennessee', '1600000US4757480')],data[('Pleasant View city, Tennessee', '1600000US4759560')]]
data[('Cheatham Incorporated', 'None')] = sum(CheathInc)
data[('Cheatham Unincorporated', 'None')] = data[('Cheatham County, Tennessee', '0500000US47021')] - data[('Cheatham Incorporated', 'None')]
DicksInc = [data[('Burns town, Tennessee', '1600000US4709880')],data[('Charlotte town, Tennessee', '1600000US4713080')],
            data[('Dickson city, Tennessee', '1600000US4720620')],data[('Slayden town, Tennessee', '1600000US4769080')],
            data[('Vanleer town, Tennessee', '1600000US4776860')],data[('White Bluff town, Tennessee', '1600000US4779980')]]
data[('Dickson Incorporated', 'None')] = sum(DicksInc)
data[('Dickson Unincorporated', 'None')] = data[('Dickson County, Tennessee', '0500000US47043')] - data[('Dickson Incorporated', 'None')]
HumphInc = [data[('McEwen city, Tennessee', '1600000US4744840')],data[('New Johnsonville city, Tennessee', '1600000US4752820')],
            data[('Waverly city, Tennessee', '1600000US4778560')]]
data[('Humphreys Incorporated', 'None')] = sum(HumphInc)
data[('Humphreys Unincorporated', 'None')] = data[('Humphreys County, Tennessee', '0500000US47085')] - data[('Humphreys Incorporated', 'None')]
data[('Montgomery Incorporated', 'None')] = data[('Clarksville city, Tennessee', '1600000US4715160')]
data[('Montgomery Unincorporated', 'None')] = data[('Montgomery County, Tennessee', '0500000US47125')] - data[('Montgomery Incorporated', 'None')]
data = data.transpose()

In [93]:
#create a list of columns not-indexed and perform mathematical operations for desired outputs
cols = list(data.columns)
data[cols] = data[cols].astype(float)
data['Annual Median Household Income'] = data['Annual Median Household Income']
data['Monthly Median Household Income'] = data['Annual Median Household Income']/12
data['Annual Transportation Cost'] = data['t_cost_ami']
data['Monthly Transportation Cost'] = data['t_cost_ami']/12
data['Annual Housing Cost'] = data['h_cost_ami']
data['Monthly Housing Cost'] = data['h_cost_ami']/12
data['Annual Housing and Transportation Cost'] = data['Annual Housing Cost'] + data['Annual Transportation Cost']
data['Monthly Housing and Transportation Cost'] = (data['Annual Housing Cost'] + data['Annual Transportation Cost'])/12
data['Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Transportation Cost'], data['Monthly Median Household Income'])
data['Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Transportation Cost'], data['Annual Median Household Income'])
data['Housing Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing Cost'], data['Monthly Median Household Income'])
data['Housing Cost as % of Annual Median Household Income'] = percent(data['Annual Housing Cost'], data['Annual Median Household Income'])
data['Housing and Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing and Transportation Cost'], data['Monthly Median Household Income'])
data['Housing and Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Housing and Transportation Cost'], data['Annual Median Household Income'])

In [94]:
#drop the input columms no longer needed
data = data.drop(columns = ['t_cost_ami', 'h_cost_ami']).reset_index()

In [95]:
data.head()

Unnamed: 0,NAME,GEO_ID,Households,Annual Median Household Income,Monthly Median Household Income,Annual Transportation Cost,Monthly Transportation Cost,Annual Housing Cost,Monthly Housing Cost,Annual Housing and Transportation Cost,Monthly Housing and Transportation Cost,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income
0,"Cheatham County, Tennessee",0500000US47021,15089.0,61913.0,5159.416667,16511.0,1375.916667,14748.0,1229.0,31259.0,2604.916667,26.668066,26.668066,23.820522,23.820522,50.488589,50.488589
1,"Davidson County, Tennessee",0500000US47037,282366.0,60388.0,5032.333333,13136.0,1094.666667,16308.0,1359.0,29444.0,2453.666667,21.752666,21.752666,27.005365,27.005365,48.758031,48.758031
2,"Dickson County, Tennessee",0500000US47043,19198.0,53076.0,4423.0,16270.0,1355.833333,13092.0,1091.0,29362.0,2446.833333,30.654156,30.654156,24.666516,24.666516,55.320672,55.320672
3,"Houston County, Tennessee",0500000US47083,2878.0,42711.0,3559.25,15210.0,1267.5,11364.0,947.0,26574.0,2214.5,35.611435,35.611435,26.606729,26.606729,62.218164,62.218164
4,"Humphreys County, Tennessee",0500000US47085,6763.0,45667.0,3805.583333,15296.0,1274.666667,10764.0,897.0,26060.0,2171.666667,33.494646,33.494646,23.570631,23.570631,57.065277,57.065277


In [96]:
#find the difference in cost and income overall
data['Difference Annual Median Household Income and Total Annual Costs'] = data['Annual Median Household Income'] - data['Annual Housing and Transportation Cost']

In [97]:
#remove CDPs
data = data.loc[~data['NAME'].str.contains('CDP')]

In [98]:
#just make sure it's the full census geoid
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data.head(2)

Unnamed: 0,NAME,GEO_ID,Households,Annual Median Household Income,Monthly Median Household Income,Annual Transportation Cost,Monthly Transportation Cost,Annual Housing Cost,Monthly Housing Cost,Annual Housing and Transportation Cost,Monthly Housing and Transportation Cost,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Difference Annual Median Household Income and Total Annual Costs
0,"Cheatham County, Tennessee",0500000US47021,15089.0,61913.0,5159.416667,16511.0,1375.916667,14748.0,1229.0,31259.0,2604.916667,26.668066,26.668066,23.820522,23.820522,50.488589,50.488589,30654.0
1,"Davidson County, Tennessee",0500000US47037,282366.0,60388.0,5032.333333,13136.0,1094.666667,16308.0,1359.0,29444.0,2453.666667,21.752666,21.752666,27.005365,27.005365,48.758031,48.758031,30944.0


In [99]:
#set source
data['Year'] = '2019'

In [100]:
nineteen = data

In [101]:
dfs = [fifteen, nineteen]
data = pd.concat(dfs)

In [102]:
#make sure year is formatted as an integer
data['Year'] = data['Year'].astype(int)
#create a list of years from the dataframe to pass through our "generate time frames" function to create a list of all possible time frames - need this here for later
years = list(data['Year'].unique().astype(int))
time_frames = generate_time_frames(years)

In [103]:
#create a multilevel column header with year and placeholder for time frames
#pivot the table and create a multiindex of year and column header
cols = list(data.columns)
cols.remove('NAME')
cols.remove('Year')
df_pivot = data.pivot_table(index = 'NAME', columns = ['Year'], values = cols)
df_pivot.head(2)

Unnamed: 0_level_0,Annual Housing Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost,Annual Median Household Income,Annual Median Household Income,Annual Transportation Cost,Annual Transportation Cost,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs,Households,Households,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost,Monthly Median Household Income,Monthly Median Household Income,Monthly Transportation Cost,Monthly Transportation Cost,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Monthly Median Household Income
Year,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019
NAME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2
"Adams city, Tennessee",14064.0,17520.0,29718.0,35557.0,56667.0,55357.0,15654.0,18037.0,26949.0,19800.0,274.0,210.0,24.818678,31.649114,24.818678,31.649114,52.443221,64.232166,52.443221,64.232166,1172.0,1460.0,2476.5,2963.083333,4722.25,4613.083333,1304.5,1503.083333,27.624543,32.583052,27.624543,32.583052
"Allen County, Kentucky",10116.0,10116.0,24351.0,26210.0,41326.0,44036.0,14235.0,16094.0,16975.0,17826.0,7774.0,7605.0,24.478537,22.972114,24.478537,22.972114,58.924164,59.519484,58.924164,59.519484,843.0,843.0,2029.25,2184.166667,3443.833333,3669.666667,1186.25,1341.166667,34.445627,36.54737,34.445627,36.54737


In [104]:
#add a level to the multiindex to accomodate the time period metrics
df_pivot.columns = pd.MultiIndex.from_tuples([(col[0], col[1], 'None') for col in df_pivot.columns])
df_pivot.head(3)

Unnamed: 0_level_0,Annual Housing Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost,Annual Median Household Income,Annual Median Household Income,Annual Transportation Cost,Annual Transportation Cost,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs,Households,Households,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost,Monthly Median Household Income,Monthly Median Household Income,Monthly Transportation Cost,Monthly Transportation Cost,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Monthly Median Household Income
Unnamed: 0_level_1,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019
Unnamed: 0_level_2,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None
NAME,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3
"Adams city, Tennessee",14064.0,17520.0,29718.0,35557.0,56667.0,55357.0,15654.0,18037.0,26949.0,19800.0,274.0,210.0,24.818678,31.649114,24.818678,31.649114,52.443221,64.232166,52.443221,64.232166,1172.0,1460.0,2476.5,2963.083333,4722.25,4613.083333,1304.5,1503.083333,27.624543,32.583052,27.624543,32.583052
"Allen County, Kentucky",10116.0,10116.0,24351.0,26210.0,41326.0,44036.0,14235.0,16094.0,16975.0,17826.0,7774.0,7605.0,24.478537,22.972114,24.478537,22.972114,58.924164,59.519484,58.924164,59.519484,843.0,843.0,2029.25,2184.166667,3443.833333,3669.666667,1186.25,1341.166667,34.445627,36.54737,34.445627,36.54737
"Ashland City town, Tennessee",11424.0,12624.0,25115.0,27110.0,39777.0,48654.0,13691.0,14486.0,14662.0,21544.0,1974.0,1986.0,28.720115,25.946479,28.720115,25.946479,63.139503,55.719982,63.139503,55.719982,952.0,1052.0,2092.916667,2259.166667,3314.75,4054.5,1140.916667,1207.166667,34.419388,29.773503,34.419388,29.773503


In [105]:
#get a list of the varaibles to loop through by indexing into the first level only of the column headers
first_level = df_pivot.columns.get_level_values(0).unique().tolist()
#remove percentages - don't want change metrics on them
first_level = [item for item in first_level if '%' not in item]

In [106]:
#pass the dataframe, the list of variables, time frames, and years through the "calculate change" function
data = calculate_changes(df_pivot, first_level, time_frames = time_frames, years = years)

In [107]:
#reformat and rename columns
data = data.stack([1, 1])
data = data.reset_index(drop = False)
data = data.rename(columns = {'level_1':'Year', 'level_2':'Time Frame'})

In [113]:
data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Center for Neighborhood Technology'

In [114]:
#final check
data.head()

Unnamed: 0,NAME,Year,Time Frame,Annual Housing Cost,Annual Housing Cost % Change,Annual Housing Cost Change,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost % Change,Annual Housing and Transportation Cost Change,Annual Median Household Income,Annual Median Household Income % Change,Annual Median Household Income Change,Annual Transportation Cost,Annual Transportation Cost % Change,Annual Transportation Cost Change,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs % Change,Difference Annual Median Household Income and Total Annual Costs Change,Households,Households % Change,Households Change,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost % Change,Monthly Housing Cost Change,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost % Change,Monthly Housing and Transportation Cost Change,Monthly Median Household Income,Monthly Median Household Income % Change,Monthly Median Household Income Change,Monthly Transportation Cost,Monthly Transportation Cost % Change,Monthly Transportation Cost Change,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Source,GEO_ID
0,"Adams city, Tennessee",2015.0,,14064.0,,,29718.0,,,56667.0,,,15654.0,,,26949.0,,,274.0,,,24.818678,24.818678,52.443221,52.443221,1172.0,,,2476.5,,,4722.25,,,1304.5,,,27.624543,27.624543,Center for Neighborhood Technology,1600000US4700200
1,"Adams city, Tennessee",2019.0,,17520.0,,,35557.0,,,55357.0,,,18037.0,,,19800.0,,,210.0,,,31.649114,31.649114,64.232166,64.232166,1460.0,,,2963.083333,,,4613.083333,,,1503.083333,,,32.583052,32.583052,Center for Neighborhood Technology,1600000US4700200
2,"Adams city, Tennessee",,2015-2019,,24.573379,3456.0,,19.648025,5839.0,,-2.311751,-1310.0,,15.222946,2383.0,,-26.527886,-7149.0,,-23.357664,-64.0,,,,,,24.573379,288.0,,19.648025,486.583333,,-2.311751,-109.166667,,15.222946,198.583333,,,Center for Neighborhood Technology,1600000US4700200
3,"Allen County, Kentucky",2015.0,,10116.0,,,24351.0,,,41326.0,,,14235.0,,,16975.0,,,7774.0,,,24.478537,24.478537,58.924164,58.924164,843.0,,,2029.25,,,3443.833333,,,1186.25,,,34.445627,34.445627,Center for Neighborhood Technology,0500000US21003
4,"Allen County, Kentucky",2019.0,,10116.0,,,26210.0,,,44036.0,,,16094.0,,,17826.0,,,7605.0,,,22.972114,22.972114,59.519484,59.519484,843.0,,,2184.166667,,,3669.666667,,,1341.166667,,,36.54737,36.54737,Center for Neighborhood Technology,0500000US21003


In [115]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3532 entries, 0 to 3531
Data columns (total 41 columns):
 #   Column                                                                     Non-Null Count  Dtype  
---  ------                                                                     --------------  -----  
 0   NAME                                                                       3532 non-null   object 
 1   Year                                                                       3532 non-null   object 
 2   Time Frame                                                                 3532 non-null   object 
 3   Annual Housing Cost                                                        2321 non-null   float64
 4   Annual Housing Cost % Change                                               1153 non-null   float64
 5   Annual Housing Cost Change                                                 1153 non-null   float64
 6   Annual Housing and Transportation Cost                  

In [116]:
#export to the SQLite database
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('CNT_HT_Annual_Change', conn, if_exists = 'replace', index = False)

3532

In [117]:
data.to_csv('test.csv')