###### Imports and Settings

In [1]:
import pandas as pd
import numpy as np
import requests
from functools import reduce
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
import sys
sys.path.append("../../../Functions and Dictionaries") # Adds higher directory to python modules path
import geodict
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
tofullcensus = geodict.tofullcensus
geotogeoid = geodict.geotogeoid
shorttnplaces = geodict.shorttnplaces
shortkyplaces = geodict.shortkyplaces
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces
import sqlite3 as sq
#functions
def percent(x, y):
        try:
            return ((x/y)*100)
        except ZeroDivisionError:
            return 0
def percentchange(x, y):
    try:
        return ((x - y)*100/y)
    except ZeroDivisionError:
        return 0
def realchange(x, y):
    return x-y
#calculate real and percent change between all columns for all possible time frames
def calculate_changes(df, columns, time_frames, years):
    for column in columns:
        for time_frame in time_frames:
            start_year, end_year = time_frame.split('-')
            df[f'{column} % Change', 'None', f'{time_frame}'] = percentchange(df[(column, int(end_year), 'None')], df[(column, int(start_year), 'None')])
            df[f'{column} Change', 'None', f'{time_frame}'] = (df[(column, int(end_year), 'None')] - df[(column, int(start_year), 'None')])

    return df
#generate all possible time frames from a list of years
def generate_time_frames(years):
    time_frames = []
    for i in range(len(years)-1):
        for j in range(i+1, len(years)):
            time_frames.append(f"{years[i]}-{years[j]}")
    return time_frames

# This notebook outlines the download and formatting process for the Center for Neighborhood Technology's Housing and Transportation Cost Index as well as the data combined from our end for counties and places in the GNRC operating region.  

Go to this page: https://htaindex.cnt.org/download/  
Upon registering for access, download the following documents:  
+ HTA Index for Counties in Tennessee and Kentucky  
+ HTA Index for MPOs  
+ HTA Index for Block Groups in Tennessee  

Save these csvs as they come in the Data Downloads folder of Parent Data Gathering  

### Calculations are made both for Comphrehensive plans at higher geography levels (counties, MPO), but also by block group to identify distressed areas at a granular level.

In [2]:
#Load API Key
with open('api_keys.pkl', 'rb') as keys_file:
        keys_dict_2 = pickle.load(keys_file)
#create a variable that contains your api key
census_key = keys_dict_2['CENSUS']
bea_key = keys_dict_2['BEA']

In [3]:
#2015 ACS 5 Year Median Household Income, total occupied housing units for households
#counties
url_str= 'https://api.census.gov/data/2015/acs/acs5?key='+census_key
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
df = df.loc[df['GeoFIPS'].isin(GNRC)]
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)                                                              
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)]
df = pd.concat([df, kycos], axis = 0)
#ky places call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shortkyplaces)]
df = pd.concat([df, places], axis = 0)
#places
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shorttnplaces)]
df = pd.concat([df, places], axis = 0)
#state call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "state:47"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
state=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
state['GeoFIPS'] = '0'
df = pd.concat([df, state], axis = 0)
#national call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "us:*"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
national=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
national['GeoFIPS'] = '0'
df = pd.concat([df, national], axis = 0)
# predicates= {} #block groups GNRC Region
# get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
# predicates["get"]= ",". join(get_vars)
# predicates["for"]= "block group:*"
# predicates["in"]= "state:47, county:*, tract:*"
# data= requests.get(url_str, params = predicates)
# col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
# bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
# bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
# bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)
#df = pd.concat([df, bg], axis = 0)
savename = df
print('Okay Finished')

Okay Finished


In [155]:
predicates= {} #block groups GNRC Region
get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "block group:*"
predicates["in"]= "state:47, county:*, tract:*"
data= requests.get(url_str, params = predicates)
col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)

In [156]:
df = pd.concat([df, bg], axis = 0)

In [157]:
#drop unneeded columns and change columns that need to be to float
hhincome = df.reset_index(drop = True)
hhincome = hhincome.drop(columns = ['StateFIPS', 'GeoFIPS'])
cols = ['Median Household Income', 'Households']
hhincome[cols] = hhincome[cols].astype(float)

In [158]:
#clarify the annual MHI and create a column for monthly MHI then drop the original column
hhincome['Annual Median Household Income'] = hhincome['Median Household Income']
hhincome['Monthly Median Household Income'] = hhincome['Median Household Income']/12
hhincome = hhincome.drop(columns = 'Median Household Income')

## H&T

In [159]:
kycos = pd.read_csv('../../Data Downloads/CNT_KYCounties_2015_HT.csv')
tncos = pd.read_csv('../../Data Downloads/CNT_TNCounties_2015_HT.csv')
#mpos = pd.read_csv('../../Data Downloads/CNT_MPOs_2015_HT.csv')
tnplaces = pd.read_csv('../../Data Downloads/CNT_TNPlaces_2015_HT.csv')
kyplaces = pd.read_csv('../../Data Downloads/CNT_KYPlaces_2015_HT.csv')
tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2015_HT.csv')

In [160]:
tnbg.head()

Unnamed: 0,blkgrp,cbsa,blkgrps,population,households,land_acres,ht_ami,ht_80ami,ht_nmi,h_ami,h_80ami,h_nmi,t_ami,t_80ami,t_nmi,co2_per_hh_local,co2_per_acre_local,autos_per_hh_ami,autos_per_hh_80ami,autos_per_hh_nmi,vmt_per_hh_ami,vmt_per_hh_80ami,vmt_per_hh_nmi,pct_transit_commuters_ami,pct_transit_commuters_80ami,pct_transit_commuters_nmi,t_cost_ami,t_cost_80ami,t_cost_nmi,auto_ownership_cost_ami,auto_ownership_cost_80ami,auto_ownership_cost_nmi,vmt_cost_ami,vmt_cost_80ami,vmt_cost_nmi,transit_cost_ami,transit_cost_80ami,transit_cost_nmi,transit_trips_ami,transit_trips_80ami,transit_trips_nmi,compact_ndx,emp_ovrll_ndx,res_density,gross_hh_density,hh_gravity,frac_sfd,emp_gravity,emp_ndx,block_size,intersection_density,avg_block_perimeter_meters,h_cost,median_smoc,median_gross_rent,pct_owner_occupied_hu,pct_renter_occupied_hu
0,"""471439754011""","""Dayton, TN""",1.0,1285.0,548.0,1412.5,55.0,65.0,40.0,24.0,30.0,16.0,30.0,34.0,23.0,8.27,3.21,1.55,1.0,2.0,20994.0,18811.0,22727.0,0.0,0.0,0.0,10974.0,9961.0,12652.0,8002.0,7298.0,9435.0,2972.0,2663.0,3217.0,0.0,0.0,0.0,0.0,0.0,0.0,6.7,3.3,2.28,0.39,3053.0,47.0,5510.0,85.0,19.0,58.0,1172,732.0,936.0,579.0,43.0,57.0
1,"""471459801001""","""Knoxville, TN""",1.0,0.0,0.0,20650.78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.5,,0.0,0.0,,,,,,,1969,,,,,
2,"""471579802001""","""Memphis, TN-MS-AR""",1.0,0.0,0.0,10197.18,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.1,,0.0,0.0,,,,,,5.0,3403,,,,,
3,"""471579803001""","""Memphis, TN-MS-AR""",1.0,0.0,0.0,24033.27,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.3,,0.0,0.0,,,,,,,4530,,,,,
4,"""471619802001""",,1.0,0.0,0.0,25021.37,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.9,,0.0,0.0,,,,,,,2345,,,,,


In [161]:
tnplaces.head()

Unnamed: 0,place,name,cbsa,blkgrps,population,households,land_acres,ht_ami,ht_80ami,ht_nmi,h_ami,h_80ami,h_nmi,t_ami,t_80ami,t_nmi,co2_per_hh_local,co2_per_acre_local,autos_per_hh_ami,autos_per_hh_80ami,autos_per_hh_nmi,vmt_per_hh_ami,vmt_per_hh_80ami,vmt_per_hh_nmi,pct_transit_commuters_ami,pct_transit_commuters_80ami,pct_transit_commuters_nmi,t_cost_ami,t_cost_80ami,t_cost_nmi,auto_ownership_cost_ami,auto_ownership_cost_80ami,auto_ownership_cost_nmi,vmt_cost_ami,vmt_cost_80ami,vmt_cost_nmi,transit_cost_ami,transit_cost_80ami,transit_cost_nmi,transit_trips_ami,transit_trips_80ami,transit_trips_nmi,compact_ndx,emp_ovrll_ndx,res_density,gross_hh_density,hh_gravity,frac_sfd,emp_gravity,emp_ndx,block_size,intersection_density,avg_block_perimeter_meters,h_cost,median_smoc,median_gross_rent,pct_owner_occupied_hu,pct_renter_occupied_hu
0,"""4700200""","""Adams""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",0.07,147.45,51.42,1713.47,55,66,55,26,33,26,29,33,29,11.27,0.35,2.2,2,2,25966,23286,25832,0,0,0,15654,14384,15570,11978,11088,11912,3675,3296,3656,1,0,1,0,0,0,1.2,0.8,0.23,0.03,2321,83,1797,82,177,7,3207,1172,1251,846.0,80,20
1,"""4700240""","""Adamsville""","""""",0.52,665.2,270.34,4391.39,63,74,42,24,30,15,39,44,27,8.97,0.8,1.81,2,2,23154,20842,25034,1,1,1,12636,11563,14596,9340,8591,11035,3278,2951,3543,18,21,17,15,18,15,2.1,2.4,0.67,0.06,1289,81,1124,79,73,13,2450,661,738,445.0,74,26
2,"""4700440""","""Alamo""","""Jackson, TN""",0.59,532.78,182.99,1439.6,54,62,42,22,27,17,32,35,26,9.98,2.36,1.83,2,2,23655,21398,24411,0,0,0,13317,11706,13967,9968,8675,10511,3348,3030,3455,0,1,0,0,1,0,3.0,1.2,0.92,0.13,2074,72,2310,83,55,25,2171,743,826,628.0,56,44
3,"""4700540""","""Alcoa""","""Knoxville, TN""",8.07,11228.86,4834.71,9429.01,50,58,43,23,29,19,27,29,24,8.16,6.1,1.69,2,2,21555,19174,22722,0,0,0,12254,10631,12928,9203,7916,9711,3051,2715,3216,1,1,1,1,1,1,5.2,5.1,1.83,0.51,7567,66,11904,87,15,80,1123,868,813,733.0,60,40
4,"""4700620""","""Alexandria""","""""",0.18,260.07,102.15,1291.49,61,72,46,25,31,18,36,41,28,10.18,0.81,1.96,2,2,24713,22313,25497,0,0,0,13641,12528,14967,10142,9368,11358,3499,3159,3609,0,0,0,0,0,0,2.4,0.4,0.61,0.08,1984,82,1768,83,108,14,2338,803,842,722.0,68,32


In [162]:
# mpos['name'] = mpos['name'].str.strip('\"')
# mpos['GEO_ID'] = mpos['mpo'].str.strip('\"')
# mpos = mpos.loc[mpos['name'] == 'Nashville Area MPO']

In [163]:
#strip the extra characters from the GEOID imports
tncos['GEO_ID'] = tncos['county'].str.strip('\"')
kycos['GEO_ID'] = kycos['county'].str.strip('\"')
tnplaces['GEO_ID'] = tnplaces['place'].str.strip('\"')
kyplaces['GEO_ID'] = kyplaces['place'].str.strip('\"')
tnbg['GEO_ID'] = tnbg['blkgrp'].str.strip('\"')

In [164]:
#select which columns to keep
tncos = tncos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kycos = kycos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnplaces = tnplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
kyplaces = kyplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnbg = tnbg[['GEO_ID', 'h_cost', 't_cost_ami']]
#mpos = mpos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]

In [165]:
#get annual numbers
tncos['h_cost_ami'] = tncos['h_cost']*12
kycos['h_cost_ami'] = kycos['h_cost']*12
tnplaces['h_cost_ami'] = tnplaces['h_cost']*12
kyplaces['h_cost_ami'] = kyplaces['h_cost']*12
tnbg['h_cost_ami'] = tnbg['h_cost']*12
#mpos['h_cost_ami'] = mpos['h_cost']*12

In [166]:
#drop the original numbers
tncos= tncos.drop(columns = 'h_cost')
kycos= kycos.drop(columns = 'h_cost')
tnplaces= tnplaces.drop(columns = 'h_cost')
kyplaces= kyplaces.drop(columns = 'h_cost')
tnbg= tnbg.drop(columns = 'h_cost')
#mpos= mpos.drop(columns = 'h_cost')

In [167]:
#remove full GEOIDs for filtering from custom module lists
tncos['GEO_ID'] = tncos['GEO_ID'].str[2:]
kycos['GEO_ID'] = kycos['GEO_ID'].str[2:]
tnplaces['GEO_ID'] = '1600000US' + tnplaces['GEO_ID']
kyplaces['GEO_ID'] = '1600000US' + kyplaces['GEO_ID']
tnbg['CountyFIPS'] = tnbg['GEO_ID'].str[2:5]

In [168]:
#filter geographies
tncos = tncos.loc[tncos['GEO_ID'].isin(GNRC)].reset_index(drop = True)
kycos = kycos.loc[kycos['GEO_ID'].isin(KY)].reset_index(drop = True)
tnplaces = tnplaces.loc[tnplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
kyplaces = kyplaces.loc[kyplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
tnbg = tnbg.loc[tnbg['CountyFIPS'].isin(GNRC)]
tnbg = tnbg.drop(columns = ['CountyFIPS']).reset_index(drop = True)

In [169]:
#create full geoid for joining
tncos['GEO_ID'] = '0500000US47' + tncos['GEO_ID']
kycos['GEO_ID'] = '0500000US21' + kycos['GEO_ID']

In [170]:
#merge and set index
df = pd.concat([tncos, kycos, tnplaces, kyplaces, tnbg]).reset_index(drop = True)
data = df.merge(hhincome, on = 'GEO_ID')
data = data.drop(columns = 'name')

In [171]:
data = data.set_index('NAME').transpose()
data = data.rename(columns = tofullcensus)
data = data.transpose().reset_index(drop = False)

In [172]:
data = data.set_index(['NAME', 'GEO_ID']).transpose()
GNRCCounties = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                data[('Rutherford County, Tennessee', '0500000US47149')]]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                   data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                   data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                   data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                   data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                   data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                   data[('Rutherford County, Tennessee', '0500000US47149')],data[('Maury County, Tennessee', '0500000US47119')]]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
               data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
               data[('Williamson County, Tennessee', '0500000US47187')],data[('Rutherford County, Tennessee', '0500000US47149')],
               data[('Maury County, Tennessee', '0500000US47119')]]
data['MPO'] = sum(MPOCounties)
RuthInc = [data[('Eagleville city, Tennessee', '1600000US4722360')],data[('La Vergne city, Tennessee', '1600000US4741200')],
           data[('Murfreesboro city, Tennessee', '1600000US4751560')],data[('Smyrna town, Tennessee', '1600000US4769420')]]
data[('Rutherford Incorporated', 'None')] = sum(RuthInc)
data[('Rutherford Unincorporated', 'None')] = data[('Rutherford County, Tennessee', '0500000US47149')] - data[('Rutherford Incorporated', 'None')]
WilsonInc = [data[('Lebanon city, Tennessee', '1600000US4741520')],data[('Mount Juliet city, Tennessee', '1600000US4750780')],
             data[('Watertown city, Tennessee', '1600000US4778320')]]
data[('Wilson Incorporated', 'None')] = sum(WilsonInc)
data[('Wilson Unincorporated', 'None')] = data[('Wilson County, Tennessee', '0500000US47189')] - data[('Wilson Incorporated', 'None')]
CheathInc = [data[('Ashland City town, Tennessee', '1600000US4702180')],data[('Kingston Springs town, Tennessee', '1600000US4739660')],
             data[('Pegram town, Tennessee', '1600000US4757480')],data[('Pleasant View city, Tennessee', '1600000US4759560')]]
data[('Cheatham Incorporated', 'None')] = sum(CheathInc)
data[('Cheatham Unincorporated', 'None')] = data[('Cheatham County, Tennessee', '0500000US47021')] - data[('Cheatham Incorporated', 'None')]
DicksInc = [data[('Burns town, Tennessee', '1600000US4709880')],data[('Charlotte town, Tennessee', '1600000US4713080')],
            data[('Dickson city, Tennessee', '1600000US4720620')],data[('Slayden town, Tennessee', '1600000US4769080')],
            data[('Vanleer town, Tennessee', '1600000US4776860')],data[('White Bluff town, Tennessee', '1600000US4779980')]]
data[('Dickson Incorporated', 'None')] = sum(DicksInc)
data[('Dickson Unincorporated', 'None')] = data[('Dickson County, Tennessee', '0500000US47043')] - data[('Dickson Incorporated', 'None')]
HumphInc = [data[('McEwen city, Tennessee', '1600000US4744840')],data[('New Johnsonville city, Tennessee', '1600000US4752820')],
            data[('Waverly city, Tennessee', '1600000US4778560')]]
data[('Humphreys Incorporated', 'None')] = sum(HumphInc)
data[('Humphreys Unincorporated', 'None')] = data[('Humphreys County, Tennessee', '0500000US47085')] - data[('Humphreys Incorporated', 'None')]
data[('Montgomery Incorporated', 'None')] = data[('Clarksville city, Tennessee', '1600000US4715160')]
data[('Montgomery Unincorporated', 'None')] = data[('Montgomery County, Tennessee', '0500000US47125')] - data[('Montgomery Incorporated', 'None')]
data = data.transpose()

In [173]:
data.tail(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,t_cost_ami,h_cost_ami,Households,Annual Median Household Income,Monthly Median Household Income
NAME,GEO_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Block Group 1, Census Tract 1018.02, Montgomery County, Tennessee",471251018021.0,15200.0,15108.0,1432.0,77174.0,6431.166667
"Block Group 1, Census Tract 801.04, Robertson County, Tennessee",471470801041.0,15267.0,16056.0,326.0,44167.0,3680.583333
"Block Group 1, Census Tract 806.05, Robertson County, Tennessee",471470806051.0,15664.0,13896.0,442.0,66944.0,5578.666667
"Block Group 2, Census Tract 408.09, Rutherford County, Tennessee",471490408092.0,15571.0,15900.0,296.0,55543.0,4628.583333
"Block Group 2, Census Tract 408.07, Rutherford County, Tennessee",471490408072.0,15562.0,19380.0,733.0,91645.0,7637.083333
"Block Group 1, Census Tract 405.02, Rutherford County, Tennessee",471490405021.0,15567.0,13200.0,936.0,49881.0,4156.75
"Block Group 3, Census Tract 405.01, Rutherford County, Tennessee",471490405013.0,15717.0,16368.0,722.0,75698.0,6308.166667
"Block Group 2, Census Tract 512.01, Williamson County, Tennessee",471870512012.0,15745.0,14580.0,658.0,46111.0,3842.583333
"Block Group 2, Census Tract 165, Davidson County, Tennessee",470370165002.0,,,0.0,-666666666.0,-55555555.5
"Block Group 2, Census Tract 1018.03, Montgomery County, Tennessee",471251018032.0,15669.0,17400.0,349.0,95625.0,7968.75


In [174]:
#create a list of columns not-indexed and perform mathematical operations for desired outputs
cols = list(data.columns)
data[cols] = data[cols].astype(float)
data['Annual Median Household Income'] = data['Annual Median Household Income']
data['Monthly Median Household Income'] = data['Annual Median Household Income']/12
data['Annual Transportation Cost'] = data['t_cost_ami']
data['Monthly Transportation Cost'] = data['t_cost_ami']/12
data['Annual Housing Cost'] = data['h_cost_ami']
data['Monthly Housing Cost'] = data['h_cost_ami']/12
data['Annual Housing and Transportation Cost'] = data['Annual Housing Cost'] + data['Annual Transportation Cost']
data['Monthly Housing and Transportation Cost'] = (data['Annual Housing Cost'] + data['Annual Transportation Cost'])/12
data['Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Transportation Cost'], data['Monthly Median Household Income'])
data['Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Transportation Cost'], data['Annual Median Household Income'])
data['Housing Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing Cost'], data['Monthly Median Household Income'])
data['Housing Cost as % of Annual Median Household Income'] = percent(data['Annual Housing Cost'], data['Annual Median Household Income'])
data['Housing and Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing and Transportation Cost'], data['Monthly Median Household Income'])
data['Housing and Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Housing and Transportation Cost'], data['Annual Median Household Income'])

In [175]:
#drop the input columms no longer needed
data = data.drop(columns = ['t_cost_ami', 'h_cost_ami']).reset_index()

In [176]:
#find the difference between cost and income overall
data['Difference Annual Median Household Income and Total Annual Costs'] = data['Annual Median Household Income'] - data['Annual Housing and Transportation Cost']

In [177]:
#remove CDPs
data = data.loc[~data['NAME'].str.contains('CDP')]

In [178]:
#commenting out to get block groups
# #just make sure it's the full census geoid
# data['GEO_ID'] = data['NAME'].map(geotogeoid)
# data.tail(20)

In [179]:
#set the source
data['Year'] = '2015'

In [180]:
fifteen = data

## 2019

In [3]:
#2015 ACS 5 Year Median Household Income, total occupied housing units for households
#counties
url_str= 'https://api.census.gov/data/2021/acs/acs5?key='+census_key
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
df = df.loc[df['GeoFIPS'].isin(GNRC)]
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "county:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)                                                              
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)]
df = pd.concat([df, kycos], axis = 0)
#ky places call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:21" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shortkyplaces)]
df = pd.concat([df, places], axis = 0)
#places
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "place:*"
predicates["in"]= "state:47" 
data = requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS', 'GeoFIPS']
places=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
places=places.loc[places['GeoFIPS'].isin(shorttnplaces)]
df = pd.concat([df, places], axis = 0)
#state call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "state:47"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
state=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
state['GeoFIPS'] = '0'
df = pd.concat([df, state], axis = 0)
#national call
predicates= {}
get_vars= ["NAME", 'GEO_ID', 'B19013_001E', 'B25002_002E']
predicates["get"]= ",". join(get_vars)
predicates["for"]= "us:*"
data= requests.get(url_str, params= predicates)
col_names = ['NAME', 'GEO_ID', 'Median Household Income', 'Households', 'StateFIPS']
national=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
national['GeoFIPS'] = '0'
df = pd.concat([df, national], axis = 0)
# predicates= {} #block groups GNRC Region
# get_vars= ["NAME", 'B19013_001E', 'B25002_002E']
# predicates["get"]= ",". join(get_vars)
# predicates["for"]= "block group:*"
# predicates["in"]= "state:47, county:*, tract:*"
# data= requests.get(url_str, params = predicates)
# col_names = ['NAME', 'Median Household Income', 'Households', 'StateFIPS', 'CountyFIPS', 'Census Tract', 'Block Group']
# bg=pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
# bg['GEO_ID'] = bg['StateFIPS'] + bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg['GeoFIPS'] = bg['CountyFIPS'] + bg['Census Tract'] + bg['Block Group']
# bg = bg.loc[bg['CountyFIPS'].isin(GNRC)]
# bg = bg.drop(columns = ['CountyFIPS', 'Census Tract', 'Block Group']).reset_index(drop = True)
#df = pd.concat([df, bg], axis = 0)
savename = df
print('Okay Finished')

Okay Finished


In [4]:
savename.head()

Unnamed: 0,NAME,GEO_ID,Median Household Income,Households,StateFIPS,GeoFIPS
10,"Cheatham County, Tennessee",0500000US47021,69132,15366,47,21
18,"Davidson County, Tennessee",0500000US47037,66047,293859,47,37
21,"Dickson County, Tennessee",0500000US47043,61388,19663,47,43
41,"Houston County, Tennessee",0500000US47083,46535,2860,47,83
42,"Humphreys County, Tennessee",0500000US47085,49745,6688,47,85


In [5]:
savename.to_csv('geostest.csv')

In [6]:
#drop unneeded columns and change columns that need to be to float
hhincome = savename.reset_index(drop = True)
hhincome = hhincome.drop(columns = ['StateFIPS', 'GeoFIPS'])
cols = ['Median Household Income', 'HouseholdsCensus']
hhincome[cols] = hhincome[cols].astype(float)

KeyError: "['HouseholdsCensus'] not in index"

In [54]:
#clarify the annual MHI and create a column for monthly MHI then drop the original column
hhincome['Annual Median Household Income'] = hhincome['Median Household Income']
#hhincome['Monthly Median Household Income'] = hhincome['Median Household Income']/12
hhincome = hhincome.drop(columns = 'Median Household Income')

In [55]:
#check before moving on to H&T
hhincome.tail()

Unnamed: 0,NAME,HouseholdsCensus,GEO_ID,Annual Median Household Income
4557,"Block Group 4, Census Tract 309.08, Wilson Cou...",638.0,471890309084,102667.0
4558,"Block Group 1, Census Tract 310, Wilson County...",675.0,471890310001,80820.0
4559,"Block Group 2, Census Tract 310, Wilson County...",616.0,471890310002,68152.0
4560,"Block Group 3, Census Tract 310, Wilson County...",438.0,471890310003,41750.0
4561,"Block Group 4, Census Tract 310, Wilson County...",624.0,471890310004,63203.0


In [56]:
# kycos = pd.read_csv('../../Data Downloads/CNT_KYCounties_2019_HT.csv')
# tncos = pd.read_csv('../../Data Downloads/CNT_TNCounties_2019_HT.csv')
# mpos = pd.read_csv('../../Data Downloads/CNT_MPOs_2019_HT.csv')
# tnplaces = pd.read_csv('../../Data Downloads/CNT_TNPlaces_2019_HT.csv')
# kyplaces = pd.read_csv('../../Data Downloads/CNT_KYPlaces_2019_HT.csv')
#tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2019_HT.csv')
tnbg = pd.read_csv('../../Data Downloads/CNT_TNBlockGroups_2020_HT.csv')

In [57]:
tnbg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4562 entries, 0 to 4561
Data columns (total 57 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   blkgrp                       4562 non-null   object 
 1   cbsa                         4103 non-null   object 
 2   blkgrps                      4562 non-null   float64
 3   population                   4562 non-null   float64
 4   households                   4562 non-null   float64
 5   land_acres                   4562 non-null   float64
 6   ht_ami                       4527 non-null   float64
 7   ht_80ami                     4527 non-null   float64
 8   ht_nmi                       4527 non-null   float64
 9   h_ami                        4527 non-null   float64
 10  h_80ami                      4527 non-null   float64
 11  h_nmi                        4527 non-null   float64
 12  t_ami                        4527 non-null   float64
 13  t_80ami           

In [58]:
# mpos['name'] = mpos['name'].str.strip('\"')
# mpos['GEO_ID'] = mpos['mpo'].str.strip('\"')
# mpos = mpos.loc[mpos['name'] == 'Nashville Area MPO']

In [59]:
tnbg.head()

Unnamed: 0,blkgrp,cbsa,blkgrps,population,households,land_acres,ht_ami,ht_80ami,ht_nmi,h_ami,h_80ami,h_nmi,t_ami,t_80ami,t_nmi,co2_per_hh_local,co2_per_acre_local,autos_per_hh_ami,autos_per_hh_80ami,autos_per_hh_nmi,vmt_per_hh_ami,vmt_per_hh_80ami,vmt_per_hh_nmi,pct_transit_commuters_ami,pct_transit_commuters_80ami,pct_transit_commuters_nmi,t_cost_ami,t_cost_80ami,t_cost_nmi,auto_ownership_cost_ami,auto_ownership_cost_80ami,auto_ownership_cost_nmi,vmt_cost_ami,vmt_cost_80ami,vmt_cost_nmi,transit_cost_ami,transit_cost_80ami,transit_cost_nmi,transit_trips_ami,transit_trips_80ami,transit_trips_nmi,compact_ndx,emp_ovrll_ndx,res_density,gross_hh_density,hh_gravity,frac_sfd,emp_gravity,emp_ndx,block_size,intersection_density,avg_block_perimeter_meters,h_cost,median_smoc,median_gross_rent,pct_owner_occupied_hu,pct_renter_occupied_hu
0,"""470019801001""","""Knoxville, TN""",1.0,0.0,0.0,10874.39,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,
1,"""470210701021""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",1.0,1687.0,593.0,11692.89,48.0,59.0,50.0,24.0,30.0,25.0,24.0,29.0,25.0,9.89,0.5,2.23,2.0,2.0,23327.0,23036.0,23320.0,0.0,0.0,0.0,16176.0,15783.0,16127.0,13312.0,12955.0,13264.0,2861.0,2826.0,2860.0,2.0,2.0,2.0,1.0,1.0,1.0,,,,0.05,3979.0,94.0,5342.0,81.0,377.0,,,1372.0,1438.0,998.0,86.0,14.0
2,"""470210702032""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",1.0,2109.0,850.0,21884.83,53.0,65.0,55.0,29.0,36.0,31.0,24.0,29.0,25.0,9.98,0.39,2.22,2.0,2.0,24186.0,23889.0,24176.0,0.0,0.0,0.0,16275.0,15885.0,16225.0,13307.0,12953.0,13258.0,2967.0,2930.0,2965.0,2.0,2.0,2.0,1.0,1.0,1.0,,,,0.04,2894.0,71.0,2330.0,83.0,342.0,,,1652.0,1766.0,967.0,87.0,13.0
3,"""470210701022""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",1.0,2341.0,808.0,28206.02,46.0,56.0,48.0,22.0,27.0,23.0,24.0,29.0,25.0,10.52,0.3,2.23,2.0,2.0,24363.0,24062.0,24342.0,0.0,0.0,0.0,16324.0,15935.0,16273.0,13334.0,12982.0,13286.0,2988.0,2951.0,2986.0,2.0,2.0,2.0,1.0,1.0,1.0,,,,0.03,3143.0,85.0,3269.0,82.0,522.0,,,1243.0,1266.0,1163.0,82.0,18.0
4,"""470210702011""","""Nashville-Davidson--Murfreesboro--Franklin, TN""",1.0,2030.0,782.0,3319.56,43.0,54.0,46.0,20.0,25.0,21.0,23.0,29.0,25.0,10.19,2.4,2.19,2.0,2.0,23879.0,23584.0,23862.0,0.0,0.0,0.0,16033.0,15679.0,15985.0,13102.0,12784.0,13056.0,2929.0,2893.0,2927.0,2.0,2.0,2.0,1.0,1.0,1.0,,,,0.24,3453.0,83.0,3514.0,82.0,66.0,,,1136.0,1146.0,1111.0,72.0,28.0


In [60]:
# #strip the extra characters from the GEOID imports
# tncos['GEO_ID'] = tncos['county'].str.strip('\"')
# kycos['GEO_ID'] = kycos['county'].str.strip('\"')
# tnplaces['GEO_ID'] = tnplaces['place'].str.strip('\"')
# kyplaces['GEO_ID'] = kyplaces['place'].str.strip('\"')
tnbg['GEO_ID'] = tnbg['blkgrp'].str.strip('\"')

In [61]:
# #select which columns to keep
# tncos = tncos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
# kycos = kycos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
# tnplaces = tnplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
# kyplaces = kyplaces[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]
tnbg = tnbg[['GEO_ID', 'h_cost', 't_cost_ami', 'households']]
#mpos = mpos[['name', 'GEO_ID', 'h_cost', 't_cost_ami']]

In [62]:
# #get annual numbers
# tncos['h_cost_ami'] = tncos['h_cost']*12
# kycos['h_cost_ami'] = kycos['h_cost']*12
# tnplaces['h_cost_ami'] = tnplaces['h_cost']*12
# kyplaces['h_cost_ami'] = kyplaces['h_cost']*12
tnbg['h_cost_ami'] = tnbg['h_cost']*12
#mpos['h_cost_ami'] = mpos['h_cost']*12

In [63]:
# #drop original numbers
# tncos= tncos.drop(columns = 'h_cost')
# kycos= kycos.drop(columns = 'h_cost')
# tnplaces= tnplaces.drop(columns = 'h_cost')
# kyplaces= kyplaces.drop(columns = 'h_cost')
tnbg= tnbg.drop(columns = 'h_cost')
#mpos= mpos.drop(columns = 'h_cost')

In [64]:
# #remove full GEOIDs for filtering from custom module lists
# tncos['GEO_ID'] = tncos['GEO_ID'].str[2:]
# kycos['GEO_ID'] = kycos['GEO_ID'].str[2:]
# tnplaces['GEO_ID'] = '1600000US' + tnplaces['GEO_ID']
# kyplaces['GEO_ID'] = '1600000US' + kyplaces['GEO_ID']
tnbg['CountyFIPS'] = tnbg['GEO_ID'].str[2:5]

In [65]:
# #filter geographies
# tncos = tncos.loc[tncos['GEO_ID'].isin(GNRC)].reset_index(drop = True)
# kycos = kycos.loc[kycos['GEO_ID'].isin(KY)].reset_index(drop = True)
# tnplaces = tnplaces.loc[tnplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
# kyplaces = kyplaces.loc[kyplaces['GEO_ID'].isin(censusplaces)].reset_index(drop = True)
# tnbg = tnbg.loc[tnbg['CountyFIPS'].isin(GNRC)]
# tnbg = tnbg.drop(columns = ['CountyFIPS']).reset_index(drop = True)

In [66]:
# #create full geoid for joining
# tncos['GEO_ID'] = '0500000US47' + tncos['GEO_ID']
# kycos['GEO_ID'] = '0500000US21' + kycos['GEO_ID']

In [67]:
# #merge and set index
#df = pd.concat([tncos, kycos, tnplaces, kyplaces, tnbg]).reset_index(drop = True)

df = tnbg
data = df.merge(hhincome, on = 'GEO_ID')
#data = data.drop(columns = 'name')

In [68]:
data.head()

Unnamed: 0,GEO_ID,t_cost_ami,households,h_cost_ami,CountyFIPS,NAME,HouseholdsCensus,Annual Median Household Income
0,470019801001,,0.0,,1,"Block Group 1, Census Tract 9801, Anderson Cou...",0.0,-666666666.0
1,470210701021,16176.0,593.0,16464.0,21,"Block Group 1, Census Tract 701.02, Cheatham C...",587.0,67917.0
2,470210702032,16275.0,850.0,19824.0,21,"Block Group 2, Census Tract 702.03, Cheatham C...",869.0,64102.0
3,470210701022,16324.0,808.0,14916.0,21,"Block Group 2, Census Tract 701.02, Cheatham C...",854.0,76250.0
4,470210702011,16033.0,782.0,13632.0,21,"Block Group 1, Census Tract 702.01, Cheatham C...",846.0,80000.0


In [69]:
data.drop(columns = 'households', inplace = True)

In [70]:
data = data.rename(columns = {'HouseholdsCensus':'Households'})

In [71]:
# data = data.set_index('NAME').transpose()
# data = data.rename(columns = tofullcensus)
# data = data.transpose().reset_index(drop = False)

In [72]:
# data = data.set_index(['NAME', 'GEO_ID']).transpose()
# GNRCCounties = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
#                 data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
#                 data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
#                 data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
#                 data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
#                 data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
#                 data[('Rutherford County, Tennessee', '0500000US47149')]]
# data['GNRC'] = sum(GNRCCounties)
# GNRCCountiesAll = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
#                    data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
#                    data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
#                    data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
#                    data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
#                    data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
#                    data[('Rutherford County, Tennessee', '0500000US47149')],data[('Maury County, Tennessee', '0500000US47119')]]
# data['GNRC Region'] = sum(GNRCCountiesAll)
# MPOCounties = [data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
#                data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
#                data[('Williamson County, Tennessee', '0500000US47187')],data[('Rutherford County, Tennessee', '0500000US47149')],
#                data[('Maury County, Tennessee', '0500000US47119')]]
# data['MPO'] = sum(MPOCounties)
# RuthInc = [data[('Eagleville city, Tennessee', '1600000US4722360')],data[('La Vergne city, Tennessee', '1600000US4741200')],
#            data[('Murfreesboro city, Tennessee', '1600000US4751560')],data[('Smyrna town, Tennessee', '1600000US4769420')]]
# data[('Rutherford Incorporated', 'None')] = sum(RuthInc)
# data[('Rutherford Unincorporated', 'None')] = data[('Rutherford County, Tennessee', '0500000US47149')] - data[('Rutherford Incorporated', 'None')]
# WilsonInc = [data[('Lebanon city, Tennessee', '1600000US4741520')],data[('Mount Juliet city, Tennessee', '1600000US4750780')],
#              data[('Watertown city, Tennessee', '1600000US4778320')]]
# data[('Wilson Incorporated', 'None')] = sum(WilsonInc)
# data[('Wilson Unincorporated', 'None')] = data[('Wilson County, Tennessee', '0500000US47189')] - data[('Wilson Incorporated', 'None')]
# CheathInc = [data[('Ashland City town, Tennessee', '1600000US4702180')],data[('Kingston Springs town, Tennessee', '1600000US4739660')],
#              data[('Pegram town, Tennessee', '1600000US4757480')],data[('Pleasant View city, Tennessee', '1600000US4759560')]]
# data[('Cheatham Incorporated', 'None')] = sum(CheathInc)
# data[('Cheatham Unincorporated', 'None')] = data[('Cheatham County, Tennessee', '0500000US47021')] - data[('Cheatham Incorporated', 'None')]
# DicksInc = [data[('Burns town, Tennessee', '1600000US4709880')],data[('Charlotte town, Tennessee', '1600000US4713080')],
#             data[('Dickson city, Tennessee', '1600000US4720620')],data[('Slayden town, Tennessee', '1600000US4769080')],
#             data[('Vanleer town, Tennessee', '1600000US4776860')],data[('White Bluff town, Tennessee', '1600000US4779980')]]
# data[('Dickson Incorporated', 'None')] = sum(DicksInc)
# data[('Dickson Unincorporated', 'None')] = data[('Dickson County, Tennessee', '0500000US47043')] - data[('Dickson Incorporated', 'None')]
# HumphInc = [data[('McEwen city, Tennessee', '1600000US4744840')],data[('New Johnsonville city, Tennessee', '1600000US4752820')],
#             data[('Waverly city, Tennessee', '1600000US4778560')]]
# data[('Humphreys Incorporated', 'None')] = sum(HumphInc)
# data[('Humphreys Unincorporated', 'None')] = data[('Humphreys County, Tennessee', '0500000US47085')] - data[('Humphreys Incorporated', 'None')]
# data[('Montgomery Incorporated', 'None')] = data[('Clarksville city, Tennessee', '1600000US4715160')]
# data[('Montgomery Unincorporated', 'None')] = data[('Montgomery County, Tennessee', '0500000US47125')] - data[('Montgomery Incorporated', 'None')]
# data = data.transpose()

In [73]:
data.set_index('GEO_ID', inplace = True)
data.drop(columns = 'NAME', inplace = True)

In [74]:
#create a list of columns not-indexed and perform mathematical operations for desired outputs
cols = list(data.columns)
data[cols] = data[cols].astype(float)
data['Annual Median Household Income'] = data['Annual Median Household Income']
#data['Monthly Median Household Income'] = data['Annual Median Household Income']/12
data['Annual Transportation Cost'] = data['t_cost_ami']
#data['Monthly Transportation Cost'] = data['t_cost_ami']/12
data['Annual Housing Cost'] = data['h_cost_ami']
#data['Monthly Housing Cost'] = data['h_cost_ami']/12
data['Annual Housing and Transportation Cost'] = data['Annual Housing Cost'] + data['Annual Transportation Cost']
#data['Monthly Housing and Transportation Cost'] = (data['Annual Housing Cost'] + data['Annual Transportation Cost'])/12
#data['Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Transportation Cost'], data['Monthly Median Household Income'])
data['Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Transportation Cost'], data['Annual Median Household Income'])
#data['Housing Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing Cost'], data['Monthly Median Household Income'])
data['Housing Cost as % of Annual Median Household Income'] = percent(data['Annual Housing Cost'], data['Annual Median Household Income'])
#data['Housing and Transportation Cost as % of Monthly Median Household Income'] = percent(data['Monthly Housing and Transportation Cost'], data['Monthly Median Household Income'])
data['Housing and Transportation Cost as % of Annual Median Household Income'] = percent(data['Annual Housing and Transportation Cost'], data['Annual Median Household Income'])

In [75]:
#drop the input columms no longer needed
data = data.drop(columns = ['t_cost_ami', 'h_cost_ami']).reset_index()

In [76]:
data.head()

Unnamed: 0,GEO_ID,CountyFIPS,Households,Annual Median Household Income,Annual Transportation Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income
0,470019801001,1.0,0.0,-666666666.0,,,,,,
1,470210701021,21.0,587.0,67917.0,16176.0,16464.0,32640.0,23.817306,24.241353,48.05866
2,470210702032,21.0,869.0,64102.0,16275.0,19824.0,36099.0,25.389223,30.925712,56.314936
3,470210701022,21.0,854.0,76250.0,16324.0,14916.0,31240.0,21.408525,19.561967,40.970492
4,470210702011,21.0,846.0,80000.0,16033.0,13632.0,29665.0,20.04125,17.04,37.08125


In [77]:
#find the difference in cost and income overall
data['Difference Annual Median Household Income and Total Annual Costs'] = data['Annual Median Household Income'] - data['Annual Housing and Transportation Cost']

In [78]:
# #remove CDPs
# data = data.loc[~data['NAME'].str.contains('CDP')]

In [79]:
#commenting out to get bgs
# #just make sure it's the full census geoid
# data['GEO_ID'] = data['NAME'].map(geotogeoid)
# data.head(2)

In [81]:
#set source
data['Year'] = '2020'

In [82]:
nineteen = data

In [83]:
nineteen.head()

Unnamed: 0,GEO_ID,CountyFIPS,Households,Annual Median Household Income,Annual Transportation Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Transportation Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Difference Annual Median Household Income and Total Annual Costs,Year
0,470019801001,1.0,0.0,-666666666.0,,,,,,,,2020
1,470210701021,21.0,587.0,67917.0,16176.0,16464.0,32640.0,23.817306,24.241353,48.05866,35277.0,2020
2,470210702032,21.0,869.0,64102.0,16275.0,19824.0,36099.0,25.389223,30.925712,56.314936,28003.0,2020
3,470210701022,21.0,854.0,76250.0,16324.0,14916.0,31240.0,21.408525,19.561967,40.970492,45010.0,2020
4,470210702011,21.0,846.0,80000.0,16033.0,13632.0,29665.0,20.04125,17.04,37.08125,50335.0,2020


In [84]:
nineteen.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4562 entries, 0 to 4561
Data columns (total 12 columns):
 #   Column                                                                  Non-Null Count  Dtype  
---  ------                                                                  --------------  -----  
 0   GEO_ID                                                                  4562 non-null   object 
 1   CountyFIPS                                                              4562 non-null   float64
 2   Households                                                              4562 non-null   float64
 3   Annual Median Household Income                                          4562 non-null   float64
 4   Annual Transportation Cost                                              4527 non-null   float64
 5   Annual Housing Cost                                                     4527 non-null   float64
 6   Annual Housing and Transportation Cost                                  4527 non

In [86]:
nineteen.to_csv('CNT2020BGs.csv', index = False)

In [205]:
dfs = [fifteen, nineteen]
data = pd.concat(dfs)

In [206]:
#make sure year is formatted as an integer
data['Year'] = data['Year'].astype(int)
#create a list of years from the dataframe to pass through our "generate time frames" function to create a list of all possible time frames - need this here for later
years = list(data['Year'].unique().astype(int))
time_frames = generate_time_frames(years)

In [207]:
#create a multilevel column header with year and placeholder for time frames
#pivot the table and create a multiindex of year and column header
cols = list(data.columns)
cols.remove('NAME')
cols.remove('Year')
df_pivot = data.pivot_table(index = 'NAME', columns = ['Year'], values = cols)
df_pivot.head(2)

  df_pivot = data.pivot_table(index = 'NAME', columns = ['Year'], values = cols)


Unnamed: 0_level_0,Annual Housing Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost,Annual Median Household Income,Annual Median Household Income,Annual Transportation Cost,Annual Transportation Cost,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs,Households,Households,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost,Monthly Median Household Income,Monthly Median Household Income,Monthly Transportation Cost,Monthly Transportation Cost,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Monthly Median Household Income
Year,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019
NAME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2
"Adams city, Tennessee",14064.0,17520.0,29718.0,35557.0,56667.0,55357.0,15654.0,18037.0,26949.0,19800.0,274.0,210.0,24.818678,31.649114,24.818678,31.649114,52.443221,64.232166,52.443221,64.232166,1172.0,1460.0,2476.5,2963.083333,4722.25,4613.083333,1304.5,1503.083333,27.624543,32.583052,27.624543,32.583052
"Allen County, Kentucky",10116.0,10116.0,24351.0,26210.0,41326.0,44036.0,14235.0,16094.0,16975.0,17826.0,7774.0,7605.0,24.478537,22.972114,24.478537,22.972114,58.924164,59.519484,58.924164,59.519484,843.0,843.0,2029.25,2184.166667,3443.833333,3669.666667,1186.25,1341.166667,34.445627,36.54737,34.445627,36.54737


In [208]:
#add a level to the multiindex to accomodate the time period metrics
df_pivot.columns = pd.MultiIndex.from_tuples([(col[0], col[1], 'None') for col in df_pivot.columns])
df_pivot.head(3)

Unnamed: 0_level_0,Annual Housing Cost,Annual Housing Cost,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost,Annual Median Household Income,Annual Median Household Income,Annual Transportation Cost,Annual Transportation Cost,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs,Households,Households,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost,Monthly Median Household Income,Monthly Median Household Income,Monthly Transportation Cost,Monthly Transportation Cost,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Transportation Cost as % of Monthly Median Household Income
Unnamed: 0_level_1,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019,2015,2019
Unnamed: 0_level_2,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None
NAME,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3
"Adams city, Tennessee",14064.0,17520.0,29718.0,35557.0,56667.0,55357.0,15654.0,18037.0,26949.0,19800.0,274.0,210.0,24.818678,31.649114,24.818678,31.649114,52.443221,64.232166,52.443221,64.232166,1172.0,1460.0,2476.5,2963.083333,4722.25,4613.083333,1304.5,1503.083333,27.624543,32.583052,27.624543,32.583052
"Allen County, Kentucky",10116.0,10116.0,24351.0,26210.0,41326.0,44036.0,14235.0,16094.0,16975.0,17826.0,7774.0,7605.0,24.478537,22.972114,24.478537,22.972114,58.924164,59.519484,58.924164,59.519484,843.0,843.0,2029.25,2184.166667,3443.833333,3669.666667,1186.25,1341.166667,34.445627,36.54737,34.445627,36.54737
"Ashland City town, Tennessee",11424.0,12624.0,25115.0,27110.0,39777.0,48654.0,13691.0,14486.0,14662.0,21544.0,1974.0,1986.0,28.720115,25.946479,28.720115,25.946479,63.139503,55.719982,63.139503,55.719982,952.0,1052.0,2092.916667,2259.166667,3314.75,4054.5,1140.916667,1207.166667,34.419388,29.773503,34.419388,29.773503


In [209]:
#get a list of the varaibles to loop through by indexing into the first level only of the column headers
first_level = df_pivot.columns.get_level_values(0).unique().tolist()
#remove percentages - don't want change metrics on them
first_level = [item for item in first_level if '%' not in item]

In [210]:
#pass the dataframe, the list of variables, time frames, and years through the "calculate change" function
data = calculate_changes(df_pivot, first_level, time_frames = time_frames, years = years)

In [211]:
#reformat and rename columns
data = data.stack([1, 1])
data = data.reset_index(drop = False)
data = data.rename(columns = {'level_1':'Year', 'level_2':'Time Frame'})

In [212]:
# data['GEO_ID'] = data['NAME'].map(geotogeoid)
data['Source'] = 'Center for Neighborhood Technology'

In [213]:
#final check
data.tail()

Unnamed: 0,NAME,Year,Time Frame,Annual Housing Cost,Annual Housing Cost % Change,Annual Housing Cost Change,Annual Housing and Transportation Cost,Annual Housing and Transportation Cost % Change,Annual Housing and Transportation Cost Change,Annual Median Household Income,Annual Median Household Income % Change,Annual Median Household Income Change,Annual Transportation Cost,Annual Transportation Cost % Change,Annual Transportation Cost Change,Difference Annual Median Household Income and Total Annual Costs,Difference Annual Median Household Income and Total Annual Costs % Change,Difference Annual Median Household Income and Total Annual Costs Change,Households,Households % Change,Households Change,Housing Cost as % of Annual Median Household Income,Housing Cost as % of Monthly Median Household Income,Housing and Transportation Cost as % of Annual Median Household Income,Housing and Transportation Cost as % of Monthly Median Household Income,Monthly Housing Cost,Monthly Housing Cost % Change,Monthly Housing Cost Change,Monthly Housing and Transportation Cost,Monthly Housing and Transportation Cost % Change,Monthly Housing and Transportation Cost Change,Monthly Median Household Income,Monthly Median Household Income % Change,Monthly Median Household Income Change,Monthly Transportation Cost,Monthly Transportation Cost % Change,Monthly Transportation Cost Change,Transportation Cost as % of Annual Median Household Income,Transportation Cost as % of Monthly Median Household Income,Source
3530,Wilson Incorporated,2019.0,,45348.0,,,92209.0,,,192788.0,,,46861.0,,,100579.0,,,24535.0,,,23.522211,23.522211,47.829222,47.829222,3779.0,,,7684.083333,,,16065.666667,,,3905.083333,,,24.307011,24.307011,Center for Neighborhood Technology
3531,Wilson Incorporated,,2015-2019,,9.854651,4068.0,,11.229192,9309.0,,25.615247,39313.0,,12.592504,5241.0,,42.513638,30004.0,,15.355682,3266.0,,,,,,9.854651,339.0,,11.229192,775.75,,25.615247,3276.083333,,12.592504,436.75,,,Center for Neighborhood Technology
3532,Wilson Unincorporated,2015.0,,-25344.0,,,-52668.0,,,-92405.0,,,-27324.0,,,-39737.0,,,23259.0,,,27.427087,27.427087,56.996916,56.996916,-2112.0,,,-4389.0,,,-7700.416667,,,-2277.0,,,29.569828,29.569828,Center for Neighborhood Technology
3533,Wilson Unincorporated,2019.0,,-27912.0,,,-58768.0,,,-116797.0,,,-30856.0,,,-58029.0,,,25129.0,,,23.897874,23.897874,50.316361,50.316361,-2326.0,,,-4897.333333,,,-9733.083333,,,-2571.333333,,,26.418487,26.418487,Center for Neighborhood Technology
3534,Wilson Unincorporated,,2015-2019,,10.132576,-2568.0,,11.581985,-6100.0,,26.39684,-24392.0,,12.926365,-3532.0,,46.032665,-18292.0,,8.039899,1870.0,,,,,,10.132576,-214.0,,11.581985,-508.333333,,26.39684,-2032.666667,,12.926365,-294.333333,,,Center for Neighborhood Technology


In [214]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3535 entries, 0 to 3534
Data columns (total 40 columns):
 #   Column                                                                     Non-Null Count  Dtype  
---  ------                                                                     --------------  -----  
 0   NAME                                                                       3535 non-null   object 
 1   Year                                                                       3535 non-null   object 
 2   Time Frame                                                                 3535 non-null   object 
 3   Annual Housing Cost                                                        2323 non-null   float64
 4   Annual Housing Cost % Change                                               1154 non-null   float64
 5   Annual Housing Cost Change                                                 1154 non-null   float64
 6   Annual Housing and Transportation Cost                  

In [215]:
#export to the SQLite database
conn = sq.connect('../../Outputs/Dem_Transpo_Housing_Collection.db')
data.to_sql('CNT_HT_Annual_Change', conn, if_exists = 'replace', index = False)

3535

In [216]:
data.to_csv('bgs.csv')