###### Imports and Settings

In [22]:
import pandas as pd
import numpy as np
import requests
import pickle
from collections import deque
from functools import reduce
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 150)
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [23]:
import sys
sys.path.append("../../../Functions and Dictionaries") # Adds higher directory to python modules path
import geodict
GNRC = geodict.GNRC
KY = geodict.KY
censusplaces = geodict.censusplaces

# American Community Survey 2021 1 Year Estimates

In [24]:
#read in API key
with open('api_keys.pkl', 'rb') as keys_file:
        keys_dict_2 = pickle.load(keys_file)

In [25]:
#variable containing Census API key
api_key = keys_dict_2['CENSUS']

## Read In Data Guide

In [26]:
dataguide = pd.read_csv('../../Data Guides/DATA GUIDE ACS 2020_2021 5YR.csv', dtype = str)
dataguide['ID'] = dataguide['ID'].astype(int)

In [27]:
dg1 = dataguide[dataguide['ID'].between(1, 46)]
dg2 = dataguide[dataguide['ID'].between(47, 92)]
dg3 = dataguide[dataguide['ID'].between(93, 138)]
dg4 = dataguide[dataguide['ID'].between(139, 184)]
dg5 = dataguide[dataguide['ID'].between(185, 230)]
dg6 = dataguide[dataguide['ID'].between(231, 276)]
dg7 = dataguide[dataguide['ID'].between(277, 322)]
dg8 = dataguide[dataguide['ID'].between(323, 368)]
dg9 = dataguide[dataguide['ID'].between(369, 414)]
dg10 = dataguide[dataguide['ID'].between(415, 460)]
dg11 = dataguide[dataguide['ID'].between(461, 506)]
dg12 = dataguide[dataguide['ID'].between(507, 552)]
dg13 = dataguide[dataguide['ID'].between(553, 598)]
dg14 = dataguide[dataguide['ID'].between(599, 644)]
dg15 = dataguide[dataguide['ID'].between(645, 690)]
dg16 = dataguide[dataguide['ID'].between(691, 736)]
dg17 = dataguide[dataguide['ID'].between(737, 782)]
dg18 = dataguide[dataguide['ID'].between(783, 828)]
dg19 = dataguide[dataguide['ID'].between(829, 874)]
dg20 = dataguide[dataguide['ID'].between(875, 920)]
dg21 = dataguide[dataguide['ID'].between(921, 966)]
dg22 = dataguide[dataguide['ID'].between(967, 1012)]
dg23 = dataguide[dataguide['ID'].between(1013, 1058)]
dg24 = dataguide[dataguide['ID'].between(1059, 1104)]
dg25 = dataguide[dataguide['ID'].between(1105, 1150)]
dg26 = dataguide[dataguide['ID'].between(1151, 1196)]
dg27 = dataguide[dataguide['ID'].between(1197, 1242)]
dg28 = dataguide[dataguide['ID'].between(1243, 1287)]
dg29 = dataguide[dataguide['ID'].between(1288, 1332)]
dg30 = dataguide[dataguide['ID'].between(1333, 1377)]
dg31 = dataguide[dataguide['ID'].between(1378, 1422)]
dg32 = dataguide[dataguide['ID'].between(1423, 1467)]
dg33 = dataguide[dataguide['ID'].between(1468, 1512)]
dg34 = dataguide[dataguide['ID'].between(1513, 1557)]
dg35 = dataguide[dataguide['ID'].between(1558, 1602)]
dg36 = dataguide[dataguide['ID'].between(1603, 1647)]
dg37 = dataguide[dataguide['ID'].between(1648, 1692)]
dg38 = dataguide[dataguide['ID'].between(1693, 1737)]
dg39 = dataguide[dataguide['ID'].between(1738, 1782)]
dg40 = dataguide[dataguide['ID'].between(1783, 1827)]
dg41 = dataguide[dataguide['ID'].between(1828, 1872)]
dg42 = dataguide[dataguide['ID'].between(1873, 1917)]
dg43 = dataguide[dataguide['ID'].between(1918, 1962)]
dg44 = dataguide[dataguide['ID'].between(1963, 2007)]
dg45 = dataguide[dataguide['ID'].between(2008, 2052)]
dg46 = dataguide[dataguide['ID'].between(2053, 2097)]
dg47 = dataguide[dataguide['ID'].between(2098, 2142)]
dg48 = dataguide[dataguide['ID'].between(2143, 2187)]
dg49 = dataguide[dataguide['ID'].between(2188, 2232)]
dg50 = dataguide[dataguide['ID'].between(2233, 2277)]
dg51 = dataguide[dataguide['ID'].between(2278, 2322)]
dg52 = dataguide[dataguide['ID'].between(2323, 2367)]
dg53 = dataguide[dataguide['ID'].between(2368, 2412)]
dg54 = dataguide[dataguide['ID'].between(2413, 2457)]
dg55 = dataguide[dataguide['ID'].between(2458, 2502)]
dg56 = dataguide[dataguide['ID'].between(2503, 2547)]
dg57 = dataguide[dataguide['ID'].between(2548, 2592)]
dg58 = dataguide[dataguide['ID'].between(2593, 2637)]
dg59 = dataguide[dataguide['ID'].between(2638, 2682)]
dg60 = dataguide[dataguide['ID'].between(2683, 2727)]
dg61 = dataguide[dataguide['ID'].between(2728, 2772)]
dg62 = dataguide[dataguide['ID'].between(2773, 2817)]
dg63 = dataguide[dataguide['ID'].between(2818, 2862)]
dg64 = dataguide[dataguide['ID'].between(2863, 2907)]
dg65 = dataguide[dataguide['ID'].between(2908, 2952)]
dg66 = dataguide[dataguide['ID'].between(2953, 2997)]
dg67 = dataguide[dataguide['ID'].between(2998, 3042)]
dg68 = dataguide[dataguide['ID'].between(3043, 3086)]
dg69 = dataguide[dataguide['ID'].between(3087, 3131)]
dg70 = dataguide[dataguide['ID'].between(3132, 3176)]
dg71 = dataguide[dataguide['ID'].between(3177, 3221)]
dg72 = dataguide[dataguide['ID'].between(3222, 3266)]
dg73 = dataguide[dataguide['ID'].between(3267, 3311)]

In [28]:
dfs = [dg1, dg2, dg3, dg4, dg5, dg6, dg7, dg8, dg9, dg10, dg11, dg12, dg13, dg14, dg15, dg16, dg17, dg18, dg19, dg20, dg21, dg22, dg23, dg24, dg25, 
       dg26, dg27, dg28, dg29, dg30, dg31, dg32, dg33, dg34, dg35, dg36, dg37, dg38, dg39, dg40, dg41, dg42, dg43, dg44, dg45, dg46, dg47, dg48, dg49,
       dg50, dg51, dg52, dg53, dg54, dg55, dg56, dg57, dg58, dg59, dg60, dg61, dg62, dg63, dg64, dg65, dg66, dg67, dg68, dg69, dg70, dg71, dg72, dg73]
dfs = [dg29]

In [29]:
droppers = ['ID', 'Data Point']
for df in dfs:
    df.drop(droppers, axis = 1, inplace = True)

In [30]:
# droppers = ['ID', 'Data Point']
# for df in dfs1:
#     df.drop(droppers, axis = 1, inplace = True)

In [31]:
#url string and list parameters for column head and tail
url_str= 'https://api.census.gov/data/2021/acs/acs1?key='+api_key
head1 = 'NAME' 
head2 = 'GEO_ID'
tail_cols1 = 'StateFIPS'
tail_cols2 = 'GeoFIPS'

In [32]:
results = []
for df in dfs:
    dataguide = df
    for col_name, col_data in df.items():
        var_list = list(dataguide['Variable']) #make variables list
        var_list = deque(var_list)
        var_list.appendleft(head2)
        var_list.appendleft(head1)
        col_list = list(dataguide['Column Name']) #make columns list
        col_list.append(tail_cols1)
        col_list.append(tail_cols2)
        col_list = deque(col_list)
        col_list.appendleft(head2)
        col_list.appendleft(head1)
        predicates= {} #call for all counties in state of TN
        get_vars= var_list
        predicates["get"]= ",". join(get_vars)
        predicates["for"]= "county:*"
        predicates["in"]= "state:47"                                                             
        data = requests.get(url_str, params= predicates)                                                                
        col_names = col_list
        df = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
        predicates= {} #call for all counties in state of KY
        get_vars= var_list
        predicates["get"]= ",". join(get_vars)
        predicates["for"]= "county:*"
        predicates["in"]= "state:21"                                                             
        data = requests.get(url_str, params= predicates)                                                                
        col_names = col_list
        kycos = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
        kycos = kycos.loc[kycos['GeoFIPS'].isin(KY)] #filter for counties of concern in KY
        df = pd.concat([df, kycos], axis = 0)
        col_list.remove(tail_cols2) #adjust the column list for different formats of geos 
        predicates= {} #call for state of TN
        get_vars= var_list
        predicates["get"]= ",". join(get_vars)
        predicates["for"]= "state:47"
        data= requests.get(url_str, params= predicates)
        col_names = col_list
        state = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
        state['GeoFIPS'] = '0' #fill in the blank GeoFIPS column
        df = pd.concat([df, state], axis = 0)
        predicates = {} #call for US
        get_vars = var_list
        predicates["get"] = ",". join(get_vars)
        predicates["for"] = "us:*"
        data = requests.get(url_str, params= predicates)
        col_names = col_list
        national = pd.DataFrame(columns=col_names, data=data.json()[1:], dtype=str)
        national['GeoFIPS'] = '0' #fill in the blank GeoFIPS column
        df = pd.concat([df, national], axis = 0)
        results.append(df)
new_df = pd.concat(results, axis = 1)
new_df = new_df.transpose().reset_index(drop = False).drop_duplicates()
new_df.columns = new_df.iloc[0]
new_df = new_df.set_index('NAME').transpose().drop(columns = ['StateFIPS', 'GeoFIPS']).reset_index(drop = True)
new_df = new_df.rename_axis(None, axis = 1)
print('Okay Finished')

Okay Finished


In [33]:
new_df.head()

Unnamed: 0,NAME,GEO_ID,lfstatus_f_62to64_inlf_civilian,lfstatus_f_62to64_inlf_civilian_employed,lfstatus_f_62to64_inlf_civilian_unemployed,lfstatus_f_62to64_notinlf,lfstatus_f_65to69,lfstatus_f_65to69_inlf,lfstatus_f_65to69_inlf_employed,lfstatus_f_65to69_inlf_unemployed,lfstatus_f_65to69_notinlf,lfstatus_f_70to74,lfstatus_f_70to74_inlf,lfstatus_f_70to74_inlf_employed,lfstatus_f_70to74_inlf_unemployed,lfstatus_f_70to74_notinlf,lfstatus_f_75andolder,lfstatus_f_75andolder_inlf,lfstatus_f_75andolder_inlf_employed,lfstatus_f_75andolder_inlf_unemployed,lfstatus_f_75andolder_notinlf,fb_yearentry_total_series,fb_yearentry_2010orlater,fb_yearentry_2010orlater_uscitizen,fb_yearentry_2010orlater_notuscitizen,fb_yearentry_2000to2009,fb_yearentry_2000to2009_uscitizen,fb_yearentry_2000to2009_notuscitizen,fb_yearentry_1990to1999,fb_yearentry_1990to1999_uscitizen,fb_yearentry_1990to1999_notuscitizen,fb_yearentry_before1990,fb_yearentry_before1990_uscitizen,fb_yearentry_before1990_notuscitizen,snap_total_households_series,snap_householdswith,snap_householdswithout,poverty_total_bysexbyage_series,poverty_belowlevel,poverty_belowlevel_m,poverty_belowlevel_m_u5,poverty_belowlevel_m_5,poverty_belowlevel_m_6to11,poverty_belowlevel_m_12to14,poverty_belowlevel_m_15,poverty_belowlevel_m_16to17,poverty_belowlevel_m_18to24
0,"Anderson County, Tennessee",0500000US47001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31735,4447,27288,75965,11371,5541,75,266,967,488,87,226,401
1,"Hamilton County, Tennessee",0500000US47065,3466.0,3466.0,0.0,4377.0,12278.0,3827.0,3781.0,46.0,8451.0,9509.0,1196.0,1156.0,40.0,8313.0,16172.0,604.0,554.0,50.0,15568.0,,,,,,,,,,,,,,151676,17012,134664,358746,44949,19783,2005,500,2864,1370,265,343,3103
2,"Knox County, Tennessee",0500000US47093,4809.0,4639.0,170.0,5631.0,13907.0,4067.0,3921.0,146.0,9840.0,12396.0,2251.0,2121.0,130.0,10145.0,17956.0,697.0,553.0,144.0,17259.0,,,,,,,,,,,,,,198914,20498,178416,474327,57309,27418,1458,441,1885,1069,342,208,8335
3,"Rutherford County, Tennessee",0500000US47149,2884.0,2884.0,0.0,2685.0,8829.0,2654.0,2583.0,71.0,6175.0,4934.0,748.0,709.0,39.0,4186.0,8275.0,264.0,264.0,0.0,8011.0,,,,,,,,,,,,,,121944,10097,111847,346049,29652,13057,723,116,1459,967,435,337,2719
4,"Sevier County, Tennessee",0500000US47155,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,36670,4767,31903,97691,14547,7089,452,281,854,583,0,90,810


In [34]:
data = new_df[['NAME', 'GEO_ID', 'poverty_total_bysexbyage_series', 'poverty_belowlevel']]

In [35]:
#data = new_df

In [36]:
data = data.set_index(['NAME', 'GEO_ID']).transpose()
data.head()

NAME,"Anderson County, Tennessee","Hamilton County, Tennessee","Knox County, Tennessee","Rutherford County, Tennessee","Sevier County, Tennessee","Sumner County, Tennessee","Davidson County, Tennessee","Greene County, Tennessee","Madison County, Tennessee","Maury County, Tennessee","Montgomery County, Tennessee","Blount County, Tennessee","Bradley County, Tennessee","Putnam County, Tennessee","Robertson County, Tennessee","Shelby County, Tennessee","Sullivan County, Tennessee","Washington County, Tennessee","Williamson County, Tennessee","Wilson County, Tennessee",Tennessee,United States
GEO_ID,0500000US47001,0500000US47065,0500000US47093,0500000US47149,0500000US47155,0500000US47165,0500000US47037,0500000US47059,0500000US47113,0500000US47119,0500000US47125,0500000US47009,0500000US47011,0500000US47141,0500000US47147,0500000US47157,0500000US47163,0500000US47179,0500000US47187,0500000US47189,0400000US47,0100000US
poverty_total_bysexbyage_series,75965,358746,474327,346049,97691,198956,682243,68465,94847,103377,223997,135715,106861,77649,73449,908687,157230,129239,254338,149101,6811613,324173084
poverty_belowlevel,11371,44949,57309,29652,14547,16600,102601,6508,16471,10694,25075,11953,10299,9560,8228,162280,28111,18935,8947,11833,927587,41393176


In [37]:
numcols = list(data.columns)
numcols
data[numcols] = data[numcols].astype(float)

In [38]:
GNRCCounties = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                data[('Rutherford County, Tennessee', '0500000US47149')]]
data['GNRC'] = sum(GNRCCounties)
GNRCCountiesAll = [data[('Stewart County, Tennessee', '0500000US47161')],data[('Montgomery County, Tennessee', '0500000US47125')],
                   data[('Houston County, Tennessee', '0500000US47083')],data[('Humphreys County, Tennessee', '0500000US47085')],
                   data[('Dickson County, Tennessee', '0500000US47043')],data[('Cheatham County, Tennessee', '0500000US47021')],
                   data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
                   data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
                   data[('Trousdale County, Tennessee', '0500000US47169')],data[('Williamson County, Tennessee', '0500000US47187')],
                   data[('Rutherford County, Tennessee', '0500000US47149')],data[('Maury County, Tennessee', '0500000US47119')]]
data['GNRC Region'] = sum(GNRCCountiesAll)
MPOCounties = [data[('Robertson County, Tennessee', '0500000US47147')],data[('Sumner County, Tennessee', '0500000US47165')],
               data[('Davidson County, Tennessee', '0500000US47037')],data[('Wilson County, Tennessee', '0500000US47189')],
               data[('Williamson County, Tennessee', '0500000US47187')],data[('Rutherford County, Tennessee', '0500000US47149')],
               data[('Maury County, Tennessee', '0500000US47119')]]
data['MPO'] = sum(MPOCounties)
RuthInc = [data[('Eagleville city, Tennessee', '1600000US4722360')],data[('La Vergne city, Tennessee', '1600000US4741200')],
           data[('Murfreesboro city, Tennessee', '1600000US4751560')],data[('Smyrna town, Tennessee', '1600000US4769420')]]
data[('Rutherford Incorporated', 'None')] = sum(RuthInc)
data[('Rutherford Unincorporated', 'None')] = data[('Rutherford County, Tennessee', '0500000US47149')] - data[('Rutherford Incorporated', 'None')]
WilsonInc = [data[('Lebanon city, Tennessee', '1600000US4741520')],data[('Mount Juliet city, Tennessee', '1600000US4750780')],
             data[('Watertown city, Tennessee', '1600000US4778320')]]
data[('Wilson Incorporated', 'None')] = sum(WilsonInc)
data[('Wilson Unincorporated', 'None')] = data[('Wilson County, Tennessee', '0500000US47189')] - data[('Wilson Incorporated', 'None')]
CheathInc = [data[('Ashland City town, Tennessee', '1600000US4702180')],data[('Kingston Springs town, Tennessee', '1600000US4739660')],
             data[('Pegram town, Tennessee', '1600000US4757480')],data[('Pleasant View city, Tennessee', '1600000US4759560')]]
data[('Cheatham Incorporated', 'None')] = sum(CheathInc)
data[('Cheatham Unincorporated', 'None')] = data[('Cheatham County, Tennessee', '0500000US47021')] - data[('Cheatham Incorporated', 'None')]
DicksInc = [data[('Burns town, Tennessee', '1600000US4709880')],data[('Charlotte town, Tennessee', '1600000US4713080')],
            data[('Dickson city, Tennessee', '1600000US4720620')],data[('Slayden town, Tennessee', '1600000US4769080')],
            data[('Vanleer town, Tennessee', '1600000US4776860')],data[('White Bluff town, Tennessee', '1600000US4779980')]]
data[('Dickson Incorporated', 'None')] = sum(DicksInc)
data[('Dickson Unincorporated', 'None')] = data[('Dickson County, Tennessee', '0500000US47043')] - data[('Dickson Incorporated', 'None')]
HumphInc = [data[('McEwen city, Tennessee', '1600000US4744840')],data[('New Johnsonville city, Tennessee', '1600000US4752820')],
            data[('Waverly city, Tennessee', '1600000US4778560')]]
data[('Humphreys Incorporated', 'None')] = sum(HumphInc)
data[('Humphreys Unincorporated', 'None')] = data[('Humphreys County, Tennessee', '0500000US47085')] - data[('Humphreys Incorporated', 'None')]
data[('Montgomery Incorporated', 'None')] = data[('Clarksville city, Tennessee', '1600000US4715160')]
data[('Montgomery Unincorporated', 'None')] = data[('Montgomery County, Tennessee', '0500000US47125')] - data[('Montgomery Incorporated', 'None')]

KeyError: ('Stewart County, Tennessee', '0500000US47161')

In [None]:
data = data.transpose().reset_index()
data.head()

In [None]:
data = data.loc[(data['NAME'] == 'GNRC')|(data['NAME'] == 'Tennessee')|(data['NAME'] == 'United States')]

In [None]:
data.info()

In [None]:
#data.to_feather('../../Raw Data/ACS20211YR')

In [None]:
#data.to_csv('../../Raw Data/ACS20211YR.csv')

In [None]:
data.to_csv('../../Raw Data/pov2021.csv')