In [1]:
from configparser import ConfigParser
from census import Census
import pandas as pd
import numpy as np

## Census Data Pull

This analysis utilizes data from the U.S. Census Bureau's American Community Survey (ACS), an annual survey covering social, economic, demographic, and housing characteristics. The Census Bureau maintains an API for easy access to their public datasets, including the ACS.

Request a Census Data API Key [here](https://api.census.gov/data/key_signup.html). For more information on the census library, a wrapper for the Census API, visit [Python Package Index](https://pypi.org/project/census/).

In [2]:
# retrieve API from configuration file
config = ConfigParser()
config.read('config.ini')
API_KEY = config['CENSUS']['API']
c = Census(API_KEY)

List variables needed from the [Census Bureau](https://api.census.gov/data/2022/acs/acs5/variables.html)

In [3]:
variables = (
    'NAME',    
    "B01003_001E", # Total Population
    # Economic
    "B17001_002E", # Poverty
    "B19013_001E", # Median Household Income
    # Race & Ethnicity
    "B02001_002E", # White
    "B02001_003E", # Black or African American 
    "B02001_004E", # American Indian and Alaska Native
    "B02001_005E", # Asian
    "B02001_006E", # Native Hawaiian and other Pacific Islander 
    "B02001_007E", # Some other race
    "B02001_008E", # Two or more races
    "B03001_003E", # Hispanic or Latino 
    # Age & Sex
    "B01001_002E", # Male
    "B01001_026E", # Female
    "B01001_003E", # Male < 5
    "B01001_004E", # Male 5-9
    "B01001_005E", # Male 10-14
    "B01001_006E", # Male 15-17
    "B01001_007E", # Male 18-19
    "B01001_008E", # Male 20
    "B01001_009E", # Male 21
    "B01001_010E", # Male 22-24
    "B01001_011E", # Male 25-29
    "B01001_012E", # Male 30-34
    "B01001_013E", # Male 35-39
    "B01001_014E", # Male 40-44
    "B01001_015E", # Male 45-49
    "B01001_016E", # Male 50-54
    "B01001_017E", # Male 55-59
    "B01001_018E", # Male 60-61
    "B01001_019E", # Male 62-64
    "B01001_020E", # Male 65-66
    "B01001_021E", # Male 67-69
    "B01001_022E", # Male 70-74
    "B01001_023E", # Male 75-79
    "B01001_024E", # Male 80-84
    "B01001_025E", # Male 85+
    "B01001_027E", # Female < 5
    "B01001_028E", # Female 5-9
    "B01001_029E", # Female 10-14
    "B01001_030E", # Female 15-17
    "B01001_031E", # Female 18-19
    "B01001_032E", # Female 20
    "B01001_033E", # Female 21
    "B01001_034E", # Female 22-24
    "B01001_035E", # Female 25-29
    "B01001_036E", # Female 30-34
    "B01001_037E", # Female 35-39
    "B01001_038E", # Female 40-44
    "B01001_039E", # Female 45-49
    "B01001_040E", # Female 50-54
    "B01001_041E", # Female 55-59
    "B01001_042E", # Female 60-61
    "B01001_043E", # Female 62-64
    "B01001_044E", # Female 65-66
    "B01001_045E", # Female 67-69
    "B01001_046E", # Female 70-74
    "B01001_047E", # Female 75-79
    "B01001_048E", # Female 80-84
    "B01001_049E", # Female 85+
)

Instead of the boundaries of the United States Postal Service's ZIP Codes, the Census Bureau uses ZIP Code Tabulation Areas (ZCTAs). ZCTAs are an analgous geographic area to ZIP Codes compromised of Census tabulation blocks, the smallest geogrpahic unit used by the Census Bureau.

In [4]:
# make Census API call
print('Sending request to Census API.')
response = c.acs5.get(
                        fields = variables, 
                        geo={"for": "zip code tabulation area:*"} # all ZCTAs
                    )

# convert results to pd dataframe
data = pd.DataFrame(response)

Sending request to Census API.


In [5]:
# create final dataframe
census = data.loc[:,['GEO_ID', 'zip code tabulation area']].copy(deep = True)


# clean data
census.rename(columns = {"zip code tabulation area": "Zip"}, inplace = True) 

census['Pop'] =         data['B01003_001E'].astype(int)
census['Income'] =      np.where(data['B19013_001E'] == -666666666.0, None, data['B19013_001E'])
census['Poverty%'] =    data['B17001_002E']/census['Pop']
census['White%'] =      data['B02001_002E']/census['Pop'] 
census['Black%'] =      data['B02001_003E']/census['Pop'] 
census['Hispanic%'] =   data['B03001_003E']/census['Pop']
census['Asian%'] =      data['B02001_004E']/census['Pop']
census['TwoOrMore%'] =  data['B02001_008E']/census['Pop']  
census['Male%'] =       data['B01001_002E']/census['Pop']
census['Female%'] =     data['B01001_026E']/census['Pop']
census['Under18%'] =    data[['B01001_003E','B01001_004E','B01001_005E','B01001_006E','B01001_027E',
                              'B01001_028E','B01001_029E','B01001_030E']].sum(axis=1)/census['Pop']   
census['65+%'] =        data[['B01001_020E','B01001_021E','B01001_022E','B01001_023E','B01001_024E',
                              'B01001_025E','B01001_044E','B01001_045E','B01001_046E','B01001_047E',
                              'B01001_048E','B01001_049E']].sum(axis=1)/census['Pop'] 

print(census)

               GEO_ID    Zip    Pop   Income  Poverty%    White%    Black%  \
0      860Z200US00601  00601  16834  17526.0  0.620173  0.841749  0.020672   
1      860Z200US00602  00602  37642  20260.0  0.472026  0.490914  0.014744   
2      860Z200US00603  00603  49075  17703.0  0.479898  0.737972  0.035028   
3      860Z200US00606  00606   5590  19603.0  0.540429  0.665653  0.001610   
4      860Z200US00610  00610  25542  22796.0  0.454036  0.437789  0.022551   
...               ...    ...    ...      ...       ...       ...       ...   
33769  860Z200US99923  99923     25     None  0.000000  1.000000  0.000000   
33770  860Z200US99925  99925    920  80313.0  0.155435  0.448913  0.004348   
33771  860Z200US99926  99926   1465  78365.0  0.151536  0.073720  0.003413   
33772  860Z200US99927  99927     14     None  0.000000  1.000000  0.000000   
33773  860Z200US99929  99929   2133  61125.0  0.099391  0.569620  0.000469   

       Hispanic%    Asian%  TwoOrMore%     Male%   Female%  Und