In [1]:
from configparser import ConfigParser
from census import Census
import pandas as pd
import numpy as np
import time
import os

## Census Data Pull

This analysis utilizes data from the U.S. Census Bureau's American Community Survey (ACS), an annual survey covering social, economic, demographic, and housing characteristics. The Census Bureau maintains an API for easy access to their public datasets, including the ACS.

Request a Census Data API Key [here](https://api.census.gov/data/key_signup.html). For more information on the census library, a wrapper for the Census API, visit [Python Package Index](https://pypi.org/project/census/).

In [2]:
# retrieve API from configuration file
config = ConfigParser()
config.read('config.ini')
API_KEY = config['CENSUS']['API']
c = Census(API_KEY)

See the list of variables available from the [Census Bureau](https://api.census.gov/data/2022/acs/acs5/variables.html).

In [3]:
variables = (
    'GEO_ID',
    'TRACT',
    'COUNTY',
    'STATE',
    'NAME',
    "B01003_001E", # Total Population
    # Economic
    "B17001_002E", # Poverty
    "B19013_001E", # Median Household Income
    "B23025_002E", # Total in Labor Force
    "B23025_004E", # Employed - Civilian Labor Force
    "B23025_006E", # Employed - Armed Forces
    # Race & Ethnicity
    "B02001_002E", # White
    "B02001_003E", # Black or African American 
    "B02001_004E", # American Indian and Alaska Native
    "B02001_005E", # Asian
    "B02001_006E", # Native Hawaiian and other Pacific Islander 
    "B02001_007E", # Some other race
    "B02001_008E", # Two or more races
    "B03001_003E", # Hispanic or Latino 
    # Age & Sex
    "B01001_002E", # Male
    "B01001_026E", # Female
    "B01001_003E", # Male < 5
    "B01001_004E", # Male 5-9
    "B01001_005E", # Male 10-14
    "B01001_006E", # Male 15-17
    "B01001_007E", # Male 18-19
    "B01001_008E", # Male 20
    "B01001_009E", # Male 21
    "B01001_010E", # Male 22-24
    "B01001_011E", # Male 25-29
    "B01001_012E", # Male 30-34
    "B01001_013E", # Male 35-39
    "B01001_014E", # Male 40-44
    "B01001_015E", # Male 45-49
    "B01001_016E", # Male 50-54
    "B01001_017E", # Male 55-59
    "B01001_018E", # Male 60-61
    "B01001_019E", # Male 62-64
    "B01001_020E", # Male 65-66
    "B01001_021E", # Male 67-69
    "B01001_022E", # Male 70-74
    "B01001_023E", # Male 75-79
    "B01001_024E", # Male 80-84
    "B01001_025E", # Male 85+
    "B01001_027E", # Female < 5
    "B01001_028E", # Female 5-9
    "B01001_029E", # Female 10-14
    "B01001_030E", # Female 15-17
    "B01001_031E", # Female 18-19
    "B01001_032E", # Female 20
    "B01001_033E", # Female 21
    "B01001_034E", # Female 22-24
    "B01001_035E", # Female 25-29
    "B01001_036E", # Female 30-34
    "B01001_037E", # Female 35-39
    "B01001_038E", # Female 40-44
    "B01001_039E", # Female 45-49
    "B01001_040E", # Female 50-54
    "B01001_041E", # Female 55-59
    "B01001_042E", # Female 60-61
    "B01001_043E", # Female 62-64
    "B01001_044E", # Female 65-66
    "B01001_045E", # Female 67-69
    "B01001_046E", # Female 70-74
    "B01001_047E", # Female 75-79
    "B01001_048E", # Female 80-84
    "B01001_049E", # Female 85+
    # Disability status
    "B18101_004E", # Male, <5 with a Disability
    "B18101_007E", # Male, 5-17 with a Disability
    "B18101_010E", # Male, 18-34 with a Disability
    "B18101_013E", # Male, 35-64 with a Disability
    "B18101_016E", # Male, 65-75 with a Disability
    "B18101_019E", # Male, 75+ with a Disability
    "B18101_023E", # Female, <5 with a Disability
    "B18101_026E", # Female, 5-17 with a Disability
    "B18101_029E", # Female, 18-34 with a Disability
    "B18101_032E", # Female, 35-64 with a Disability
    "B18101_035E", # Female, 65-75 with a Disability
    "B18101_038E", # Female, 75+ with a Disability
    # Housing
    "B25001_001E", # Total Housing Units
    "B25002_003E", # Vacant Housing Units
    "B25003_002E"  # Owner-occupied housing units
    
)

We will be pulling data for all census tracts in the U.S. The Census API requires that census tract queries be pulled one state at a time, so we first need a list of the state's [FIPS](https://en.wikipedia.org/wiki/Federal_Information_Processing_Standard_state_code) codes.

In [4]:
# FIPS codes for the 50 states and Washington D.C.
state_fips = [str(n).zfill(2) for n in range(1,57) if n not in (3,7,14,43,52)]


In [5]:
us_df = pd.DataFrame()

# make Census API call
print('Sending request to Census API ...')

for state in state_fips:
    response = c.acs5.state_county_tract(
                        fields = variables,
                        state_fips = state,
                        county_fips = Census.ALL,
                        tract = Census.ALL)

    # convert results to pd dataframe
    state_df = pd.DataFrame(response)

    # concat to final df
    us_df = pd.concat([us_df, state_df], ignore_index = True)

    time.sleep(1)

print('Request sent.')

Sending request to Census API ...
Request sent.


In [6]:
us_df

Unnamed: 0,GEO_ID,TRACT,COUNTY,STATE,NAME,B01003_001E,B17001_002E,B19013_001E,B23025_002E,B23025_004E,...,B18101_019E,B18101_023E,B18101_026E,B18101_029E,B18101_032E,B18101_035E,B18101_038E,B25001_001E,B25002_003E,B25003_002E
0,1400000US01001020100,020100,001,01,Census Tract 201; Autauga County; Alabama,1865.0,286.0,60563.0,738.0,713.0,...,68.0,0.0,6.0,9.0,75.0,18.0,38.0,733.0,33.0,519.0
1,1400000US01001020200,020200,001,01,Census Tract 202; Autauga County; Alabama,1861.0,105.0,57460.0,947.0,868.0,...,0.0,0.0,28.0,13.0,120.0,67.0,6.0,680.0,136.0,429.0
2,1400000US01001020300,020300,001,01,Census Tract 203; Autauga County; Alabama,3492.0,352.0,77371.0,1808.0,1748.0,...,24.0,0.0,0.0,5.0,126.0,64.0,77.0,1431.0,126.0,912.0
3,1400000US01001020400,020400,001,01,Census Tract 204; Autauga County; Alabama,3987.0,408.0,73191.0,1875.0,1837.0,...,163.0,0.0,18.0,51.0,145.0,52.0,189.0,1722.0,56.0,1306.0
4,1400000US01001020501,020501,001,01,Census Tract 205.01; Autauga County; Alabama,4121.0,323.0,79953.0,2504.0,2386.0,...,97.0,0.0,38.0,137.0,73.0,34.0,11.0,1857.0,74.0,971.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84410,1400000US56043000200,000200,043,56,Census Tract 2; Washakie County; Wyoming,3028.0,198.0,56627.0,1656.0,1625.0,...,53.0,0.0,9.0,0.0,93.0,21.0,41.0,1580.0,213.0,1019.0
84411,1400000US56043000301,000301,043,56,Census Tract 3.01; Washakie County; Wyoming,2323.0,90.0,61087.0,1156.0,1151.0,...,11.0,0.0,14.0,0.0,66.0,44.0,43.0,1128.0,180.0,612.0
84412,1400000US56043000302,000302,043,56,Census Tract 3.02; Washakie County; Wyoming,2374.0,215.0,72634.0,1221.0,1185.0,...,43.0,0.0,15.0,1.0,32.0,51.0,96.0,1148.0,49.0,815.0
84413,1400000US56045951100,951100,045,56,Census Tract 9511; Weston County; Wyoming,3275.0,459.0,67715.0,1354.0,1270.0,...,58.0,13.0,0.0,1.0,29.0,77.0,71.0,1557.0,184.0,1256.0


In [7]:
# create final dataframe
census = us_df.loc[:,['GEO_ID','NAME']].copy(deep = True)

# clean data
census['GEO_ID'] =      census['GEO_ID'].str[-11:]   
census['Pop'] =         us_df['B01003_001E'].astype(int)
census['Hous_Units'] =  us_df['B25001_001E'].astype(int)
census['Income'] =      np.where(us_df['B19013_001E'] == -666666666.0, None, us_df['B19013_001E'])
census['Poverty%'] =    us_df['B17001_002E']/census['Pop']
census['White%'] =      us_df['B02001_002E']/census['Pop'] 
census['Black%'] =      us_df['B02001_003E']/census['Pop'] 
census['Hispanic%'] =   us_df['B03001_003E']/census['Pop']
census['Asian%'] =      us_df['B02001_004E']/census['Pop']
census['TwoOrMore%'] =  us_df['B02001_008E']/census['Pop']  
census['Male%'] =       us_df['B01001_002E']/census['Pop']
census['Female%'] =     us_df['B01001_026E']/census['Pop']
census['Under18%'] =    us_df[['B01001_003E','B01001_004E','B01001_005E','B01001_006E','B01001_027E',
                              'B01001_028E','B01001_029E','B01001_030E']].sum(axis=1)/census['Pop']   
census['65+%'] =        us_df[['B01001_020E','B01001_021E','B01001_022E','B01001_023E','B01001_024E',
                              'B01001_025E','B01001_044E','B01001_045E','B01001_046E','B01001_047E',
                              'B01001_048E','B01001_049E']].sum(axis=1)/census['Pop']
census['Employment%'] = (us_df['B23025_004E']+us_df['B23025_006E'])/us_df['B23025_002E']
census['Disability%'] =  us_df[['B18101_004E','B18101_007E','B18101_010E','B18101_013E','B18101_016E',
                               'B18101_019E','B18101_023E','B18101_026E','B18101_029E','B18101_032E',
                               'B18101_035E','B18101_038E']].sum(axis=1)/census['Pop']
census['VacantHous%'] =  us_df['B25002_003E']/census['Hous_Units']
census['OwnOcpHous%'] =  us_df['B25003_002E']/census['Hous_Units']


In [8]:
census

Unnamed: 0,GEO_ID,NAME,Pop,Hous_Units,Income,Poverty%,White%,Black%,Hispanic%,Asian%,TwoOrMore%,Male%,Female%,Under18%,65+%,Employment%,Disability%,VacantHous%,OwnOcpHous%
0,01001020100,Census Tract 201; Autauga County; Alabama,1865,733,60563.0,0.153351,0.804290,0.111528,0.043432,0.000000,0.055228,0.519035,0.480965,0.239678,0.194638,0.974255,0.200000,0.045020,0.708049
1,01001020200,Census Tract 202; Autauga County; Alabama,1861,680,57460.0,0.056421,0.363246,0.559914,0.001075,0.000000,0.072542,0.544331,0.455669,0.098872,0.157442,0.946146,0.180548,0.200000,0.630882
2,01001020300,Census Tract 203; Autauga County; Alabama,3492,1431,77371.0,0.100802,0.691008,0.250859,0.012600,0.000000,0.048110,0.460195,0.539805,0.261741,0.134593,0.981748,0.158362,0.088050,0.637317
3,01001020400,Census Tract 204; Autauga County; Alabama,3987,1722,73191.0,0.102333,0.879609,0.074492,0.011036,0.001756,0.038375,0.510158,0.489842,0.145974,0.260095,0.990933,0.222222,0.032520,0.758420
4,01001020501,Census Tract 205.01; Autauga County; Alabama,4121,1857,79953.0,0.078379,0.796166,0.150449,0.036884,0.000000,0.029847,0.465664,0.534336,0.181267,0.133705,0.994409,0.173016,0.039849,0.522886
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84410,56043000200,Census Tract 2; Washakie County; Wyoming,3028,1580,56627.0,0.065390,0.910502,0.000330,0.063738,0.010568,0.026750,0.532034,0.467966,0.206407,0.211361,0.981280,0.105020,0.134810,0.644937
84411,56043000301,Census Tract 3.01; Washakie County; Wyoming,2323,1128,61087.0,0.038743,0.832975,0.000000,0.247094,0.007318,0.144210,0.565217,0.434783,0.289712,0.184675,0.995675,0.130435,0.159574,0.542553
84412,56043000302,Census Tract 3.02; Washakie County; Wyoming,2374,1148,72634.0,0.090564,0.822241,0.000000,0.141533,0.003370,0.120472,0.472199,0.527801,0.165122,0.308762,0.970516,0.170598,0.042683,0.709930
84413,56045951100,Census Tract 9511; Weston County; Wyoming,3275,1557,67715.0,0.140153,0.838168,0.001832,0.025954,0.022901,0.130382,0.551756,0.448244,0.170076,0.254962,0.946086,0.139542,0.118176,0.806680


In [9]:
census.to_csv("Census_ACS5_Export.csv", index = False)