In [1]:
## Census American Community Survey 5-year estimates; Demographics at county (city) geographic level


In [2]:
#see https://pygis.io/docs/d_access_census.html

In [3]:
# data source: 

In [4]:
# From https://www.census.gov/programs-surveys/acs/guidance/comparing-acs-data.html:
# "Due to the impact of the COVID-19 pandemic, the Census Bureau changed the 2020 ACS release. 
# Instead of providing the standard 1-year data products, the Census Bureau released experimental estimates from the 1-year data. 
# Data users should not compare 2020 ACS 1-year experimental estimates with any other data.""

In [5]:
# Dependencies
from census import Census
from config import (census_key)
from us import states
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

# Census API key
c = Census(census_key)

In [6]:
import os

In [7]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\cherry-tree-lane-analytics\\Medium-blog-post1\\census-demographics-city-level'

In [8]:
# need to change the working directory; be sure that the config file for the census API is also in this folder
#os.chdir('C:\\Users\\Jenkir\\Desktop\\ECO-iMPACT\\Medium-blog-post1')

In [9]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\cherry-tree-lane-analytics\\Medium-blog-post1\\census-demographics-city-level'

In [10]:
pd.set_option('display.max_rows', 200)

In [11]:
pd.set_option('max_colwidth', 200)

In [12]:
# Census American Community Survey 5-Year Data 

data = c.acs5.state_county(fields=('NAME', "B11005_002E","B19001_001E","B19301_001E","B15003_001E","B01003_001E", "B01002_001E",
                                         "B02001_002E", "B02001_003E","B03001_003E","B17001_002E","B17001_002E",
                                         "B19013_001E","B17001_002E","B23025_004E","B23025_005E","B15003_017E","B15003_021E",
                                         "B15003_022E","B25003_001E","B25003_002E","B02001_005E","B02001_008E","B25003_003E",
                                         "B19013A_001E","B19013B_001E","B19013D_001E","B19013G_001E","B19013G_001E",
                                         "B28003_005E", "B28003_006E"),
                                  state_fips = states.MD.fips,
                                  county_fips = "*",
                                  year = 2019)                          


In [13]:
# Create dataframe from the census data
counties_df2 = pd.DataFrame(data)


# Note, per https://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html,
# When estimate value is -666666666, it means estimate could not be computed bc there was insufficient number of sample observations.
counties_df2

Unnamed: 0,NAME,B11005_002E,B19001_001E,B19301_001E,B15003_001E,B01003_001E,B01002_001E,B02001_002E,B02001_003E,B03001_003E,...,B02001_008E,B25003_003E,B19013A_001E,B19013B_001E,B19013D_001E,B19013G_001E,B28003_005E,B28003_006E,state,county
0,"Worcester County, Maryland",4684.0,22089.0,38080.0,39386.0,51765.0,50.4,42965.0,6659.0,1811.0,...,1221.0,5507.0,66448.0,39614.0,117154.0,92596.0,1352.0,2542.0,24,47
1,"Talbot County, Maryland",4256.0,16826.0,49136.0,27939.0,37167.0,50.5,30818.0,3887.0,2513.0,...,1628.0,4998.0,78500.0,39474.0,72500.0,81583.0,958.0,1440.0,24,41
2,"Howard County, Maryland",43338.0,114170.0,54628.0,215300.0,318855.0,38.8,181711.0,60232.0,22012.0,...,14160.0,30650.0,131332.0,94192.0,134635.0,107102.0,3611.0,3706.0,24,27
3,"Prince George's County, Maryland",100967.0,311343.0,37191.0,616274.0,908670.0,37.1,150782.0,569478.0,167498.0,...,26891.0,118080.0,92392.0,85576.0,96080.0,83370.0,24163.0,17743.0,24,33
4,"Anne Arundel County, Maryland",69265.0,209814.0,46629.0,393584.0,571275.0,38.3,415470.0,95710.0,44621.0,...,23351.0,54197.0,105768.0,82360.0,100921.0,92992.0,8676.0,11221.0,24,3
5,"Baltimore County, Maryland",96036.0,313519.0,40105.0,575300.0,828018.0,39.4,501423.0,239308.0,44807.0,...,23112.0,106396.0,82334.0,68143.0,78245.0,73173.0,18773.0,25557.0,24,5
6,"Frederick County, Maryland",32990.0,92526.0,43582.0,170910.0,251422.0,39.1,202622.0,24411.0,24101.0,...,8337.0,22912.0,100271.0,76095.0,103710.0,82917.0,3994.0,6662.0,24,21
7,"Calvert County, Maryland",11665.0,31973.0,45783.0,62546.0,91511.0,40.7,74326.0,10709.0,3641.0,...,4041.0,4903.0,114801.0,70423.0,121845.0,106302.0,1304.0,1988.0,24,9
8,"Garrett County, Maryland",3370.0,12425.0,30617.0,21472.0,29235.0,46.2,28327.0,250.0,330.0,...,447.0,2654.0,52682.0,-666666666.0,81477.0,32778.0,991.0,1909.0,24,23
9,"Kent County, Maryland",1562.0,8025.0,36813.0,13933.0,19536.0,48.1,15774.0,2787.0,859.0,...,449.0,2468.0,66582.0,31742.0,-666666666.0,65521.0,752.0,1302.0,24,29


In [14]:
# Replace the value -666666666.0 with 0 for the entire dataframe
counties_df = counties_df2.replace(-666666666.0,0)
counties_df

Unnamed: 0,NAME,B11005_002E,B19001_001E,B19301_001E,B15003_001E,B01003_001E,B01002_001E,B02001_002E,B02001_003E,B03001_003E,...,B02001_008E,B25003_003E,B19013A_001E,B19013B_001E,B19013D_001E,B19013G_001E,B28003_005E,B28003_006E,state,county
0,"Worcester County, Maryland",4684.0,22089.0,38080.0,39386.0,51765.0,50.4,42965.0,6659.0,1811.0,...,1221.0,5507.0,66448.0,39614.0,117154.0,92596.0,1352.0,2542.0,24,47
1,"Talbot County, Maryland",4256.0,16826.0,49136.0,27939.0,37167.0,50.5,30818.0,3887.0,2513.0,...,1628.0,4998.0,78500.0,39474.0,72500.0,81583.0,958.0,1440.0,24,41
2,"Howard County, Maryland",43338.0,114170.0,54628.0,215300.0,318855.0,38.8,181711.0,60232.0,22012.0,...,14160.0,30650.0,131332.0,94192.0,134635.0,107102.0,3611.0,3706.0,24,27
3,"Prince George's County, Maryland",100967.0,311343.0,37191.0,616274.0,908670.0,37.1,150782.0,569478.0,167498.0,...,26891.0,118080.0,92392.0,85576.0,96080.0,83370.0,24163.0,17743.0,24,33
4,"Anne Arundel County, Maryland",69265.0,209814.0,46629.0,393584.0,571275.0,38.3,415470.0,95710.0,44621.0,...,23351.0,54197.0,105768.0,82360.0,100921.0,92992.0,8676.0,11221.0,24,3
5,"Baltimore County, Maryland",96036.0,313519.0,40105.0,575300.0,828018.0,39.4,501423.0,239308.0,44807.0,...,23112.0,106396.0,82334.0,68143.0,78245.0,73173.0,18773.0,25557.0,24,5
6,"Frederick County, Maryland",32990.0,92526.0,43582.0,170910.0,251422.0,39.1,202622.0,24411.0,24101.0,...,8337.0,22912.0,100271.0,76095.0,103710.0,82917.0,3994.0,6662.0,24,21
7,"Calvert County, Maryland",11665.0,31973.0,45783.0,62546.0,91511.0,40.7,74326.0,10709.0,3641.0,...,4041.0,4903.0,114801.0,70423.0,121845.0,106302.0,1304.0,1988.0,24,9
8,"Garrett County, Maryland",3370.0,12425.0,30617.0,21472.0,29235.0,46.2,28327.0,250.0,330.0,...,447.0,2654.0,52682.0,0.0,81477.0,32778.0,991.0,1909.0,24,23
9,"Kent County, Maryland",1562.0,8025.0,36813.0,13933.0,19536.0,48.1,15774.0,2787.0,859.0,...,449.0,2468.0,66582.0,31742.0,0.0,65521.0,752.0,1302.0,24,29


In [15]:
# Replace the census variable codes (such as "B19013_001E") in the dataframe with the name of the detailed table variables so it's understandable
# I have the table id numbers listed in order so that it is easy to see what tables I have included at this point - can rearrange the column order later
counties_df = counties_df.rename(columns={"B01003_001E": "Population",
                                      "B15003_001E": "Population 25 years and older",      
                                      "B01002_001E": "Median age", # universe: total population
                                      "B02001_003E": "Pop. Black", # universe: total population
                                      "B02001_002E": "Pop. white", # universe: total population
                                      "B02001_005E": "Pop. Asian",  # universe: total population      
                                      "B02001_008E": "Pop. 2 or more races", # universe: total population   
                                      "B03001_003E": "Pop. Hispanic origin", # universe: total population
                                      "B11005_002E": "Households with one or more people under 18 years",# universe: total households
                                      "B19301_001E":  "Per capita income", # universe: total population 
                                      "B15003_017E": "# persons age 25+ graduated high school", # universe: Population 25 years and over
                                      "B15003_021E": "# persons Associate's degree",   # universe: Population 25 years and over
                                      "B15003_022E": "# persons age 25+ with Bachelor's degree", # universe: Population 25 years and over
                                      "B17001_002E": " Persons for whom poverty status determined",    
                                      "B17001_002E": "Individuals- income below poverty level",# universe: persons for whom poverty status is determined
                                      "B19001_001E": "Total households",    
                                      "B19013_001E": "Median household income", # universe: total households
                                      "B19013A_001E": "Median HH income White Alone Householder", # universe: total households
                                      "B19013B_001E": "Median HH income Black or African American Alone Householder", # universe: total households
                                      "B19013D_001E": "Median HH income Asian Alone Householder", # universe: total households
                                      "B19013G_001E": "Median HH income two or more races Householder", # universe: total households
                                      "B19013G_001E": "Median HH income Hispanic or Latino Householder", # universe: total households 
                                      "B19301_001E":  "Per capita income", # universe: total population
                                      "B23025_002E": "In labor force", # universe: Population 16 years and over
                                      "B23025_004E": "Employed civilians", # universe: Population 16 years and over
                                      "B23025_005E": "Unemployedment civilians", # universe: Population 16 years and over
                                      "B25003_001E": "Total occupied units", # universe: Housing units
                                      "B25003_002E": "Total owner-occupied units", # universe: Housing units
                                      "B25003_003E": "Total renter-occupied units", # universe: Housing units
                                      "B28003_005E": "# households with computer, no internet subscription", # universe: total households
                                      "B28003_006E": "# households no computer",  # universe: total households  
                                      })
counties_df.head()

Unnamed: 0,NAME,Households with one or more people under 18 years,Total households,Per capita income,Population 25 years and older,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin,...,Pop. 2 or more races,Total renter-occupied units,Median HH income White Alone Householder,Median HH income Black or African American Alone Householder,Median HH income Asian Alone Householder,Median HH income Hispanic or Latino Householder,"# households with computer, no internet subscription",# households no computer,state,county
0,"Worcester County, Maryland",4684.0,22089.0,38080.0,39386.0,51765.0,50.4,42965.0,6659.0,1811.0,...,1221.0,5507.0,66448.0,39614.0,117154.0,92596.0,1352.0,2542.0,24,47
1,"Talbot County, Maryland",4256.0,16826.0,49136.0,27939.0,37167.0,50.5,30818.0,3887.0,2513.0,...,1628.0,4998.0,78500.0,39474.0,72500.0,81583.0,958.0,1440.0,24,41
2,"Howard County, Maryland",43338.0,114170.0,54628.0,215300.0,318855.0,38.8,181711.0,60232.0,22012.0,...,14160.0,30650.0,131332.0,94192.0,134635.0,107102.0,3611.0,3706.0,24,27
3,"Prince George's County, Maryland",100967.0,311343.0,37191.0,616274.0,908670.0,37.1,150782.0,569478.0,167498.0,...,26891.0,118080.0,92392.0,85576.0,96080.0,83370.0,24163.0,17743.0,24,33
4,"Anne Arundel County, Maryland",69265.0,209814.0,46629.0,393584.0,571275.0,38.3,415470.0,95710.0,44621.0,...,23351.0,54197.0,105768.0,82360.0,100921.0,92992.0,8676.0,11221.0,24,3


In [19]:
counties_df["Percent Black"] =(100 * \
    counties_df["Pop. Black"].astype(
        int) / counties_df["Population"].astype(
        int)).round(1) 



Unnamed: 0,NAME,Households with one or more people under 18 years,Total households,Per capita income,Population 25 years and older,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin,...,Total renter-occupied units,Median HH income White Alone Householder,Median HH income Black or African American Alone Householder,Median HH income Asian Alone Householder,Median HH income Hispanic or Latino Householder,"# households with computer, no internet subscription",# households no computer,state,county,Percent Black
0,"Worcester County, Maryland",4684.0,22089.0,38080.0,39386.0,51765.0,50.4,42965.0,6659.0,1811.0,...,5507.0,66448.0,39614.0,117154.0,92596.0,1352.0,2542.0,24,47,12.9
1,"Talbot County, Maryland",4256.0,16826.0,49136.0,27939.0,37167.0,50.5,30818.0,3887.0,2513.0,...,4998.0,78500.0,39474.0,72500.0,81583.0,958.0,1440.0,24,41,10.5
2,"Howard County, Maryland",43338.0,114170.0,54628.0,215300.0,318855.0,38.8,181711.0,60232.0,22012.0,...,30650.0,131332.0,94192.0,134635.0,107102.0,3611.0,3706.0,24,27,18.9
3,"Prince George's County, Maryland",100967.0,311343.0,37191.0,616274.0,908670.0,37.1,150782.0,569478.0,167498.0,...,118080.0,92392.0,85576.0,96080.0,83370.0,24163.0,17743.0,24,33,62.7
4,"Anne Arundel County, Maryland",69265.0,209814.0,46629.0,393584.0,571275.0,38.3,415470.0,95710.0,44621.0,...,54197.0,105768.0,82360.0,100921.0,92992.0,8676.0,11221.0,24,3,16.8
5,"Baltimore County, Maryland",96036.0,313519.0,40105.0,575300.0,828018.0,39.4,501423.0,239308.0,44807.0,...,106396.0,82334.0,68143.0,78245.0,73173.0,18773.0,25557.0,24,5,28.9
6,"Frederick County, Maryland",32990.0,92526.0,43582.0,170910.0,251422.0,39.1,202622.0,24411.0,24101.0,...,22912.0,100271.0,76095.0,103710.0,82917.0,3994.0,6662.0,24,21,9.7
7,"Calvert County, Maryland",11665.0,31973.0,45783.0,62546.0,91511.0,40.7,74326.0,10709.0,3641.0,...,4903.0,114801.0,70423.0,121845.0,106302.0,1304.0,1988.0,24,9,11.7
8,"Garrett County, Maryland",3370.0,12425.0,30617.0,21472.0,29235.0,46.2,28327.0,250.0,330.0,...,2654.0,52682.0,0.0,81477.0,32778.0,991.0,1909.0,24,23,0.9
9,"Kent County, Maryland",1562.0,8025.0,36813.0,13933.0,19536.0,48.1,15774.0,2787.0,859.0,...,2468.0,66582.0,31742.0,0.0,65521.0,752.0,1302.0,24,29,14.3


In [20]:
counties_df["Percent white"] =(100 * \
    counties_df["Pop. white"].astype(
        int) / counties_df["Population"].astype(
        int)).round(1) 

counties_df

Unnamed: 0,NAME,Households with one or more people under 18 years,Total households,Per capita income,Population 25 years and older,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin,...,Median HH income White Alone Householder,Median HH income Black or African American Alone Householder,Median HH income Asian Alone Householder,Median HH income Hispanic or Latino Householder,"# households with computer, no internet subscription",# households no computer,state,county,Percent Black,Percent white
0,"Worcester County, Maryland",4684.0,22089.0,38080.0,39386.0,51765.0,50.4,42965.0,6659.0,1811.0,...,66448.0,39614.0,117154.0,92596.0,1352.0,2542.0,24,47,12.9,83.0
1,"Talbot County, Maryland",4256.0,16826.0,49136.0,27939.0,37167.0,50.5,30818.0,3887.0,2513.0,...,78500.0,39474.0,72500.0,81583.0,958.0,1440.0,24,41,10.5,82.9
2,"Howard County, Maryland",43338.0,114170.0,54628.0,215300.0,318855.0,38.8,181711.0,60232.0,22012.0,...,131332.0,94192.0,134635.0,107102.0,3611.0,3706.0,24,27,18.9,57.0
3,"Prince George's County, Maryland",100967.0,311343.0,37191.0,616274.0,908670.0,37.1,150782.0,569478.0,167498.0,...,92392.0,85576.0,96080.0,83370.0,24163.0,17743.0,24,33,62.7,16.6
4,"Anne Arundel County, Maryland",69265.0,209814.0,46629.0,393584.0,571275.0,38.3,415470.0,95710.0,44621.0,...,105768.0,82360.0,100921.0,92992.0,8676.0,11221.0,24,3,16.8,72.7
5,"Baltimore County, Maryland",96036.0,313519.0,40105.0,575300.0,828018.0,39.4,501423.0,239308.0,44807.0,...,82334.0,68143.0,78245.0,73173.0,18773.0,25557.0,24,5,28.9,60.6
6,"Frederick County, Maryland",32990.0,92526.0,43582.0,170910.0,251422.0,39.1,202622.0,24411.0,24101.0,...,100271.0,76095.0,103710.0,82917.0,3994.0,6662.0,24,21,9.7,80.6
7,"Calvert County, Maryland",11665.0,31973.0,45783.0,62546.0,91511.0,40.7,74326.0,10709.0,3641.0,...,114801.0,70423.0,121845.0,106302.0,1304.0,1988.0,24,9,11.7,81.2
8,"Garrett County, Maryland",3370.0,12425.0,30617.0,21472.0,29235.0,46.2,28327.0,250.0,330.0,...,52682.0,0.0,81477.0,32778.0,991.0,1909.0,24,23,0.9,96.9
9,"Kent County, Maryland",1562.0,8025.0,36813.0,13933.0,19536.0,48.1,15774.0,2787.0,859.0,...,66582.0,31742.0,0.0,65521.0,752.0,1302.0,24,29,14.3,80.7


In [None]:
# Export file as a CSV, without the Pandas index, but with the header
#counties_df.to_csv("ACS_demographics_2019.csv", index = False, header=True)

In [None]:
# Plotly dashboard example   https://towardsdatascience.com/creating-an-interactive-dashboard-with-dash-plotly-using-crime-data-a217da841df3