In [1]:
## Census American Community Survey 5-year estimates; Demographics at county (city) geographic level


In [2]:
#see https://pygis.io/docs/d_access_census.html

In [3]:
# data source: 

In [4]:
# From https://www.census.gov/programs-surveys/acs/guidance/comparing-acs-data.html:
# "Due to the impact of the COVID-19 pandemic, the Census Bureau changed the 2020 ACS release. 
# Instead of providing the standard 1-year data products, the Census Bureau released experimental estimates from the 1-year data. 
# Data users should not compare 2020 ACS 1-year experimental estimates with any other data.""

In [5]:
# Dependencies
from census import Census
from config import (census_key)
from us import states
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

# Census API key
c = Census(census_key)

In [6]:
import os

In [7]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\cherry-tree-lane-analytics\\Medium-blog-post1\\census-demographics-city-level'

In [8]:
# need to change the working directory; be sure that the config file for the census API is also in this folder
#os.chdir('C:\\Users\\Jenkir\\Desktop\\ECO-iMPACT\\Medium-blog-post1')

In [9]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\cherry-tree-lane-analytics\\Medium-blog-post1\\census-demographics-city-level'

In [10]:
pd.set_option('display.max_rows', 200)

In [11]:
pd.set_option('max_colwidth', 200)

In [12]:
# Census American Community Survey 5-Year Data 
# For list of variables, see https://api.census.gov/data/2021/acs/acs5/variables.html
data = c.acs5.state_county(fields=('NAME', "B11005_002E","B19001_001E","B19301_001E","B15003_001E","B01003_001E", "B01002_001E",
                                         "B02001_002E", "B02001_003E","B03001_003E","B17001_002E","B17001_002E",
                                         "B19013_001E","B17001_002E","B23025_004E","B23025_005E","B15003_017E","B15003_021E",
                                         "B15003_022E","B25003_001E","B25003_002E","B02001_005E","B02001_008E","B25003_003E",
                                         "B19013A_001E","B19013B_001E","B19013D_001E","B19013G_001E","B19013G_001E",
                                         "B28003_005E", "B28003_006E"),
                                  state_fips = states.MD.fips,
                                  county_fips = "*",
                                  year = 2019)                          


CensusException: Sorry, the system is currently undergoing maintenance or is busy.  Please try again later.

In [None]:
# Create dataframe from the census data
counties_df2 = pd.DataFrame(data)


# Note, per https://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html,
# When estimate value is -666666666, it means estimate could not be computed bc there was insufficient number of sample observations.
counties_df2

In [None]:
# Replace the value -666666666.0 with 0 for the entire dataframe
counties_df = counties_df2.replace(-666666666.0,0)
counties_df

In [None]:
# Replace the census variable codes (such as "B19013_001E") in the dataframe with the name of the detailed table variables so it's understandable
# I have the table id numbers listed in order so that it is easy to see what tables I have included at this point - can rearrange the column order later
counties_df = counties_df.rename(columns={"B01003_001E": "Population",
                                      "B15003_001E": "Population 25 years and older",      
                                      "B01002_001E": "Median age", # universe: total population
                                      "B02001_003E": "Pop. Black", # universe: total population
                                      "B02001_002E": "Pop. white", # universe: total population
                                      "B02001_005E": "Pop. Asian",  # universe: total population      
                                      "B02001_008E": "Pop. 2 or more races", # universe: total population   
                                      "B03001_003E": "Pop. Hispanic origin", # universe: total population
                                      "B11005_002E": "Households with one or more people under 18 years",# universe: total households
                                      "B19301_001E":  "Per capita income", # universe: total population 
                                      "B15003_017E": "# persons age 25+ graduated high school", # universe: Population 25 years and over
                                      "B15003_021E": "# persons Associates degree",   # universe: Population 25 years and over
                                      "B15003_022E": "# persons age 25+ with Bachelors degree", # universe: Population 25 years and over
                                      "B17001_002E": " Persons for whom poverty status determined",    
                                      "B17001_002E": "Individuals- income below poverty level",# universe: persons for whom poverty status is determined
                                      "B19001_001E": "Total households",    
                                      "B19013_001E": "Median household income", # universe: total households
                                      "B19013A_001E": "Median HH income White", # universe: total households; "white alone householder"
                                      "B19013B_001E": "Median HH income Black", # universe: total households; "Black or African American Alone Householder"
                                      "B19013D_001E": "Median HH income Asian", # universe: total households; "Asian Alone Householder""
                                      "B19013G_001E": "Median HH income two or more races", # universe: total households
                                      "B19013G_001E": "Median HH income Hispanic/Latinx, any race", # universe: total households 
                                      "B19301_001E":  "Per capita income", # universe: total population
                                      "B23025_002E": "In labor force", # universe: Population 16 years and over
                                      "B23025_004E": "Employed civilians", # universe: Population 16 years and over
                                      "B23025_005E": "Unemployedment civilians", # universe: Population 16 years and over
                                      "B25003_001E": "Total occupied units", # universe: Housing units
                                      "B25003_002E": "Total owner-occupied units", # universe: Housing units
                                      "B25003_003E": "Total renter-occupied units", # universe: Housing units
                                      "B28003_005E": "# households with computer, no internet subscription", # universe: total households
                                      "B28003_006E": "# households no computer",  # universe: total households  
                                      })
counties_df.head()

In [None]:
# rename column
counties_df = counties_df.rename(columns={"NAME":"County"})

In [None]:
# 

In [None]:
counties_df.columns

In [None]:
counties_df[["Location", "State"]] = counties_df.County.str.split(",", expand=True)
counties_df.head()

In [None]:
# drop unnecessary columns
counties_df.drop(['state','county','State','County'], axis=1, inplace=True)
counties_df.head()

In [None]:
# check data types of the column values
counties_df.dtypes

In [None]:
# 

In [None]:
counties_df["Percent Black"] =(100 * \
    counties_df["Pop. Black"].astype(
        int) / counties_df["Population"].astype(
        int)).round(1) 



In [None]:
counties_df["Percent white"] =(100 * \
    counties_df["Pop. white"].astype(
        int) / counties_df["Population"].astype(
        int)).round(1) 

counties_df.head()

In [None]:
# remove word "County" from all values in "Location" column
counties_df['Location'] = counties_df['Location'].str.replace('County', '')
counties_df

In [None]:
# change order of columns in df
counties_df.loc[:,["Location","Households with one or more people under 18 years",
       "Total households", "Per capita income",
       "Population 25 years and older", "Population", "Median age",
       "Pop. white", "Pop. Black", "Pop. Hispanic origin",
       "Individuals- income below poverty level", "Median household income",
       "Employed civilians", "Unemployedment civilians",
       "# persons age 25+ graduated high school",
       "# persons Associates degree",
       "# persons age 25+ with Bachelors degree", "Total occupied units",
       "Total owner-occupied units", "Pop. Asian", "Pop. 2 or more races",
       "Total renter-occupied units", "Median HH income White",
       "Median HH income Black", "Median HH income Asian",
       "Median HH income Hispanic/Latinx, any race",
       "# households with computer, no internet subscription",
       "# households no computer","Percent Black", "Percent white"]]
counties_df.head()

In [None]:
# Export file as a CSV, without the Pandas index, but with the header
#counties_df.to_csv("ACS_demographics_2019.csv", index = False, header=True)

## CHARTS

In [None]:
# Plotly dashboard example   https://towardsdatascience.com/creating-an-interactive-dashboard-with-dash-plotly-using-crime-data-a217da841df3

In [None]:
counties_df = counties_df.set_index("NAME")
