2019 Census Data for Selected Variables - Baltimore City

In [1]:
#pip install us

In [2]:
# pip install censusgeocode
# pip install censusdata
conda install geopandas

In [None]:
# From https://www.census.gov/programs-surveys/acs/guidance/comparing-acs-data.html:
# "Due to the impact of the COVID-19 pandemic, the Census Bureau changed the 2020 ACS release. 
# Instead of providing the standard 1-year data products, the Census Bureau released experimental estimates from the 1-year data. 
# Data users should not compare 2020 ACS 1-year experimental estimates with any other data.""

In [None]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states
# import censusdata 
import censusgeocode as cg
#import gmaps
import geopandas as gpd
# Census & gmaps API Keys
from config import api_key
c = Census(api_key, year=2019)

# Configure gmaps
#gmaps.configure(api_key=gkey)

In [None]:
# testing search function
#sample = censusdata.search('acs1', 2015, 'concept', 'employment')
#print(sample)


In [None]:
#censusdata.printtable(censusdata.censustable('acs1', 2019, 'B08604'))

In [None]:
# Run Census Search to retrieve data on Baltimore City, MD (all census tracts in Baltimore City)
# ex. "B23025_005E" is unemployment count
census_data = c.acs5.state_county_tract(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E",
                          "B08136_003E",
                          "B23025_004E",
                          "B23025_005E",                                     
                          "B15003_017E"), 
                          state_fips = "24",
                          county_fips = "510",
                          tract = "*") 

# convert to dataframe
census_pd = pd.DataFrame(census_data)
census_pd.head()
                         

In [None]:
# number of rows = # of census tracts in the dataframe
print("Number of rows, columns: ", census_pd.shape)

In [None]:
# code excerpted from https://pygis.io/docs/d_access_census.html

# Let’s also read into Python a shapefile of the Virginia census tracts and reproject it to the UTM Zone 17N projection. (This shapefile can be downloaded on the Census Bureau’s website on the Cartographic Boundary Files page or the TIGER/Line Shapefiles page.)

# Access shapefile of Virginia census tracts
md_tract = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2019/TRACT/tl_2019_24_tract.zip")

# Reproject shapefile to UTM Zone 17N
# https://spatialreference.org/ref/epsg/wgs-84-utm-zone-17n/

# googled "espg maryland: to find this page: https://epsg.io/6487

md_tract = md_tract.to_crs(epsg = 6487)

# Print GeoDataFrame of shapefile
print(md_tract.head(2))
print('Shape: ', md_tract.shape)

# Check shapefile projection
print("\nThe shapefile projection is: {}".format(md_tract.crs))


# The UTM (Universal Transverse Mercator) coordinate system divides the world into sixty north-south zones, each 6 degrees of longitude wide. UTM zones are numbered consecutively beginning with Zone 1, 
# which includes the westernmost point of Alaska, and progress eastward to Zone 19, which includes Maine.

In [None]:
# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Median Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B08136_003E": "Time spent commuting (minutes)",
                                      "B23025_004E": "Number of Employed, age 16+",
                                      "B23025_005E": "Unemployment Count",
                                      "B15003_017E": "Number of persons age 25+ graduated high school",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["State", "Name", "Population", "Median Age", "Median Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate", 
                       "Time spent commuting (minutes)","Number of Employed, age 16+", "Unemployment Count",
                      "Number of persons age 25+ graduated high school"]]

census_pd.head()

In [None]:
census_pd["Poverty Rate"] = census_pd["Poverty Rate"].astype(float).round(1)
census_pd.head()

In [None]:
census_pd["Unemployment Rate"] = census_pd["Unemployment Rate"].astype(float).round(1)
census_pd.head()

In [None]:
census_pd.count()

In [None]:

census_pd[['Census_Tract', "County", "State Name"]]= census_pd['Name'].str.split(",", n=3, expand=True)
census_pd.head()

In [None]:
census_pd = census_pd.drop(["State", "Time spent commuting (minutes)"], axis=1)
census_pd.head()

In [None]:
census_pd.columns

In [None]:
# calculate daytime population; from: https://www.census.gov/topics/employment/commuting/guidance/calculations.html
# Two methods for calculating commuter-adjusted population estimates:

#Method 1: Total resident population + (total workers working in area - workers who lived and worked in same area) - (total workers living in area - workers who lived and worked in same area) OR;
#Method 2: Total resident population + Total workers working in area - Total workers living in area.




In [3]:
# Reorganize the columns using double brackets
census_pd = census_pd[["State Name","County","Census_Tract","Population",'Median Age', 'Median Household Income', 'Per Capita Income',
       'Poverty Count', 'Poverty Rate', 'Unemployment Rate',
       'Time spent commuting (minutes)', 'Number of Employed, age 16+',
       'Unemployment Count', 'Number of persons age 25+ graduated high school']]
census_pd

NameError: name 'census_pd' is not defined

In [None]:
census_pd["Census_Tract"] = census_pd['Census_Tract'].str.replace('Census Tract', "") 
census_pd

In [None]:
# Calculations can also be performed on Series and added into DataFrames as new columns
#thousands_of_dollars = data_file_df["Amount"]/1000
#data_file_df["Thousands of Dollars"] = thousands_of_dollars

#data_file_df.head()

In [None]:
# Converting the membership days into weeks and then adding a column to the DataFrame
#weeks = training_df["Membership (Days)"]/7
#training_df["Membership (Weeks)"] = weeks

#training_df.head()

In [None]:
# Calculate the number of unique census tracts in the DataFrame
tract_count = len(census_pd["Census_Tract"].unique())
tract_count

In [None]:
# Export file as a CSV, without the Pandas index, but with the header
census_pd.to_csv("Comm_Corridor_Stats_Draft1.csv", index = False, header=True)

In [None]:
# Join the attributes of the dataframes together
# Source: https://geopandas.org/docs/user_guide/mergingdata.html
#va_merge = va_tract.merge(va_df, on = "GEOID")

# Show result
#print(va_merge.head(2))
3print('Shape: ', va_merge.shape)