### 2021 Census Data (ACS 5-year)for Selected Variables - Baltimore City



In [1]:
#pip install cenpy

In [2]:
#pip install census

In [3]:
#pip install censusdata

In [4]:
#pip install openpyxl

In [5]:
#pip install cenpy

In [6]:
# From https://cenpy-devs.github.io/cenpy/:
# Cenpy (pronounced sen-pie) is a package that automatically discovers US Census Bureau API endpoints and exposes them to Python in a consistent fashion. 
# It also provides easy-to-use access to certain well-used data products, like the American Community Survey (ACS) and 2010 Decennial Census.


In [7]:
# From https://www.census.gov/programs-surveys/acs/guidance/comparing-acs-data.html:
# "Due to the impact of the COVID-19 pandemic, the Census Bureau changed the 2020 ACS release. 
# Instead of providing the standard 1-year data products, the Census Bureau released experimental estimates from the 1-year data. 
# Data users should not compare 2020 ACS 1-year experimental estimates with any other data.""

In [8]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
#from us import states
import censusdata 
#import censusgeocode as cg
import cenpy
#from scipy.stats import linregress

# Census & gmaps API Keys
# the latest year available is the default year and so you do not need to specify the year

from config import (api_key, gkey)
c = Census(api_key)

# Configure gmaps
#gmaps.configure(api_key=gkey)

In [9]:
import os

In [10]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\BDC\\Commercial_corridors\\Commercial_Corridors_Demographics'

In [11]:
pd.set_option('display.max_rows', 200)

In [12]:
pd.set_option('max_colwidth', 250)

In [13]:
# see https://mcdc.missouri.edu/applications/acs/profiles/report.php?period=5&year=2020&g=06000US2451090000|05000US24510|04000US24|01000US
# for helpful cheatsheet of subject/ variables

In [14]:
# American Community Survey 5-Year Data DETAILED Tables 
# The data are population COUNTS

# See "When to Use 1-year or 5-year estimates": 
# https://www.census.gov/programs-surveys/acs/guidance/estimates.html

# Set this to false if you're trying to do this without an internet connection
# and data which would have been fetched from an API query will be read from cached files instead
INTERNET_IS_WORKING = True

if INTERNET_IS_WORKING:
    con = cenpy.remote.APIConnection('ACSDT5Y2021')
    variables = con.variables
else:
    variables = pd.read_csv('data/ACSDT5Y2021_variables.csv',index_col='Unnamed: 0')

# No matter which product you use, a cenpy APIConnection can show you the variables 
# which it can retrieve for you, returned as a pandas DataFrame.
print(f"ACSDT5Y2021 provides {len(variables)} variables.") # how many are there?
variables.head()

ACSDT5Y2021 provides 27927 variables.


Unnamed: 0,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required
for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,True,,,
in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,True,,,
ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,True,True,,
B24022_060E,Estimate!!Total:!!Female:!!Service occupations:!!Food preparation and serving related occupations,"SEX BY OCCUPATION AND MEDIAN EARNINGS IN THE PAST 12 MONTHS (IN 2021 INFLATION-ADJUSTED DOLLARS) FOR THE FULL-TIME, YEAR-ROUND CIVILIAN EMPLOYED POPULATION 16 YEARS AND OVER",int,B24022,0,,,"B24022_060EA,B24022_060M,B24022_060MA",
B19001B_014E,"Estimate!!Total:!!$100,000 to $124,999",HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2021 INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER),int,B19001B,0,,,"B19001B_014EA,B19001B_014M,B19001B_014MA",


In [15]:
# Comments and code in this block are from https://github.com/censusreporter/nicar20-advanced-census-python/blob/master/workshop.ipynb:
# I'll use "Nicar20" as citation for the above site from here on out
# Values for 'group' are ACS table IDs; 
# For this data, when it's N/A, it's for other kinds of API variables so won't include those:
short_vars = variables[~(variables['group'] == 'N/A')] 

# Get a list of table IDs and their titles
short_vars[['group', 'concept']].drop_duplicates().sort_values('group').head(20) 

Unnamed: 0,group,concept
B01001_012E,B01001,SEX BY AGE
B01001A_002E,B01001A,SEX BY AGE (WHITE ALONE)
B01001B_029E,B01001B,SEX BY AGE (BLACK OR AFRICAN AMERICAN ALONE)
B01001C_008E,B01001C,SEX BY AGE (AMERICAN INDIAN AND ALASKA NATIVE ALONE)
B01001D_008E,B01001D,SEX BY AGE (ASIAN ALONE)
B01001E_013E,B01001E,SEX BY AGE (NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER ALONE)
B01001F_001E,B01001F,SEX BY AGE (SOME OTHER RACE ALONE)
B01001G_022E,B01001G,SEX BY AGE (TWO OR MORE RACES)
B01001H_001E,B01001H,"SEX BY AGE (WHITE ALONE, NOT HISPANIC OR LATINO)"
B01001I_027E,B01001I,SEX BY AGE (HISPANIC OR LATINO)


In [16]:
# (From Nicar20)
# Use when you know which group but still need specific API variable codes
# "attributes" column shows related variables you can request. The one that ends with M is the margin of error, and since we want to be responsible when we aggregate data, we'll be sure to aggregate the error as well. 
# Just fyi, The other two which end with A are "annotations." 
short_vars[short_vars['group'] == 'B01001'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B01001_001E,Estimate!!Total:,"B01001_001EA,B01001_001M,B01001_001MA"
B01001_002E,Estimate!!Total:!!Male:,"B01001_002EA,B01001_002M,B01001_002MA"
B01001_003E,Estimate!!Total:!!Male:!!Under 5 years,"B01001_003EA,B01001_003M,B01001_003MA"
B01001_004E,Estimate!!Total:!!Male:!!5 to 9 years,"B01001_004EA,B01001_004M,B01001_004MA"
B01001_005E,Estimate!!Total:!!Male:!!10 to 14 years,"B01001_005EA,B01001_005M,B01001_005MA"
B01001_006E,Estimate!!Total:!!Male:!!15 to 17 years,"B01001_006EA,B01001_006M,B01001_006MA"
B01001_007E,Estimate!!Total:!!Male:!!18 and 19 years,"B01001_007EA,B01001_007M,B01001_007MA"
B01001_008E,Estimate!!Total:!!Male:!!20 years,"B01001_008EA,B01001_008M,B01001_008MA"
B01001_009E,Estimate!!Total:!!Male:!!21 years,"B01001_009EA,B01001_009M,B01001_009MA"
B01001_010E,Estimate!!Total:!!Male:!!22 to 24 years,"B01001_010EA,B01001_010M,B01001_010MA"


In [17]:
short_vars[short_vars['group'] == 'B01002'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B01002_001E,Estimate!!Median age --!!Total:,"B01002_001EA,B01002_001M,B01002_001MA"
B01002_002E,Estimate!!Median age --!!Male,"B01002_002EA,B01002_002M,B01002_002MA"
B01002_003E,Estimate!!Median age --!!Female,"B01002_003EA,B01002_003M,B01002_003MA"


In [18]:
short_vars[short_vars['group'] == 'B05002'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B05002_001E,Estimate!!Total:,"B05002_001EA,B05002_001M,B05002_001MA"
B05002_002E,Estimate!!Total:!!Native:,"B05002_002EA,B05002_002M,B05002_002MA"
B05002_003E,Estimate!!Total:!!Native:!!Born in state of residence,"B05002_003EA,B05002_003M,B05002_003MA"
B05002_004E,Estimate!!Total:!!Native:!!Born in other state in the United States:,"B05002_004EA,B05002_004M,B05002_004MA"
B05002_005E,Estimate!!Total:!!Native:!!Born in other state in the United States:!!Northeast,"B05002_005EA,B05002_005M,B05002_005MA"
B05002_006E,Estimate!!Total:!!Native:!!Born in other state in the United States:!!Midwest,"B05002_006EA,B05002_006M,B05002_006MA"
B05002_007E,Estimate!!Total:!!Native:!!Born in other state in the United States:!!South,"B05002_007EA,B05002_007M,B05002_007MA"
B05002_008E,Estimate!!Total:!!Native:!!Born in other state in the United States:!!West,"B05002_008EA,B05002_008M,B05002_008MA"
B05002_009E,Estimate!!Total:!!Native:!!Born outside the United States:,"B05002_009EA,B05002_009M,B05002_009MA"
B05002_010E,Estimate!!Total:!!Native:!!Born outside the United States:!!Puerto Rico,"B05002_010EA,B05002_010M,B05002_010MA"


In [19]:
short_vars[short_vars['group'] == 'B07007PR'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B07007PR_001E,Estimate!!Total:,"B07007PR_001EA,B07007PR_001M,B07007PR_001MA"
B07007PR_002E,Estimate!!Total:!!Native,"B07007PR_002EA,B07007PR_002M,B07007PR_002MA"
B07007PR_003E,Estimate!!Total:!!Foreign born:,"B07007PR_003EA,B07007PR_003M,B07007PR_003MA"
B07007PR_004E,Estimate!!Total:!!Foreign born:!!Naturalized U.S. citizen,"B07007PR_004EA,B07007PR_004M,B07007PR_004MA"
B07007PR_005E,Estimate!!Total:!!Foreign born:!!Not a U.S. citizen,"B07007PR_005EA,B07007PR_005M,B07007PR_005MA"
B07007PR_006E,Estimate!!Total:!!Same house 1 year ago:,"B07007PR_006EA,B07007PR_006M,B07007PR_006MA"
B07007PR_007E,Estimate!!Total:!!Same house 1 year ago:!!Native,"B07007PR_007EA,B07007PR_007M,B07007PR_007MA"
B07007PR_008E,Estimate!!Total:!!Same house 1 year ago:!!Foreign born:,"B07007PR_008EA,B07007PR_008M,B07007PR_008MA"
B07007PR_009E,Estimate!!Total:!!Same house 1 year ago:!!Foreign born:!!Naturalized U.S. citizen,"B07007PR_009EA,B07007PR_009M,B07007PR_009MA"
B07007PR_010E,Estimate!!Total:!!Same house 1 year ago:!!Foreign born:!!Not a U.S. citizen,"B07007PR_010EA,B07007PR_010M,B07007PR_010MA"


In [20]:
short_vars[short_vars['group'] == 'B08301'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B08301_001E,Estimate!!Total:,"B08301_001EA,B08301_001M,B08301_001MA"
B08301_002E,"Estimate!!Total:!!Car, truck, or van:","B08301_002EA,B08301_002M,B08301_002MA"
B08301_003E,"Estimate!!Total:!!Car, truck, or van:!!Drove alone","B08301_003EA,B08301_003M,B08301_003MA"
B08301_004E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:","B08301_004EA,B08301_004M,B08301_004MA"
B08301_005E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 2-person carpool","B08301_005EA,B08301_005M,B08301_005MA"
B08301_006E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 3-person carpool","B08301_006EA,B08301_006M,B08301_006MA"
B08301_007E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 4-person carpool","B08301_007EA,B08301_007M,B08301_007MA"
B08301_008E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 5- or 6-person carpool","B08301_008EA,B08301_008M,B08301_008MA"
B08301_009E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 7-or-more-person carpool","B08301_009EA,B08301_009M,B08301_009MA"
B08301_010E,Estimate!!Total:!!Public transportation (excluding taxicab):,"B08301_010EA,B08301_010M,B08301_010MA"


In [21]:
short_vars[short_vars['group'] == 'B12006'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B12006_001E,Estimate!!Total:,"B12006_001EA,B12006_001M,B12006_001MA"
B12006_002E,Estimate!!Total:!!Never married:,"B12006_002EA,B12006_002M,B12006_002MA"
B12006_003E,Estimate!!Total:!!Never married:!!Male:,"B12006_003EA,B12006_003M,B12006_003MA"
B12006_004E,Estimate!!Total:!!Never married:!!Male:!!In labor force:,"B12006_004EA,B12006_004M,B12006_004MA"
B12006_005E,Estimate!!Total:!!Never married:!!Male:!!In labor force:!!Employed or in Armed Forces,"B12006_005EA,B12006_005M,B12006_005MA"
B12006_006E,Estimate!!Total:!!Never married:!!Male:!!In labor force:!!Unemployed,"B12006_006EA,B12006_006M,B12006_006MA"
B12006_007E,Estimate!!Total:!!Never married:!!Male:!!Not in labor force,"B12006_007EA,B12006_007M,B12006_007MA"
B12006_008E,Estimate!!Total:!!Never married:!!Female:,"B12006_008EA,B12006_008M,B12006_008MA"
B12006_009E,Estimate!!Total:!!Never married:!!Female:!!In labor force:,"B12006_009EA,B12006_009M,B12006_009MA"
B12006_010E,Estimate!!Total:!!Never married:!!Female:!!In labor force:!!Employed or in Armed Forces,"B12006_010EA,B12006_010M,B12006_010MA"


In [22]:
short_vars[short_vars['group'] == 'B15002'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B15002_001E,Estimate!!Total:,"B15002_001EA,B15002_001M,B15002_001MA"
B15002_002E,Estimate!!Total:!!Male:,"B15002_002EA,B15002_002M,B15002_002MA"
B15002_003E,Estimate!!Total:!!Male:!!No schooling completed,"B15002_003EA,B15002_003M,B15002_003MA"
B15002_004E,Estimate!!Total:!!Male:!!Nursery to 4th grade,"B15002_004EA,B15002_004M,B15002_004MA"
B15002_005E,Estimate!!Total:!!Male:!!5th and 6th grade,"B15002_005EA,B15002_005M,B15002_005MA"
B15002_006E,Estimate!!Total:!!Male:!!7th and 8th grade,"B15002_006EA,B15002_006M,B15002_006MA"
B15002_007E,Estimate!!Total:!!Male:!!9th grade,"B15002_007EA,B15002_007M,B15002_007MA"
B15002_008E,Estimate!!Total:!!Male:!!10th grade,"B15002_008EA,B15002_008M,B15002_008MA"
B15002_009E,Estimate!!Total:!!Male:!!11th grade,"B15002_009EA,B15002_009M,B15002_009MA"
B15002_010E,"Estimate!!Total:!!Male:!!12th grade, no diploma","B15002_010EA,B15002_010M,B15002_010MA"


In [23]:
# poverty 
# see poverty thresholds for census at https://www.census.gov/data/tables/time-series/demo/income-poverty/historical-poverty-thresholds.html

In [24]:
short_vars[short_vars['group'] == 'C17002'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
C17002_001E,Estimate!!Total:,"C17002_001EA,C17002_001M,C17002_001MA"
C17002_002E,Estimate!!Total:!!Under .50,"C17002_002EA,C17002_002M,C17002_002MA"
C17002_003E,Estimate!!Total:!!.50 to .99,"C17002_003EA,C17002_003M,C17002_003MA"
C17002_004E,Estimate!!Total:!!1.00 to 1.24,"C17002_004EA,C17002_004M,C17002_004MA"
C17002_005E,Estimate!!Total:!!1.25 to 1.49,"C17002_005EA,C17002_005M,C17002_005MA"
C17002_006E,Estimate!!Total:!!1.50 to 1.84,"C17002_006EA,C17002_006M,C17002_006MA"
C17002_007E,Estimate!!Total:!!1.85 to 1.99,"C17002_007EA,C17002_007M,C17002_007MA"
C17002_008E,Estimate!!Total:!!2.00 and over,"C17002_008EA,C17002_008M,C17002_008MA"


In [25]:
short_vars[short_vars['group'] == 'B19013'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B19013_001E,Estimate!!Median household income in the past 12 months (in 2021 inflation-adjusted dollars),"B19013_001EA,B19013_001M,B19013_001MA"


In [26]:
short_vars[short_vars['group'] == 'B07009'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B07009_001E,Estimate!!Total:,"B07009_001EA,B07009_001M,B07009_001MA"
B07009_002E,Estimate!!Total:!!Less than high school graduate,"B07009_002EA,B07009_002M,B07009_002MA"
B07009_003E,Estimate!!Total:!!High school graduate (includes equivalency),"B07009_003EA,B07009_003M,B07009_003MA"
B07009_004E,Estimate!!Total:!!Some college or associate's degree,"B07009_004EA,B07009_004M,B07009_004MA"
B07009_005E,Estimate!!Total:!!Bachelor's degree,"B07009_005EA,B07009_005M,B07009_005MA"
B07009_006E,Estimate!!Total:!!Graduate or professional degree,"B07009_006EA,B07009_006M,B07009_006MA"
B07009_007E,Estimate!!Total:!!Same house 1 year ago:,"B07009_007EA,B07009_007M,B07009_007MA"
B07009_008E,Estimate!!Total:!!Same house 1 year ago:!!Less than high school graduate,"B07009_008EA,B07009_008M,B07009_008MA"
B07009_009E,Estimate!!Total:!!Same house 1 year ago:!!High school graduate (includes equivalency),"B07009_009EA,B07009_009M,B07009_009MA"
B07009_010E,Estimate!!Total:!!Same house 1 year ago:!!Some college or associate's degree,"B07009_010EA,B07009_010M,B07009_010MA"


In [27]:
#short_vars[short_vars['group'] == 'B24080'][['label','attributes']].sort_index() 

In [28]:
short_vars[short_vars['group'] == 'B23025'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B23025_001E,Estimate!!Total:,"B23025_001EA,B23025_001M,B23025_001MA"
B23025_002E,Estimate!!Total:!!In labor force:,"B23025_002EA,B23025_002M,B23025_002MA"
B23025_003E,Estimate!!Total:!!In labor force:!!Civilian labor force:,"B23025_003EA,B23025_003M,B23025_003MA"
B23025_004E,Estimate!!Total:!!In labor force:!!Civilian labor force:!!Employed,"B23025_004EA,B23025_004M,B23025_004MA"
B23025_005E,Estimate!!Total:!!In labor force:!!Civilian labor force:!!Unemployed,"B23025_005EA,B23025_005M,B23025_005MA"
B23025_006E,Estimate!!Total:!!In labor force:!!Armed Forces,"B23025_006EA,B23025_006M,B23025_006MA"
B23025_007E,Estimate!!Total:!!Not in labor force,"B23025_007EA,B23025_007M,B23025_007MA"


In [29]:
short_vars[short_vars['group'] == 'B25003'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B25003_001E,Estimate!!Total:,"B25003_001EA,B25003_001M,B25003_001MA"
B25003_002E,Estimate!!Total:!!Owner occupied,"B25003_002EA,B25003_002M,B25003_002MA"
B25003_003E,Estimate!!Total:!!Renter occupied,"B25003_003EA,B25003_003M,B25003_003MA"


In [30]:
# universe: persons for whom poverty status is determined
short_vars[short_vars['group'] == 'B17021'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B17021_001E,Estimate!!Total:,"B17021_001EA,B17021_001M,B17021_001MA"
B17021_002E,Estimate!!Total:!!Income in the past 12 months below poverty level:,"B17021_002EA,B17021_002M,B17021_002MA"
B17021_003E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:,"B17021_003EA,B17021_003M,B17021_003MA"
B17021_004E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:,"B17021_004EA,B17021_004M,B17021_004MA"
B17021_005E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:!!All relatives,"B17021_005EA,B17021_005M,B17021_005MA"
B17021_006E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:!!Non-relatives,"B17021_006EA,B17021_006M,B17021_006MA"
B17021_007E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:,"B17021_007EA,B17021_007M,B17021_007MA"
B17021_008E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:","B17021_008EA,B17021_008M,B17021_008MA"
B17021_009E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:!!All relatives","B17021_009EA,B17021_009M,B17021_009MA"
B17021_010E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:!!Non-relatives","B17021_010EA,B17021_010M,B17021_010MA"


In [31]:
short_vars[short_vars['group'] == 'B28003'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B28003_001E,Estimate!!Total:,"B28003_001EA,B28003_001M,B28003_001MA"
B28003_002E,Estimate!!Total:!!Has a computer:,"B28003_002EA,B28003_002M,B28003_002MA"
B28003_003E,Estimate!!Total:!!Has a computer:!!With dial-up Internet subscription alone,"B28003_003EA,B28003_003M,B28003_003MA"
B28003_004E,Estimate!!Total:!!Has a computer:!!With a broadband Internet subscription,"B28003_004EA,B28003_004M,B28003_004MA"
B28003_005E,Estimate!!Total:!!Has a computer:!!Without an Internet subscription,"B28003_005EA,B28003_005M,B28003_005MA"
B28003_006E,Estimate!!Total:!!No computer,"B28003_006EA,B28003_006M,B28003_006MA"


In [32]:
short_vars[short_vars['group'] == 'B08136'][['label','attributes']].sort_index() 


Unnamed: 0,label,attributes
B08136_001E,Estimate!!Aggregate travel time to work (in minutes):,"B08136_001EA,B08136_001M,B08136_001MA"
B08136_002E,"Estimate!!Aggregate travel time to work (in minutes):!!Car, truck, or van:","B08136_002EA,B08136_002M,B08136_002MA"
B08136_003E,"Estimate!!Aggregate travel time to work (in minutes):!!Car, truck, or van:!!Drove alone","B08136_003EA,B08136_003M,B08136_003MA"
B08136_004E,"Estimate!!Aggregate travel time to work (in minutes):!!Car, truck, or van:!!Carpooled:","B08136_004EA,B08136_004M,B08136_004MA"
B08136_005E,"Estimate!!Aggregate travel time to work (in minutes):!!Car, truck, or van:!!Carpooled:!!In 2-person carpool","B08136_005EA,B08136_005M,B08136_005MA"
B08136_006E,"Estimate!!Aggregate travel time to work (in minutes):!!Car, truck, or van:!!Carpooled:!!In 3-or-more-person carpool","B08136_006EA,B08136_006M,B08136_006MA"
B08136_007E,Estimate!!Aggregate travel time to work (in minutes):!!Public transportation (excluding taxicab):,"B08136_007EA,B08136_007M,B08136_007MA"
B08136_008E,Estimate!!Aggregate travel time to work (in minutes):!!Public transportation (excluding taxicab):!!Bus,"B08136_008EA,B08136_008M,B08136_008MA"
B08136_009E,"Estimate!!Aggregate travel time to work (in minutes):!!Public transportation (excluding taxicab):!!Subway or elevated rail, Light rail, streetcar, or trolley (carro público in Puerto Rico):","B08136_009EA,B08136_009M,B08136_009MA"
B08136_010E,Estimate!!Aggregate travel time to work (in minutes):!!Public transportation (excluding taxicab):!!Long-distance train or commuter rail or Ferryboat:,"B08136_010EA,B08136_010M,B08136_010MA"


In [33]:
#short_vars[short_vars['group'] == 'C24010'][['label','attributes']].sort_index() 

In [34]:
short_vars[short_vars['group'] == 'S2301'][['label']].sort_index() 

Unnamed: 0,label


In [35]:
short_vars[short_vars['group'] == 'C24030'][['label']].sort_index() 


Unnamed: 0,label
C24030_001E,Estimate!!Total:
C24030_002E,Estimate!!Total:!!Male:
C24030_003E,"Estimate!!Total:!!Male:!!Agriculture, forestry, fishing and hunting, and mining:"
C24030_004E,"Estimate!!Total:!!Male:!!Agriculture, forestry, fishing and hunting, and mining:!!Agriculture, forestry, fishing and hunting"
C24030_005E,"Estimate!!Total:!!Male:!!Agriculture, forestry, fishing and hunting, and mining:!!Mining, quarrying, and oil and gas extraction"
C24030_006E,Estimate!!Total:!!Male:!!Construction
C24030_007E,Estimate!!Total:!!Male:!!Manufacturing
C24030_008E,Estimate!!Total:!!Male:!!Wholesale trade
C24030_009E,Estimate!!Total:!!Male:!!Retail trade
C24030_010E,"Estimate!!Total:!!Male:!!Transportation and warehousing, and utilities:"


In [36]:
# CALCULATING MEDIAN HOUSEHOLD INCOME
# https://atcoordinates.info/2019/05/13/calculating-mean-income-for-groups-of-geographies-with-census-acs-data/
# https://mcdc.missouri.edu/help/measures-of-income/
# https://www.socialexplorer.com/help/faq/knowledge-base/calculating-medians
# https://s4.ad.brown.edu/Projects/Diversity/SUC/MHHINote.htm

In [37]:
# search function
# sample = censusdata.search('acs5', 2017, 'concept', 'age ')
# print(sample)

#  to see list of all tables in the ACS5:  c.acs5.tables()

In [38]:
c.acs5.tables()

[{'name': 'B17015',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS OF FAMILIES BY FAMILY TYPE BY SOCIAL SECURITY INCOME BY SUPPLEMENTAL SECURITY INCOME (SSI) AND CASH PUBLIC ASSISTANCE INCOME',
  'variables': 'http://api.census.gov/data/2018/acs/acs5/groups/B17015.json'},
 {'name': 'B18104',
  'description': 'SEX BY AGE BY COGNITIVE DIFFICULTY',
  'variables': 'http://api.census.gov/data/2018/acs/acs5/groups/B18104.json'},
 {'name': 'B17016',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS OF FAMILIES BY FAMILY TYPE BY WORK EXPERIENCE OF HOUSEHOLDER AND SPOUSE',
  'variables': 'http://api.census.gov/data/2018/acs/acs5/groups/B17016.json'},
 {'name': 'B18105',
  'description': 'SEX BY AGE BY AMBULATORY DIFFICULTY',
  'variables': 'http://api.census.gov/data/2018/acs/acs5/groups/B18105.json'},
 {'name': 'B17017',
  'description': 'POVERTY STATUS IN THE PAST 12 MONTHS BY HOUSEHOLD TYPE BY AGE OF HOUSEHOLDER',
  'variables': 'http://api.census.gov/data/2018/acs/acs5/groups/B

In [39]:
# censusdata.printtable(censusdata.censustable('acs5', 2020, 'B08604')) # didn't use bc got error message for this for 2020

In [40]:
# calculating median income (ACS) https://www.census.gov/quickfacts/fact/note/US/INC110221

In [41]:
# Run Census Search to retrieve data on Baltimore City, MD (all census tracts in Baltimore City)
# ***See https://api.census.gov/data/2019/acs/acs5/groups.html  for list of variables and groups for the ACS 5-year estimates***
# ex. "B23025_005E" is "unemployment count"
# The state FIPS code for MD is 24 and the FIPS code for Balt City is 510; * is to pull data for all census tracts in the 510 FIPs
census_data = c.acs5.state_county_tract(("NAME", "B11002_001E", 
                          "B01001_001E", "B01002_001E",
                          "B02001_002E",
                          "B02001_003E",
                          "B02001_005E",              
                          "B02001_008E",
                          "B03001_003E",
                          "B05002_013E",
                          "B08301_001E",               
                          "B08301_010E",
                          "B15003_001E",
                          "B15003_016E",               
                          "B15003_017E",
                          "B15003_021E",               
                          "B15003_022E", 
                          
                          "B17021_001E", 
                          "B17021_002E",
                          "B19001_001E",              
                          "B19013_001E",
                          "B19301_001E",               
                          "B23025_001E",
                          "B23025_002E",
                          "B23025_004E",
                          "B23025_005E",              
                          "B23025_007E",
                          "B25044_003E",
                          "B25044_010E",
                          "B25008_002E",
                          "B25003_001E",               
                          "B25003_002E",                                    
                          "B25003_003E",
                          "B28003_005E",
                          "B28003_006E",
                          "C24030_001E",            
                          "C24030_006E",                
                          "C24030_007E", 
                          "C24030_009E",                
                          "C24030_010E",                
                          "C24030_014E",                
                          "C24030_017E",                
                          "C24030_021E",                
                          "C24030_024E",
                          "C24030_028E",              
                          "C24030_033E",                
                          "C24030_034E",                
                          "C24030_036E",                
                          "C24030_037E",                
                          "C24030_041E",  
                          "C24030_044E",               
                          "C24030_048E",                
                          "C24030_051E",
                          "C24030_055E"),               
                          state_fips = "24",
                          county_fips = "510",
                          tract = "*")
census_pd = pd.DataFrame(census_data)
census_pd.head()      

Unnamed: 0,NAME,B11002_001E,B01001_001E,B01002_001E,B02001_002E,B02001_003E,B02001_005E,B02001_008E,B03001_003E,B05002_013E,...,C24030_037E,GEO_ID,state,county,tract,C24030_041E,C24030_044E,C24030_048E,C24030_051E,C24030_055E
0,"Census Tract 101, Baltimore city, Maryland",2867.0,3022.0,33.8,2621.0,227.0,135.0,39.0,58.0,221.0,...,26.0,1400000US24510010100,24,510,10100,137.0,169.0,545.0,66.0,45.0
1,"Census Tract 102, Baltimore city, Maryland",2869.0,2869.0,32.0,2593.0,107.0,107.0,53.0,69.0,211.0,...,0.0,1400000US24510010200,24,510,10200,93.0,157.0,404.0,37.0,102.0
2,"Census Tract 103, Baltimore city, Maryland",2571.0,2571.0,30.0,2287.0,39.0,165.0,80.0,99.0,275.0,...,40.0,1400000US24510010300,24,510,10300,73.0,193.0,439.0,85.0,33.0
3,"Census Tract 104, Baltimore city, Maryland",2473.0,2478.0,33.9,2114.0,62.0,160.0,65.0,101.0,218.0,...,18.0,1400000US24510010400,24,510,10400,46.0,224.0,218.0,71.0,44.0
4,"Census Tract 105, Baltimore city, Maryland",1949.0,1976.0,31.4,1769.0,103.0,31.0,34.0,294.0,216.0,...,0.0,1400000US24510010500,24,510,10500,16.0,103.0,447.0,23.0,22.0


In [42]:
check = c.acs5.state_county(("NAME", 
                          "C17002_001E",
                          "B17001_002E",
                          "B17021_001E",
                          "B17021_002E"),                                        
                          state_fips = "24",
                          county_fips = "510",
                          )
check_df = pd.DataFrame(check)
check_df      



Unnamed: 0,NAME,C17002_001E,B17001_002E,B17021_001E,B17021_002E,state,county
0,"Baltimore city, Maryland",591942.0,128829.0,591942.0,128829.0,24,510


In [43]:
# check variables that are similar
check = c.acs5.state_county(("NAME", 
                          "B17021_001E",
                          "B17010_001E",
                          "B17007_001E",
                          "B17001_002E",
                             
                          ),               
                          state_fips = "24",
                          county_fips = "510",
                          )
check_df = pd.DataFrame(check)
check_df      

Unnamed: 0,NAME,B17021_001E,B17010_001E,B17007_001E,B17001_002E,state,county
0,"Baltimore city, Maryland",591942.0,121958.0,175276.0,128829.0,24,510


In [44]:
# check variables that are similar

# C24030_001E is for "civilian employed population 16 years and over" so will use that for percent of workers for each industry
# B08301_001E is for "workers 16 years and over"

check3 = c.acs5.state_county(("NAME", 
                          "C24030_001E",
                          "B08301_001E"
                                       
                          ),               
                          state_fips = "24",
                          county_fips = "510",
                          )
check3_df = pd.DataFrame(check3)
check3_df   

Unnamed: 0,NAME,C24030_001E,B08301_001E,state,county
0,"Baltimore city, Maryland",279034.0,274479.0,24,510


In [45]:
short_vars[short_vars['group'] == 'B17021'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B17021_001E,Estimate!!Total:,"B17021_001EA,B17021_001M,B17021_001MA"
B17021_002E,Estimate!!Total:!!Income in the past 12 months below poverty level:,"B17021_002EA,B17021_002M,B17021_002MA"
B17021_003E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:,"B17021_003EA,B17021_003M,B17021_003MA"
B17021_004E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:,"B17021_004EA,B17021_004M,B17021_004MA"
B17021_005E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:!!All relatives,"B17021_005EA,B17021_005M,B17021_005MA"
B17021_006E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In married couple families:!!Non-relatives,"B17021_006EA,B17021_006M,B17021_006MA"
B17021_007E,Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:,"B17021_007EA,B17021_007M,B17021_007MA"
B17021_008E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:","B17021_008EA,B17021_008M,B17021_008MA"
B17021_009E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:!!All relatives","B17021_009EA,B17021_009M,B17021_009MA"
B17021_010E,"Estimate!!Total:!!Income in the past 12 months below poverty level:!!In family households:!!In other families:!!Male householder, no spouse present:!!Non-relatives","B17021_010EA,B17021_010M,B17021_010MA"


In [46]:
# check variables 
check2 = c.acs5.state_county(("NAME", 
                          "B08301_001E",
                          "B08301_010E",
                          "B23025_002E"),               
                          state_fips = "24",
                          county_fips = "510",
                          )
check_df2 = pd.DataFrame(check2)
check_df2     

Unnamed: 0,NAME,B08301_001E,B08301_010E,B23025_002E,state,county
0,"Baltimore city, Maryland",274479.0,49309.0,307536.0,24,510


In [47]:
short_vars[short_vars['group'] == 'B08301'][['label','attributes']].sort_index() 

Unnamed: 0,label,attributes
B08301_001E,Estimate!!Total:,"B08301_001EA,B08301_001M,B08301_001MA"
B08301_002E,"Estimate!!Total:!!Car, truck, or van:","B08301_002EA,B08301_002M,B08301_002MA"
B08301_003E,"Estimate!!Total:!!Car, truck, or van:!!Drove alone","B08301_003EA,B08301_003M,B08301_003MA"
B08301_004E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:","B08301_004EA,B08301_004M,B08301_004MA"
B08301_005E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 2-person carpool","B08301_005EA,B08301_005M,B08301_005MA"
B08301_006E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 3-person carpool","B08301_006EA,B08301_006M,B08301_006MA"
B08301_007E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 4-person carpool","B08301_007EA,B08301_007M,B08301_007MA"
B08301_008E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 5- or 6-person carpool","B08301_008EA,B08301_008M,B08301_008MA"
B08301_009E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 7-or-more-person carpool","B08301_009EA,B08301_009M,B08301_009MA"
B08301_010E,Estimate!!Total:!!Public transportation (excluding taxicab):,"B08301_010EA,B08301_010M,B08301_010MA"


In [48]:
census_pd.columns

Index(['NAME', 'B11002_001E', 'B01001_001E', 'B01002_001E', 'B02001_002E',
       'B02001_003E', 'B02001_005E', 'B02001_008E', 'B03001_003E',
       'B05002_013E', 'B08301_001E', 'B08301_010E', 'B15003_001E',
       'B15003_016E', 'B15003_017E', 'B15003_021E', 'B15003_022E',
       'B17021_001E', 'B17021_002E', 'B19001_001E', 'B19013_001E',
       'B19301_001E', 'B23025_001E', 'B23025_002E', 'B23025_004E',
       'B23025_005E', 'B23025_007E', 'B25044_003E', 'B25044_010E',
       'B25008_002E', 'B25003_001E', 'B25003_002E', 'B25003_003E',
       'B28003_005E', 'B28003_006E', 'C24030_001E', 'C24030_006E',
       'C24030_007E', 'C24030_009E', 'C24030_010E', 'C24030_014E',
       'C24030_017E', 'C24030_021E', 'C24030_024E', 'C24030_028E',
       'C24030_033E', 'C24030_034E', 'C24030_036E', 'C24030_037E', 'GEO_ID',
       'state', 'county', 'tract', 'C24030_041E', 'C24030_044E', 'C24030_048E',
       'C24030_051E', 'C24030_055E'],
      dtype='object')

In [49]:
# Pull values from the ACS 5 yr. census codes/ variables listed and save them in the variable "age_data"

age_data = c.acs5.state_county_tract(("NAME", "B01001_003E",                                    
                          "B01001_004E",
                          "B01001_005E",
                          "B01001_006E",
                          "B01001_007E",
                          "B01001_008E",
                          "B01001_009E",
                          "B01001_010E",
                          "B01001_011E",
                          "B01001_012E",
                          "B01001_013E",
                          "B01001_014E",
                          "B01001_015E",
                          "B01001_016E",            
                          "B01001_017E",
                          "B01001_018E",
                          "B01001_019E",
                          "B01001_020E",
                          "B01001_021E",
                          "B01001_022E",
                          "B01001_023E",
                          "B01001_024E",
                          "B01001_025E",
                          "B01001_027E",
                          "B01001_028E",
                          "B01001_029E",
                          "B01001_030E",
                          "B01001_031E",
                          "B01001_032E",
                          "B01001_033E",
                          "B01001_034E",
                          "B01001_035E",
                          "B01001_036E",
                          "B01001_037E",
                          "B01001_038E",
                          "B01001_039E",            
                          "B01001_040E",
                          "B01001_041E",
                          "B01001_042E",
                          "B01001_043E",
                          "B01001_044E",
                          "B01001_045E",
                          "B01001_046E",
                          "B01001_047E",
                          "B01001_048E",
                          "B01001_049E"),          
                          state_fips = "24",
                          county_fips = "510",
                          tract = "*")
age_pd = pd.DataFrame(age_data)
age_pd.head()      

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,tract
0,"Census Tract 1901, Baltimore city, Maryland",55.0,66.0,43.0,32.0,23.0,0.0,0.0,0.0,75.0,...,8.0,24.0,7.0,25.0,53.0,38.0,27.0,24,510,190100
1,"Census Tract 1902, Baltimore city, Maryland",64.0,155.0,22.0,54.0,17.0,0.0,0.0,14.0,74.0,...,6.0,9.0,32.0,12.0,26.0,0.0,0.0,24,510,190200
2,"Census Tract 2201, Baltimore city, Maryland",43.0,50.0,169.0,0.0,0.0,0.0,29.0,62.0,387.0,...,15.0,12.0,173.0,81.0,111.0,47.0,61.0,24,510,220100
3,"Census Tract 2303, Baltimore city, Maryland",0.0,0.0,0.0,3.0,0.0,0.0,0.0,105.0,231.0,...,9.0,13.0,10.0,7.0,4.0,0.0,15.0,24,510,230300
4,"Census Tract 2502.07, Baltimore city, Maryland",144.0,51.0,100.0,20.0,55.0,21.0,14.0,30.0,83.0,...,54.0,32.0,22.0,58.0,8.0,37.0,23.0,24,510,250207


In [50]:
# Sum columns of age groups that are under 18 years old for male and female and add new column "Pop. <18 years"
columns_under18 = ["B01001_003E",                                    
                   "B01001_004E",
                   "B01001_005E",
                   "B01001_006E",
                   "B01001_027E",
                   "B01001_028E",
                   "B01001_029E",
                   "B01001_030E"]
age_pd['Pop. <18 years']= age_pd[columns_under18].sum(axis=1)
age_pd.head()

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,tract,Pop. <18 years
0,"Census Tract 1901, Baltimore city, Maryland",55.0,66.0,43.0,32.0,23.0,0.0,0.0,0.0,75.0,...,24.0,7.0,25.0,53.0,38.0,27.0,24,510,190100,650.0
1,"Census Tract 1902, Baltimore city, Maryland",64.0,155.0,22.0,54.0,17.0,0.0,0.0,14.0,74.0,...,9.0,32.0,12.0,26.0,0.0,0.0,24,510,190200,446.0
2,"Census Tract 2201, Baltimore city, Maryland",43.0,50.0,169.0,0.0,0.0,0.0,29.0,62.0,387.0,...,12.0,173.0,81.0,111.0,47.0,61.0,24,510,220100,380.0
3,"Census Tract 2303, Baltimore city, Maryland",0.0,0.0,0.0,3.0,0.0,0.0,0.0,105.0,231.0,...,13.0,10.0,7.0,4.0,0.0,15.0,24,510,230300,16.0
4,"Census Tract 2502.07, Baltimore city, Maryland",144.0,51.0,100.0,20.0,55.0,21.0,14.0,30.0,83.0,...,32.0,22.0,58.0,8.0,37.0,23.0,24,510,250207,676.0


In [51]:
# Sum columns of age groups that are 18-64 years old (working age) for male and female and add new column "Pop. working age"
columns_working_age = [                                    
                   "B01001_007E",
                   "B01001_008E",
                   "B01001_009E",
                   "B01001_010E",
                   "B01001_011E",
                   "B01001_012E",   
                   "B01001_013E",
                   "B01001_014E",    
                   "B01001_015E",    
                   "B01001_016E",   
                   "B01001_017E", 
                   "B01001_018E",    
                   "B01001_019E",    
                   "B01001_031E",
                   "B01001_032E",
                   "B01001_033E",
                   "B01001_034E",
                   "B01001_035E",   
                   "B01001_036E",
                   "B01001_037E",    
                   "B01001_038E",    
                   "B01001_039E",   
                   "B01001_040E", 
                   "B01001_041E",    
                   "B01001_042E",    
                   "B01001_043E"]    
                           
age_pd['Pop. working age']= age_pd[columns_working_age].sum(axis=1)
age_pd.head()    
            
               


Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,tract,Pop. <18 years,Pop. working age
0,"Census Tract 1901, Baltimore city, Maryland",55.0,66.0,43.0,32.0,23.0,0.0,0.0,0.0,75.0,...,7.0,25.0,53.0,38.0,27.0,24,510,190100,650.0,1201.0
1,"Census Tract 1902, Baltimore city, Maryland",64.0,155.0,22.0,54.0,17.0,0.0,0.0,14.0,74.0,...,32.0,12.0,26.0,0.0,0.0,24,510,190200,446.0,1293.0
2,"Census Tract 2201, Baltimore city, Maryland",43.0,50.0,169.0,0.0,0.0,0.0,29.0,62.0,387.0,...,173.0,81.0,111.0,47.0,61.0,24,510,220100,380.0,3164.0
3,"Census Tract 2303, Baltimore city, Maryland",0.0,0.0,0.0,3.0,0.0,0.0,0.0,105.0,231.0,...,10.0,7.0,4.0,0.0,15.0,24,510,230300,16.0,1316.0
4,"Census Tract 2502.07, Baltimore city, Maryland",144.0,51.0,100.0,20.0,55.0,21.0,14.0,30.0,83.0,...,22.0,58.0,8.0,37.0,23.0,24,510,250207,676.0,1375.0


In [52]:
# Sum columns of age groups that are 65+ years old for male and female and add new column "Pop. 65+ years"
columns_senior = ["B01001_020E",
                  "B01001_021E",
                  "B01001_022E",
                  "B01001_023E",                                    
                  "B01001_024E",
                  "B01001_025E",
                  "B01001_044E",
                  "B01001_045E",
                  "B01001_046E",
                  "B01001_047E",                                    
                  "B01001_048E",
                  "B01001_049E"]               
                          
age_pd['Pop. 65+ years']= age_pd[columns_senior].sum(axis=1)
age_pd.head()                           

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,tract,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Census Tract 1901, Baltimore city, Maryland",55.0,66.0,43.0,32.0,23.0,0.0,0.0,0.0,75.0,...,25.0,53.0,38.0,27.0,24,510,190100,650.0,1201.0,280.0
1,"Census Tract 1902, Baltimore city, Maryland",64.0,155.0,22.0,54.0,17.0,0.0,0.0,14.0,74.0,...,12.0,26.0,0.0,0.0,24,510,190200,446.0,1293.0,147.0
2,"Census Tract 2201, Baltimore city, Maryland",43.0,50.0,169.0,0.0,0.0,0.0,29.0,62.0,387.0,...,81.0,111.0,47.0,61.0,24,510,220100,380.0,3164.0,791.0
3,"Census Tract 2303, Baltimore city, Maryland",0.0,0.0,0.0,3.0,0.0,0.0,0.0,105.0,231.0,...,7.0,4.0,0.0,15.0,24,510,230300,16.0,1316.0,99.0
4,"Census Tract 2502.07, Baltimore city, Maryland",144.0,51.0,100.0,20.0,55.0,21.0,14.0,30.0,83.0,...,58.0,8.0,37.0,23.0,24,510,250207,676.0,1375.0,257.0


In [53]:
age_temp = age_pd[[ "state", "county", "tract", "Pop. <18 years", "Pop. working age", "Pop. 65+ years"]]
age_temp.head()

Unnamed: 0,state,county,tract,Pop. <18 years,Pop. working age,Pop. 65+ years
0,24,510,190100,650.0,1201.0,280.0
1,24,510,190200,446.0,1293.0,147.0
2,24,510,220100,380.0,3164.0,791.0
3,24,510,230300,16.0,1316.0,99.0
4,24,510,250207,676.0,1375.0,257.0


In [54]:
age_temp.dtypes

state                object
county               object
tract                object
Pop. <18 years      float64
Pop. working age    float64
Pop. 65+ years      float64
dtype: object

In [55]:
age_final = age_temp.copy()

In [56]:
# Create "GEOID" column, that we will later merge tables on
age_final["GEOID1"] = age_final[['state','county', 'tract']].agg(''.join,axis=1) 
age_final

Unnamed: 0,state,county,tract,Pop. <18 years,Pop. working age,Pop. 65+ years,GEOID1
0,24,510,190100,650.0,1201.0,280.0,24510190100
1,24,510,190200,446.0,1293.0,147.0,24510190200
2,24,510,220100,380.0,3164.0,791.0,24510220100
3,24,510,230300,16.0,1316.0,99.0,24510230300
4,24,510,250207,676.0,1375.0,257.0,24510250207
5,24,510,250303,543.0,1533.0,294.0,24510250303
6,24,510,260202,1205.0,3857.0,467.0,24510260202
7,24,510,272004,757.0,2125.0,1016.0,24510272004
8,24,510,120202,159.0,5626.0,190.0,24510120202
9,24,510,272005,918.0,1863.0,614.0,24510272005


In [57]:
# remove extraneous columns
age_final = age_final.drop(["state", "county", "tract"], axis=1)
age_final.head()

Unnamed: 0,Pop. <18 years,Pop. working age,Pop. 65+ years,GEOID1
0,650.0,1201.0,280.0,24510190100
1,446.0,1293.0,147.0,24510190200
2,380.0,3164.0,791.0,24510220100
3,16.0,1316.0,99.0,24510230300
4,676.0,1375.0,257.0,24510250207


In [58]:
# Did not add in daytime population - will use ESRI business analyst for this 
# See https://www.census.gov/topics/employment/commuting/guidance/calculations.html
# "commuter-adjusted daytime population estimates" =    
#         total resident population + total workers working in area - total workers living in area

# For "Workers in Workplace Geography," see https://www.census.gov/topics/employment/commuting/guidance/calculations.html
# "Total workers working in area:
# B08604 Total Workers for Workplace Geography
# B08604 is only available for data years 2011 and after. 
# The tables for workplace geography are only available for the following geographic summary levels: States; 
# Counties; Places; County Subdivisions in selected states (not MD); Combined Statistical Areas; Metropolitan 
# and Micropolitan Statistical Areas, and their associated Metropolitan Divisions and Principal Cities; 

census_data_workers = c.acs5.state_county(("NAME", 
                          "B08604_001E"),               
                          state_fips = "24",
                          county_fips = "510") 

# convert to dataframe
workers_df = pd.DataFrame(census_data_workers)
workers_df
                         

Unnamed: 0,NAME,B08604_001E,state,county
0,"Baltimore city, Maryland",382638.0,24,510


In [59]:
# Create Geographic Identifier ("GEOID") for each census tract by adding state fips code + county fips code + census tract code
# see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html#:~:text=The%20full%20GEOID%20for%20many,codes%2C%20in%20which%20they%20nest.
census_pd["GEOID"] = census_pd['state'] + census_pd['county'] + census_pd['tract']
census_pd

Unnamed: 0,NAME,B11002_001E,B01001_001E,B01002_001E,B02001_002E,B02001_003E,B02001_005E,B02001_008E,B03001_003E,B05002_013E,...,GEO_ID,state,county,tract,C24030_041E,C24030_044E,C24030_048E,C24030_051E,C24030_055E,GEOID
0,"Census Tract 101, Baltimore city, Maryland",2867.0,3022.0,33.8,2621.0,227.0,135.0,39.0,58.0,221.0,...,1400000US24510010100,24,510,10100,137.0,169.0,545.0,66.0,45.0,24510010100
1,"Census Tract 102, Baltimore city, Maryland",2869.0,2869.0,32.0,2593.0,107.0,107.0,53.0,69.0,211.0,...,1400000US24510010200,24,510,10200,93.0,157.0,404.0,37.0,102.0,24510010200
2,"Census Tract 103, Baltimore city, Maryland",2571.0,2571.0,30.0,2287.0,39.0,165.0,80.0,99.0,275.0,...,1400000US24510010300,24,510,10300,73.0,193.0,439.0,85.0,33.0,24510010300
3,"Census Tract 104, Baltimore city, Maryland",2473.0,2478.0,33.9,2114.0,62.0,160.0,65.0,101.0,218.0,...,1400000US24510010400,24,510,10400,46.0,224.0,218.0,71.0,44.0,24510010400
4,"Census Tract 105, Baltimore city, Maryland",1949.0,1976.0,31.4,1769.0,103.0,31.0,34.0,294.0,216.0,...,1400000US24510010500,24,510,10500,16.0,103.0,447.0,23.0,22.0,24510010500
5,"Census Tract 201, Baltimore city, Maryland",1713.0,1713.0,31.9,1366.0,144.0,56.0,96.0,144.0,162.0,...,1400000US24510020100,24,510,20100,31.0,145.0,266.0,75.0,31.0,24510020100
6,"Census Tract 202, Baltimore city, Maryland",1835.0,1835.0,31.3,1396.0,197.0,161.0,57.0,308.0,379.0,...,1400000US24510020200,24,510,20200,14.0,98.0,311.0,20.0,58.0,24510020200
7,"Census Tract 203, Baltimore city, Maryland",3877.0,3877.0,35.5,3195.0,89.0,364.0,229.0,265.0,621.0,...,1400000US24510020300,24,510,20300,110.0,262.0,685.0,84.0,32.0,24510020300
8,"Census Tract 301, Baltimore city, Maryland",2604.0,2696.0,33.4,563.0,2036.0,48.0,11.0,242.0,206.0,...,1400000US24510030100,24,510,30100,23.0,97.0,201.0,21.0,127.0,24510030100
9,"Census Tract 302, Baltimore city, Maryland",2137.0,2327.0,36.4,1272.0,765.0,199.0,48.0,160.0,308.0,...,1400000US24510030200,24,510,30200,67.0,78.0,317.0,17.0,42.0,24510030200


In [60]:
# "Number of rows" = # of census tracts in the dataframe
print("Number of rows, columns: ", census_pd.shape)

Number of rows, columns:  (200, 59)


In [61]:
# Replace the census variable codes (such as "B19013_001E") in the dataframe with text so it's understandable
census_pd = census_pd.rename(columns={"B19001_001E": "Total households",
                                      "B01001_001E": "Population",
                                      "tract": "Census Tract",  
                                      "B08301_001E": "Workers_16_yrs_and_over",  
                                      "B08301_010E": "Commute_to_work_public_transportation",       
                                      "B01002_001E": "Median age",
                                      "B02001_002E": "Pop. white",
                                      "B02001_003E": "Pop. Black",
                                      "B02001_005E": "Pop. Asian",        
                                      "B02001_008E": "Pop. two or more races",
                                      "B03001_003E": "Pop. Hispanic origin",
                                      "B05002_013E": "# Foreign-born",
                                      "B11002_001E": "Household population",
                                      "B15003_001E": "Population_25_yrs_and_over",
                                      "B15003_016E": "# persons 12th grade, no diploma",
                                      "B15003_017E": "# persons graduated high school",
                                      "B15003_021E": "# persons Associate's degree",
                                      "B15003_022E": "# persons Bachelor's degree",
                                      "B19013_001E": "Median household income",
                                      "B19301_001E": "Per capita income", 
                                      "B17021_002E": "Income_past_12mos_below_pov_level",
                                      "B17021_001E": "Persons_poverty_status_determined",
                                      "B23025_001E": "Pop_16_yrs_and_over",
                                      "B23025_002E": "Pop_in_labor_force",
                                      "B23025_004E": "Employed_civilians",
                                      "B23025_005E": "Unemployed_civilians",
                                      "B23025_007E": "Pop_NOT_in_labor_force",
                                      "B25008_002E": "Total pop. in occupied housing units by tenure",
                                      "B25003_001E": "Total occupied units",
                                      "B25003_002E": "Total owner-occupied units",
                                      "B25003_003E": "Total renter-occupied units",
                                      "B25044_003E": "No_vehicle_available_owneroccupied_unit",
                                      "B25044_010E": "No_vehicle_available_renteroccupied_unit",
                                      "B28003_005E": "# households with computer, no internet subscription",
                                      "B28003_006E": "# households no computer",
                                      "C24030_001E": "Civilian_employed_pop_16yrs_and_over",
                                      "C24030_006E": "ConstructionM", 
                                      "C24030_007E": "ManufacturingM", 
                                      "C24030_009E": "Retail_TradeM",
                                      "C24030_010E": "Transportation_warehousing_utilitiesM",
                                      "C24030_014E": "Finance_insurance_realestateM",               
                                      "C24030_017E": "Professional_scientific_mgmt_administrativeM",              
                                      "C24030_021E": "Educational_healthcare_socialM",                
                                      "C24030_024E": "Accommodations_foodservices_arts_entertainmentM",
                                      "C24030_028E": "Public_administrationM",
                                      "C24030_033E": "ConstructionF",                
                                      "C24030_034E": "ManufacturingF",
                                      "C24030_036E": "Retail_TradeF",                
                                      "C24030_037E": "Transportation_warehousing_utilitiesF",                
                                      "C24030_041E": "Finance_insurance_realestateF",  
                                      "C24030_044E": "Professional_scientific_mgmt_administrativeF",               
                                      "C24030_048E": "Educational_healthcare_socialF",
                                      "C24030_051E": "Accommodations_foodservices_arts_entertainmentF",
                                      "C24030_055E": "Public_administrationF",
                                      "NAME": "Name", "state": "State", "GEOID": "GEOID"
                                     })       
census_pd.head()                              



Unnamed: 0,Name,Household population,Population,Median age,Pop. white,Pop. Black,Pop. Asian,Pop. two or more races,Pop. Hispanic origin,# Foreign-born,...,GEO_ID,State,county,Census Tract,Finance_insurance_realestateF,Professional_scientific_mgmt_administrativeF,Educational_healthcare_socialF,Accommodations_foodservices_arts_entertainmentF,Public_administrationF,GEOID
0,"Census Tract 101, Baltimore city, Maryland",2867.0,3022.0,33.8,2621.0,227.0,135.0,39.0,58.0,221.0,...,1400000US24510010100,24,510,10100,137.0,169.0,545.0,66.0,45.0,24510010100
1,"Census Tract 102, Baltimore city, Maryland",2869.0,2869.0,32.0,2593.0,107.0,107.0,53.0,69.0,211.0,...,1400000US24510010200,24,510,10200,93.0,157.0,404.0,37.0,102.0,24510010200
2,"Census Tract 103, Baltimore city, Maryland",2571.0,2571.0,30.0,2287.0,39.0,165.0,80.0,99.0,275.0,...,1400000US24510010300,24,510,10300,73.0,193.0,439.0,85.0,33.0,24510010300
3,"Census Tract 104, Baltimore city, Maryland",2473.0,2478.0,33.9,2114.0,62.0,160.0,65.0,101.0,218.0,...,1400000US24510010400,24,510,10400,46.0,224.0,218.0,71.0,44.0,24510010400
4,"Census Tract 105, Baltimore city, Maryland",1949.0,1976.0,31.4,1769.0,103.0,31.0,34.0,294.0,216.0,...,1400000US24510010500,24,510,10500,16.0,103.0,447.0,23.0,22.0,24510010500


In [62]:
# info. on calculating unemployment rate: https://www.investopedia.com/ask/answers/063015/how-does-us-bureau-labor-statistics-calculate-unemployment-rate-published-monthly.asp

# "How Does the U.S. Determine the Unemployment Rate?
# The U.S. determines the unemployment rate by dividing the unemployed individuals by the total number of individuals in the labor force."

In [63]:
census_pd.columns

Index(['Name', 'Household population', 'Population', 'Median age',
       'Pop. white', 'Pop. Black', 'Pop. Asian', 'Pop. two or more races',
       'Pop. Hispanic origin', '# Foreign-born', 'Workers_16_yrs_and_over',
       'Commute_to_work_public_transportation', 'Population_25_yrs_and_over',
       '# persons 12th grade, no diploma', '# persons graduated high school',
       '# persons Associate's degree', '# persons Bachelor's degree',
       'Persons_poverty_status_determined',
       'Income_past_12mos_below_pov_level', 'Total households',
       'Median household income', 'Per capita income', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Employed_civilians', 'Unemployed_civilians',
       'Pop_NOT_in_labor_force', 'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       '# ho

In [64]:
# Create a new column for each job industry by adding male and female values for that industry together 
census_pd["Construction"] = census_pd.apply(lambda row: row.ConstructionM + row.ConstructionF, axis=1)
census_pd.columns

Index(['Name', 'Household population', 'Population', 'Median age',
       'Pop. white', 'Pop. Black', 'Pop. Asian', 'Pop. two or more races',
       'Pop. Hispanic origin', '# Foreign-born', 'Workers_16_yrs_and_over',
       'Commute_to_work_public_transportation', 'Population_25_yrs_and_over',
       '# persons 12th grade, no diploma', '# persons graduated high school',
       '# persons Associate's degree', '# persons Bachelor's degree',
       'Persons_poverty_status_determined',
       'Income_past_12mos_below_pov_level', 'Total households',
       'Median household income', 'Per capita income', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Employed_civilians', 'Unemployed_civilians',
       'Pop_NOT_in_labor_force', 'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       '# ho

In [65]:
#census_pd.dtypes

In [66]:
census_pd["Construction count"] = census_pd.apply(lambda row: row.ConstructionM + row.ConstructionF, axis=1)

In [67]:
census_pd["Manufacturing count"] = census_pd.apply(lambda row: row.ManufacturingM + row.ManufacturingF, axis=1)

In [68]:
census_pd["Retail_Trade count"] = census_pd.apply(lambda row: row.Retail_TradeM + row.Retail_TradeF, axis=1)

In [69]:
census_pd["Transportation_warehousing_utilities count"] = census_pd.apply(lambda row: row.Transportation_warehousing_utilitiesM + row.Transportation_warehousing_utilitiesF, axis=1)

In [70]:
census_pd["Finance_insurance_realestate count"] = census_pd.apply(lambda row: row.Finance_insurance_realestateM + row.Finance_insurance_realestateF, axis=1)


In [71]:
census_pd["Professional_scientific_mgmt_administrative count"] = census_pd.apply(lambda row: row.Professional_scientific_mgmt_administrativeM + row.Professional_scientific_mgmt_administrativeF, axis=1)


In [72]:
census_pd["Educational_healthcare_social count"] = census_pd.apply(lambda row: row.Educational_healthcare_socialM + row.Educational_healthcare_socialF, axis=1)


In [73]:
census_pd["Accommodations_foodservices_arts_entertainment count"] = census_pd.apply(lambda row: row.Accommodations_foodservices_arts_entertainmentM + row.Accommodations_foodservices_arts_entertainmentF, axis=1)

In [74]:
census_pd["Public_administration count"] = census_pd.apply(lambda row: row.Public_administrationM + row.Public_administrationF, axis=1)

In [75]:
census_pd.columns

Index(['Name', 'Household population', 'Population', 'Median age',
       'Pop. white', 'Pop. Black', 'Pop. Asian', 'Pop. two or more races',
       'Pop. Hispanic origin', '# Foreign-born', 'Workers_16_yrs_and_over',
       'Commute_to_work_public_transportation', 'Population_25_yrs_and_over',
       '# persons 12th grade, no diploma', '# persons graduated high school',
       '# persons Associate's degree', '# persons Bachelor's degree',
       'Persons_poverty_status_determined',
       'Income_past_12mos_below_pov_level', 'Total households',
       'Median household income', 'Per capita income', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Employed_civilians', 'Unemployed_civilians',
       'Pop_NOT_in_labor_force', 'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       '# ho

In [76]:
# remove extraneous columns
census_pd = census_pd.drop(['ConstructionM', 'ManufacturingM',
       'Retail_TradeM', 'Transportation_warehousing_utilitiesM',
       'Finance_insurance_realestateM',
       'Professional_scientific_mgmt_administrativeM',
       'Educational_healthcare_socialM',
       'Accommodations_foodservices_arts_entertainmentM',
       'Public_administrationM', 'ConstructionF', 'ManufacturingF',
       'Retail_TradeF', 'Transportation_warehousing_utilitiesF',
       'Finance_insurance_realestateF',
       'Professional_scientific_mgmt_administrativeF',
       'Educational_healthcare_socialF',
       'Accommodations_foodservices_arts_entertainmentF',
       'Public_administrationF', 'State', 'county'], axis=1)
census_pd.columns

Index(['Name', 'Household population', 'Population', 'Median age',
       'Pop. white', 'Pop. Black', 'Pop. Asian', 'Pop. two or more races',
       'Pop. Hispanic origin', '# Foreign-born', 'Workers_16_yrs_and_over',
       'Commute_to_work_public_transportation', 'Population_25_yrs_and_over',
       '# persons 12th grade, no diploma', '# persons graduated high school',
       '# persons Associate's degree', '# persons Bachelor's degree',
       'Persons_poverty_status_determined',
       'Income_past_12mos_below_pov_level', 'Total households',
       'Median household income', 'Per capita income', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Employed_civilians', 'Unemployed_civilians',
       'Pop_NOT_in_labor_force', 'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       '# ho

In [77]:
# Calculate the number of unique census tracts in the DataFrame
tract_count = len(census_pd["Census Tract"].unique())
tract_count

200

In [78]:
# merge the census_pd dataframe with the age_final dataframe on the common column "GEOID"
census_joined2 = pd.concat([census_pd, age_final], axis=1)
census_joined2

Unnamed: 0,Name,Household population,Population,Median age,Pop. white,Pop. Black,Pop. Asian,Pop. two or more races,Pop. Hispanic origin,# Foreign-born,...,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Pop. <18 years,Pop. working age,Pop. 65+ years,GEOID1
0,"Census Tract 101, Baltimore city, Maryland",2867.0,3022.0,33.8,2621.0,227.0,135.0,39.0,58.0,221.0,...,48.0,254.0,401.0,684.0,119.0,102.0,650.0,1201.0,280.0,24510190100
1,"Census Tract 102, Baltimore city, Maryland",2869.0,2869.0,32.0,2593.0,107.0,107.0,53.0,69.0,211.0,...,58.0,276.0,335.0,623.0,84.0,203.0,446.0,1293.0,147.0,24510190200
2,"Census Tract 103, Baltimore city, Maryland",2571.0,2571.0,30.0,2287.0,39.0,165.0,80.0,99.0,275.0,...,74.0,211.0,458.0,668.0,153.0,70.0,380.0,3164.0,791.0,24510220100
3,"Census Tract 104, Baltimore city, Maryland",2473.0,2478.0,33.9,2114.0,62.0,160.0,65.0,101.0,218.0,...,50.0,182.0,475.0,357.0,107.0,143.0,16.0,1316.0,99.0,24510230300
4,"Census Tract 105, Baltimore city, Maryland",1949.0,1976.0,31.4,1769.0,103.0,31.0,34.0,294.0,216.0,...,19.0,70.0,230.0,603.0,79.0,73.0,676.0,1375.0,257.0,24510250207
5,"Census Tract 201, Baltimore city, Maryland",1713.0,1713.0,31.9,1366.0,144.0,56.0,96.0,144.0,162.0,...,23.0,106.0,242.0,389.0,118.0,72.0,543.0,1533.0,294.0,24510250303
6,"Census Tract 202, Baltimore city, Maryland",1835.0,1835.0,31.3,1396.0,197.0,161.0,57.0,308.0,379.0,...,6.0,77.0,248.0,522.0,88.0,107.0,1205.0,3857.0,467.0,24510260202
7,"Census Tract 203, Baltimore city, Maryland",3877.0,3877.0,35.5,3195.0,89.0,364.0,229.0,265.0,621.0,...,20.0,225.0,551.0,1073.0,180.0,273.0,757.0,2125.0,1016.0,24510272004
8,"Census Tract 301, Baltimore city, Maryland",2604.0,2696.0,33.4,563.0,2036.0,48.0,11.0,242.0,206.0,...,15.0,56.0,177.0,270.0,48.0,127.0,159.0,5626.0,190.0,24510120202
9,"Census Tract 302, Baltimore city, Maryland",2137.0,2327.0,36.4,1272.0,765.0,199.0,48.0,160.0,308.0,...,40.0,181.0,307.0,500.0,41.0,67.0,918.0,1863.0,614.0,24510272005


In [79]:
census_joined2.columns

Index(['Name', 'Household population', 'Population', 'Median age',
       'Pop. white', 'Pop. Black', 'Pop. Asian', 'Pop. two or more races',
       'Pop. Hispanic origin', '# Foreign-born', 'Workers_16_yrs_and_over',
       'Commute_to_work_public_transportation', 'Population_25_yrs_and_over',
       '# persons 12th grade, no diploma', '# persons graduated high school',
       '# persons Associate's degree', '# persons Bachelor's degree',
       'Persons_poverty_status_determined',
       'Income_past_12mos_below_pov_level', 'Total households',
       'Median household income', 'Per capita income', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Employed_civilians', 'Unemployed_civilians',
       'Pop_NOT_in_labor_force', 'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       '# ho

In [80]:
# see the values in the "GEOID" column to make sure it matches with tracts in "NAME" column (before we delete the "GEOID1" column)
#list = census_joined['GEOID'].tolist()
#print(list)


In [81]:
# Change order of columns in DataFrame by using double brackets

census_joined2 = census_joined2[["Census Tract", "GEOID", "GEOID1", "Population", "Total households", "Household population", 
                       "Median household income", "Per capita income", "Persons_poverty_status_determined", 
                       "Income_past_12mos_below_pov_level","Pop. <18 years", "Pop. working age", "Pop. 65+ years",
                       "Pop_16_yrs_and_over", "Pop_in_labor_force", "Pop_NOT_in_labor_force", "Employed_civilians", "Unemployed_civilians",
                       "Commute_to_work_public_transportation", "Workers_16_yrs_and_over", "Population_25_yrs_and_over",
                       "# persons 12th grade, no diploma",
                       "# persons graduated high school", "# persons Associate's degree",
                       "# persons Bachelor's degree",
                       "Median age","Pop. white", "Pop. Black", "Pop. two or more races", "Pop. Hispanic origin", 
                       "Pop. Asian","Total pop. in occupied housing units by tenure", "Total occupied units",
                       "Total owner-occupied units", 
                       "Total renter-occupied units", "No_vehicle_available_owneroccupied_unit",
                       "No_vehicle_available_renteroccupied_unit",
                       "# Foreign-born", "# households with computer, no internet subscription",
                       "# households no computer", "Civilian_employed_pop_16yrs_and_over",                                      
                       "Construction count", "Manufacturing count", "Retail_Trade count","Transportation_warehousing_utilities count",                
                       "Finance_insurance_realestate count", "Professional_scientific_mgmt_administrative count",               
                       "Educational_healthcare_social count", "Accommodations_foodservices_arts_entertainment count",
                       "Public_administration count"      
                        ]]

census_joined2.head()

Unnamed: 0,Census Tract,GEOID,GEOID1,Population,Total households,Household population,Median household income,Per capita income,Persons_poverty_status_determined,Income_past_12mos_below_pov_level,...,Civilian_employed_pop_16yrs_and_over,Construction count,Manufacturing count,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count
0,10100,24510010100,24510190100,3022.0,1398.0,2867.0,109808.0,63832.0,2867.0,124.0,...,2163.0,71.0,113.0,138.0,48.0,254.0,401.0,684.0,119.0,102.0
1,10200,24510010200,24510190200,2869.0,1393.0,2869.0,107617.0,56058.0,2860.0,169.0,...,2100.0,40.0,130.0,78.0,58.0,276.0,335.0,623.0,84.0,203.0
2,10300,24510010300,24510220100,2571.0,1079.0,2571.0,138686.0,68345.0,2571.0,110.0,...,2027.0,63.0,140.0,94.0,74.0,211.0,458.0,668.0,153.0,70.0
3,10400,24510010400,24510230300,2478.0,1388.0,2473.0,114453.0,93670.0,2478.0,143.0,...,1790.0,132.0,171.0,81.0,50.0,182.0,475.0,357.0,107.0,143.0
4,10500,24510010500,24510250207,1976.0,788.0,1949.0,99167.0,54193.0,1954.0,173.0,...,1310.0,57.0,46.0,35.0,19.0,70.0,230.0,603.0,79.0,73.0


In [82]:
# Remove extra GEOID/ tract columns 
census_joined2 = census_joined2.drop(["GEOID1", "Census Tract"], axis=1)
census_joined2.head()

Unnamed: 0,GEOID,Population,Total households,Household population,Median household income,Per capita income,Persons_poverty_status_determined,Income_past_12mos_below_pov_level,Pop. <18 years,Pop. working age,...,Civilian_employed_pop_16yrs_and_over,Construction count,Manufacturing count,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count
0,24510010100,3022.0,1398.0,2867.0,109808.0,63832.0,2867.0,124.0,650.0,1201.0,...,2163.0,71.0,113.0,138.0,48.0,254.0,401.0,684.0,119.0,102.0
1,24510010200,2869.0,1393.0,2869.0,107617.0,56058.0,2860.0,169.0,446.0,1293.0,...,2100.0,40.0,130.0,78.0,58.0,276.0,335.0,623.0,84.0,203.0
2,24510010300,2571.0,1079.0,2571.0,138686.0,68345.0,2571.0,110.0,380.0,3164.0,...,2027.0,63.0,140.0,94.0,74.0,211.0,458.0,668.0,153.0,70.0
3,24510010400,2478.0,1388.0,2473.0,114453.0,93670.0,2478.0,143.0,16.0,1316.0,...,1790.0,132.0,171.0,81.0,50.0,182.0,475.0,357.0,107.0,143.0
4,24510010500,1976.0,788.0,1949.0,99167.0,54193.0,1954.0,173.0,676.0,1375.0,...,1310.0,57.0,46.0,35.0,19.0,70.0,230.0,603.0,79.0,73.0


In [83]:
# Save the ArcGIS acreage/ square mile data compiled by Patrick (csv file) in variable

data_GEOID_SQMI = "raw_data/BaltimoreCensusTract_Area.csv"

In [84]:
# Create dataframes by reading the variable data from the code block above
df_GEOID_SQMI = pd.read_csv(data_GEOID_SQMI)
df_GEOID_SQMI

Unnamed: 0,GEOID,NAMELSAD,Shape_Area,Area_Acres,Area_mi2
0,24510272007,Census Tract 2720.07,8635608.0,198.245483,0.30976
1,24510120202,Census Tract 1202.02,10567490.0,242.595201,0.379057
2,24510272005,Census Tract 2720.05,9986757.0,229.263486,0.358226
3,24510120201,Census Tract 1202.01,3745909.0,85.993887,0.134366
4,24510272004,Census Tract 2720.04,7630553.0,175.172691,0.273708
5,24510272006,Census Tract 2720.06,7930308.0,182.054103,0.284461
6,24510280500,Census Tract 2805,9945122.0,228.30768,0.356732
7,24510260202,Census Tract 2602.02,11254630.0,258.369773,0.403704
8,24510260203,Census Tract 2602.03,9184329.0,210.842332,0.329442
9,24510260301,Census Tract 2603.01,6537325.0,150.075739,0.234494


In [85]:
# Need to rename the "GEOID " column as it has a space in the name so won't be able to merge on it (because other "GEOID" columns do not contain spaces)
# Replace the census variable codes (such as "B19013_001E") in the dataframe with text so it's understandable
df_GEOID_SQMI = df_GEOID_SQMI.rename(columns={"GEOID ": "GEOID"
                                    })

In [86]:
df_GEOID_SQMI.dtypes

GEOID           int64
NAMELSAD       object
Shape_Area    float64
Area_Acres    float64
Area_mi2      float64
dtype: object

In [87]:
# Change data type of the "GEOID" column so that it's a string/ text and not an integer
df_GEOID_SQMI["GEOID"] = df_GEOID_SQMI["GEOID"].astype('str')
df_GEOID_SQMI.dtypes

GEOID          object
NAMELSAD       object
Shape_Area    float64
Area_Acres    float64
Area_mi2      float64
dtype: object

In [88]:
# drop extraneous columns
GEOID_SQMI_df = df_GEOID_SQMI.drop(['NAMELSAD', 'Shape_Area'], axis=1)
GEOID_SQMI_df

Unnamed: 0,GEOID,Area_Acres,Area_mi2
0,24510272007,198.245483,0.30976
1,24510120202,242.595201,0.379057
2,24510272005,229.263486,0.358226
3,24510120201,85.993887,0.134366
4,24510272004,175.172691,0.273708
5,24510272006,182.054103,0.284461
6,24510280500,228.30768,0.356732
7,24510260202,258.369773,0.403704
8,24510260203,210.842332,0.329442
9,24510260301,150.075739,0.234494


In [89]:
SQMI_df = GEOID_SQMI_df.sort_values('GEOID')
SQMI_df

Unnamed: 0,GEOID,Area_Acres,Area_mi2
137,24510010100,133.820372,0.209095
128,24510010200,87.758257,0.137123
138,24510010300,169.103575,0.264225
139,24510010400,230.345352,0.359916
140,24510010500,38.359064,0.059936
141,24510020100,43.126141,0.067385
142,24510020200,48.815301,0.076274
143,24510020300,258.665092,0.404166
144,24510030100,105.613394,0.165022
175,24510030200,147.748239,0.230858


In [90]:
# combine GEOID_SQMI_df with census_joined2 df
census_temp = pd.merge(
    census_joined2, GEOID_SQMI_df, on="GEOID")
census_temp

Unnamed: 0,GEOID,Population,Total households,Household population,Median household income,Per capita income,Persons_poverty_status_determined,Income_past_12mos_below_pov_level,Pop. <18 years,Pop. working age,...,Manufacturing count,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2
0,24510010100,3022.0,1398.0,2867.0,109808.0,63832.0,2867.0,124.0,650.0,1201.0,...,113.0,138.0,48.0,254.0,401.0,684.0,119.0,102.0,133.820372,0.209095
1,24510010200,2869.0,1393.0,2869.0,107617.0,56058.0,2860.0,169.0,446.0,1293.0,...,130.0,78.0,58.0,276.0,335.0,623.0,84.0,203.0,87.758257,0.137123
2,24510010300,2571.0,1079.0,2571.0,138686.0,68345.0,2571.0,110.0,380.0,3164.0,...,140.0,94.0,74.0,211.0,458.0,668.0,153.0,70.0,169.103575,0.264225
3,24510010400,2478.0,1388.0,2473.0,114453.0,93670.0,2478.0,143.0,16.0,1316.0,...,171.0,81.0,50.0,182.0,475.0,357.0,107.0,143.0,230.345352,0.359916
4,24510010500,1976.0,788.0,1949.0,99167.0,54193.0,1954.0,173.0,676.0,1375.0,...,46.0,35.0,19.0,70.0,230.0,603.0,79.0,73.0,38.359064,0.059936
5,24510020100,1713.0,808.0,1713.0,90500.0,53678.0,1713.0,126.0,543.0,1533.0,...,68.0,52.0,23.0,106.0,242.0,389.0,118.0,72.0,43.126141,0.067385
6,24510020200,1835.0,761.0,1835.0,89375.0,47316.0,1835.0,282.0,1205.0,3857.0,...,59.0,40.0,6.0,77.0,248.0,522.0,88.0,107.0,48.815301,0.076274
7,24510020300,3877.0,2143.0,3877.0,108516.0,87413.0,3877.0,359.0,757.0,2125.0,...,140.0,112.0,20.0,225.0,551.0,1073.0,180.0,273.0,258.665092,0.404166
8,24510030100,2696.0,1054.0,2604.0,19741.0,17299.0,2696.0,1133.0,159.0,5626.0,...,0.0,57.0,15.0,56.0,177.0,270.0,48.0,127.0,105.613394,0.165022
9,24510030200,2327.0,1161.0,2137.0,77301.0,57793.0,2310.0,520.0,918.0,1863.0,...,68.0,21.0,40.0,181.0,307.0,500.0,41.0,67.0,147.748239,0.230858


In [91]:
census_temp2 = census_temp.sort_values('GEOID')
census_temp2

Unnamed: 0,GEOID,Population,Total households,Household population,Median household income,Per capita income,Persons_poverty_status_determined,Income_past_12mos_below_pov_level,Pop. <18 years,Pop. working age,...,Manufacturing count,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2
0,24510010100,3022.0,1398.0,2867.0,109808.0,63832.0,2867.0,124.0,650.0,1201.0,...,113.0,138.0,48.0,254.0,401.0,684.0,119.0,102.0,133.820372,0.209095
1,24510010200,2869.0,1393.0,2869.0,107617.0,56058.0,2860.0,169.0,446.0,1293.0,...,130.0,78.0,58.0,276.0,335.0,623.0,84.0,203.0,87.758257,0.137123
2,24510010300,2571.0,1079.0,2571.0,138686.0,68345.0,2571.0,110.0,380.0,3164.0,...,140.0,94.0,74.0,211.0,458.0,668.0,153.0,70.0,169.103575,0.264225
3,24510010400,2478.0,1388.0,2473.0,114453.0,93670.0,2478.0,143.0,16.0,1316.0,...,171.0,81.0,50.0,182.0,475.0,357.0,107.0,143.0,230.345352,0.359916
4,24510010500,1976.0,788.0,1949.0,99167.0,54193.0,1954.0,173.0,676.0,1375.0,...,46.0,35.0,19.0,70.0,230.0,603.0,79.0,73.0,38.359064,0.059936
5,24510020100,1713.0,808.0,1713.0,90500.0,53678.0,1713.0,126.0,543.0,1533.0,...,68.0,52.0,23.0,106.0,242.0,389.0,118.0,72.0,43.126141,0.067385
6,24510020200,1835.0,761.0,1835.0,89375.0,47316.0,1835.0,282.0,1205.0,3857.0,...,59.0,40.0,6.0,77.0,248.0,522.0,88.0,107.0,48.815301,0.076274
7,24510020300,3877.0,2143.0,3877.0,108516.0,87413.0,3877.0,359.0,757.0,2125.0,...,140.0,112.0,20.0,225.0,551.0,1073.0,180.0,273.0,258.665092,0.404166
8,24510030100,2696.0,1054.0,2604.0,19741.0,17299.0,2696.0,1133.0,159.0,5626.0,...,0.0,57.0,15.0,56.0,177.0,270.0,48.0,127.0,105.613394,0.165022
9,24510030200,2327.0,1161.0,2137.0,77301.0,57793.0,2310.0,520.0,918.0,1863.0,...,68.0,21.0,40.0,181.0,307.0,500.0,41.0,67.0,147.748239,0.230858


In [92]:
# GIS Analyst, Patrick, provided a csv (created from ARcGIS) that contains a key to match Baltimore's commercial corridors 
# with specific GEOIDS 

# Store filepath in a variable
corridor_key = "raw_data/corr_key.csv"

# Read the file with the pandas library
corr_key_df2 = pd.read_csv(corridor_key)
corr_key_df2.dtypes

GEOID        int64
Corridor    object
dtype: object

In [93]:
# Change data type of the "GEOID" column so that it's a string/ text and not an integer
corr_key_df2["GEOID"] = corr_key_df2["GEOID"].astype('str')
corr_key_df2.dtypes

GEOID       object
Corridor    object
dtype: object

In [94]:
corr_key_df2.columns

Index(['GEOID', 'Corridor'], dtype='object')

In [95]:
corr_key_df2

Unnamed: 0,GEOID,Corridor
0,24510260403,Highlandtown
1,24510230200,Hamilton Lauraville
2,24510260102,Pimlico
3,24510260303,Hamilton Lauraville
4,24510260800,Highlandtown
5,24510270401,Hamilton Lauraville
6,24510280401,Pimlico
7,24510270302,Hamilton Lauraville
8,24510270702,Pimlico
9,24510270903,Penn Ave


In [96]:
# see the number of rows (200) and columns (2) in the dataframe - so can compare it with the number of rows and columns in the census_joined df
corr_key_df2.shape

(200, 2)

In [97]:
corr_key_df = corr_key_df2.sort_values("GEOID")
corr_key_df

Unnamed: 0,GEOID,Corridor
138,24510010100,
46,24510010200,Highlandtown
76,24510010300,
123,24510010400,
139,24510010500,
72,24510020100,
124,24510020200,
44,24510020300,Brooklyn
125,24510030100,
126,24510030200,


In [98]:
# print all of the values in the "GEOID" column
#geoid_list = census_joined2['GEOID'].tolist()
#print(geoid_list)

In [161]:
# merge the census_temp2 dataframe with the corr_key_df dataframe on the common column "GEOID"

corridors_df = pd.merge(
    census_temp2, corr_key_df, on="GEOID")

# remove any columns with NaN ("Not a Number") - used for missing values, by using .dropna()
corridors_df = corridors_df.dropna()
corridors_df

Unnamed: 0,GEOID,Population,Total households,Household population,Median household income,Per capita income,Persons_poverty_status_determined,Income_past_12mos_below_pov_level,Pop. <18 years,Pop. working age,...,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Corridor
1,24510010200,2869.0,1393.0,2869.0,107617.0,56058.0,2860.0,169.0,446.0,1293.0,...,78.0,58.0,276.0,335.0,623.0,84.0,203.0,87.758257,0.137123,Highlandtown
7,24510020300,3877.0,2143.0,3877.0,108516.0,87413.0,3877.0,359.0,757.0,2125.0,...,112.0,20.0,225.0,551.0,1073.0,180.0,273.0,258.665092,0.404166,Brooklyn
10,24510040100,4404.0,2596.0,4379.0,62500.0,49876.0,4404.0,870.0,911.0,1767.0,...,65.0,100.0,206.0,612.0,1337.0,88.0,86.0,293.940854,0.459284,E Monument St
11,24510040200,901.0,228.0,295.0,34375.0,16596.0,311.0,66.0,1531.0,2481.0,...,30.0,10.0,20.0,29.0,181.0,27.0,22.0,107.657244,0.168215,Hamilton Lauraville
13,24510060200,3241.0,1079.0,3225.0,72465.0,32023.0,3236.0,864.0,233.0,1369.0,...,41.0,59.0,118.0,233.0,524.0,224.0,133.0,63.684102,0.099507,E Monument St
14,24510060300,1865.0,750.0,1840.0,68088.0,36889.0,1865.0,321.0,776.0,2019.0,...,79.0,48.0,44.0,182.0,412.0,63.0,42.0,46.233527,0.07224,E Monument St
15,24510060400,1376.0,761.0,1376.0,55368.0,41278.0,1371.0,197.0,366.0,1951.0,...,31.0,12.0,48.0,90.0,332.0,40.0,50.0,106.281673,0.166066,Waverly
16,24510070100,2565.0,749.0,2560.0,39602.0,15387.0,2565.0,939.0,764.0,1858.0,...,79.0,50.0,15.0,97.0,190.0,164.0,63.0,71.813331,0.112209,E Monument St
17,24510070200,3028.0,990.0,3015.0,32000.0,17596.0,3028.0,1233.0,786.0,1338.0,...,25.0,81.0,31.0,142.0,311.0,107.0,85.0,76.881283,0.120127,E Monument St
18,24510070300,856.0,307.0,833.0,29261.0,18544.0,856.0,367.0,535.0,1403.0,...,54.0,43.0,0.0,10.0,104.0,58.0,7.0,56.506963,0.088292,E Monument St


In [100]:
# Use .groupby and .agg to sum the amounts by corridor
corridors_sum = corridors_df.groupby(['Corridor'], as_index=False).agg(
    {"Population": sum,
     "Total households": sum,
     "Household population": sum,
     "Income_past_12mos_below_pov_level": sum,
     "Persons_poverty_status_determined": sum,
     "Pop_16_yrs_and_over": sum,
     "Pop_in_labor_force": sum,
     "Pop_NOT_in_labor_force": sum,
     "Employed_civilians": sum,
     "Unemployed_civilians": sum,
     "Workers_16_yrs_and_over": sum,
     "Population_25_yrs_and_over": sum,
     "# persons 12th grade, no diploma": sum,
     "# persons graduated high school": sum,
     "# persons Associate's degree": sum,
     "# persons Bachelor's degree": sum,
     
     "Pop. Asian": sum,
     "Pop. Black": sum,
     "Pop. Hispanic origin": sum,
     "Pop. white": sum,
     "Pop. two or more races": sum,
     "# Foreign-born":sum,
     "Total pop. in occupied housing units by tenure":sum,
     "Total occupied units": sum,
     "Total owner-occupied units": sum,
     "Total renter-occupied units": sum,
     "No_vehicle_available_owneroccupied_unit": sum,
     "No_vehicle_available_renteroccupied_unit": sum,
     "# households with computer, no internet subscription":sum,
     "# households no computer":sum,
     "Pop. <18 years": sum,
     "Pop. working age": sum,
     "Pop. 65+ years": sum,
     "Commute_to_work_public_transportation": sum,
     "Civilian_employed_pop_16yrs_and_over": sum,
     "Construction count": sum,               
     "Manufacturing count": sum,
     "Retail_Trade count": sum,                
     "Transportation_warehousing_utilities count": sum,                
     "Finance_insurance_realestate count": sum,  
     "Professional_scientific_mgmt_administrative count": sum,               
     "Educational_healthcare_social count": sum,
     "Accommodations_foodservices_arts_entertainment count": sum,
     "Public_administration count": sum, 
     "Area_Acres": sum,
     "Area_mi2":sum
    })
    
corridors_sum

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Manufacturing count,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,466.0,1206.0,1127.0,838.0,1746.0,4463.0,1165.0,1649.0,2745.531667,4.28991
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,366.0,998.0,252.0,339.0,966.0,1828.0,566.0,562.0,1424.617673,2.225974
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,723.0,1886.0,1019.0,1262.0,2860.0,8155.0,1913.0,1561.0,3084.743916,4.819932
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,518.0,448.0,131.0,450.0,886.0,1162.0,731.0,234.0,1697.990035,2.65312
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,924.0,1740.0,975.0,1031.0,2024.0,5802.0,1179.0,1419.0,3136.819934,4.901301
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,491.0,805.0,375.0,785.0,1495.0,4022.0,1169.0,632.0,1848.277959,2.887946
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,676.0,821.0,638.0,1187.0,1810.0,4091.0,964.0,1075.0,2607.840666,4.074767
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,543.0,966.0,294.0,194.0,653.0,1721.0,453.0,359.0,1272.453932,1.988217
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,130.0,515.0,468.0,262.0,538.0,1603.0,514.0,477.0,927.39036,1.449053
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,240.0,381.0,401.0,308.0,835.0,2254.0,437.0,412.0,778.387215,1.216235


In [163]:
temp = corridors_df[["Corridor", "Median household income", "Median age"]]
temp

Unnamed: 0,Corridor,Median household income,Median age
1,Highlandtown,107617.0,32.0
7,Brooklyn,108516.0,35.5
10,E Monument St,62500.0,28.3
11,Hamilton Lauraville,34375.0,23.9
13,E Monument St,72465.0,31.4
14,E Monument St,68088.0,31.9
15,Waverly,55368.0,37.0
16,E Monument St,39602.0,27.6
17,E Monument St,32000.0,29.9
18,E Monument St,29261.0,36.8


In [102]:
corridors_sum["Home ownership rate"] = 100 * \
    corridors_sum["Total owner-occupied units"].astype(
        int) / corridors_sum["Total occupied units"].astype(
        int) 

corridors_sum.head()

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Retail_Trade count,Transportation_warehousing_utilities count,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,1206.0,1127.0,838.0,1746.0,4463.0,1165.0,1649.0,2745.531667,4.28991,55.017218
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,998.0,252.0,339.0,966.0,1828.0,566.0,562.0,1424.617673,2.225974,38.58899
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,1886.0,1019.0,1262.0,2860.0,8155.0,1913.0,1561.0,3084.743916,4.819932,39.652966
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,448.0,131.0,450.0,886.0,1162.0,731.0,234.0,1697.990035,2.65312,61.377871
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,1740.0,975.0,1031.0,2024.0,5802.0,1179.0,1419.0,3136.819934,4.901301,68.813509


In [103]:
# Round the home ownership rate to one decimal point; using "float" instead of "int" because want to use decimal points
corridors_sum["Home ownership rate"] = corridors_sum["Home ownership rate"].astype(float).round(1)


In [104]:
# See https://www.census.gov/topics/income-poverty/poverty/guidance/poverty-measures.html
# for how census measures poverty
# Add a new column for poverty rate (Poverty Count / Population)
corridors_sum["Poverty rate"] = 100 * \
    corridors_sum["Income_past_12mos_below_pov_level"].astype(
        int) / corridors_sum["Persons_poverty_status_determined"].astype(int)

corridors_sum["Poverty rate"] = corridors_sum["Poverty rate"].astype(float).round(1)

In [105]:
# Add a new column for unemployment rate for civilians only (Employment Count / Population)
corridors_sum["Unemployment rate"] = 100 * \
    corridors_sum["Unemployed_civilians"].astype(
        int) / corridors_sum["Pop_16_yrs_and_over"].astype(int)

corridors_sum["Unemployment rate"] = corridors_sum["Unemployment rate"].astype(float).round(1)

corridors_sum.head()

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Finance_insurance_realestate count,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,838.0,1746.0,4463.0,1165.0,1649.0,2745.531667,4.28991,55.0,22.3,6.7
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,339.0,966.0,1828.0,566.0,562.0,1424.617673,2.225974,38.6,23.0,5.4
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,1262.0,2860.0,8155.0,1913.0,1561.0,3084.743916,4.819932,39.7,25.0,6.0
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,450.0,886.0,1162.0,731.0,234.0,1697.990035,2.65312,61.4,10.5,2.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,1031.0,2024.0,5802.0,1179.0,1419.0,3136.819934,4.901301,68.8,14.9,4.7


In [106]:
# Add a new column for average household size (Household population / Total households); see https://www.census.gov/quickfacts/fact/note/US/HSD410221#:~:text=Persons%20per%20household%2C%20or%20average,by%20the%20number%20of%20households.
corridors_sum["Average hh size"] = corridors_sum["Household population"].astype(
        int) / corridors_sum["Total households"].astype(int)

corridors_sum["Average hh size"] = corridors_sum["Average hh size"].astype(float).round(2)

corridors_sum.head()

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Professional_scientific_mgmt_administrative count,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate,Average hh size
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,1746.0,4463.0,1165.0,1649.0,2745.531667,4.28991,55.0,22.3,6.7,2.67
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,966.0,1828.0,566.0,562.0,1424.617673,2.225974,38.6,23.0,5.4,2.49
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,2860.0,8155.0,1913.0,1561.0,3084.743916,4.819932,39.7,25.0,6.0,2.47
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,886.0,1162.0,731.0,234.0,1697.990035,2.65312,61.4,10.5,2.1,2.82
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,2024.0,5802.0,1179.0,1419.0,3136.819934,4.901301,68.8,14.9,4.7,2.66


In [107]:
# Calculate population density  see: https://www.census.gov/quickfacts/fact/note/US/LND110210
# Density is expressed as "population per square mile(kilometer)"
# Divide total population (or # of housing units)/ by land area of the entity measured in square miles
corridors_sum['Pop. density per sq mile'] = corridors_sum['Population'].astype(
                                    float) / corridors_sum['Area_mi2'].astype(
                                    float)
corridors_sum   

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate,Average hh size,Pop. density per sq mile
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,4463.0,1165.0,1649.0,2745.531667,4.28991,55.0,22.3,6.7,2.67,8265.440713
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,1828.0,566.0,562.0,1424.617673,2.225974,38.6,23.0,5.4,2.49,6284.439926
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,8155.0,1913.0,1561.0,3084.743916,4.819932,39.7,25.0,6.0,2.47,10155.123262
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,1162.0,731.0,234.0,1697.990035,2.65312,61.4,10.5,2.1,2.82,4134.000657
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,5802.0,1179.0,1419.0,3136.819934,4.901301,68.8,14.9,4.7,2.66,7548.404367
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,4022.0,1169.0,632.0,1848.277959,2.887946,56.8,12.4,3.6,2.32,9370.674273
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,4091.0,964.0,1075.0,2607.840666,4.074767,54.5,17.2,3.4,2.26,6478.897517
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,1721.0,453.0,359.0,1272.453932,1.988217,45.5,16.2,2.3,2.44,5826.828111
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,1603.0,514.0,477.0,927.39036,1.449053,46.1,31.9,12.5,2.87,10767.030262
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,2254.0,437.0,412.0,778.387215,1.216235,41.3,25.6,7.6,2.35,12257.500707


In [108]:
# Round the pop. density rate to one decimal point; using "float" instead of "int" because want to use decimal points
corridors_sum["Pop. density per sq mile"] = corridors_sum["Pop. density per sq mile"].astype(float).round(1)
corridors_sum

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Educational_healthcare_social count,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate,Average hh size,Pop. density per sq mile
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,4463.0,1165.0,1649.0,2745.531667,4.28991,55.0,22.3,6.7,2.67,8265.4
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,1828.0,566.0,562.0,1424.617673,2.225974,38.6,23.0,5.4,2.49,6284.4
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,8155.0,1913.0,1561.0,3084.743916,4.819932,39.7,25.0,6.0,2.47,10155.1
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,1162.0,731.0,234.0,1697.990035,2.65312,61.4,10.5,2.1,2.82,4134.0
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,5802.0,1179.0,1419.0,3136.819934,4.901301,68.8,14.9,4.7,2.66,7548.4
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,4022.0,1169.0,632.0,1848.277959,2.887946,56.8,12.4,3.6,2.32,9370.7
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,4091.0,964.0,1075.0,2607.840666,4.074767,54.5,17.2,3.4,2.26,6478.9
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,1721.0,453.0,359.0,1272.453932,1.988217,45.5,16.2,2.3,2.44,5826.8
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,1603.0,514.0,477.0,927.39036,1.449053,46.1,31.9,12.5,2.87,10767.0
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,2254.0,437.0,412.0,778.387215,1.216235,41.3,25.6,7.6,2.35,12257.5


In [109]:
corridors_sum.columns

Index(['Corridor', 'Population', 'Total households', 'Household population',
       'Income_past_12mos_below_pov_level',
       'Persons_poverty_status_determined', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Pop_NOT_in_labor_force', 'Employed_civilians',
       'Unemployed_civilians', 'Workers_16_yrs_and_over',
       'Population_25_yrs_and_over', '# persons 12th grade, no diploma',
       '# persons graduated high school', '# persons Associate's degree',
       '# persons Bachelor's degree', 'Pop. Asian', 'Pop. Black',
       'Pop. Hispanic origin', 'Pop. white', 'Pop. two or more races',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       '# households with computer, no internet subscription',
       '# households no computer', 'Pop. <18 years', 'P

In [110]:
# Add a new column for # of occupied units with no vehicle available (sum column values of owner-occupied units with no vehicles available
# and renter-occupied units with no vehicles available)

corridors_sum["Number units with no vehicle available"] = corridors_sum.apply(lambda row: row.No_vehicle_available_owneroccupied_unit + row.No_vehicle_available_renteroccupied_unit, axis=1) 
corridors_sum.head()


Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Accommodations_foodservices_arts_entertainment count,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate,Average hh size,Pop. density per sq mile,Number units with no vehicle available
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,1165.0,1649.0,2745.531667,4.28991,55.0,22.3,6.7,2.67,8265.4,3493.0
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,566.0,562.0,1424.617673,2.225974,38.6,23.0,5.4,2.49,6284.4,1361.0
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,1913.0,1561.0,3084.743916,4.819932,39.7,25.0,6.0,2.47,10155.1,6691.0
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,731.0,234.0,1697.990035,2.65312,61.4,10.5,2.1,2.82,4134.0,432.0
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,1179.0,1419.0,3136.819934,4.901301,68.8,14.9,4.7,2.66,7548.4,1877.0


In [111]:
# Add a new column for % of occupied units with no vehicle available 
# (owner-occupied units with no vehicle available + renter-occupied units with no vehicle available / Total occupied units)
corridors_sum["Percent units with no vehicle available"] = 100 * \
    corridors_sum["Number units with no vehicle available"].astype(
        int) / corridors_sum["Total occupied units"].astype(
        int) 

corridors_sum["Percent units with no vehicle available"] = corridors_sum["Percent units with no vehicle available"].astype(float).round(1)
corridors_sum

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Public_administration count,Area_Acres,Area_mi2,Home ownership rate,Poverty rate,Unemployment rate,Average hh size,Pop. density per sq mile,Number units with no vehicle available,Percent units with no vehicle available
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,1649.0,2745.531667,4.28991,55.0,22.3,6.7,2.67,8265.4,3493.0,28.0
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,562.0,1424.617673,2.225974,38.6,23.0,5.4,2.49,6284.4,1361.0,24.2
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,1561.0,3084.743916,4.819932,39.7,25.0,6.0,2.47,10155.1,6691.0,34.2
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,234.0,1697.990035,2.65312,61.4,10.5,2.1,2.82,4134.0,432.0,11.3
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,1419.0,3136.819934,4.901301,68.8,14.9,4.7,2.66,7548.4,1877.0,14.0
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,632.0,1848.277959,2.887946,56.8,12.4,3.6,2.32,9370.7,1578.0,18.6
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,1075.0,2607.840666,4.074767,54.5,17.2,3.4,2.26,6478.9,2078.0,18.3
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,359.0,1272.453932,1.988217,45.5,16.2,2.3,2.44,5826.8,1128.0,24.3
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,477.0,927.39036,1.449053,46.1,31.9,12.5,2.87,10767.0,2444.0,45.2
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,412.0,778.387215,1.216235,41.3,25.6,7.6,2.35,12257.5,2302.0,37.4


In [112]:
corridors_sum["Percent commute to work public transportation"] = 100 * \
    corridors_sum["Commute_to_work_public_transportation"].astype(
        int) / corridors_sum["Workers_16_yrs_and_over"].astype(
        int) 

corridors_sum["Percent commute to work public transportation"] = corridors_sum["Percent commute to work public transportation"].astype(float).round(1)


In [113]:
corridors_sum["Percent Asian"] = 100 * \
    corridors_sum["Pop. Asian"].astype(
        int) / corridors_sum["Population"].astype(
        int) 

corridors_sum["Percent Asian"] = corridors_sum["Percent Asian"].astype(float).round(1)

In [114]:
corridors_sum["Percent Asian"] = 100 * \
    corridors_sum["Pop. Asian"].astype(
        int) / corridors_sum["Population"].astype(
        int) 

corridors_sum["Percent Asian"] = corridors_sum["Percent Asian"].astype(float).round(1)

In [115]:
corridors_sum["Percent Black"] = 100 * \
    corridors_sum["Pop. Black"].astype(
        int) / corridors_sum["Population"].astype(
        int) 

corridors_sum["Percent Black"] = corridors_sum["Percent Black"].astype(float).round(1)

In [116]:
corridors_sum["Percent Hispanic origin"] = 100 * \
    corridors_sum["Pop. Hispanic origin"].astype(
        int) / corridors_sum["Population"].astype(
        int) 

corridors_sum["Percent Hispanic origin"] = corridors_sum["Percent Hispanic origin"].astype(float).round(1)

In [117]:
corridors_sum["Percent White"] = 100 * \
    corridors_sum["Pop. white"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent White"] = corridors_sum["Percent White"].astype(float).round(1)


In [118]:
corridors_sum["Percent two or more races"] = 100 * \
    corridors_sum["Pop. two or more races"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent two or more races"] = corridors_sum["Percent two or more races"].astype(float).round(1)


In [119]:
corridors_sum["Percent 12th grade, no diploma"] = 100 * \
    corridors_sum["# persons 12th grade, no diploma"].astype(
        int) / corridors_sum["Population_25_yrs_and_over"].astype(
        int) 
corridors_sum["Percent 12th grade, no diploma"] = corridors_sum["Percent 12th grade, no diploma"].astype(float).round(1)


In [120]:
corridors_sum["Percent graduated high school"] = 100 * \
    corridors_sum["# persons graduated high school"].astype(
        int) / corridors_sum["Population_25_yrs_and_over"].astype(
        int) 
corridors_sum["Percent graduated high school"] = corridors_sum["Percent graduated high school"].astype(float).round(1)


In [121]:
corridors_sum["Percent earned Associate's degree"] = 100 * \
    corridors_sum["# persons Associate's degree"].astype(
        int) / corridors_sum["Population_25_yrs_and_over"].astype(
        int) 
corridors_sum["Percent earned Associate's degree"] = corridors_sum["Percent earned Associate's degree"].astype(float).round(1)


In [122]:
corridors_sum["Percent earned Bachelor's degree"] = 100 * \
    corridors_sum["# persons Bachelor's degree"].astype(
        int) / corridors_sum["Population_25_yrs_and_over"].astype(
        int) 
corridors_sum["Percent earned Bachelor's degree"] = corridors_sum["Percent earned Bachelor's degree"].astype(float).round(1)


In [123]:
corridors_sum["Percent computer, no internet subscription"] = 100 * \
    corridors_sum["# households with computer, no internet subscription"].astype(
        int) / corridors_sum["Total households"].astype(
        int) 
corridors_sum["Percent computer, no internet subscription"] = corridors_sum["Percent computer, no internet subscription"].astype(float).round(1)


In [124]:
corridors_sum["Percent no computer"] = 100 * \
    corridors_sum["# households no computer"].astype(
        int) / corridors_sum["Total households"].astype(
        int) 
corridors_sum["Percent no computer"] = corridors_sum["Percent no computer"].astype(float).round(1)


In [125]:
corridors_sum["Percent less than 18 yrs old"] = 100 * \
    corridors_sum["Pop. <18 years"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent less than 18 yrs old"] = corridors_sum["Percent less than 18 yrs old"].astype(float).round(1)


In [126]:
# working age is 18-64 yrs old
corridors_sum["Percent working age"] = 100 * \
    corridors_sum["Pop. working age"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent working age"] = corridors_sum["Percent working age"].astype(float).round(1)


In [127]:
corridors_sum["Percent 65 or older"] = 100 * \
    corridors_sum["Pop. 65+ years"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent 65 or older"] = corridors_sum["Percent 65 or older"].astype(float).round(1)


In [128]:
corridors_sum["Percent 65 or older"] = 100 * \
    corridors_sum["Pop. 65+ years"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent 65 or older"] = corridors_sum["Percent 65 or older"].astype(float).round(1)


In [129]:
corridors_sum["Percent Foreign-born"] = 100 * \
    corridors_sum["# Foreign-born"].astype(
        int) / corridors_sum["Population"].astype(
        int) 
corridors_sum["Percent Foreign-born"] = corridors_sum["Percent Foreign-born"].astype(float).round(1)
corridors_sum

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,"Percent 12th grade, no diploma",Percent graduated high school,Percent earned Associate's degree,Percent earned Bachelor's degree,"Percent computer, no internet subscription",Percent no computer,Percent less than 18 yrs old,Percent working age,Percent 65 or older,Percent Foreign-born
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,1.9,28.8,4.6,13.9,9.1,15.7,12.1,36.1,6.6,3.5
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,2.0,26.2,4.3,15.8,15.2,11.8,17.6,50.5,12.2,12.2
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,1.4,24.6,4.2,16.3,10.7,15.2,24.7,76.7,15.0,8.8
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,2.9,18.6,3.8,18.2,8.1,14.1,15.1,50.9,11.0,28.0
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,1.8,22.8,5.3,18.5,7.1,11.7,26.0,63.9,13.9,7.4
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,2.1,18.1,4.0,19.9,5.4,14.6,16.5,50.7,12.0,5.9
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,1.1,21.2,3.1,24.0,12.8,12.9,20.8,56.9,10.2,8.0
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,2.4,27.3,5.1,18.6,6.7,15.3,22.7,62.6,11.8,15.5
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,3.9,29.2,5.1,6.8,15.4,26.5,22.8,64.7,10.2,2.8
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,2.7,24.4,4.9,16.1,13.3,16.9,17.4,53.4,12.4,12.2


In [130]:
# Cells below are PERCENTAGES for "Workers by industry"    Universe is "civilian employed population 16 years and over"so need to divide count of population in the industry by 
# "B08301_001E": "Workers_16_yrs_and_over"
# note: did not include "percentage" in the column names because will be easier in Tableau later 
# (in Tableau, don't want the word  "percentages" because then names are too long in the charts)

In [131]:
corridors_sum["Construction"] = 100 * \
    corridors_sum["Construction count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Construction"] = corridors_sum["Construction"].astype(float).round(1)


In [132]:
corridors_sum["Manufacturing"] = 100 * \
    corridors_sum["Manufacturing count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Manufacturing"] = corridors_sum["Manufacturing"].astype(float).round(1)


In [133]:
corridors_sum["Retail Trade"] = 100 * \
    corridors_sum["Retail_Trade count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Retail Trade"] = corridors_sum["Retail Trade"].astype(float).round(1)


In [134]:
corridors_sum["Transportation, warehousing, utilities"] = 100 * \
    corridors_sum["Transportation_warehousing_utilities count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Transportation, warehousing, utilities"] = corridors_sum["Transportation, warehousing, utilities"].astype(float).round(1)


In [135]:
corridors_sum["Finance, insurance, real estate"] = 100 * \
    corridors_sum["Finance_insurance_realestate count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Finance, insurance, real estate"] = corridors_sum["Finance, insurance, real estate"].astype(float).round(1)


In [136]:
corridors_sum["Professional, scientific, mgmt, administrative"] = 100 * \
    corridors_sum["Professional_scientific_mgmt_administrative count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Professional, scientific, mgmt, administrative"] = corridors_sum["Professional, scientific, mgmt, administrative"].astype(float).round(1)


In [137]:
corridors_sum["Educational services, health care, social assistance"] = 100 * \
    corridors_sum["Educational_healthcare_social count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Educational services, health care, social assistance"] = corridors_sum["Educational services, health care, social assistance"].astype(float).round(1)


In [138]:
corridors_sum["Accommodations, food services, arts, entertainment"] = 100 * \
    corridors_sum["Accommodations_foodservices_arts_entertainment count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Accommodations, food services, arts, entertainment"] = corridors_sum["Accommodations, food services, arts, entertainment"].astype(float).round(1)


In [139]:
corridors_sum["Public administration"] = 100 * \
    corridors_sum["Public_administration count"].astype(
        int) / corridors_sum["Civilian_employed_pop_16yrs_and_over"].astype(
        int) 
corridors_sum["Public administration"] = corridors_sum["Public administration"].astype(float).round(1)
corridors_sum

Unnamed: 0,Corridor,Population,Total households,Household population,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,Employed_civilians,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Belair Rd,35458.0,12487.0,33302.0,7420.0,33273.0,27588.0,16396.0,11192.0,14534.0,...,3.5,4.4,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3
1,Brooklyn,13989.0,5613.0,13954.0,3203.0,13938.0,11356.0,7586.0,3770.0,6975.0,...,12.2,4.9,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1
2,E Monument St,48947.0,19537.0,48257.0,12082.0,48357.0,38934.0,24444.0,14490.0,22049.0,...,8.8,3.5,3.3,8.6,4.6,5.7,13.0,37.0,8.7,7.1
3,Greektown,10968.0,3832.0,10821.0,1118.0,10631.0,8559.0,5908.0,2651.0,5716.0,...,28.0,11.3,9.1,7.8,2.3,7.9,15.5,20.3,12.8,4.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,5274.0,35323.0,28914.0,18848.0,10066.0,17503.0,...,7.4,5.1,5.3,9.9,5.6,5.9,11.6,33.1,6.7,8.1
5,Hampden,27062.0,8497.0,19748.0,2467.0,19829.0,23804.0,12242.0,11562.0,11381.0,...,5.9,3.3,4.3,7.1,3.3,6.9,13.1,35.3,10.3,5.6
6,Highlandtown,26400.0,11368.0,25721.0,4471.0,25936.0,21875.0,14404.0,7471.0,13660.0,...,8.0,7.7,4.9,6.0,4.7,8.7,13.3,29.9,7.1,7.9
7,Irvington,11585.0,4633.0,11291.0,1839.0,11334.0,9565.0,6193.0,3372.0,5845.0,...,15.5,4.2,9.3,16.5,5.0,3.3,11.2,29.4,7.8,6.1
8,North Ave,15602.0,5403.0,15490.0,4929.0,15452.0,12543.0,6784.0,5759.0,5218.0,...,2.8,5.2,2.5,9.9,9.0,5.0,10.3,30.7,9.9,9.1
9,Park Heights,14908.0,6156.0,14475.0,3772.0,14737.0,12587.0,6940.0,5647.0,5988.0,...,12.2,3.6,4.0,6.4,6.7,5.1,13.9,37.6,7.3,6.9


In [166]:
# combine the dataframes on the common column "Corridor"
merge_temp = pd.merge(corridors_sum, temp, how='left', on="Corridor")
merge_temp

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,Median household income,Median age
0,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,63542.0,35.2
1,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,33711.0,37.2
2,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,13074.0,31.1
3,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,43951.0,27.4
4,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,33920.0,36.3
5,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,57356.0,26.7
6,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,100167.0,38.8
7,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3,62945.0,35.1
8,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1,108516.0,35.5
9,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1,28750.0,39.2


In [140]:
# Change order of columns in DataFrame by using double brackets
corridors_sum = corridors_sum[["Corridor", "Population", "Total households", "Household population",
       "Pop. density per sq mile","Income_past_12mos_below_pov_level",
       "Persons_poverty_status_determined", "Pop_16_yrs_and_over",
       "Pop_in_labor_force", "Pop_NOT_in_labor_force", "Employed_civilians",
       "Unemployed_civilians", "Workers_16_yrs_and_over",
       "Population_25_yrs_and_over", "# persons 12th grade, no diploma",
       "# persons graduated high school", "# persons Associate's degree",
       "# persons Bachelor's degree", "Pop. Asian", "Pop. Black",
       "Pop. Hispanic origin", "Pop. white", "Pop. two or more races",
       "# Foreign-born", "Total pop. in occupied housing units by tenure",
       "Total occupied units", "Total owner-occupied units",
       "Total renter-occupied units",
       "No_vehicle_available_owneroccupied_unit",
       "No_vehicle_available_renteroccupied_unit",
       "# households with computer, no internet subscription",
       "# households no computer", "Pop. <18 years", "Pop. working age",
       "Pop. 65+ years", "Commute_to_work_public_transportation",
       "Civilian_employed_pop_16yrs_and_over", "Construction count", "Manufacturing count",
       "Retail_Trade count", "Transportation_warehousing_utilities count",
       "Finance_insurance_realestate count",
       "Professional_scientific_mgmt_administrative count",
       "Educational_healthcare_social count",
       "Accommodations_foodservices_arts_entertainment count",
       "Public_administration count", "Area_Acres", "Area_mi2",
       "Home ownership rate", "Poverty rate", "Unemployment rate",
       "Average hh size", 
       "Number units with no vehicle available",
       "Percent units with no vehicle available",
       "Percent commute to work public transportation", "Percent Asian",
       "Percent Black", "Percent Hispanic origin", "Percent White",
       "Percent two or more races", "Percent 12th grade, no diploma",
       "Percent graduated high school", "Percent earned Associate's degree",
       "Percent earned Bachelor's degree",
       "Percent computer, no internet subscription", "Percent no computer",
       "Percent less than 18 yrs old", "Percent working age",
       "Percent 65 or older", "Percent Foreign-born", "Construction","Manufacturing",
       "Retail Trade",
       "Transportation, warehousing, utilities",
       "Finance, insurance, real estate",
       "Professional, scientific, mgmt, administrative",
       "Educational services, health care, social assistance",
       "Accommodations, food services, arts, entertainment",
       "Public administration"
        ]]


corridors_sum


Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.5,4.4,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3
1,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,12.2,4.9,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1
2,E Monument St,48947.0,19537.0,48257.0,10155.1,12082.0,48357.0,38934.0,24444.0,14490.0,...,8.8,3.5,3.3,8.6,4.6,5.7,13.0,37.0,8.7,7.1
3,Greektown,10968.0,3832.0,10821.0,4134.0,1118.0,10631.0,8559.0,5908.0,2651.0,...,28.0,11.3,9.1,7.8,2.3,7.9,15.5,20.3,12.8,4.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,7548.4,5274.0,35323.0,28914.0,18848.0,10066.0,...,7.4,5.1,5.3,9.9,5.6,5.9,11.6,33.1,6.7,8.1
5,Hampden,27062.0,8497.0,19748.0,9370.7,2467.0,19829.0,23804.0,12242.0,11562.0,...,5.9,3.3,4.3,7.1,3.3,6.9,13.1,35.3,10.3,5.6
6,Highlandtown,26400.0,11368.0,25721.0,6478.9,4471.0,25936.0,21875.0,14404.0,7471.0,...,8.0,7.7,4.9,6.0,4.7,8.7,13.3,29.9,7.1,7.9
7,Irvington,11585.0,4633.0,11291.0,5826.8,1839.0,11334.0,9565.0,6193.0,3372.0,...,15.5,4.2,9.3,16.5,5.0,3.3,11.2,29.4,7.8,6.1
8,North Ave,15602.0,5403.0,15490.0,10767.0,4929.0,15452.0,12543.0,6784.0,5759.0,...,2.8,5.2,2.5,9.9,9.0,5.0,10.3,30.7,9.9,9.1
9,Park Heights,14908.0,6156.0,14475.0,12257.5,3772.0,14737.0,12587.0,6940.0,5647.0,...,12.2,3.6,4.0,6.4,6.7,5.1,13.9,37.6,7.3,6.9


In [141]:
corridors_2021_final = corridors_sum.copy()
corridors_2021_final

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.5,4.4,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3
1,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,12.2,4.9,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1
2,E Monument St,48947.0,19537.0,48257.0,10155.1,12082.0,48357.0,38934.0,24444.0,14490.0,...,8.8,3.5,3.3,8.6,4.6,5.7,13.0,37.0,8.7,7.1
3,Greektown,10968.0,3832.0,10821.0,4134.0,1118.0,10631.0,8559.0,5908.0,2651.0,...,28.0,11.3,9.1,7.8,2.3,7.9,15.5,20.3,12.8,4.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,7548.4,5274.0,35323.0,28914.0,18848.0,10066.0,...,7.4,5.1,5.3,9.9,5.6,5.9,11.6,33.1,6.7,8.1
5,Hampden,27062.0,8497.0,19748.0,9370.7,2467.0,19829.0,23804.0,12242.0,11562.0,...,5.9,3.3,4.3,7.1,3.3,6.9,13.1,35.3,10.3,5.6
6,Highlandtown,26400.0,11368.0,25721.0,6478.9,4471.0,25936.0,21875.0,14404.0,7471.0,...,8.0,7.7,4.9,6.0,4.7,8.7,13.3,29.9,7.1,7.9
7,Irvington,11585.0,4633.0,11291.0,5826.8,1839.0,11334.0,9565.0,6193.0,3372.0,...,15.5,4.2,9.3,16.5,5.0,3.3,11.2,29.4,7.8,6.1
8,North Ave,15602.0,5403.0,15490.0,10767.0,4929.0,15452.0,12543.0,6784.0,5759.0,...,2.8,5.2,2.5,9.9,9.0,5.0,10.3,30.7,9.9,9.1
9,Park Heights,14908.0,6156.0,14475.0,12257.5,3772.0,14737.0,12587.0,6940.0,5647.0,...,12.2,3.6,4.0,6.4,6.7,5.1,13.9,37.6,7.3,6.9


In [142]:
# Export df as an Excel file
#corridors_2021_final.to_excel("CommCorr_demographics_2021.xlsx", index = False)

In [143]:
# import commercial vacancy data from HUD/USPS
# import csv file created from Commercial_corridors/vacant_comm_bldgs Jupyter nb
# save the data from the csv in variable, "vacants_2021" 

vacants_2021 = "vacants_USPS_2021Q4.csv"

# Create dataframe ("vacants_USPS_2021") by reading the variable data from the code block above
vacants_USPS_2021 = pd.read_csv(vacants_2021)
vacants_USPS_2021

Unnamed: 0,Corridor,Business vacancies <24 months,Business vacancies >24 months,Business no-stat <24 months,Business no-stat >24 months,Year
0,Belair Rd,18,43,206,207,2021
1,Brooklyn,16,94,323,300,2021
2,E Monument St,35,141,359,379,2021
3,Greektown,9,27,192,201,2021
4,Hamilton Lauraville,25,107,383,369,2021
5,Hampden,38,80,181,163,2021
6,Highlandtown,39,91,318,332,2021
7,Irvington,6,44,111,112,2021
8,North Ave,7,109,174,173,2021
9,Park Heights,7,41,172,170,2021


In [144]:
# combine the dataframes on the common column "Corridor"
corridors_vacants = pd.merge(corridors_2021_final, vacants_USPS_2021, on="Corridor")
corridors_vacants = corridors_vacants.reset_index(drop=True)
corridors_vacants

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,"Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,Business vacancies <24 months,Business vacancies >24 months,Business no-stat <24 months,Business no-stat >24 months,Year
0,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,5.8,12.0,30.7,8.0,11.3,18,43,206,207,2021
1,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,4.9,13.8,26.2,8.1,8.1,16,94,323,300,2021
2,E Monument St,48947.0,19537.0,48257.0,10155.1,12082.0,48357.0,38934.0,24444.0,14490.0,...,5.7,13.0,37.0,8.7,7.1,35,141,359,379,2021
3,Greektown,10968.0,3832.0,10821.0,4134.0,1118.0,10631.0,8559.0,5908.0,2651.0,...,7.9,15.5,20.3,12.8,4.1,9,27,192,201,2021
4,Hamilton Lauraville,36997.0,13384.0,35548.0,7548.4,5274.0,35323.0,28914.0,18848.0,10066.0,...,5.9,11.6,33.1,6.7,8.1,25,107,383,369,2021
5,Hampden,27062.0,8497.0,19748.0,9370.7,2467.0,19829.0,23804.0,12242.0,11562.0,...,6.9,13.1,35.3,10.3,5.6,38,80,181,163,2021
6,Highlandtown,26400.0,11368.0,25721.0,6478.9,4471.0,25936.0,21875.0,14404.0,7471.0,...,8.7,13.3,29.9,7.1,7.9,39,91,318,332,2021
7,Irvington,11585.0,4633.0,11291.0,5826.8,1839.0,11334.0,9565.0,6193.0,3372.0,...,3.3,11.2,29.4,7.8,6.1,6,44,111,112,2021
8,North Ave,15602.0,5403.0,15490.0,10767.0,4929.0,15452.0,12543.0,6784.0,5759.0,...,5.0,10.3,30.7,9.9,9.1,7,109,174,173,2021
9,Park Heights,14908.0,6156.0,14475.0,12257.5,3772.0,14737.0,12587.0,6940.0,5647.0,...,5.1,13.9,37.6,7.3,6.9,7,41,172,170,2021


In [145]:
# export dataframe as csv file to use for creating charts
corridors_vacants.to_csv("csvs_for_creating_charts/corridors_vacants_2021.csv", index = False, header=True)

In [146]:
#corridors_2021_final.columns

In [147]:
# import csv file of Baltimore City (as a whole) demographics
# first save the data from the csv in variable, "baltcity_2021" 

baltcity_2021 = "BaltCity_demographics_2021.csv"

In [148]:
# Create dataframes by reading the variable data from the code block above
balt_df2 = pd.read_csv(baltcity_2021)
balt_df2

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,"Baltimore city, Maryland",614700.0,238436.0,590812.0,6661.2,128829.0,591942.0,498965.0,307536.0,191429.0,...,8.1,4.5,4.4,9.4,5.8,5.6,12.5,31.7,9.1,8.1


In [149]:
balt_df2.columns

Index(['Corridor', 'Population', 'Total households', 'Household population',
       'Pop. density per sq mile', 'Income_past_12mos_below_pov_level',
       'Persons_poverty_status_determined', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Pop_NOT_in_labor_force', 'Employed_civilians',
       'Unemployed_civilians', 'Workers_16_yrs_and_over',
       'Population_25_yrs_and_over', '# persons 12th grade, no diploma',
       '# persons graduated high school', '# persons Associate's degree',
       '# persons Bachelor's degree', 'Pop. Asian', 'Pop. Black',
       'Pop. Hispanic origin', 'Pop. white', 'Pop. two or more races',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       '# households with computer, no internet subscription',
       '# households no com

In [150]:
balt_df = balt_df2.reset_index(drop=True)

In [151]:
# merge the dataframes on the common column "Corridor"
corridors_balt_merge = pd.concat([corridors_2021_final, balt_df], axis=0)
corridors_balt_merge = corridors_balt_merge.reset_index(drop=True)
corridors_balt_merge.columns

Index(['Corridor', 'Population', 'Total households', 'Household population',
       'Pop. density per sq mile', 'Income_past_12mos_below_pov_level',
       'Persons_poverty_status_determined', 'Pop_16_yrs_and_over',
       'Pop_in_labor_force', 'Pop_NOT_in_labor_force', 'Employed_civilians',
       'Unemployed_civilians', 'Workers_16_yrs_and_over',
       'Population_25_yrs_and_over', '# persons 12th grade, no diploma',
       '# persons graduated high school', '# persons Associate's degree',
       '# persons Bachelor's degree', 'Pop. Asian', 'Pop. Black',
       'Pop. Hispanic origin', 'Pop. white', 'Pop. two or more races',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total occupied units', 'Total owner-occupied units',
       'Total renter-occupied units',
       'No_vehicle_available_owneroccupied_unit',
       'No_vehicle_available_renteroccupied_unit',
       '# households with computer, no internet subscription',
       '# households no com

In [152]:
# import csv file of Maryland demographics
# first save the data from the csv in variable, "MD_2021" 

MD_2021 = "MD_demographics_2021.csv"

In [153]:
# Create dataframe by reading the variable data from the code block above
MD_df = pd.read_csv(MD_2021)
MD_df

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Maryland,6003435.0,2192518.0,5862883.0,483.9,553496.0,5862050.0,4809210.0,3261655.0,1547555.0,...,15.1,6.8,4.4,9.6,4.6,6.0,15.5,23.8,8.5,10.9


In [154]:
MD_df = MD_df.reset_index(drop=True)

In [155]:
# merge the dataframes on the common column "Corridor"
Demog = pd.concat([corridors_balt_merge, MD_df], axis=0)
Demog = Demog.reset_index(drop=True)
Demog

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Belair Rd,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.5,4.4,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3
1,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,12.2,4.9,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1
2,E Monument St,48947.0,19537.0,48257.0,10155.1,12082.0,48357.0,38934.0,24444.0,14490.0,...,8.8,3.5,3.3,8.6,4.6,5.7,13.0,37.0,8.7,7.1
3,Greektown,10968.0,3832.0,10821.0,4134.0,1118.0,10631.0,8559.0,5908.0,2651.0,...,28.0,11.3,9.1,7.8,2.3,7.9,15.5,20.3,12.8,4.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,7548.4,5274.0,35323.0,28914.0,18848.0,10066.0,...,7.4,5.1,5.3,9.9,5.6,5.9,11.6,33.1,6.7,8.1
5,Hampden,27062.0,8497.0,19748.0,9370.7,2467.0,19829.0,23804.0,12242.0,11562.0,...,5.9,3.3,4.3,7.1,3.3,6.9,13.1,35.3,10.3,5.6
6,Highlandtown,26400.0,11368.0,25721.0,6478.9,4471.0,25936.0,21875.0,14404.0,7471.0,...,8.0,7.7,4.9,6.0,4.7,8.7,13.3,29.9,7.1,7.9
7,Irvington,11585.0,4633.0,11291.0,5826.8,1839.0,11334.0,9565.0,6193.0,3372.0,...,15.5,4.2,9.3,16.5,5.0,3.3,11.2,29.4,7.8,6.1
8,North Ave,15602.0,5403.0,15490.0,10767.0,4929.0,15452.0,12543.0,6784.0,5759.0,...,2.8,5.2,2.5,9.9,9.0,5.0,10.3,30.7,9.9,9.1
9,Park Heights,14908.0,6156.0,14475.0,12257.5,3772.0,14737.0,12587.0,6940.0,5647.0,...,12.2,3.6,4.0,6.4,6.7,5.1,13.9,37.6,7.3,6.9


In [156]:
# Change names of corridors (replace string using apply() function with lambda)
Demographics_Master = Demog.apply(lambda x: x.replace({"Baltimore city, Maryland":"Baltimore City", "Penn Ave": "Pennsylvania Avenue",
                                                       "Belair Rd": "Belair Road", "North Ave": "North Avenue"}, regex=True))
Demographics_Master                   

Unnamed: 0,Corridor,Population,Total households,Household population,Pop. density per sq mile,Income_past_12mos_below_pov_level,Persons_poverty_status_determined,Pop_16_yrs_and_over,Pop_in_labor_force,Pop_NOT_in_labor_force,...,Percent Foreign-born,Construction,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration
0,Belair Road,35458.0,12487.0,33302.0,8265.4,7420.0,33273.0,27588.0,16396.0,11192.0,...,3.5,4.4,3.2,8.3,7.8,5.8,12.0,30.7,8.0,11.3
1,Brooklyn,13989.0,5613.0,13954.0,6284.4,3203.0,13938.0,11356.0,7586.0,3770.0,...,12.2,4.9,5.2,14.3,3.6,4.9,13.8,26.2,8.1,8.1
2,E Monument St,48947.0,19537.0,48257.0,10155.1,12082.0,48357.0,38934.0,24444.0,14490.0,...,8.8,3.5,3.3,8.6,4.6,5.7,13.0,37.0,8.7,7.1
3,Greektown,10968.0,3832.0,10821.0,4134.0,1118.0,10631.0,8559.0,5908.0,2651.0,...,28.0,11.3,9.1,7.8,2.3,7.9,15.5,20.3,12.8,4.1
4,Hamilton Lauraville,36997.0,13384.0,35548.0,7548.4,5274.0,35323.0,28914.0,18848.0,10066.0,...,7.4,5.1,5.3,9.9,5.6,5.9,11.6,33.1,6.7,8.1
5,Hampden,27062.0,8497.0,19748.0,9370.7,2467.0,19829.0,23804.0,12242.0,11562.0,...,5.9,3.3,4.3,7.1,3.3,6.9,13.1,35.3,10.3,5.6
6,Highlandtown,26400.0,11368.0,25721.0,6478.9,4471.0,25936.0,21875.0,14404.0,7471.0,...,8.0,7.7,4.9,6.0,4.7,8.7,13.3,29.9,7.1,7.9
7,Irvington,11585.0,4633.0,11291.0,5826.8,1839.0,11334.0,9565.0,6193.0,3372.0,...,15.5,4.2,9.3,16.5,5.0,3.3,11.2,29.4,7.8,6.1
8,North Avenue,15602.0,5403.0,15490.0,10767.0,4929.0,15452.0,12543.0,6784.0,5759.0,...,2.8,5.2,2.5,9.9,9.0,5.0,10.3,30.7,9.9,9.1
9,Park Heights,14908.0,6156.0,14475.0,12257.5,3772.0,14737.0,12587.0,6940.0,5647.0,...,12.2,3.6,4.0,6.4,6.7,5.1,13.9,37.6,7.3,6.9


In [157]:
#Demographics_Master.to_excel("CommCorr_Tableau-2021/CommCorr_Tableau_2021rev.xlsx")

In [158]:
# Export file as a CSV, without the Pandas index, but with the header
# Do not run this last code block until you have all previous code blocks in their final form:

#Demographics_Tableau.to_csv("CommCorr_demograph2021_Tableau.csv", index = False, header=True)

In [159]:
#census_2021_formatted = census_2021
#census_2021_formatted.head()

In [160]:
"""
# Use .map to format columns (helpful resource for this: https://towardsdatascience.com/apply-thousand-separator-and-other-formatting-to-pandas-dataframe-45f2f4c7ab01)
# Note: once you format values in a column, they are changed to strings (see cell below to see data types of each column)
# I will use the census_2017_FINAL dataframe to use for analysis as needed (can do calculations with number data types but not strings)
# You may need to restart the kernel after you format
census_2020_formatted["Median household income"] = census_2020_FINAL["Median household income"].map("${:.2f}".format)
census_2020_formatted["Per capita income"] = census_2020_FINAL["Per capita income"].map("${:.2f}".format)
census_2020_formatted["Population"] = census_2020_formatted["Population"].map("{:,.0f}".format)
census_2020_formatted["Poverty count"] = census_2020_formatted["Poverty count"].map("{:,.0f}".format)
census_2020_formatted["Poverty rate"] = census_2020_formatted["Poverty rate"].map("{:.2%}".format)
census_2020_formatted["Unemployment rate"] = census_2020_formatted["Unemployment rate"].map("{:.2%}".format)

census_2020_formatted = census_2020_formatted.reset_index(drop=True)
census_2020_formatted.head()
"""

'\n# Use .map to format columns (helpful resource for this: https://towardsdatascience.com/apply-thousand-separator-and-other-formatting-to-pandas-dataframe-45f2f4c7ab01)\n# Note: once you format values in a column, they are changed to strings (see cell below to see data types of each column)\n# I will use the census_2017_FINAL dataframe to use for analysis as needed (can do calculations with number data types but not strings)\n# You may need to restart the kernel after you format\ncensus_2020_formatted["Median household income"] = census_2020_FINAL["Median household income"].map("${:.2f}".format)\ncensus_2020_formatted["Per capita income"] = census_2020_FINAL["Per capita income"].map("${:.2f}".format)\ncensus_2020_formatted["Population"] = census_2020_formatted["Population"].map("{:,.0f}".format)\ncensus_2020_formatted["Poverty count"] = census_2020_formatted["Poverty count"].map("{:,.0f}".format)\ncensus_2020_formatted["Poverty rate"] = census_2020_formatted["Poverty rate"].map("{:.2