In [1]:
### 2021 Census Data (ACS 5-year)for Selected Variables - Baltimore City Totals



In [2]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states
import censusdata 
import censusgeocode as cg
import cenpy
import gmaps
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt

# Census & gmaps API Keys
# the latest year available is the default year and so you do not need to specify the year

from config import (api_key, gkey)
c = Census(api_key)

# Configure gmaps
#gmaps.configure(api_key=gkey)

In [3]:
import os

In [4]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\BDC\\Commercial_corridors\\LUC_Corr_Analysis\\BaltimoreCity'

In [5]:
pd.set_option('display.max_rows', 200)

In [6]:
pd.set_option('max_colwidth', 100)

In [7]:
# Save the acreage/ square mile data (csv file) in variable

data_SQMI = "BaltCity_SQ_mile.csv"

In [8]:
# Create dataframe by reading the variable data from the code block above
Balt_SQMI_df = pd.read_csv(data_SQMI)
Balt_SQMI_df

Unnamed: 0,Name,Area_Acres,Area_mi2
0,"Baltimore city, Maryland",59060,92.28


In [9]:
Balt_SQMI_df.dtypes

Name           object
Area_Acres      int64
Area_mi2      float64
dtype: object

In [10]:
# American Community Survey 5-Year Data DETAILED Tables 
# The data are population COUNTS

# See "When to Use 1-year or 5-year estimates": 
# https://www.census.gov/programs-surveys/acs/guidance/estimates.html

# Set this to false if you're trying to do this without an internet connection
# and data which would have been fetched from an API query will be read from cached files instead
INTERNET_IS_WORKING = True

if INTERNET_IS_WORKING:
    con = cenpy.remote.APIConnection('ACSDT5Y2021')
    variables = con.variables
else:
    variables = pd.read_csv('data/ACSDT5Y2021_variables.csv',index_col='Unnamed: 0')


variables.head()

Unnamed: 0,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required
for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,True,,,
in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,True,,,
ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,True,True,,
B24022_060E,Estimate!!Total:!!Female:!!Service occupations:!!Food preparation and serving related occupations,SEX BY OCCUPATION AND MEDIAN EARNINGS IN THE PAST 12 MONTHS (IN 2021 INFLATION-ADJUSTED DOLLARS)...,int,B24022,0,,,"B24022_060EA,B24022_060M,B24022_060MA",
B19001B_014E,"Estimate!!Total:!!$100,000 to $124,999",HOUSEHOLD INCOME IN THE PAST 12 MONTHS (IN 2021 INFLATION-ADJUSTED DOLLARS) (BLACK OR AFRICAN AM...,int,B19001B,0,,,"B19001B_014EA,B19001B_014M,B19001B_014MA",


In [11]:
# Run Census Search to retrieve data on Baltimore City, MD 
# ***See https://api.census.gov/data/2019/acs/acs5/groups.html  for list of variables and groups for the ACS 5-year estimates***
# ex. "B23025_005E" is "unemployment count"
# The state FIPS code for MD is 24 and the FIPS code for Balt City is 510
census_data = c.acs5.state_county(("NAME","B11002_001E", 
                          "B19001_001E","B19013_001E", "B01003_001E", "B01002_001E",
                          "B02001_002E",
                          "B02001_003E",
                          "B02001_005E",              
                          "B02001_008E",
                          "B03001_003E",
                          "B05002_013E",
                          "B08301_001E",               
                          "B08301_010E",              
                          "B15003_016E",               
                          "B15003_017E",
                          "B15003_021E",               
                          "B15003_022E", 
                          "B17001_002E",
                          "B19301_001E",               
                          "B23025_001E",
                          "B23025_002E",
                          "B23025_004E",
                          "B23025_005E",              
                          "B23025_007E",
                          "B25044_003E",
                          "B25044_010E",
                          "B25008_002E",
                          "B25003_001E",               
                          "B25003_002E",                                    
                          "B25003_003E",
                          "B28003_005E",
                          "B28003_006E",
                          "C17002_001E",              
                          "C24030_006E",                
                          "C24030_007E", 
                          "C24030_009E",                
                          "C24030_010E",                
                          "C24030_014E",                
                          "C24030_017E",                
                          "C24030_021E",                
                          "C24030_024E",
                          "C24030_028E",              
                          "C24030_033E",                
                          "C24030_034E",                
                          "C24030_036E",                
                          "C24030_037E",                
                          "C24030_041E",  
                          "C24030_044E",               
                          "C24030_048E",                
                          "C24030_051E",
                          "C24030_055E ),
                          state_fips = "24",
                          county_fips = "510",
                          )
census_pd = pd.DataFrame(census_data)
census_pd.head()      

Unnamed: 0,NAME,B19001_001E,B19013_001E,B01003_001E,B01002_001E,B19301_001E,B17001_002E,B23025_005E,B23025_004E,B15003_016E,...,C24030_034E,C24030_036E,C24030_037E,C24030_041E,C24030_044E,C24030_048E,C24030_051E,C24030_055E,state,county
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,3966.0,13288.0,4605.0,8087.0,15521.0,63444.0,12201.0,13515.0,24,510


In [12]:
# Pull values from the ACS 5 yr. census codes/ variables listed and save them in the variable "age_data"

age_data = c.acs5.state_county(("NAME", "B01001_003E",                                    
                          "B01001_004E",
                          "B01001_005E",
                          "B01001_006E",
                          "B01001_007E",
                          "B01001_008E",
                          "B01001_009E",
                          "B01001_010E",
                          "B01001_011E",
                          "B01001_012E",
                          "B01001_013E",
                          "B01001_014E",
                          "B01001_015E",
                          "B01001_016E",            
                          "B01001_017E",
                          "B01001_018E",
                          "B01001_019E",
                          "B01001_020E",
                          "B01001_021E",
                          "B01001_022E",
                          "B01001_023E",
                          "B01001_024E",
                          "B01001_025E",
                          "B01001_027E",
                          "B01001_028E",
                          "B01001_029E",
                          "B01001_030E",
                          "B01001_031E",
                          "B01001_032E",
                          "B01001_033E",
                          "B01001_034E",
                          "B01001_035E",
                          "B01001_036E",
                          "B01001_037E",
                          "B01001_038E",
                          "B01001_039E",            
                          "B01001_040E",
                          "B01001_041E",
                          "B01001_042E",
                          "B01001_043E",
                          "B01001_044E",
                          "B01001_045E",
                          "B01001_046E",
                          "B01001_047E",
                          "B01001_048E",
                          "B01001_049E"),          
                          state_fips = "24",
                          county_fips = "510",
                         )
age_pd = pd.DataFrame(age_data)
age_pd.head()      

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county
0,"Baltimore city, Maryland",20357.0,18021.0,16926.0,9570.0,8068.0,4682.0,4790.0,12019.0,29510.0,...,8708.0,12208.0,7092.0,8953.0,11030.0,8274.0,6375.0,6730.0,24,510


In [13]:
# Sum columns of age groups that are under 18 years old for male and female and add new column "Pop. <18 years"
columns_under18 = ["B01001_003E",                                    
                   "B01001_004E",
                   "B01001_005E",
                   "B01001_006E",
                   "B01001_027E",
                   "B01001_028E",
                   "B01001_029E",
                   "B01001_030E"]
age_pd['Pop. <18 years']= age_pd[columns_under18].sum(axis=1)
age_pd.head()

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,Pop. <18 years
0,"Baltimore city, Maryland",20357.0,18021.0,16926.0,9570.0,8068.0,4682.0,4790.0,12019.0,29510.0,...,12208.0,7092.0,8953.0,11030.0,8274.0,6375.0,6730.0,24,510,128219.0


In [14]:
# Sum columns of age groups that are 18-64 years old (working age) for male and female and add new column "Pop. working age"
columns_working_age = [                                    
                   "B01001_007E",
                   "B01001_008E",
                   "B01001_009E",
                   "B01001_010E",
                   "B01001_011E",
                   "B01001_012E",   
                   "B01001_013E",
                   "B01001_014E",    
                   "B01001_015E",    
                   "B01001_016E",   
                   "B01001_017E", 
                   "B01001_018E",    
                   "B01001_019E",    
                   "B01001_031E",
                   "B01001_032E",
                   "B01001_033E",
                   "B01001_034E",
                   "B01001_035E",   
                   "B01001_036E",
                   "B01001_037E",    
                   "B01001_038E",    
                   "B01001_039E",   
                   "B01001_040E", 
                   "B01001_041E",    
                   "B01001_042E",    
                   "B01001_043E"]    
                           
age_pd['Pop. working age']= age_pd[columns_working_age].sum(axis=1)
age_pd.head()     


Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,Pop. <18 years,Pop. working age
0,"Baltimore city, Maryland",20357.0,18021.0,16926.0,9570.0,8068.0,4682.0,4790.0,12019.0,29510.0,...,7092.0,8953.0,11030.0,8274.0,6375.0,6730.0,24,510,128219.0,405635.0


In [15]:
# Sum columns of age groups that are 65+ years old for male and female and add new column "Pop. 65+ years"
columns_senior = ["B01001_020E",
                  "B01001_021E",
                  "B01001_022E",
                  "B01001_023E",                                    
                  "B01001_024E",
                  "B01001_025E",
                  "B01001_044E",
                  "B01001_045E",
                  "B01001_046E",
                  "B01001_047E",                                    
                  "B01001_048E",
                  "B01001_049E"]               
                          
age_pd['Pop. 65+ years']= age_pd[columns_senior].sum(axis=1)
age_pd.head()                           

Unnamed: 0,NAME,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore city, Maryland",20357.0,18021.0,16926.0,9570.0,8068.0,4682.0,4790.0,12019.0,29510.0,...,8953.0,11030.0,8274.0,6375.0,6730.0,24,510,128219.0,405635.0,80846.0


In [16]:
# Rename "NAME" column so we can join/ merge it with other dfs later on the "Name" column
age_pd = age_pd.rename(columns={"NAME": "Name"
                               })
age_pd                                     

Unnamed: 0,Name,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,...,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore city, Maryland",20357.0,18021.0,16926.0,9570.0,8068.0,4682.0,4790.0,12019.0,29510.0,...,8953.0,11030.0,8274.0,6375.0,6730.0,24,510,128219.0,405635.0,80846.0


In [17]:
age_temp = age_pd[[ "Name","state", "county", "Pop. <18 years", "Pop. working age", "Pop. 65+ years"]]
age_temp.head()

Unnamed: 0,Name,state,county,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore city, Maryland",24,510,128219.0,405635.0,80846.0


In [18]:
age_temp.dtypes

Name                 object
state                object
county               object
Pop. <18 years      float64
Pop. working age    float64
Pop. 65+ years      float64
dtype: object

In [19]:
age_final = age_temp.copy()

In [20]:
# Did not add in daytime population - will use ESRI business analyst for this 
# See https://www.census.gov/topics/employment/commuting/guidance/calculations.html
# "commuter-adjusted daytime population estimates" =    
#         total resident population + total workers working in area - total workers living in area

# For "Workers in Workplace Geography," see https://www.census.gov/topics/employment/commuting/guidance/calculations.html
# "Total workers working in area:
# B08604 Total Workers for Workplace Geography
# B08604 is only available for data years 2011 and after. 
# The tables for workplace geography are only available for the following geographic summary levels: States; 
# Counties; Places; County Subdivisions in selected states (not MD); Combined Statistical Areas; Metropolitan 
# and Micropolitan Statistical Areas, and their associated Metropolitan Divisions and Principal Cities; 

census_data_workers = c.acs5.state_county(("NAME", 
                          "B08604_001E"),               
                          state_fips = "24",
                          county_fips = "510") 

# convert to dataframe
workers_df = pd.DataFrame(census_data_workers)
workers_df                         

Unnamed: 0,NAME,B08604_001E,state,county
0,"Baltimore city, Maryland",382638.0,24,510


In [21]:
# Replace the census variable codes (such as "B19013_001E") in the dataframe with text so it's understandable
census_pd = census_pd.rename(columns={"B19001_001E": "Total households",
                                      "B01003_001E": "Population",
                                      "tract": "Census Tract", "C17002_001E": "poverty", 
                                      "B08301_001E": "Workers_16_yrs_and_over",  
                                      "B08301_010E": "Commute_to_work_public_transportation",       
                                      "B01002_001E": "Median age",
                                      "B02001_002E": "Pop. white",
                                      "B02001_003E": "Pop. Black",
                                      "B02001_005E": "Pop. Asian",        
                                      "B02001_008E": "Pop. two or more races",
                                      "B03001_003E": "Pop. Hispanic origin",
                                      "B05002_013E": "# Foreign-born",
                                      "B11002_001E": "Household population",
                                      "B15003_016E": "# persons 12th grade, no diploma",
                                      "B15003_017E": "# persons age 25+ graduated high school",
                                      "B15003_021E": "# persons Associate's degree",
                                      "B15003_022E": "# persons age 25+ with Bachelor's degree",
                                      "B19013_001E": "Median household income",
                                      "B19301_001E": "Per capita income", 
                                      "B17001_002E": "Poverty count",
                                      "B23025_001E": "Pop_16_yrs_and_over",
                                      "B23025_002E": "Pop_in_labor_force",
                                      "B23025_004E": "Employed_civilians",
                                      "B23025_005E": "Unemployed_civilians",
                                      "B23025_007E": "Pop_NOT_in_labor_force",
                                      "B25008_002E": "Total pop. in occupied housing units by tenure",
                                      "B25003_001E": "Total occupied units",
                                      "B25003_002E": "Total owner-occupied units",
                                      "B25003_003E": "Total renter-occupied units",
                                      "B25044_003E": "No_vehicle_available_owneroccupied_unit",
                                      "B25044_010E": "No_vehicle_available_renteroccupied_unit",
                                      "B28003_005E": "# households with computer, no internet subscription",
                                      "B28003_006E": "# households no computer",
                                      "C24030_006E": "ConstructionM", 
                                      "C24030_007E": "ManufacturingM", 
                                      "C24030_009E": "Retail_TradeM",
                                      "C24030_010E": "Transportation_warehousing_utilitiesM",
                                      "C24030_014E": "Finance_insurance_realestateM",               
                                      "C24030_017E": "Professional_scientific_mgmt_administrativeM",              
                                      "C24030_021E": "Educational_healthcare_socialM",                
                                      "C24030_024E": "Accommodations_foodservices_arts_entertainmentM",
                                      "C24030_028E": "Public_administrationM",
                                      "C24030_033E": "ConstructionF",                
                                      "C24030_034E": "ManufacturingF",
                                      "C24030_036E": "Retail_TradeF",                
                                      "C24030_037E": "Transportation_warehousing_utilitiesF",                
                                      "C24030_041E": "Finance_insurance_realestateF",  
                                      "C24030_044E": "Professional_scientific_mgmt_administrativeF",               
                                      "C24030_048E": "Educational_healthcare_socialF",
                                      "C24030_051E": "Accommodations_foodservices_arts_entertainmentF",
                                      "C24030_055E": "Public_administrationF",
                                      "NAME": "Name", "state": "State", "GEOID": "GEOID"
                                     })
census_pd 

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,State,county
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,3966.0,13288.0,4605.0,8087.0,15521.0,63444.0,12201.0,13515.0,24,510


In [22]:
census_pd.columns

Index(['Name', 'Total households', 'Median household income', 'Population',
       'Median age', 'Per capita income', 'Poverty count',
       'Unemployment count', '# employed, age 16+',
       '# persons 12th grade, no diploma',
       '# persons age 25+ graduated high school',
       '# persons Associate's degree',
       '# persons age 25+ with Bachelor's degree', 'Pop. white', 'Pop. Black',
       'Pop. Asian', 'Pop. two or more races', 'Pop. Hispanic origin',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total owner-occupied units', 'Total renter-occupied units',
       '# households with computer, no internet subscription',
       '# households no computer', 'Construction-M', 'Manufacturing-M',
       'Retail Trade-M', 'Transportation, warehousing, utilities-M',
       'Finance, insurance, real estate-M',
       'Professional, scientific, mgmt, administrative-M',
       'Educational services, health care, social assistance-M',
       'Accommod

In [23]:
census_pd["Construction"] = census_pd["Construction-M"].astype(
        int) + census_pd["Construction"].astype(
        int) 
census_pd.head()

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Manufacturing,Retail Trade,"Transportation, warehousing, utilities","Finance, insurance, real estate","Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,State,county
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,3966.0,13288.0,4605.0,8087.0,15521.0,63444.0,12201.0,13515.0,24,510


In [24]:
census_pd["Manufacturing"] = census_pd["Manufacturing-M"].astype(
        int) + census_pd["Manufacturing"].astype(
        int) 

In [25]:
census_pd["Retail Trade"] = census_pd["Retail Trade-M"].astype(
        int) + census_pd["Retail Trade"].astype(
        int) 

In [26]:
census_pd["Transportation, warehousing, utilities"] = census_pd["Transportation, warehousing, utilities-M"].astype(
        int) + census_pd["Transportation, warehousing, utilities"].astype(
        int) 

In [27]:
census_pd["Finance, insurance, real estate"] = census_pd["Finance, insurance, real estate-M"].astype(
        int) + census_pd["Finance, insurance, real estate"].astype(
        int) 

In [28]:
census_pd["Professional, scientific, mgmt, administrative"] = census_pd["Professional, scientific, mgmt, administrative-M"].astype(
        int) + census_pd["Professional, scientific, mgmt, administrative"].astype(
        int) 

In [29]:
census_pd["Educational services, health care, social assistance"] = census_pd["Educational services, health care, social assistance-M"].astype(
        int) + census_pd["Educational services, health care, social assistance"].astype(
        int) 

In [30]:
census_pd["Accommodations, food services, arts, entertainment"] = census_pd["Accommodations, food services, arts, entertainment-M"].astype(
        int) + census_pd["Accommodations, food services, arts, entertainment"].astype(
        int) 

In [31]:
census_pd["Public administration"] = census_pd["Public administration-M"].astype(
        int) + census_pd["Public administration"].astype(
        int) 

In [32]:
# merge the census_pd dataframe with the age_final dataframe on the common column "NAME"
census_Balt = pd.merge(
    census_pd, age_final, on="Name"
                        )
census_Balt

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,State,county_x,state,county_y,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,88531,25274,22659,24,510,24,510,128219.0,405635.0,80846.0


In [33]:
# merge the census_Balt dataframe with the Balt_SQMI_dfdataframe on the common column "Name"
census_Balt1 = pd.merge(
    census_Balt, Balt_SQMI_df, on="Name"
                        )
census_Balt1

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Public administration,State,county_x,state,county_y,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,22659,24,510,24,510,128219.0,405635.0,80846.0,59060,92.28


In [34]:
census_Balt1.columns

Index(['Name', 'Total households', 'Median household income', 'Population',
       'Median age', 'Per capita income', 'Poverty count',
       'Unemployment count', '# employed, age 16+',
       '# persons 12th grade, no diploma',
       '# persons age 25+ graduated high school',
       '# persons Associate's degree',
       '# persons age 25+ with Bachelor's degree', 'Pop. white', 'Pop. Black',
       'Pop. Asian', 'Pop. two or more races', 'Pop. Hispanic origin',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total owner-occupied units', 'Total renter-occupied units',
       '# households with computer, no internet subscription',
       '# households no computer', 'Construction-M', 'Manufacturing-M',
       'Retail Trade-M', 'Transportation, warehousing, utilities-M',
       'Finance, insurance, real estate-M',
       'Professional, scientific, mgmt, administrative-M',
       'Educational services, health care, social assistance-M',
       'Accommod

In [35]:
# Drop extraneous columns
census_Balt1 = census_Balt1.drop(["State", "county_x", "state", "county_y"], axis=1)

In [36]:
census_Balt_final = census_Balt1.copy()

In [37]:
census_Balt_final["Total occupied units"] = census_Balt_final.loc[:, ["Total owner-occupied units", "Total renter-occupied units"]].sum(axis=1)
census_Balt_final

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,34809,88531,25274,22659,128219.0,405635.0,80846.0,59060,92.28,238436.0


In [38]:
Balt_percentages = census_Balt_final.copy()
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Professional, scientific, mgmt, administrative","Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,34809,88531,25274,22659,128219.0,405635.0,80846.0,59060,92.28,238436.0


In [39]:
# Calculate population density  see: https://www.census.gov/quickfacts/fact/note/US/LND110210
# Density is expressed as "population per square mile(kilometer)"
# Divide total population (or # of housing units)/ by land area of the entity measured in square miles
Balt_percentages['Pop. density per Sq Mile'] = Balt_percentages['Population'].astype(
                                    float) / Balt_percentages['Area_mi2'].astype(
                                    float)
Balt_percentages   

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Educational services, health care, social assistance","Accommodations, food services, arts, entertainment",Public administration,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units,Pop. density per Sq Mile
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,88531,25274,22659,128219.0,405635.0,80846.0,59060,92.28,238436.0,6661.248375


In [40]:
Balt_percentages["Home ownership rate"] = 100 * \
    Balt_percentages["Total owner-occupied units"].astype(
        int) / Balt_percentages["Total occupied units"].astype(
        int) 

Balt_percentages.head()

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Accommodations, food services, arts, entertainment",Public administration,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units,Pop. density per Sq Mile,Home ownership rate
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,25274,22659,128219.0,405635.0,80846.0,59060,92.28,238436.0,6661.248375,47.348974


In [41]:
# Round the home ownership rate to one decimal point; using "float" instead of "int" because want to use decimal points
Balt_percentages["Home ownership rate"] = Balt_percentages["Home ownership rate"].astype(float).round(1)


In [42]:
# Add a new column for poverty rate (Poverty Count / Population)
Balt_percentages["Poverty rate"] = 100 * \
    Balt_percentages["Poverty count"].astype(
        int) / Balt_percentages["Population"].astype(int)

Balt_percentages["Poverty rate"] = Balt_percentages["Poverty rate"].astype(float).round(1)


# Add a new column for unemployment rate (Employment Count / Population)
Balt_percentages["Unemployment rate"] = 100 * \
    Balt_percentages["Unemployment count"].astype(
        int) / Balt_percentages["Population"].astype(int)

Balt_percentages["Unemployment rate"] = Balt_percentages["Unemployment rate"].astype(float).round(1)

Balt_percentages.head()

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Pop. <18 years,Pop. working age,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units,Pop. density per Sq Mile,Home ownership rate,Poverty rate,Unemployment rate
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,128219.0,405635.0,80846.0,59060,92.28,238436.0,6661.248375,47.3,21.0,4.5


In [None]:
# Add a new column for average household size (Household population / Total households); see https://www.census.gov/quickfacts/fact/note/US/HSD410221#:~:text=Persons%20per%20household%2C%20or%20average,by%20the%20number%20of%20households.
Balt_percentages["Average hh size"] = Balt_percentages["Household population"].astype(
        int) / Balt_percentages["Total households"].astype(int)

Balt_percentages["Average hh size"] = Balt_percentages["Average hh size"].astype(float).round(2)

Balt_percentages.head()

In [None]:
# Add a new column for # of occupied units with no vehicle available (sum column values of owner-occupied units with no vehicles available
# and renter-occupied units with no vehicles available)

Balt_percentages["Number units with no vehicle available"] = Balt_percentages.apply(lambda row: row.No_vehicle_available_owneroccupied_unit + row.No_vehicle_available_renteroccupied_unit, axis=1) 
Balt_percentages.head()


In [None]:
# Add a new column for % of occupied units with no vehicle available 
# (owner-occupied units with no vehicle available + renter-occupied units with no vehicle available / Total occupied units)
Balt_percentages["Percent units with no vehicle available"] = 100 * \
    Balt_percentages["Number units with no vehicle available"].astype(
        int) / Balt_percentages["Total occupied units"].astype(
        int) 

Balt_percentages["Percent units with no vehicle available"] = Balt_percentages["Percent units with no vehicle available"].astype(float).round(1)
Balt_percentages

In [None]:
Balt_percentages["Percent commute to work public transportation"] = 100 * \
    Balt_percentages["Commute_to_work_public_transportation"].astype(
        int) / Balt_percentages["Workers_16_yrs_and_over"].astype(
        int) 

Balt_percentages["Percent commute to work public transportation"] = Balt_percentages["Percent commute to work public transportation"].astype(float).round(1)


In [43]:
Balt_percentages["Percent Black"] = 100 * \
    Balt_percentages["Pop. Black"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 

Balt_percentages["Percent Black"] = Balt_percentages["Percent Black"].astype(float).round(1)

In [44]:
Balt_percentages["Percent white"] = 100 * \
    Balt_percentages["Pop. white"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent white"] = Balt_percentages["Percent white"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Pop. 65+ years,Area_Acres,Area_mi2,Total occupied units,Pop. density per Sq Mile,Home ownership rate,Poverty rate,Unemployment rate,Percent Black,Percent white
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,80846.0,59060,92.28,238436.0,6661.248375,47.3,21.0,4.5,62.5,30.4


In [45]:
Balt_percentages["Percent Asian"] = 100 * \
    Balt_percentages["Pop. Asian"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Asian"] = Balt_percentages["Percent Asian"].astype(float).round(1)


In [46]:
Balt_percentages["Percent Hispanic origin"] = 100 * \
    Balt_percentages["Pop. Hispanic origin"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Hispanic origin"] = Balt_percentages["Percent Hispanic origin"].astype(float).round(1)


In [47]:
Balt_percentages["Percent two or more races"] = 100 * \
    Balt_percentages["Pop. two or more races"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent two or more races"] = Balt_percentages["Percent two or more races"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Total occupied units,Pop. density per Sq Mile,Home ownership rate,Poverty rate,Unemployment rate,Percent Black,Percent white,Percent Asian,Percent Hispanic origin,Percent two or more races
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,238436.0,6661.248375,47.3,21.0,4.5,62.5,30.4,2.6,5.1,2.5


In [48]:
Balt_percentages["Percent 12th grade, no diploma"] = 100 * \
    Balt_percentages["# persons 12th grade, no diploma"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent 12th grade, no diploma"] = Balt_percentages["Percent 12th grade, no diploma"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Pop. density per Sq Mile,Home ownership rate,Poverty rate,Unemployment rate,Percent Black,Percent white,Percent Asian,Percent Hispanic origin,Percent two or more races,"Percent 12th grade, no diploma"
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,6661.248375,47.3,21.0,4.5,62.5,30.4,2.6,5.1,2.5,1.4


In [49]:
Balt_percentages["Percent graduated high school"] = 100 * \
    Balt_percentages["# persons age 25+ graduated high school"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent graduated high school"] = Balt_percentages["Percent graduated high school"].astype(float).round(1)


In [None]:
Balt_percentages["Percent earned Associate's degree"] = 100 * \
    Balt_percentages["# persons Associate's degree"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent earned Associate's degree"] = Balt_percentages["Percent earned Associate's degree"].astype(float).round(1)


In [50]:
Balt_percentages["Percent earned Bachelor's degree"] = 100 * \
    Balt_percentages["# persons age 25+ with Bachelor's degree"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent earned Bachelor's degree"] = Balt_percentages["Percent earned Bachelor's degree"].astype(float).round(1)


In [51]:
Balt_percentages["Percent computer, no internet subscription"] = 100 * \
    Balt_percentages["# households with computer, no internet subscription"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent computer, no internet subscription"] = Balt_percentages["Percent computer, no internet subscription"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Unemployment rate,Percent Black,Percent white,Percent Asian,Percent Hispanic origin,Percent two or more races,"Percent 12th grade, no diploma",Percent graduated high school,Percent earned Bachelor's degree,"Percent computer, no internet subscription"
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,4.5,62.5,30.4,2.6,5.1,2.5,1.4,17.2,11.3,4.7


In [52]:
Balt_percentages["Percent no computer"] = 100 * \
    Balt_percentages["# households no computer"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent no computer"] = Balt_percentages["Percent no computer"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Percent Black,Percent white,Percent Asian,Percent Hispanic origin,Percent two or more races,"Percent 12th grade, no diploma",Percent graduated high school,Percent earned Bachelor's degree,"Percent computer, no internet subscription",Percent no computer
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,62.5,30.4,2.6,5.1,2.5,1.4,17.2,11.3,4.7,6.2


In [53]:
Balt_percentages["Percent less than 18 yrs old"] = 100 * \
    Balt_percentages["Pop. <18 years"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent less than 18 yrs old"] = Balt_percentages["Percent less than 18 yrs old"].astype(float).round(1)


In [54]:
# working age is 18-64 yrs old
Balt_percentages["Percent working age"] = 100 * \
    Balt_percentages["Pop. working age"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent working age"] = Balt_percentages["Percent working age"].astype(float).round(1)


In [55]:
Balt_percentages["Percent 65 or older"] = 100 * \
    Balt_percentages["Pop. 65+ years"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent 65 or older"] = Balt_percentages["Percent 65 or older"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Percent Hispanic origin,Percent two or more races,"Percent 12th grade, no diploma",Percent graduated high school,Percent earned Bachelor's degree,"Percent computer, no internet subscription",Percent no computer,Percent less than 18 yrs old,Percent working age,Percent 65 or older
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,5.1,2.5,1.4,17.2,11.3,4.7,6.2,20.9,66.0,13.2


In [56]:
Balt_percentages["Percent Foreign-born"] = 100 * \
    Balt_percentages["# Foreign-born"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Foreign-born"] = Balt_percentages["Percent Foreign-born"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Percent two or more races,"Percent 12th grade, no diploma",Percent graduated high school,Percent earned Bachelor's degree,"Percent computer, no internet subscription",Percent no computer,Percent less than 18 yrs old,Percent working age,Percent 65 or older,Percent Foreign-born
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,2.5,1.4,17.2,11.3,4.7,6.2,20.9,66.0,13.2,8.1


In [57]:
Balt_percentages["Percent Manufacturing"] = 100 * \
    Balt_percentages["Manufacturing"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Manufacturing"] = Balt_percentages["Percent Manufacturing"].astype(float).round(1)


In [58]:
Balt_percentages["Percent Retail Trade"] = 100 * \
    Balt_percentages["Retail Trade"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Retail Trade"] = Balt_percentages["Percent Retail Trade"].astype(float).round(1)

In [59]:
Balt_percentages["Percent Transportation, warehousing, utilities"] = 100 * \
    Balt_percentages["Transportation, warehousing, utilities"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Transportation, warehousing, utilities"] = Balt_percentages["Percent Transportation, warehousing, utilities"].astype(float).round(1)

In [60]:
Balt_percentages["Percent Finance, insurance, real estate"] = 100 * \
    Balt_percentages["Finance, insurance, real estate"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Finance, insurance, real estate"] = Balt_percentages["Percent Finance, insurance, real estate"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,"Percent computer, no internet subscription",Percent no computer,Percent less than 18 yrs old,Percent working age,Percent 65 or older,Percent Foreign-born,Percent Manufacturing,Percent Retail Trade,"Percent Transportation, warehousing, utilities","Percent Finance, insurance, real estate"
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,4.7,6.2,20.9,66.0,13.2,8.1,2.0,4.3,2.6,2.6


In [61]:
Balt_percentages["Percent Professional, scientific, mgmt, administrative"] = 100 * \
    Balt_percentages["Professional, scientific, mgmt, administrative"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Professional, scientific, mgmt, administrative"] = Balt_percentages["Percent Professional, scientific, mgmt, administrative"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Percent no computer,Percent less than 18 yrs old,Percent working age,Percent 65 or older,Percent Foreign-born,Percent Manufacturing,Percent Retail Trade,"Percent Transportation, warehousing, utilities","Percent Finance, insurance, real estate","Percent Professional, scientific, mgmt, administrative"
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,6.2,20.9,66.0,13.2,8.1,2.0,4.3,2.6,2.6,5.7


In [62]:
Balt_percentages["Percent Educational services, health care, social assistance"] = 100 * \
    Balt_percentages["Educational services, health care, social assistance"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Educational services, health care, social assistance"] = Balt_percentages["Percent Educational services, health care, social assistance"].astype(float).round(1)

In [63]:
Balt_percentages["Percent Accommodations, food services, arts, entertainment"] = 100 * \
    Balt_percentages["Accommodations, food services, arts, entertainment"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Accommodations, food services, arts, entertainment"] = Balt_percentages["Percent Accommodations, food services, arts, entertainment"].astype(float).round(1)

In [64]:
Balt_percentages["Percent Public administration"] = 100 * \
    Balt_percentages["Public administration"].astype(
        int) / Balt_percentages["Population"].astype(
        int) 
Balt_percentages["Percent Public administration"] = Balt_percentages["Percent Public administration"].astype(float).round(1)
Balt_percentages

Unnamed: 0,Name,Total households,Median household income,Population,Median age,Per capita income,Poverty count,Unemployment count,"# employed, age 16+","# persons 12th grade, no diploma",...,Percent 65 or older,Percent Foreign-born,Percent Manufacturing,Percent Retail Trade,"Percent Transportation, warehousing, utilities","Percent Finance, insurance, real estate","Percent Professional, scientific, mgmt, administrative","Percent Educational services, health care, social assistance","Percent Accommodations, food services, arts, entertainment",Percent Public administration
0,"Baltimore city, Maryland",238436.0,48840.0,614700.0,35.1,29700.0,128829.0,27963.0,279034.0,8895.0,...,13.2,8.1,2.0,4.3,2.6,2.6,5.7,14.4,4.1,3.7


In [65]:
# Replace column name "Name" so we can match it on "Corridor" column at a later step
Balt_percentages = Balt_percentages.rename(columns={"Name": "Corridor",
                                      "Percent white": "Percent White",
                                      "Percent 2 or more races": "Percent Two or more races"
                                           })

In [66]:
Balt_percentages.columns

Index(['Corridor', 'Total households', 'Median household income', 'Population',
       'Median age', 'Per capita income', 'Poverty count',
       'Unemployment count', '# employed, age 16+',
       '# persons 12th grade, no diploma',
       '# persons age 25+ graduated high school',
       '# persons Associate's degree',
       '# persons age 25+ with Bachelor's degree', 'Pop. white', 'Pop. Black',
       'Pop. Asian', 'Pop. two or more races', 'Pop. Hispanic origin',
       '# Foreign-born', 'Total pop. in occupied housing units by tenure',
       'Total owner-occupied units', 'Total renter-occupied units',
       '# households with computer, no internet subscription',
       '# households no computer', 'Construction-M', 'Manufacturing-M',
       'Retail Trade-M', 'Transportation, warehousing, utilities-M',
       'Finance, insurance, real estate-M',
       'Professional, scientific, mgmt, administrative-M',
       'Educational services, health care, social assistance-M',
       'Acco

In [67]:
# Export file as an Excel file (will merge this file with the corridor info. in "ACS5YR-detailed-tables-2021.ipynb")
#Balt_percentages.to_excel("BaltCity_demographics_2021.xlsx")

In [73]:
# export as csv - will the import into the "ACS5YR-detailed-tables-2021" nb and merge with dataframe
# ignore "percentages" in name - file includes counts as well as percentages
Balt_percentages.to_csv("BaltCity_demographics_2021.csv", index = False, header=True)

In [69]:
# If want to transpose....

In [70]:
#Greektown_percentages = Greektown_percentages.T
#Greektown_percentages

In [71]:
#Greektown_percentages.reset_index().rename(columns={'index':'Category'})
#Greektown_percentages

In [72]:
# make the first row the header
#Greektown_percentages = Greektown_percentages.rename(columns=Greektown_percentages.iloc[0]).drop(Greektown_percentages.index[0])
#Greektown_percentages = Greektown_percentages.reset_index().rename(columns={'index': 'Metric'})
#Greektown_percentages.drop(index=Greektown_percentages.index[0], axis=0,inplace=True)
#Greektown_percentages