In [1]:
## Grow the City's Population:
### Households with children, total households, working age population
## 2017

In [2]:
# data source: 2017 Census Data (ACS-1 year) for Selected Variables - Baltimore City


In [3]:
#pip install cenpy
# From https://cenpy-devs.github.io/cenpy/

In [4]:
# From https://www.census.gov/programs-surveys/acs/guidance/comparing-acs-data.html:
# "Due to the impact of the COVID-19 pandemic, the Census Bureau changed the 2020 ACS release. 
# Instead of providing the standard 1-year data products, the Census Bureau released experimental estimates from the 1-year data. 
# Data users should not compare 2020 ACS 1-year experimental estimates with any other data.""

In [5]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

from us import states
import censusdata 

In [6]:
import os

In [7]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\BDC\\Baltimore_Together\\measuring-success-metrics\\analysis_nbs'

In [8]:
# need to change the working directory; be sure that the config file for the census API is also in this folder
os.chdir('C:\\Users\\Jenkir\\Desktop\\BDC\\Baltimore_Together\\measuring-success-metrics\\analysis_nbs')


In [9]:
os.getcwd()

'C:\\Users\\Jenkir\\Desktop\\BDC\\Baltimore_Together\\measuring-success-metrics\\analysis_nbs'

In [10]:
pd.set_option('display.max_rows', 200)

In [11]:
pd.set_option('max_colwidth', 100)

In [12]:
# American Community Survey 1-Year Data 

data = censusdata.download('acs1', 2017,
                          censusdata.censusgeo([('state', '24'),
                                         ('county', '510')]),
                                        ["B11005_002E","B19001_001E","B01003_001E", "B01002_001E",
                                         "B02001_002E", "B02001_003E","B03001_003E"])
                          
census_2017 = pd.DataFrame(data)
census_2017

Unnamed: 0,B11005_002E,B19001_001E,B01003_001E,B01002_001E,B02001_002E,B02001_003E,B03001_003E
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",59831,240280,611648,35.3,183988,383508,32495


In [13]:
# Pull age values from the ACS 1 yr. census codes/ variables listed and save them in the variable "age_data"
age_data = censusdata.download('acs1', 2017,
                          censusdata.censusgeo([('state', '24'),
                                         ('county', '510')]),
                                         ["B01001_003E",                                    
                                          "B01001_004E",
                                          "B01001_005E",
                                          "B01001_006E",
                                          "B01001_007E",
                                          "B01001_008E",
                                          "B01001_009E",
                                          "B01001_010E",
                                          "B01001_011E",
                                          "B01001_012E",
                                          "B01001_013E",
                                          "B01001_014E",
                                          "B01001_015E",
                                          "B01001_016E",            
                                          "B01001_017E",
                                          "B01001_018E",
                                          "B01001_019E",
                                          "B01001_020E",
                                          "B01001_021E",
                                          "B01001_022E",
                                          "B01001_023E",
                                          "B01001_024E",
                                          "B01001_025E",
                                          "B01001_027E",
                                          "B01001_028E",
                                          "B01001_029E",
                                          "B01001_030E",
                                          "B01001_031E",
                                          "B01001_032E",
                                          "B01001_033E",
                                          "B01001_034E",
                                          "B01001_035E",
                                          "B01001_036E",
                                          "B01001_037E",
                                          "B01001_038E",
                                          "B01001_039E",            
                                          "B01001_040E",
                                          "B01001_041E",
                                          "B01001_042E",
                                          "B01001_043E",
                                          "B01001_044E",
                                          "B01001_045E",
                                          "B01001_046E",
                                          "B01001_047E",
                                          "B01001_048E",
                                          "B01001_049E"])
age_2017 = pd.DataFrame(age_data)
age_2017.head()      

Unnamed: 0,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,...,B01001_040E,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",19712,16273,18113,9206,7787,4586,4190,11723,29837,26254,...,19870,22400,8812,11456,7162,9460,11378,7309,6981,7057


In [14]:
# Sum columns of age groups that are under 18 years old for male and female and add new column "Pop. <18 years"
columns_under18 = ["B01001_003E",                                    
                   "B01001_004E",
                   "B01001_005E",
                   "B01001_006E",
                   "B01001_027E",
                   "B01001_028E",
                   "B01001_029E",
                   "B01001_030E"]
age_2017['Pop. <18 years']= age_2017[columns_under18].sum(axis=1)
age_2017.head()

Unnamed: 0,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,...,B01001_041E,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,Pop. <18 years
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",19712,16273,18113,9206,7787,4586,4190,11723,29837,26254,...,22400,8812,11456,7162,9460,11378,7309,6981,7057,126007


In [15]:
# Sum columns of age groups that are 18-64 years old (working age) for male and female and add new column "Pop. working age"
columns_working_age_2017 = [                                    
                   "B01001_007E",
                   "B01001_008E",
                   "B01001_009E",
                   "B01001_010E",
                   "B01001_011E",
                   "B01001_012E",   
                   "B01001_013E",
                   "B01001_014E",    
                   "B01001_015E",    
                   "B01001_016E",   
                   "B01001_017E", 
                   "B01001_018E",    
                   "B01001_019E",    
                   "B01001_031E",
                   "B01001_032E",
                   "B01001_033E",
                   "B01001_034E",
                   "B01001_035E",   
                   "B01001_036E",
                   "B01001_037E",    
                   "B01001_038E",    
                   "B01001_039E",   
                   "B01001_040E", 
                   "B01001_041E",    
                   "B01001_042E",    
                   "B01001_043E"]    
                           
age_2017['Pop. working age']= age_2017[columns_working_age_2017].sum(axis=1)
age_2017.head()                


Unnamed: 0,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,...,B01001_042E,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,Pop. <18 years,Pop. working age
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",19712,16273,18113,9206,7787,4586,4190,11723,29837,26254,...,8812,11456,7162,9460,11378,7309,6981,7057,126007,403089


In [16]:
# Sum columns of age groups that are 65+ years old for male and female and add new column "Pop. 65+ years"
columns_senior_2017 = ["B01001_020E",
                  "B01001_021E",
                  "B01001_022E",
                  "B01001_023E",                                    
                  "B01001_024E",
                  "B01001_025E",
                  "B01001_044E",
                  "B01001_045E",
                  "B01001_046E",
                  "B01001_047E",                                    
                  "B01001_048E",
                  "B01001_049E"]               
                          
age_2017['Pop. 65+ years']= age_2017[columns_senior_2017].sum(axis=1)
age_2017.head()                           

Unnamed: 0,B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,...,B01001_043E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,Pop. <18 years,Pop. working age,Pop. 65+ years
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",19712,16273,18113,9206,7787,4586,4190,11723,29837,26254,...,11456,7162,9460,11378,7309,6981,7057,126007,403089,82552


In [17]:
age_final_2017 = age_2017[["Pop. <18 years", "Pop. working age", "Pop. 65+ years"]]
age_final_2017.head()

Unnamed: 0,Pop. <18 years,Pop. working age,Pop. 65+ years
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",126007,403089,82552


In [18]:
age_final = age_final_2017.copy()
age_final

Unnamed: 0,Pop. <18 years,Pop. working age,Pop. 65+ years
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",126007,403089,82552


In [19]:
age_final.index = ['Baltimore, 2017']
age_final

Unnamed: 0,Pop. <18 years,Pop. working age,Pop. 65+ years
"Baltimore, 2017",126007,403089,82552


In [20]:
age_final = age_final.reset_index()
age_final

Unnamed: 0,index,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore, 2017",126007,403089,82552


In [21]:
age_final = age_final.rename(columns={"index": "Year"})                                           
age_final 

Unnamed: 0,Year,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore, 2017",126007,403089,82552


In [22]:
age_final.dtypes

Year                object
Pop. <18 years       int64
Pop. working age     int64
Pop. 65+ years       int64
dtype: object

In [23]:
# Replace the census variable codes (such as "B19013_001E") in the dataframe with text so it's understandable
census_2017 = census_2017.rename(columns={"B11005_002E": "Households with one or more people under 18 years",
                                      "B19001_001E": "Total households",
                                      "B01003_001E": "Population",
                                      "B01002_001E": "Median age",
                                      "B02001_003E": "Pop. Black",
                                      "B02001_002E": "Pop. white",
                                      "B03001_003E": "Pop. Hispanic origin"
                                         })
census_2017.head()

Unnamed: 0,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",59831,240280,611648,35.3,183988,383508,32495


In [24]:
census = census_2017.copy()
census

Unnamed: 0,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin
"Baltimore city, Maryland: Summary level: 050, state:24> county:510",59831,240280,611648,35.3,183988,383508,32495


In [25]:
census.index = ['Baltimore, 2017']
census

Unnamed: 0,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin
"Baltimore, 2017",59831,240280,611648,35.3,183988,383508,32495


In [26]:
census = census.reset_index()
census

Unnamed: 0,index,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin
0,"Baltimore, 2017",59831,240280,611648,35.3,183988,383508,32495


In [27]:
census = census.rename(columns={"index": "Year"})                                           
census 

Unnamed: 0,Year,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin
0,"Baltimore, 2017",59831,240280,611648,35.3,183988,383508,32495


In [28]:
# combine the census_2017 dataframe with the age_final_2017 dataframe 
population_2017 = pd.merge(
    census, age_final, on="Year", how="outer")
population_2017

Unnamed: 0,Year,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin,Pop. <18 years,Pop. working age,Pop. 65+ years
0,"Baltimore, 2017",59831,240280,611648,35.3,183988,383508,32495,126007,403089,82552


In [29]:
population_2017["Percent working age"] = 100 * \
    population_2017["Pop. working age"].astype(
        int) / population_2017["Population"].astype(
        int) 

population_2017["Percent working age"] = population_2017["Percent working age"].astype(int)

In [30]:
population_2017["Percent households with children"] = 100 * \
    population_2017["Households with one or more people under 18 years"].astype(
        int) / population_2017["Total households"].astype(
        int) 

population_2017["Percent households with children"] = population_2017["Percent households with children"].astype(int)
population_2017

Unnamed: 0,Year,Households with one or more people under 18 years,Total households,Population,Median age,Pop. white,Pop. Black,Pop. Hispanic origin,Pop. <18 years,Pop. working age,Pop. 65+ years,Percent working age,Percent households with children
0,"Baltimore, 2017",59831,240280,611648,35.3,183988,383508,32495,126007,403089,82552,65,24


In [31]:
# Export file as a CSV, without the Pandas index, but with the header
population_2017.to_csv("ACS_population_2017.csv", index = False, header=True)