In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import os
import json
import time
# Census API Key
from config import api_key

In [2]:
#census_data = c.acs5.get().json()
query_url = f"https://api.census.gov/data/2019/acs/acs5"
display(query_url)
#https://api.census.gov/data#/2019/acs/acs5&{api_key}#?get=NAME,group(B01001)&for=us:1&key={api_key}

'https://api.census.gov/data/2019/acs/acs5'

In [3]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

cols = ["Zipcode", "Household Income", "Population",
        "Median Contract Rent", "Median Gross Rent", "Median Home Value",
        "Median Monthly Owner Costs", "Year"]

df = pd.DataFrame(columns = cols)

#years = [2014, 2019]
#years = [2015, 2015, 2016, 2017, 2018, 2019]
years = [2019]
for year in years:

    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", 
                              "B01003_001E",
                              "B25058_001E",
                              "B25064_001E", 
                              "B25077_001E", 
                              "B25088_002E"), {'for': 'zip code tabulation area:*'})

    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

    # Column Reordering
    census_pd = census_pd.rename(columns={"B19013_001E": "Household Income",
                                          "B01003_001E": "Population",
                                          "B25058_001E": "Median Contract Rent",
                                          "B25064_001E": "Median Gross Rent",
                                          "B25077_001E": "Median Home Value",
                                          "B25088_002E": "Median Monthly Owner Costs",
                                          "NAME": "Name", "zip code tabulation area": "Zipcode"})


    # Final DataFrame
    census_pd['Year']=str(year)
    
    census_pd = census_pd[cols]
    
    
    concat_df = pd.concat([df, census_pd])

# Visualize
#print(len(census_pd))
display(concat_df.head())

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year
0,601,14361.0,17113.0,292.0,383.0,83900.0,771.0,2019
1,602,16807.0,37751.0,293.0,400.0,85300.0,877.0,2019
2,603,16049.0,47081.0,328.0,433.0,118400.0,832.0,2019
3,606,12119.0,6392.0,196.0,275.0,80800.0,526.0,2019
4,610,19898.0,26686.0,338.0,427.0,87600.0,751.0,2019


In [4]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
concat_df.to_csv("census_data.csv", encoding="utf-8", index=False)

In [5]:
!ls

__pycache__
api_keys.py
census_data.csv
census_data_2017Years.csv
census_data2019.csv
Census_Demo.ipynb
config.py
files
README.md


In [6]:
#create DataFrame for MSA
#https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
msa_df = pd.read_csv('../project_1/files/ScanUSZipCode2017A.csv',low_memory=False)
msa_df

Unnamed: 0,ZIP,MA,MANAME
0,400,35620.0,New York-Newark-Jersey City NY-NJ-PA
1,501,35620.0,New York-Newark-Jersey City NY-NJ-PA
2,544,35620.0,New York-Newark-Jersey City NY-NJ-PA
3,1001,44140.0,Springfield MA
4,1002,44140.0,Springfield MA
...,...,...,...
40932,99926,,
40933,99927,,
40934,99928,,
40935,99929,,


In [7]:
#Comparing Data Types between the two diffrent dataframes to make certain they match for the merge
print(msa_df.dtypes)

ZIP         int64
MA        float64
MANAME     object
dtype: object


In [8]:
print(concat_df.dtypes) 
    

Zipcode                        object
Household Income              float64
Population                    float64
Median Contract Rent          float64
Median Gross Rent             float64
Median Home Value             float64
Median Monthly Owner Costs    float64
Year                           object
dtype: object


In [9]:
#convert Zipcode datatype in concat_df to be int64
#https://www.kite.com/python/answers/how-to-convert-a-pandas-dataframe-column-from-object-to-int-in-python
concat_df["Zipcode"] = concat_df["Zipcode"].astype(object).astype(int)

In [10]:
#Rename ZIP column in MSA to match Zipcode from Census data
#https://note.nkmk.me/en/python-pandas-dataframe-rename/
msa_df.rename(columns={'ZIP': 'Zipcode'}, inplace=True)


In [11]:
#Merge data frames and drop the values in the census data with -666666666
merged_census_df = pd.merge(concat_df, msa_df, how="left", on=["Zipcode", "Zipcode"])
merged_census_df
merged_census_df.drop(merged_census_df[merged_census_df["Household Income"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs"] == -666666666].index, inplace = True)      
       

In [12]:
# Save as a csv to check full data set
# Note to avoid any issues later, use encoding="utf-8"
merged_census_df.to_csv("census_data_2019Years.csv", encoding="utf-8", index=False)

In [13]:
# Remove zips with no MA
merged_census_df['MA'].replace('', np.nan, inplace = True)
merged_census_df.dropna(subset=['MA'], inplace=True)
merged_census_df.head()

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year,MA,MANAME
131,1001,63949.0,17312.0,1025.0,1148.0,215700.0,1506.0,2019,44140.0,Springfield MA
132,1002,61159.0,30014.0,1280.0,1380.0,355500.0,2330.0,2019,44140.0,Springfield MA
134,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,2019,49340.0,Worcester MA-CT
135,1007,91191.0,15005.0,909.0,975.0,276900.0,1884.0,2019,44140.0,Springfield MA
138,1010,70063.0,3658.0,703.0,703.0,272200.0,1880.0,2019,44140.0,Springfield MA


In [14]:
# Create dataframe of home ownership
msa_home_ownership = pd.read_csv('../project_1/files/Census_Home_Ownership_2015_2020.csv',low_memory=False)
msa_home_ownership


Unnamed: 0,MANAME,2015,2016,2017,2018,2019,2020,Average
0,Inside Metropolitan Statistical Areas,62.2,61.9,62.3,62.9,63.1,65.2,62.9
1,Akron OH,74.0,74.9,67.5,65.6,70.8,69.5,70.4
2,Albany-Schenectady-Troy NY,65.9,61.3,64.1,62.2,61.2,63.7,63.1
3,Albuquerque NM,64.3,66.9,67.0,67.9,70.0,69.5,67.6
4,Allentown-Bethlehem-Easton PA-NJ,69.2,68.9,73.1,72.1,67.8,68.8,70.0
...,...,...,...,...,...,...,...,...
71,Tulsa OK,65.2,65.4,66.8,68.3,70.5,70.1,67.7
72,Urban Honolulu HI,59.6,57.9,53.8,57.7,59.0,56.9,57.5
73,Virginia Beach-Norfolk-Newport News VA-N,59.4,59.6,65.3,62.8,63.0,65.8,62.7
74,Washington-Arlington-Alexandria DC-VA-MD,64.6,63.1,63.3,62.9,64.7,67.9,64.4


In [15]:
# Merge census home ownership with existing data
full_census_merge = pd.merge(merged_census_df, msa_home_ownership, how="left", on=["MANAME", "MANAME"])
full_census_merge

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year,MA,MANAME,2015,2016,2017,2018,2019,2020,Average
0,1001,63949.0,17312.0,1025.0,1148.0,215700.0,1506.0,2019,44140.0,Springfield MA,,,,,,,
1,1002,61159.0,30014.0,1280.0,1380.0,355500.0,2330.0,2019,44140.0,Springfield MA,,,,,,,
2,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
3,1007,91191.0,15005.0,909.0,975.0,276900.0,1884.0,2019,44140.0,Springfield MA,,,,,,,
4,1010,70063.0,3658.0,703.0,703.0,272200.0,1880.0,2019,44140.0,Springfield MA,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16915,99701,59955.0,17258.0,1087.0,1182.0,192400.0,1673.0,2019,21820.0,Fairbanks AK,,,,,,,
16916,99705,80875.0,23131.0,1185.0,1241.0,225200.0,1782.0,2019,21820.0,Fairbanks AK,,,,,,,
16917,99709,87055.0,29288.0,1005.0,1134.0,253600.0,1913.0,2019,21820.0,Fairbanks AK,,,,,,,
16918,99712,88502.0,14837.0,917.0,1054.0,273000.0,2031.0,2019,21820.0,Fairbanks AK,,,,,,,


In [16]:
#Filter out zips not contained in top 75 largest MSAs
full_census_merge.dropna(inplace=True)
full_census_merge

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year,MA,MANAME,2015,2016,2017,2018,2019,2020,Average
2,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
14,1031,38173.0,1135.0,734.0,901.0,198500.0,1400.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
29,1068,88571.0,1833.0,942.0,1153.0,294800.0,1919.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
40,1083,60323.0,3094.0,640.0,682.0,220000.0,1444.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
45,1092,108725.0,1278.0,679.0,958.0,215200.0,1525.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16768,98682,75634.0,60887.0,1252.0,1397.0,283100.0,1645.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16769,98683,70099.0,33219.0,1275.0,1372.0,339100.0,1760.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16770,98684,66781.0,29396.0,1294.0,1410.0,295000.0,1664.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16771,98685,103120.0,29792.0,1271.0,1469.0,370500.0,1969.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6


In [17]:
# Check curreent dataframe types
print(full_census_merge.dtypes)

Zipcode                         int32
Household Income              float64
Population                    float64
Median Contract Rent          float64
Median Gross Rent             float64
Median Home Value             float64
Median Monthly Owner Costs    float64
Year                           object
MA                            float64
MANAME                         object
2015                          float64
2016                          float64
2017                          float64
2018                          float64
2019                          float64
2020                          float64
Average                       float64
dtype: object


In [18]:
# Convert median monthly owner cost to integer
concat_df["Median Monthly Owner Costs"] = concat_df["Median Monthly Owner Costs"].astype(object).astype(float)

In [19]:
# Confirm data type conversion
print(full_census_merge.dtypes)

Zipcode                         int32
Household Income              float64
Population                    float64
Median Contract Rent          float64
Median Gross Rent             float64
Median Home Value             float64
Median Monthly Owner Costs    float64
Year                           object
MA                            float64
MANAME                         object
2015                          float64
2016                          float64
2017                          float64
2018                          float64
2019                          float64
2020                          float64
Average                       float64
dtype: object


In [22]:
# Create weighted cost column
full_census_merge["Weighted Cost"] = full_census_merge["Median Gross Rent"] * (1-full_census_merge["Average"] / 100) + full_census_merge["Median Monthly Owner Costs"] * full_census_merge["Average"] / 100
full_census_merge

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year,MA,MANAME,2015,2016,2017,2018,2019,2020,Average,Weighted Cost
2,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1344.960
14,1031,38173.0,1135.0,734.0,901.0,198500.0,1400.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1222.356
29,1068,88571.0,1833.0,942.0,1153.0,294800.0,1919.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1646.304
40,1083,60323.0,3094.0,640.0,682.0,220000.0,1444.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1172.728
45,1092,108725.0,1278.0,679.0,958.0,215200.0,1525.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1323.148
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16768,98682,75634.0,60887.0,1252.0,1397.0,283100.0,1645.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1547.288
16769,98683,70099.0,33219.0,1275.0,1372.0,339100.0,1760.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1607.128
16770,98684,66781.0,29396.0,1294.0,1410.0,295000.0,1664.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1563.924
16771,98685,103120.0,29792.0,1271.0,1469.0,370500.0,1969.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1772.000


In [25]:
# Create Affordability Index
full_census_merge["Affordability"] = full_census_merge["Weighted Cost"] / full_census_merge["Household Income"] * 12
full_census_merge

Unnamed: 0,Zipcode,Household Income,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Year,MA,MANAME,2015,2016,2017,2018,2019,2020,Average,Weighted Cost,Affordability
2,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1344.960,0.239807
14,1031,38173.0,1135.0,734.0,901.0,198500.0,1400.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1222.356,0.384258
29,1068,88571.0,1833.0,942.0,1153.0,294800.0,1919.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1646.304,0.223049
40,1083,60323.0,3094.0,640.0,682.0,220000.0,1444.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1172.728,0.233290
45,1092,108725.0,1278.0,679.0,958.0,215200.0,1525.0,2019,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4,1323.148,0.146036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16768,98682,75634.0,60887.0,1252.0,1397.0,283100.0,1645.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1547.288,0.245491
16769,98683,70099.0,33219.0,1275.0,1372.0,339100.0,1760.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1607.128,0.275119
16770,98684,66781.0,29396.0,1294.0,1410.0,295000.0,1664.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1563.924,0.281024
16771,98685,103120.0,29792.0,1271.0,1469.0,370500.0,1969.0,2019,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6,1772.000,0.206206
