In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import os
import json
import time
# Census API Key
from config import api_key

In [2]:
#census_data = c.acs5.get().json()
query_url = f"https://api.census.gov/data/2019/acs/acs5"
display(query_url)
#https://api.census.gov/data#/2019/acs/acs5&{api_key}#?get=NAME,group(B01001)&for=us:1&key={api_key}

'https://api.census.gov/data/2019/acs/acs5'

In [3]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

cols = ["Zipcode", "Household Income", "Population",
        "Median Contract Rent", "Median Gross Rent", "Median Home Value",
        "Median Monthly Owner Costs", "Year"]


years = [2014, 2019]
#years = [2015, 2015, 2016, 2017, 2018, 2019]
#years = [2019]
for year in years:

    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", 
                              "B01003_001E",
                              "B25058_001E",
                              "B25064_001E", 
                              "B25077_001E", 
                              "B25088_002E"), {'for': 'zip code tabulation area:*'})

    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

    # Column Reordering
    census_pd = census_pd.rename(columns={"B19013_001E": f"Household Income_{year}",
                                          "B01003_001E": f"Population_{year}",
                                          "B25058_001E": f"Median Contract Rent_{year}",
                                          "B25064_001E": f"Median Gross Rent_{year}",
                                          "B25077_001E": f"Median Home Value_{year}",
                                          "B25088_002E": f"Median Monthly Owner Costs_{year}",
                                          "NAME": "Name",
                                          "zip code tabulation area": "Zipcode"})
    
    census_pd = census_pd.drop(columns=['Name'])
    # Final DataFrame
    
    
    if year==2014:
        df = census_pd
    if year==2019:
        df = df.merge(census_pd, how='inner', on=['Zipcode', 'state'])

# Visualize
#print(len(census_pd))
display(df.head())

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,Median Contract Rent_2019,Median Gross Rent_2019,Median Home Value_2019,Median Monthly Owner Costs_2019
0,10833.0,18088.0,282,367.0,105400.0,722,72,601,14361.0,17113.0,292.0,383.0,83900.0,771.0
1,16353.0,40859.0,329,411.0,91200.0,843,72,602,16807.0,37751.0,293.0,400.0,85300.0,877.0
2,16323.0,53162.0,267,369.0,128700.0,841,72,603,16049.0,47081.0,328.0,433.0,118400.0,832.0
3,14138.0,6415.0,234,326.0,105800.0,569,72,606,12119.0,6392.0,196.0,275.0,80800.0,526.0
4,17265.0,28805.0,324,419.0,113700.0,752,72,610,19898.0,26686.0,338.0,427.0,87600.0,751.0


In [5]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
df.to_csv("census_data.csv", encoding="utf-8", index=False)

In [6]:
!ls

Census_Demo.ipynb
README.md
Untitled.ipynb
__pycache__
api_keys.py
census_data.csv
census_data_2014Years.csv
census_data_2017Years.csv
census_data_2019Years.csv
config.py
files
untitled.txt


In [7]:
#create DataFrame for MSA
#https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
msa_df = pd.read_csv('../project_1/files/ScanUSZipCode2017A.csv',low_memory=False)
msa_df

Unnamed: 0,ZIP,MA,MANAME
0,400,35620.0,New York-Newark-Jersey City NY-NJ-PA
1,501,35620.0,New York-Newark-Jersey City NY-NJ-PA
2,544,35620.0,New York-Newark-Jersey City NY-NJ-PA
3,1001,44140.0,Springfield MA
4,1002,44140.0,Springfield MA
...,...,...,...
40932,99926,,
40933,99927,,
40934,99928,,
40935,99929,,


In [8]:
#Comparing Data Types between the two diffrent dataframes to make certain they match for the merge
print(msa_df.dtypes)

ZIP         int64
MA        float64
MANAME     object
dtype: object


In [9]:
print(df.dtypes) 
    

Household Income_2014              float64
Population_2014                    float64
Median Contract Rent_2014           object
Median Gross Rent_2014             float64
Median Home Value_2014             float64
Median Monthly Owner Costs_2014     object
state                               object
Zipcode                             object
Household Income_2019              float64
Population_2019                    float64
Median Contract Rent_2019          float64
Median Gross Rent_2019             float64
Median Home Value_2019             float64
Median Monthly Owner Costs_2019    float64
dtype: object


In [10]:
#convert Zipcode datatype in concat_df to be int64
#https://www.kite.com/python/answers/how-to-convert-a-pandas-dataframe-column-from-object-to-int-in-python
df["Zipcode"] = df["Zipcode"].astype(object).astype(int)

In [11]:
#Rename ZIP column in MSA to match Zipcode from Census data
#https://note.nkmk.me/en/python-pandas-dataframe-rename/
msa_df.rename(columns={'ZIP': 'Zipcode'}, inplace=True)


In [13]:
#Merge data frames and drop the values in the census data with -666666666
merged_census_df = pd.merge(df, msa_df, how="left", on=["Zipcode", "Zipcode"])
merged_census_df
merged_census_df.drop(merged_census_df[merged_census_df["Household Income_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value_2014"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs_2014"] == -666666666].index, inplace = True) 
merged_census_df.drop(merged_census_df[merged_census_df["Household Income_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value_2019"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs_2019"] == -666666666].index, inplace = True)

 

In [14]:
# Save as a csv to check full data set
# Note to avoid any issues later, use encoding="utf-8"
merged_census_df.to_csv("census_data_2014_2019Years.csv", encoding="utf-8", index=False)

In [15]:
# Remove zips with no MA
merged_census_df['MA'].replace('', np.nan, inplace = True)
merged_census_df.dropna(subset=['MA'], inplace=True)
merged_census_df.head()

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,Median Contract Rent_2019,Median Gross Rent_2019,Median Home Value_2019,Median Monthly Owner Costs_2019,MA,MANAME
131,60775.0,17141.0,800,895.0,212900.0,1586,25,1001,63949.0,17312.0,1025.0,1148.0,215700.0,1506.0,44140.0,Springfield MA
132,55082.0,29844.0,1026,1117.0,341600.0,2094,25,1002,61159.0,30014.0,1280.0,1380.0,355500.0,2330.0,44140.0,Springfield MA
134,68361.0,5113.0,753,906.0,208000.0,1642,25,1005,67302.0,5128.0,831.0,965.0,236400.0,1555.0,49340.0,Worcester MA-CT
135,74221.0,14774.0,755,876.0,255800.0,1795,25,1007,91191.0,15005.0,909.0,975.0,276900.0,1884.0,44140.0,Springfield MA
138,82365.0,3667.0,615,683.0,270400.0,1693,25,1010,70063.0,3658.0,703.0,703.0,272200.0,1880.0,44140.0,Springfield MA


In [16]:
# Create dataframe of home ownership
msa_home_ownership = pd.read_csv('../project_1/files/Census_Home_Ownership_2015_2020.csv',low_memory=False)
msa_home_ownership


Unnamed: 0,MANAME,2015,2016,2017,2018,2019,2020,Average
0,Inside Metropolitan Statistical Areas,62.2,61.9,62.3,62.9,63.1,65.2,62.9
1,Akron OH,74.0,74.9,67.5,65.6,70.8,69.5,70.4
2,Albany-Schenectady-Troy NY,65.9,61.3,64.1,62.2,61.2,63.7,63.1
3,Albuquerque NM,64.3,66.9,67.0,67.9,70.0,69.5,67.6
4,Allentown-Bethlehem-Easton PA-NJ,69.2,68.9,73.1,72.1,67.8,68.8,70.0
...,...,...,...,...,...,...,...,...
71,Tulsa OK,65.2,65.4,66.8,68.3,70.5,70.1,67.7
72,Urban Honolulu HI,59.6,57.9,53.8,57.7,59.0,56.9,57.5
73,Virginia Beach-Norfolk-Newport News VA-N,59.4,59.6,65.3,62.8,63.0,65.8,62.7
74,Washington-Arlington-Alexandria DC-VA-MD,64.6,63.1,63.3,62.9,64.7,67.9,64.4


In [17]:
# Merge census home ownership with existing data
full_census_merge = pd.merge(merged_census_df, msa_home_ownership, how="left", on=["MANAME", "MANAME"])
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,Median Monthly Owner Costs_2019,MA,MANAME,2015,2016,2017,2018,2019,2020,Average
0,60775.0,17141.0,800,895.0,212900.0,1586,25,1001,63949.0,17312.0,...,1506.0,44140.0,Springfield MA,,,,,,,
1,55082.0,29844.0,1026,1117.0,341600.0,2094,25,1002,61159.0,30014.0,...,2330.0,44140.0,Springfield MA,,,,,,,
2,68361.0,5113.0,753,906.0,208000.0,1642,25,1005,67302.0,5128.0,...,1555.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
3,74221.0,14774.0,755,876.0,255800.0,1795,25,1007,91191.0,15005.0,...,1884.0,44140.0,Springfield MA,,,,,,,
4,82365.0,3667.0,615,683.0,270400.0,1693,25,1010,70063.0,3658.0,...,1880.0,44140.0,Springfield MA,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16669,55268.0,18575.0,982,1097.0,187000.0,1883,02,99701,59955.0,17258.0,...,1673.0,21820.0,Fairbanks AK,,,,,,,
16670,76085.0,24311.0,1151,1377.0,208900.0,1902,02,99705,80875.0,23131.0,...,1782.0,21820.0,Fairbanks AK,,,,,,,
16671,73871.0,31658.0,944,1041.0,227600.0,1870,02,99709,87055.0,29288.0,...,1913.0,21820.0,Fairbanks AK,,,,,,,
16672,88517.0,12134.0,1000,1250.0,236400.0,1924,02,99712,88502.0,14837.0,...,2031.0,21820.0,Fairbanks AK,,,,,,,


In [18]:
#Filter out zips not contained in top 75 largest MSAs
full_census_merge.dropna(inplace=True)
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,Median Monthly Owner Costs_2019,MA,MANAME,2015,2016,2017,2018,2019,2020,Average
2,68361.0,5113.0,753,906.0,208000.0,1642,25,1005,67302.0,5128.0,...,1555.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
14,43583.0,1389.0,599,758.0,196800.0,1473,25,1031,38173.0,1135.0,...,1400.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
29,80577.0,1791.0,775,1039.0,257100.0,1757,25,1068,88571.0,1833.0,...,1919.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
40,37126.0,3108.0,665,823.0,199600.0,1573,25,1083,60323.0,3094.0,...,1444.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
44,48679.0,1078.0,538,538.0,250600.0,1487,25,1092,108725.0,1278.0,...,1525.0,49340.0,Worcester MA-CT,64.2,65.5,64.9,63.4,62.7,65.9,64.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16525,59325.0,56326.0,900,1036.0,185100.0,1538,53,98682,75634.0,60887.0,...,1645.0,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16526,58874.0,31908.0,866,986.0,250200.0,1741,53,98683,70099.0,33219.0,...,1760.0,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16527,57216.0,27442.0,913,1065.0,208100.0,1521,53,98684,66781.0,29396.0,...,1664.0,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6
16528,79265.0,26954.0,1058,1249.0,257100.0,1856,53,98685,103120.0,29792.0,...,1969.0,38900.0,Portland-Vancouver-Hillsboro OR-WA,58.9,61.8,61.1,59.2,60.0,62.5,60.6


In [19]:
# Check curreent dataframe types
print(full_census_merge.dtypes)

Household Income_2014              float64
Population_2014                    float64
Median Contract Rent_2014           object
Median Gross Rent_2014             float64
Median Home Value_2014             float64
Median Monthly Owner Costs_2014     object
state                               object
Zipcode                              int32
Household Income_2019              float64
Population_2019                    float64
Median Contract Rent_2019          float64
Median Gross Rent_2019             float64
Median Home Value_2019             float64
Median Monthly Owner Costs_2019    float64
MA                                 float64
MANAME                              object
2015                               float64
2016                               float64
2017                               float64
2018                               float64
2019                               float64
2020                               float64
Average                            float64
dtype: obje

In [21]:
# Convert median monthly owner cost to integer
###############ERIC, the main data frame is called df.  Also, the columns are the original names is _2014 and _2019 as a suffix
df["Median Monthly Owner Costs"] = df["Median Monthly Owner Costs"].astype(object).astype(float)

KeyError: 'Median Monthly Owner Costs'

In [None]:
# Confirm data type conversion
print(full_census_merge.dtypes)

In [None]:
# Create weighted cost column
full_census_merge["Weighted Cost"] = full_census_merge["Median Gross Rent"] * (1-full_census_merge["Average"] / 100) + full_census_merge["Median Monthly Owner Costs"] * full_census_merge["Average"] / 100
full_census_merge

In [None]:
# Create Affordability Index
full_census_merge["Affordability"] = full_census_merge["Weighted Cost"] / full_census_merge["Household Income"] * 12
full_census_merge