In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import os
import json
import time
# Census API Key
from config import api_key

In [2]:
#census_data = c.acs5.get().json()
query_url = f"https://api.census.gov/data/2019/acs/acs5"
display(query_url)
#https://api.census.gov/data#/2019/acs/acs5&{api_key}#?get=NAME,group(B01001)&for=us:1&key={api_key}

'https://api.census.gov/data/2019/acs/acs5'

In [3]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

cols = ["Zipcode", "Household Income", "Income per Capita", "Population",
        "Median Contract Rent", "Median Gross Rent", "Median Home Value",
        "Median Monthly Owner Costs","Median Age", "Year"]

df = pd.DataFrame(columns = cols)

#years = [2014, 2019]
#years = [2015, 2015, 2016, 2017, 2018, 2019]
years = [2017]
for year in years:

    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", 
                              "B19301_001E", 
                              "B01003_001E",
                              "B25058_001E",
                              "B25064_001E", 
                              "B25077_001E", 
                              "B25088_002E", 
                              "B01002_001E"), {'for': 'zip code tabulation area:*'})

    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

    # Column Reordering
    census_pd = census_pd.rename(columns={"B19013_001E": "Household Income",
                                          "B19301_001E": "Income per Capita",
                                          "B01003_001E": "Population",
                                          "B25058_001E": "Median Contract Rent",
                                          "B25064_001E": "Median Gross Rent",
                                          "B25077_001E": "Median Home Value",
                                          "B25088_002E": "Median Monthly Owner Costs",
                                          "B01002_001E": "Median Age",
                                          "NAME": "Name", "zip code tabulation area": "Zipcode"})


    # Final DataFrame
    census_pd['Year']=str(year)
    
    census_pd = census_pd[cols]
    
    
    concat_df = pd.concat([df, census_pd])

# Visualize
#print(len(census_pd))
display(concat_df.head())

Unnamed: 0,Zipcode,Household Income,Income per Capita,Population,Median Contract Rent,Median Gross Rent,Median Home Value,Median Monthly Owner Costs,Median Age,Year
0,601,11757.0,7041.0,17599.0,291.0,373.0,82500.0,748.0,38.9,2017
1,602,16190.0,8978.0,39209.0,304.0,380.0,87300.0,846.0,40.9,2017
2,603,16645.0,10897.0,50135.0,306.0,400.0,122300.0,867.0,40.4,2017
3,606,13387.0,5960.0,6304.0,185.0,284.0,92700.0,538.0,42.8,2017
4,610,18741.0,9266.0,27590.0,322.0,415.0,90300.0,733.0,41.4,2017


In [4]:
#Need to clean or drop the data with 666666 in it.

In [5]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
concat_df.to_csv("census_data.csv", encoding="utf-8", index=False)

In [6]:
!ls

Census_Demo.ipynb
README.md
__pycache__
api_keys.py
census_data.csv
census_data2019.csv
census_data_2017Years.csv
config.py
files


In [7]:
#create DataFrame for MSA
#https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
msa_df = pd.read_csv('../project_1/files/ScanUSZipCode2017A.csv',low_memory=False)
msa_df

Unnamed: 0,ZIP,MA,MANAME
0,400,35620.0,New York-Newark-Jersey City NY-NJ-PA
1,501,35620.0,New York-Newark-Jersey City NY-NJ-PA
2,544,35620.0,New York-Newark-Jersey City NY-NJ-PA
3,1001,44140.0,Springfield MA
4,1002,44140.0,Springfield MA
...,...,...,...
40932,99926,,
40933,99927,,
40934,99928,,
40935,99929,,


In [8]:
#Comparing Data Types between the two diffrent dataframes to make certain they match for the merge
print(msa_df.dtypes)

ZIP         int64
MA        float64
MANAME     object
dtype: object


In [9]:
print(concat_df.dtypes) 
    

Zipcode                        object
Household Income              float64
Income per Capita             float64
Population                    float64
Median Contract Rent          float64
Median Gross Rent             float64
Median Home Value             float64
Median Monthly Owner Costs    float64
Median Age                    float64
Year                           object
dtype: object


In [10]:
#convert Zipcode datatype in concat_df to be int64
#https://www.kite.com/python/answers/how-to-convert-a-pandas-dataframe-column-from-object-to-int-in-python
concat_df["Zipcode"] = concat_df["Zipcode"].astype(object).astype(int)

In [11]:
#Rename ZIP column in MSA to match Zipcode from Census data
#https://note.nkmk.me/en/python-pandas-dataframe-rename/
msa_df.rename(columns={'ZIP': 'Zipcode'}, inplace=True)


In [12]:
#Merge data frames and drop the values in the census data with -666666666
merged_census_df = pd.merge(concat_df, msa_df, how="left", on=["Zipcode", "Zipcode"])
merged_census_df
merged_census_df.drop(merged_census_df[merged_census_df["Household Income"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Income per Capita"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs"] == -666666666].index, inplace = True)      
merged_census_df.drop(merged_census_df[merged_census_df["Median Age"] == -666666666].index, inplace = True) 
       

In [13]:
# Save as a csv to check full data set
# Note to avoid any issues later, use encoding="utf-8"
merged_census_df.to_csv("census_data_2017Years.csv", encoding="utf-8", index=False)

In [14]:
#pushed