In [11]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import os
import json
import time
# Census API Key
from config import api_key

In [12]:
#census_data = c.acs5.get().json()
query_url = f"https://api.census.gov/data/2019/acs/acs5"
display(query_url)
#https://api.census.gov/data#/2019/acs/acs5&{api_key}#?get=NAME,group(B01001)&for=us:1&key={api_key}

'https://api.census.gov/data/2019/acs/acs5'

In [None]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

cols = ["Zipcode", "Household Income", "Population",
        "Median Contract Rent", "Median Gross Rent", "Median Home Value",
        "Median Monthly Owner Costs", "Year"]

df = pd.DataFrame(columns = cols)

#years = [2014, 2019]
#years = [2015, 2015, 2016, 2017, 2018, 2019]
years = [2019]
for year in years:

    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", 
                              "B01003_001E",
                              "B25058_001E",
                              "B25064_001E", 
                              "B25077_001E", 
                              "B25088_002E"), {'for': 'zip code tabulation area:*'})

    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

    # Column Reordering
    census_pd = census_pd.rename(columns={"B19013_001E": "Household Income",
                                          "B01003_001E": "Population",
                                          "B25058_001E": "Median Contract Rent",
                                          "B25064_001E": "Median Gross Rent",
                                          "B25077_001E": "Median Home Value",
                                          "B25088_002E": "Median Monthly Owner Costs",
                                          "NAME": "Name", "zip code tabulation area": "Zipcode"})


    # Final DataFrame
    census_pd['Year']=str(year)
    
    census_pd = census_pd[cols]
    
    
    concat_df = pd.concat([df, census_pd])

# Visualize
#print(len(census_pd))
display(concat_df.head())

In [None]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
concat_df.to_csv("census_data.csv", encoding="utf-8", index=False)

In [None]:
!ls

In [14]:
#create DataFrame for MSA
#https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
msa_df = pd.read_csv('../project_1/files/ScanUSZipCode2017A.csv',low_memory=False)
msa_df

Unnamed: 0,ZIP,NAME,COUNTY,COUNTYNAME,STATE,STATENAME,ZIPNAME,AREA,LATITUDE,LONGITUDE,...,INCCYPCAP,POPCY20_UP,RACCYWHITE,RACCYBLACK,RACCYAMIND,RACCYASIAN,RACCYHAWAI,RACCYOTHER,RACCYMULT,HISCYHISP
0,400,Central Park,36061,New York County,36,New York,Central Park,1.32716,40.782487,-73.965625,...,130905,139,121,27,0,13,0,2,2,16
1,501,IRS Service Center,36103,Suffolk County,36,New York,IRS Service Center,5.06117,40.816471,-73.044987,...,0,0,0,0,0,0,0,0,0,0
2,544,IRS Service Center,36103,Suffolk County,36,New York,IRS Service Center,0.00000,40.816602,-73.044987,...,0,0,0,0,0,0,0,0,0,0
3,1001,Agawam,25013,Hampden County,25,Massachusetts,Agawam,12.19081,42.070443,-72.627799,...,33164,13570,15271,565,36,484,0,189,314,1015
4,1002,Amherst,25015,Hampshire County,25,Massachusetts,Amherst,51.80159,42.373177,-72.511979,...,25647,25305,25885,2402,87,4333,13,871,1454,2995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40932,99926,Metlakatla,2198,Prince of Wales-Hyde,2,Alaska,Metlakatla,146.40996,55.126562,-131.573372,...,27074,1163,114,9,730,3,6,24,765,45
40933,99927,Point Baker,2198,Prince of Wales-Hyde,2,Alaska,Point Baker,245.54774,56.351172,-133.626432,...,27313,85,92,0,5,0,1,0,10,3
40934,99928,Ward Cove,2130,Ketchikan Gateway Bo,2,Alaska,Ward Cove,5130.43945,55.410352,-131.722201,...,0,0,0,0,0,0,0,0,0,0
40935,99929,Wrangell,2275,Wrangell City and Bo,2,Alaska,Wrangell,2595.99097,56.471875,-132.387305,...,30432,1819,1643,11,251,46,1,9,390,49


In [15]:
#Comparing Data Types between the two diffrent dataframes to make certain they match for the merge
print(msa_df.dtypes)

ZIP             int64
NAME           object
COUNTY          int64
COUNTYNAME     object
STATE           int64
STATENAME      object
ZIPNAME        object
AREA          float64
LATITUDE      float64
LONGITUDE     float64
CBSA          float64
CBSANAME       object
NECTA         float64
NECTANAME      object
CA            float64
CANAME         object
MA            float64
MANAME         object
POPCY           int64
INCCYPCAP       int64
POPCY20_UP      int64
RACCYWHITE      int64
RACCYBLACK      int64
RACCYAMIND      int64
RACCYASIAN      int64
RACCYHAWAI      int64
RACCYOTHER      int64
RACCYMULT       int64
HISCYHISP       int64
dtype: object


In [16]:
print(concat_df.dtypes) 
    

Zipcode                        object
Household_Income              float64
Population                    float64
Median_Contract_Rent          float64
Median_Gross_Rent             float64
Median_Home_Value              object
Median_Monthly_Owner_Costs    float64
Year                           object
dtype: object


In [17]:
#convert Zipcode datatype in concat_df to be int64
#https://www.kite.com/python/answers/how-to-convert-a-pandas-dataframe-column-from-object-to-int-in-python
concat_df["Zipcode"] = concat_df["Zipcode"].astype(object).astype(int)

In [18]:
#Rename ZIP column in MSA to match Zipcode from Census data
#https://note.nkmk.me/en/python-pandas-dataframe-rename/
msa_df.rename(columns={'ZIP': 'Zipcode'}, inplace=True)


In [19]:
#Merge data frames and drop the values in the census data with -666666666
merged_census_df = pd.merge(concat_df, msa_df, how="left", on=["Zipcode", "Zipcode"])
merged_census_df
merged_census_df.drop(merged_census_df[merged_census_df["Household Income"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs"] == -666666666].index, inplace = True)      
       

KeyError: 'Household Income'

In [None]:
# Save as a csv to check full data set
# Note to avoid any issues later, use encoding="utf-8"
merged_census_df.to_csv("census_data_2017Years.csv", encoding="utf-8", index=False)

In [10]:
# Remove zips with no MA
merged_census_df['MA'].replace('', np.nan, inplace = True)


NameError: name 'merged_census_df' is not defined