In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import census_api_key
c = Census(census_api_key, year=2017)

In [2]:
# Run Census Search to retrieve data on all zip codes (2017 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'county:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate (%)"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_df = census_pd[["Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate (%)"]]

# Visualize
print(len(census_df))
census_df

3220


Unnamed: 0,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate (%)
0,"Corozal Municipio, Puerto Rico",34933.0,38.1,14752.0,7887.0,19146.0,54.807775
1,"Maunabo Municipio, Puerto Rico",11297.0,43.5,17636.0,8830.0,5945.0,52.624591
2,"Peñuelas Municipio, Puerto Rico",21661.0,36.4,16868.0,7983.0,12379.0,57.148793
3,"Ponce Municipio, Puerto Rico",148863.0,39.5,16561.0,10775.0,75187.0,50.507514
4,"San Sebastián Municipio, Puerto Rico",38970.0,42.0,14275.0,8072.0,21472.0,55.098794
...,...,...,...,...,...,...,...
3215,"Iowa County, Wisconsin",23576.0,42.5,60017.0,31717.0,2100.0,8.907363
3216,"Pepin County, Wisconsin",7282.0,45.8,51470.0,27901.0,938.0,12.881077
3217,"Ozaukee County, Wisconsin",87817.0,43.9,80526.0,45820.0,5087.0,5.792728
3218,"Portage County, Wisconsin",70371.0,36.6,54620.0,28363.0,8757.0,12.444047


In [3]:
# Dataframe is saved to csv
census_df.to_csv("CSV_files/acs5_2017.csv")

In [4]:
# Broadband dataframe is pulled from csv, then limited to only values in 2017.
broadband_df = pd.read_csv("CSV_files/clean_broadband.csv")
broadband_2017_data = broadband_df.loc[broadband_df["Year"] == 2017]
broadband_2017_data.head()

Unnamed: 0,State,County,Year,GEO ID,Broadband (%),CFIPS
0,Alabama,Autauga County,2017,0500000US01001,0.618182,1001
12,Alabama,Baldwin County,2017,0500000US01003,0.820682,1003
14,Alabama,Barbour County,2017,0500000US01005,0.478992,1005
16,Alabama,Bibb County,2017,0500000US01007,0.606742,1007
18,Alabama,Blount County,2017,0500000US01009,0.586694,1009


In [10]:
# Empty array for Sting to be merged on is initialized 
name = []

# for loop crafts the correct string to be merged by each row.
for index, row in broadband_2017_data.iterrows():
    name.append(f'{row["County"]}, {row["State"]}')

# Dataframe to be merged, such that only 2 columns are created, is crafted    
broadband_2017_df = pd.DataFrame({"Name": name,
                                 "Broadband (%)":broadband_2017_data["Broadband (%)"],
                                 "GEO ID": broadband_2017_data["GEO ID"]})
broadband_2017_df.head()

Unnamed: 0,Name,Broadband (%),GEO ID
0,"Autauga County, Alabama",0.618182,0500000US01001
12,"Baldwin County, Alabama",0.820682,0500000US01003
14,"Barbour County, Alabama",0.478992,0500000US01005
16,"Bibb County, Alabama",0.606742,0500000US01007
18,"Blount County, Alabama",0.586694,0500000US01009


In [11]:
# Census data and broadband data are merged into one dataframe
census_2017_acs5_df = pd.merge(census_df, broadband_2017_df, how='inner', on="Name")

# Merged dataframe is saved as a csv and output
census_2017_acs5_df.to_csv("CSV_files/census_2017_acs5_df.csv")
census_2017_acs5_df.head()

Unnamed: 0,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate (%),Broadband (%),GEO ID
0,"Monroe County, Alabama",21745.0,41.7,26036.0,17264.0,7249.0,33.336399,0.538071,0500000US01099
1,"Lawrence County, Alabama",33288.0,42.0,43779.0,22419.0,5490.0,16.49243,0.666667,0500000US01079
2,"Lee County, Alabama",156597.0,31.0,47564.0,26170.0,33132.0,21.157493,0.829926,0500000US01081
3,"Marion County, Alabama",30058.0,44.1,35719.0,21835.0,5175.0,17.216714,0.55303,0500000US01093
4,"Pickens County, Alabama",20170.0,42.1,36220.0,20089.0,4340.0,21.517105,0.572917,0500000US01107
