In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pprint import pprint

# Import csv broadband data to dataframe
broadband_df = pd.read_csv("CSV_files/clean_broadband.csv")

# Census API Key
from config import census_api_key

In [2]:
# Assume year is 2017

# Craft base url
base_url = "https://api.census.gov/data/2017/ecnbasic"
 
query_url = base_url + f"?get=NAICS2017_LABEL,EMP,NAME,GEO_ID&for=county:*&NAICS2017=51&key={census_api_key}"

response = requests.get(query_url).json()

In [3]:
# Initialize empty arrays
NAIC2017_label = []
number_employed = []
county_name = []
GEO_ID = []

# Creates arrays by appending relevant data to array,which maintains index.
for county in response:
    NAIC2017_label.append(county[0])
    number_employed.append(county[1])
    county_name.append(county[2])
    GEO_ID.append(county[3])
    
# Arrays are compiled into a dataframe
county_2017_df = pd.DataFrame({"NAIC2017 Label":NAIC2017_label,
                              "Number Employed":number_employed,
                              "County, State":county_name,
                              "GEO ID":GEO_ID})

# As the first row of the dataframe is the labels from the Census, it must be dropped.
county_2017_df.drop(index=0,inplace=True)

# Output the resulting array.
county_2017_df.head()

Unnamed: 0,NAIC2017 Label,Number Employed,"County, State",GEO ID
1,Information,1180,"Montgomery County, Virginia",0500000US51121
2,Information,20,"New Kent County, Virginia",0500000US51127
3,Information,96,"Augusta County, Virginia",0500000US51015
4,Information,18,"Cumberland County, Virginia",0500000US51049
5,Information,0,"Dickenson County, Virginia",0500000US51051


In [4]:
# 2017 Broadband dataframe is crafted only grabbing relevant columns to prevent clutter
broadband_2017_df = broadband_df.loc[broadband_df["Year"] == 2017][["Broadband (%)","GEO ID","Year"]]

# Broadband dataframe is combined with county 2017 data from census.  Inner merge is used to drop all empty data points.
combined_2017_df = county_2017_df.merge(broadband_2017_df, how="inner", on="GEO ID")

# Data is output to screen and as a csv.
combined_2017_df.to_csv(f'CSV_files/{combined_2017_df["Year"].min()}_{combined_2017_df["NAIC2017 Label"].min()}_ecnbasic.csv',
                        index=False)
combined_2017_df.head()

Unnamed: 0,NAIC2017 Label,Number Employed,"County, State",GEO ID,Broadband (%),Year
0,Information,1180,"Montgomery County, Virginia",0500000US51121,0.918623,2017
1,Information,20,"New Kent County, Virginia",0500000US51127,0.621302,2017
2,Information,96,"Augusta County, Virginia",0500000US51015,0.803108,2017
3,Information,18,"Cumberland County, Virginia",0500000US51049,0.47191,2017
4,Information,0,"Dickenson County, Virginia",0500000US51051,0.542636,2017
