In [44]:
## The following notebook outlines how the data was grabbed and cleaned to prepare it for analysis.

In [45]:
# The following program cleans and then outputs the cleaned broadband data.
# The data was originally grabbed as an excel file from the following link, then saved as a csv.
#    https://techdatasociety.asu.edu/broadband-data-portal/dataaccess/countydata

# Dependencies
import pandas as pd

# Paths to be used
import_path = "CSV_files/broadband_long2000-2018rev.csv"
output_path = "CSV_files/clean_broadband.csv"

In [46]:
# rows containing any invalid values are dropped.
broadband_df = pd.read_csv(import_path)
clean_df = broadband_df.dropna(how = "any")
clean_df = clean_df.rename(columns={"statenam" : "State",
                                    "county":"County",
                                    "year": "Year",
                                   "id":"GEO ID",
                                   "broadband":"Broadband (%)",
                                   "cfips":"CFIPS"})

# Broadband access is turned into a percentage then the dataframe is output
clean_df["Broadband (%)"] = 100 * clean_df["Broadband (%)"] 
clean_df

Unnamed: 0,State,County,Year,GEO ID,Broadband (%),CFIPS
17,Alabama,Autauga County,2017,0500000US01001,61.818182,1001
18,Alabama,Autauga County,2018,0500000US01001,78.899997,1001
26,Alabama,Baldwin County,2007,0500000US01003,58.852088,1003
27,Alabama,Baldwin County,2008,0500000US01003,59.151804,1003
28,Alabama,Baldwin County,2009,0500000US01003,59.451514,1003
...,...,...,...,...,...,...
59488,Wyoming,Uinta County,2018,0500000US56041,88.200003,56041
59506,Wyoming,Washakie County,2017,0500000US56043,62.295079,56043
59507,Wyoming,Washakie County,2018,0500000US56043,78.299993,56043
59525,Wyoming,Weston County,2017,0500000US56045,58.620691,56045


In [47]:
# Check that all broadband access data is in an acceptable range
print(f'The maximum value is {clean_df["Broadband (%)"].max()}')
print(f'The minimum value is {clean_df["Broadband (%)"].min()}')

The maximum value is 97.388148
The minimum value is 1.125842


In [48]:
# Drop all duplicate values, which will be one's with the same year and location
clean_df.drop_duplicates(["Year", "GEO ID"])

Unnamed: 0,State,County,Year,GEO ID,Broadband (%),CFIPS
17,Alabama,Autauga County,2017,0500000US01001,61.818182,1001
18,Alabama,Autauga County,2018,0500000US01001,78.899997,1001
26,Alabama,Baldwin County,2007,0500000US01003,58.852088,1003
27,Alabama,Baldwin County,2008,0500000US01003,59.151804,1003
28,Alabama,Baldwin County,2009,0500000US01003,59.451514,1003
...,...,...,...,...,...,...
59488,Wyoming,Uinta County,2018,0500000US56041,88.200003,56041
59506,Wyoming,Washakie County,2017,0500000US56043,62.295079,56043
59507,Wyoming,Washakie County,2018,0500000US56043,78.299993,56043
59525,Wyoming,Weston County,2017,0500000US56045,58.620691,56045


In [49]:
# Clean dataframe is output as a csv file.
clean_df.to_csv(output_path, index=False)

In [50]:
## Now that the broadband data is in a usable format, the census data needs to be grabbed.  However, ther are 2 ways to grab
##    data from the census, and we used both.  One can either use the census module in pandas, or one can use query urls.
##    The first method is shown below for only 2 cells, as after that it is identical to the second.

In [51]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from scipy import stats

# Census API Key
from config import census_api_key
c = Census(census_api_key, year=2017)

In [52]:
# The following code is a slightly modified version of that found in the bootcamp on sections pertaining to the census module.
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'county:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Renaming so it is readable by humans
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate (%)"] = 100 * census_pd["Poverty Count"].astype(int) / census_pd["Population"].astype(int)

# Final DataFrame
census_df = census_pd[["Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate (%)"]]

# Output dataframe to csv and screen
census_df.to_csv("CSV_files/acs5_2017.csv")
census_df

Unnamed: 0,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate (%)
0,"Corozal Municipio, Puerto Rico",34933.0,38.1,14752.0,7887.0,19146.0,54.807775
1,"Maunabo Municipio, Puerto Rico",11297.0,43.5,17636.0,8830.0,5945.0,52.624591
2,"Peñuelas Municipio, Puerto Rico",21661.0,36.4,16868.0,7983.0,12379.0,57.148793
3,"Ponce Municipio, Puerto Rico",148863.0,39.5,16561.0,10775.0,75187.0,50.507514
4,"San Sebastián Municipio, Puerto Rico",38970.0,42.0,14275.0,8072.0,21472.0,55.098794
...,...,...,...,...,...,...,...
3215,"Iowa County, Wisconsin",23576.0,42.5,60017.0,31717.0,2100.0,8.907363
3216,"Pepin County, Wisconsin",7282.0,45.8,51470.0,27901.0,938.0,12.881077
3217,"Ozaukee County, Wisconsin",87817.0,43.9,80526.0,45820.0,5087.0,5.792728
3218,"Portage County, Wisconsin",70371.0,36.6,54620.0,28363.0,8757.0,12.444047


In [53]:
## The second method of extracting data (url's) is shown below.  Initially, it does do for exactly one NAICS label.

In [54]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import requests
from pprint import pprint

# Import csv broadband data to dataframe
broadband_df = pd.read_csv("CSV_files/clean_broadband.csv")

# Census API Key
from config import census_api_key

In [55]:
# Assume year is 2017
# The census database is the following:
#    https://www.census.gov/data/developers/data-sets/economic-census.html
# The documentation used is the following:
#    https://api.census.gov/data/2017/ecnbasic/variables.html

# Craft base url
base_url = "https://api.census.gov/data/2017/ecnbasic"

# Craft query url
query_url = base_url + f"?get=NAICS2017_LABEL,EMP,NAME,GEO_ID&for=county:*&NAICS2017=51&key={census_api_key}"

# Grabs relevant data from census
response = requests.get(query_url).json()
pprint(response)

[['NAICS2017_LABEL', 'EMP', 'NAME', 'GEO_ID', 'NAICS2017', 'state', 'county'],
 ['Information',
  '0',
  'Benton County, Tennessee',
  '0500000US47005',
  '51',
  '47',
  '005'],
 ['Information',
  '199',
  'Warren County, Tennessee',
  '0500000US47177',
  '51',
  '47',
  '177'],
 ['Information',
  '1424',
  'Washington County, Tennessee',
  '0500000US47179',
  '51',
  '47',
  '179'],
 ['Information',
  '131',
  'Weakley County, Tennessee',
  '0500000US47183',
  '51',
  '47',
  '183'],
 ['Information',
  '0',
  'Campbell County, Tennessee',
  '0500000US47013',
  '51',
  '47',
  '013'],
 ['Information',
  '11',
  'Cannon County, Tennessee',
  '0500000US47015',
  '51',
  '47',
  '015'],
 ['Information',
  '387',
  'Maury County, Tennessee',
  '0500000US47119',
  '51',
  '47',
  '119'],
 ['Information',
  '185',
  'Polk County, Texas',
  '0500000US48373',
  '51',
  '48',
  '373'],
 ['Information',
  '4',
  'Refugio County, Texas',
  '0500000US48391',
  '51',
  '48',
  '391'],
 ['Informati

  '48',
  '251'],
 ['Information',
  '230',
  'Hood County, Texas',
  '0500000US48221',
  '51',
  '48',
  '221'],
 ['Information',
  '159',
  'Hopkins County, Texas',
  '0500000US48223',
  '51',
  '48',
  '223'],
 ['Information',
  '38',
  'Ochiltree County, Texas',
  '0500000US48357',
  '51',
  '48',
  '357'],
 ['Information',
  '1137',
  'McLennan County, Texas',
  '0500000US48309',
  '51',
  '48',
  '309'],
 ['Information',
  '88',
  'Cherokee County, Texas',
  '0500000US48073',
  '51',
  '48',
  '073'],
 ['Information',
  '35',
  'Eastland County, Texas',
  '0500000US48133',
  '51',
  '48',
  '133'],
 ['Information',
  '0',
  'Jones County, Texas',
  '0500000US48253',
  '51',
  '48',
  '253'],
 ['Information',
  '105',
  'Jim Wells County, Texas',
  '0500000US48249',
  '51',
  '48',
  '249'],
 ['Information',
  '1924',
  'Montgomery County, Texas',
  '0500000US48339',
  '51',
  '48',
  '339'],
 ['Information',
  '0',
  'Dallam County, Texas',
  '0500000US48111',
  '51',
  '48',
  '

  '0',
  'Jefferson County, Arkansas',
  '0500000US05069',
  '51',
  '05',
  '069'],
 ['Information',
  '0',
  'Marion County, Arkansas',
  '0500000US05089',
  '51',
  '05',
  '089'],
 ['Information',
  '998',
  'Sebastian County, Arkansas',
  '0500000US05131',
  '51',
  '05',
  '131'],
 ['Information',
  '156',
  'Crawford County, Arkansas',
  '0500000US05033',
  '51',
  '05',
  '033'],
 ['Information',
  '2100',
  'Benton County, Arkansas',
  '0500000US05007',
  '51',
  '05',
  '007'],
 ['Information',
  '11',
  'Chicot County, Arkansas',
  '0500000US05017',
  '51',
  '05',
  '017'],
 ['Information',
  '0',
  'Hot Spring County, Arkansas',
  '0500000US05059',
  '51',
  '05',
  '059'],
 ['Information',
  '0',
  'Johnson County, Arkansas',
  '0500000US05071',
  '51',
  '05',
  '071'],
 ['Information',
  '0',
  'Madison County, Arkansas',
  '0500000US05087',
  '51',
  '05',
  '087'],
 ['Information',
  '0',
  'Desha County, Arkansas',
  '0500000US05041',
  '51',
  '05',
  '041'],
 ['Inf

 ['Information',
  '7',
  'Lake County, Colorado',
  '0500000US08065',
  '51',
  '08',
  '065'],
 ['Information',
  '265',
  'La Plata County, Colorado',
  '0500000US08067',
  '51',
  '08',
  '067'],
 ['Information',
  '0',
  'Moffat County, Colorado',
  '0500000US08081',
  '51',
  '08',
  '081'],
 ['Information',
  '307',
  'Pitkin County, Colorado',
  '0500000US08097',
  '51',
  '08',
  '097'],
 ['Information',
  '31',
  'Rio Grande County, Colorado',
  '0500000US08105',
  '51',
  '08',
  '105'],
 ['Information',
  '3254',
  'Larimer County, Colorado',
  '0500000US08069',
  '51',
  '08',
  '069'],
 ['Information',
  '0',
  'Logan County, Colorado',
  '0500000US08075',
  '51',
  '08',
  '075'],
 ['Information',
  '80',
  'Montezuma County, Colorado',
  '0500000US08083',
  '51',
  '08',
  '083'],
 ['Information',
  '49',
  'San Miguel County, Colorado',
  '0500000US08113',
  '51',
  '08',
  '113'],
 ['Information',
  '50',
  'Teller County, Colorado',
  '0500000US08119',
  '51',
  '08'

  '0',
  'Grand County, Colorado',
  '0500000US08049',
  '51',
  '08',
  '049'],
 ['Information',
  '26452',
  'Arapahoe County, Colorado',
  '0500000US08005',
  '51',
  '08',
  '005'],
 ['Information',
  '127',
  'Otero County, Colorado',
  '0500000US08089',
  '51',
  '08',
  '089'],
 ['Information',
  '4606',
  'Broomfield County, Colorado',
  '0500000US08014',
  '51',
  '08',
  '014'],
 ['Information',
  '5809',
  'New Castle County, Delaware',
  '0500000US10003',
  '51',
  '10',
  '003'],
 ['Information',
  '560',
  'Sussex County, Delaware',
  '0500000US10005',
  '51',
  '10',
  '005'],
 ['Information',
  '800',
  'Kent County, Delaware',
  '0500000US10001',
  '51',
  '10',
  '001'],
 ['Information',
  '23787',
  'District of Columbia, District of Columbia',
  '0500000US11001',
  '51',
  '11',
  '001'],
 ['Information',
  '8217',
  'New Haven County, Connecticut',
  '0500000US09009',
  '51',
  '09',
  '009'],
 ['Information',
  '725',
  'Litchfield County, Connecticut',
  '0500000

  '067'],
 ['Information',
  '41',
  'Roosevelt County, Montana',
  '0500000US30085',
  '51',
  '30',
  '085'],
 ['Information',
  '10',
  'Sweet Grass County, Montana',
  '0500000US30097',
  '51',
  '30',
  '097'],
 ['Information',
  '36',
  'Richland County, Montana',
  '0500000US30083',
  '51',
  '30',
  '083'],
 ['Information',
  '0',
  'Teton County, Montana',
  '0500000US30099',
  '51',
  '30',
  '099'],
 ['Information',
  '6',
  'Prairie County, Montana',
  '0500000US30079',
  '51',
  '30',
  '079'],
 ['Information',
  '27',
  'Toole County, Montana',
  '0500000US30101',
  '51',
  '30',
  '101'],
 ['Information',
  '25',
  'Glacier County, Montana',
  '0500000US30035',
  '51',
  '30',
  '035'],
 ['Information',
  '20',
  'Fallon County, Montana',
  '0500000US30025',
  '51',
  '30',
  '025'],
 ['Information',
  '0',
  'Daniels County, Montana',
  '0500000US30019',
  '51',
  '30',
  '019'],
 ['Information',
  '162',
  'Silver Bow County, Montana',
  '0500000US30093',
  '51',
  '30

  '51',
  '13',
  '295'],
 ['Information',
  '13474',
  'Gwinnett County, Georgia',
  '0500000US13135',
  '51',
  '13',
  '135'],
 ['Information',
  '133',
  'Barrow County, Georgia',
  '0500000US13013',
  '51',
  '13',
  '013'],
 ['Information',
  '10',
  'Clinch County, Georgia',
  '0500000US13065',
  '51',
  '13',
  '065'],
 ['Information',
  '359',
  'Bartow County, Georgia',
  '0500000US13015',
  '51',
  '13',
  '015'],
 ['Information',
  '245',
  'Douglas County, Georgia',
  '0500000US13097',
  '51',
  '13',
  '097'],
 ['Information',
  '22',
  'Grady County, Georgia',
  '0500000US13131',
  '51',
  '13',
  '131'],
 ['Information',
  '0',
  'Whitfield County, Georgia',
  '0500000US13313',
  '51',
  '13',
  '313'],
 ['Information',
  '830',
  'Troup County, Georgia',
  '0500000US13285',
  '51',
  '13',
  '285'],
 ['Information',
  '1482',
  'Bibb County, Georgia',
  '0500000US13021',
  '51',
  '13',
  '021'],
 ['Information',
  '0',
  'Harris County, Georgia',
  '0500000US13145',
 

  '216',
  'Campbell County, Wyoming',
  '0500000US56005',
  '51',
  '56',
  '005'],
 ['Information',
  '112',
  'Lincoln County, Wyoming',
  '0500000US56023',
  '51',
  '56',
  '023'],
 ['Information',
  '33',
  'Johnson County, Wyoming',
  '0500000US56019',
  '51',
  '56',
  '019'],
 ['Information',
  '524',
  'Natrona County, Wyoming',
  '0500000US56025',
  '51',
  '56',
  '025'],
 ['Information',
  '1408',
  'Laramie County, Wyoming',
  '0500000US56021',
  '51',
  '56',
  '021'],
 ['Information',
  '0',
  'Converse County, Wyoming',
  '0500000US56009',
  '51',
  '56',
  '009'],
 ['Information',
  '339',
  'Teton County, Wyoming',
  '0500000US56039',
  '51',
  '56',
  '039'],
 ['Information',
  '153',
  'Sweetwater County, Wyoming',
  '0500000US56037',
  '51',
  '56',
  '037'],
 ['Information',
  '270',
  'Bulloch County, Georgia',
  '0500000US13031',
  '51',
  '13',
  '031'],
 ['Information',
  '1392',
  'Clarke County, Georgia',
  '0500000US13059',
  '51',
  '13',
  '059'],
 ['Inf

  'Sandoval County, New Mexico',
  '0500000US35043',
  '51',
  '35',
  '043'],
 ['Information',
  '131',
  'Taos County, New Mexico',
  '0500000US35055',
  '51',
  '35',
  '055'],
 ['Information',
  '82',
  'Grant County, New Mexico',
  '0500000US35017',
  '51',
  '35',
  '017'],
 ['Information',
  '266',
  'Curry County, New Mexico',
  '0500000US35009',
  '51',
  '35',
  '009'],
 ['Information',
  '0',
  'Colfax County, New Mexico',
  '0500000US35007',
  '51',
  '35',
  '007'],
 ['Information',
  '223',
  'Eddy County, New Mexico',
  '0500000US35015',
  '51',
  '35',
  '015'],
 ['Information',
  '397',
  'San Juan County, New Mexico',
  '0500000US35045',
  '51',
  '35',
  '045'],
 ['Information',
  '0',
  'Quay County, New Mexico',
  '0500000US35037',
  '51',
  '35',
  '037'],
 ['Information',
  '253',
  'Park County, Wyoming',
  '0500000US56029',
  '51',
  '56',
  '029'],
 ['Information',
  '201',
  'Sheridan County, Wyoming',
  '0500000US56033',
  '51',
  '56',
  '033'],
 ['Informat

 ['Information',
  '134',
  'Franklin County, New York',
  '0500000US36033',
  '51',
  '36',
  '033'],
 ['Information',
  '86',
  'Herkimer County, New York',
  '0500000US36043',
  '51',
  '36',
  '043'],
 ['Information',
  '886',
  'Ontario County, New York',
  '0500000US36069',
  '51',
  '36',
  '069'],
 ['Information',
  '7007',
  'Queens County, New York',
  '0500000US36081',
  '51',
  '36',
  '081'],
 ['Information',
  '382',
  'Steuben County, New York',
  '0500000US36101',
  '51',
  '36',
  '101'],
 ['Information',
  '10',
  'Hamilton County, New York',
  '0500000US36041',
  '51',
  '36',
  '041'],
 ['Information',
  '83',
  'Washington County, New York',
  '0500000US36115',
  '51',
  '36',
  '115'],
 ['Information',
  '541',
  'Chautauqua County, New York',
  '0500000US36013',
  '51',
  '36',
  '013'],
 ['Information',
  '14182',
  'Kings County, New York',
  '0500000US36047',
  '51',
  '36',
  '047'],
 ['Information',
  '182079',
  'New York County, New York',
  '0500000US3606

  '0',
  'Hoke County, North Carolina',
  '0500000US37093',
  '51',
  '37',
  '093'],
 ['Information',
  '330',
  'Henderson County, North Carolina',
  '0500000US37089',
  '51',
  '37',
  '089'],
 ['Information',
  '13',
  'Hyde County, North Carolina',
  '0500000US37095',
  '51',
  '37',
  '095'],
 ['Information',
  '591',
  'Iredell County, North Carolina',
  '0500000US37097',
  '51',
  '37',
  '097'],
 ['Information',
  '25974',
  'Mecklenburg County, North Carolina',
  '0500000US37119',
  '51',
  '37',
  '119'],
 ['Information',
  '1050',
  'Pitt County, North Carolina',
  '0500000US37147',
  '51',
  '37',
  '147'],
 ['Information',
  '293',
  'Rowan County, North Carolina',
  '0500000US37159',
  '51',
  '37',
  '159'],
 ['Information',
  '509',
  'Vance County, North Carolina',
  '0500000US37181',
  '51',
  '37',
  '181'],
 ['Information',
  '70',
  'Cherokee County, North Carolina',
  '0500000US37039',
  '51',
  '37',
  '039'],
 ['Information',
  '0',
  'Currituck County, North C

  'Davie County, North Carolina',
  '0500000US37059',
  '51',
  '37',
  '059'],
 ['Information',
  '201',
  'Davidson County, North Carolina',
  '0500000US37057',
  '51',
  '37',
  '057'],
 ['Information',
  '41',
  'Duplin County, North Carolina',
  '0500000US37061',
  '51',
  '37',
  '061'],
 ['Information',
  '68',
  'Hertford County, North Carolina',
  '0500000US37091',
  '51',
  '37',
  '091'],
 ['Information',
  '164',
  'Lee County, North Carolina',
  '0500000US37105',
  '51',
  '37',
  '105'],
 ['Information',
  '3404',
  'New Hanover County, North Carolina',
  '0500000US37129',
  '51',
  '37',
  '129'],
 ['Information',
  '34',
  'Pender County, North Carolina',
  '0500000US37141',
  '51',
  '37',
  '141'],
 ['Information',
  '325',
  'Wayne County, North Carolina',
  '0500000US37191',
  '51',
  '37',
  '191'],
 ['Information',
  '70',
  'Scotland County, North Carolina',
  '0500000US37165',
  '51',
  '37',
  '165'],
 ['Information',
  '363',
  'Watauga County, North Carolina'

 ['Information',
  '421',
  'Clinton County, Iowa',
  '0500000US19045',
  '51',
  '19',
  '045'],
 ['Information',
  '48',
  'Delaware County, Iowa',
  '0500000US19055',
  '51',
  '19',
  '055'],
 ['Information',
  '116',
  'Jefferson County, Iowa',
  '0500000US19101',
  '51',
  '19',
  '101'],
 ['Information',
  '0',
  'Wayne County, Iowa',
  '0500000US19185',
  '51',
  '19',
  '185'],
 ['Information',
  '30',
  'Allamakee County, Iowa',
  '0500000US19005',
  '51',
  '19',
  '005'],
 ['Information',
  '4758',
  'Linn County, Iowa',
  '0500000US19113',
  '51',
  '19',
  '113'],
 ['Information',
  '1352',
  'Scott County, Iowa',
  '0500000US19163',
  '51',
  '19',
  '163'],
 ['Information',
  '621',
  'Dallas County, Iowa',
  '0500000US19049',
  '51',
  '19',
  '049'],
 ['Information',
  '78',
  "O'Brien County, Iowa",
  '0500000US19141',
  '51',
  '19',
  '141'],
 ['Information',
  '45',
  'Sherman County, Kansas',
  '0500000US20181',
  '51',
  '20',
  '181'],
 ['Information',
  '72',


  'Trumbull County, Ohio',
  '0500000US39155',
  '51',
  '39',
  '155'],
 ['Information',
  '4153',
  'Warren County, Ohio',
  '0500000US39165',
  '51',
  '39',
  '165'],
 ['Information',
  '946',
  'Wood County, Ohio',
  '0500000US39173',
  '51',
  '39',
  '173'],
 ['Information',
  '126',
  'Ashland County, Ohio',
  '0500000US39005',
  '51',
  '39',
  '005'],
 ['Information',
  '239',
  'Ashtabula County, Ohio',
  '0500000US39007',
  '51',
  '39',
  '007'],
 ['Information',
  '0',
  'Dewey County, Oklahoma',
  '0500000US40043',
  '51',
  '40',
  '043'],
 ['Information',
  '132',
  'Kay County, Oklahoma',
  '0500000US40071',
  '51',
  '40',
  '071'],
 ['Information',
  '0',
  'Logan County, Oklahoma',
  '0500000US40083',
  '51',
  '40',
  '083'],
 ['Information',
  '0',
  'Craig County, Oklahoma',
  '0500000US40035',
  '51',
  '40',
  '035'],
 ['Information',
  '11634',
  'Tulsa County, Oklahoma',
  '0500000US40143',
  '51',
  '40',
  '143'],
 ['Information',
  '224',
  'Washington Co

  '10',
  'Adair County, Oklahoma',
  '0500000US40001',
  '51',
  '40',
  '001'],
 ['Information',
  '45',
  'Seminole County, Oklahoma',
  '0500000US40133',
  '51',
  '40',
  '133'],
 ['Information',
  '0',
  'Stephens County, Oklahoma',
  '0500000US40137',
  '51',
  '40',
  '137'],
 ['Information',
  '42',
  'Blaine County, Oklahoma',
  '0500000US40011',
  '51',
  '40',
  '011'],
 ['Information',
  '220',
  'Carter County, Oklahoma',
  '0500000US40019',
  '51',
  '40',
  '019'],
 ['Information',
  '0',
  'Kingfisher County, Oklahoma',
  '0500000US40073',
  '51',
  '40',
  '073'],
 ['Information',
  '0',
  'McClain County, Oklahoma',
  '0500000US40087',
  '51',
  '40',
  '087'],
 ['Information',
  '0',
  'McIntosh County, Oklahoma',
  '0500000US40091',
  '51',
  '40',
  '091'],
 ['Information',
  '0',
  'Murray County, Oklahoma',
  '0500000US40099',
  '51',
  '40',
  '099'],
 ['Information',
  '8',
  'Noble County, Oklahoma',
  '0500000US40103',
  '51',
  '40',
  '103'],
 ['Informatio

  '51',
  '21',
  '179'],
 ['Information',
  '80',
  'Wayne County, Kentucky',
  '0500000US21231',
  '51',
  '21',
  '231'],
 ['Information',
  '0',
  'Marion County, Kentucky',
  '0500000US21155',
  '51',
  '21',
  '155'],
 ['Information',
  '34',
  'Trigg County, Kentucky',
  '0500000US21221',
  '51',
  '21',
  '221'],
 ['Information',
  '39',
  'Woodford County, Kentucky',
  '0500000US21239',
  '51',
  '21',
  '239'],
 ['Information',
  '1170',
  'Boone County, Kentucky',
  '0500000US21015',
  '51',
  '21',
  '015'],
 ['Information',
  '19',
  'Leslie County, Kentucky',
  '0500000US21131',
  '51',
  '21',
  '131'],
 ['Information',
  '250',
  'Jessamine County, Kentucky',
  '0500000US21113',
  '51',
  '21',
  '113'],
 ['Information',
  '118',
  'Knox County, Kentucky',
  '0500000US21121',
  '51',
  '21',
  '121'],
 ['Information',
  '29',
  'Letcher County, Kentucky',
  '0500000US21133',
  '51',
  '21',
  '133'],
 ['Information',
  '811',
  'McCracken County, Kentucky',
  '0500000US

 ['Information',
  '381',
  'Floyd County, Kentucky',
  '0500000US21071',
  '51',
  '21',
  '071'],
 ['Information',
  '472',
  'Franklin County, Kentucky',
  '0500000US21073',
  '51',
  '21',
  '073'],
 ['Information',
  '302',
  'Madison County, Kentucky',
  '0500000US21151',
  '51',
  '21',
  '151'],
 ['Information',
  '15',
  'Magoffin County, Kentucky',
  '0500000US21153',
  '51',
  '21',
  '153'],
 ['Information',
  '51',
  'Rowan County, Kentucky',
  '0500000US21205',
  '51',
  '21',
  '205'],
 ['Information',
  '53',
  'Simpson County, Kentucky',
  '0500000US21213',
  '51',
  '21',
  '213'],
 ['Information',
  '77',
  'Taylor County, Kentucky',
  '0500000US21217',
  '51',
  '21',
  '217'],
 ['Information',
  '0',
  'Clark County, Kentucky',
  '0500000US21049',
  '51',
  '21',
  '049'],
 ['Information',
  '0',
  'Carroll County, Kentucky',
  '0500000US21041',
  '51',
  '21',
  '041'],
 ['Information',
  '0',
  'Estill County, Kentucky',
  '0500000US21065',
  '51',
  '21',
  '065

  '165'],
 ['Information',
  '4919',
  'Williamson County, Tennessee',
  '0500000US47187',
  '51',
  '47',
  '187'],
 ['Information',
  '135',
  'Bedford County, Tennessee',
  '0500000US47003',
  '51',
  '47',
  '003'],
 ['Information',
  '201',
  'Greene County, Tennessee',
  '0500000US47059',
  '51',
  '47',
  '059'],
 ['Information',
  '321',
  'Cumberland County, Tennessee',
  '0500000US47035',
  '51',
  '47',
  '035'],
 ['Information',
  '6555',
  'Shelby County, Tennessee',
  '0500000US47157',
  '51',
  '47',
  '157'],
 ['Information',
  '0',
  'Claiborne County, Tennessee',
  '0500000US47025',
  '51',
  '47',
  '025'],
 ['Information',
  '51',
  'Henderson County, Tennessee',
  '0500000US47077',
  '51',
  '47',
  '077'],
 ['Information',
  '22',
  'Hickman County, Tennessee',
  '0500000US47081',
  '51',
  '47',
  '081'],
 ['Information',
  '22',
  'Humphreys County, Tennessee',
  '0500000US47085',
  '51',
  '47',
  '085'],
 ['Information',
  '53',
  'Jefferson County, Tennessee'

In [56]:
# Initialize empty arrays
NAICS2017_label = []
number_employed = []
county_name = []
GEO_ID = []

# Creates arrays by appending relevant data to array,which maintains index.
for county in response:
    NAICS2017_label.append(county[0])
    number_employed.append(county[1])
    county_name.append(county[2])
    GEO_ID.append(county[3])
    
# Arrays are compiled into a dataframe
county_2017_df = pd.DataFrame({"NAICS2017 Label":NAICS2017_label,
                              "Number Employed":number_employed,
                              "County, State":county_name,
                              "GEO ID":GEO_ID})

# As the first row of the dataframe is the labels from the Census, it must be dropped.
county_2017_df.drop(index=0,inplace=True)

# Output the resulting array.
county_2017_df.head()

Unnamed: 0,NAICS2017 Label,Number Employed,"County, State",GEO ID
1,Information,0,"Benton County, Tennessee",0500000US47005
2,Information,199,"Warren County, Tennessee",0500000US47177
3,Information,1424,"Washington County, Tennessee",0500000US47179
4,Information,131,"Weakley County, Tennessee",0500000US47183
5,Information,0,"Campbell County, Tennessee",0500000US47013


In [57]:
# 2017 Broadband dataframe is crafted only grabbing relevant columns to prevent clutter
broadband_2017_df = broadband_df.loc[broadband_df["Year"] == 2017][["Broadband (%)","GEO ID","Year"]]

# Broadband dataframe is combined with county 2017 data from census.  Inner merge is used to drop all empty data points.
combined_2017_df = county_2017_df.merge(broadband_2017_df, how="inner", on="GEO ID")

# Data is output to screen and as a csv.
combined_2017_df.to_csv(f'CSV_files/{combined_2017_df["Year"].min()}_{combined_2017_df["NAICS2017 Label"].min()}_ecnbasic.csv',
                        index=False)
combined_2017_df.head()

Unnamed: 0,NAICS2017 Label,Number Employed,"County, State",GEO ID,Broadband (%),Year
0,Information,0,"Benton County, Tennessee",0500000US47005,56.153846,2017
1,Information,199,"Warren County, Tennessee",0500000US47177,56.739813,2017
2,Information,1424,"Washington County, Tennessee",0500000US47179,85.897809,2017
3,Information,131,"Weakley County, Tennessee",0500000US47183,62.890625,2017
4,Information,0,"Campbell County, Tennessee",0500000US47013,55.232561,2017


In [58]:
## Of course, this method isn't particularly efficient at grabbing lots of data, so it was turned into a for loop to run
##    through lots of NAICS labels

In [59]:
# List of single labels to examine for all that the census has data on that doesn't crash.
# This was done by looking at census documentation, writing out each NAICS label into a list, and removing values that crashed
# The original list was [21,22,23,31,32,33,42,44,45,48,49,51,52,53,54,55,56,61,62,71,72,81]
NAICS_list = [22,51,52,53,54,56,61,62,71,72,81]

# Assume year is 2017
# Grab csv for acs5 in 2017 then cut out all irrelevant columns
acs5_data = pd.read_csv("CSV_files/acs5_2017.csv")
acs5_df = acs5_data[["Name","Population"]]


# Craft base url
base_url = "https://api.census.gov/data/2017/ecnbasic"

# Empty list of file paths and total number employed in label are made
NAICS_2017_csv_paths = []

# for loop runs through each label generating a csv of the relevant dataframe.
for label in NAICS_list:
    
    # As this label is printed first, if a csv is crafted, the next number appears. Otherwise, the last label crashed it.
    print(label)
 
    # query url is crafted
    query_url = base_url + f"?get=NAICS2017_LABEL,EMP,NAME,GEO_ID&for=county:*&NAICS2017={label}&key={census_api_key}"
   
    # census is called
    response = requests.get(query_url).json()
    
    # Initialize empty arrays
    NAICS2017_label = []
    number_employed = []
    county_name = []
    GEO_ID = []

    # Creates arrays by appending relevant data to array,which maintains index.
    for county in response:
        NAICS2017_label.append(county[0])
        number_employed.append(county[1])
        county_name.append(county[2])
        GEO_ID.append(county[3])
    
    # Arrays are compiled into a dataframe
    county_2017_data = pd.DataFrame({"NAICS2017 Label":NAICS2017_label,
                                  "Number Employed":number_employed,
                                  "Name":county_name,
                                  "GEO ID":GEO_ID})
    
    
    # Populaton data is merged into dataframe.
    county_2017_df = pd.DataFrame.merge(county_2017_data ,acs5_df, how="inner")

    # As the first row of the dataframe is the labels from the Census, it must be dropped.
    county_2017_df.drop(index=0,inplace=True)
    
    # 2017 Broadband dataframe is crafted only grabbing relevant columns to prevent clutter
    broadband_2017_df = broadband_df.loc[broadband_df["Year"] == 2017][["Broadband (%)","GEO ID","Year"]]

    # Broadband dataframe is combined with county 2017 data from census.  Inner merge is used to drop all empty data points.
    combined_2017_df = county_2017_df.merge(broadband_2017_df, how="inner", on="GEO ID")


    # Column needed for math is saved as int to ensure math and not concatenation
    # Total number employed in label is appended to relevant array
    # New column for percent employed in label is calculated then made into new column for combined dataframe
    combined_2017_df["Number Employed"] = combined_2017_df["Number Employed"].astype(float)
    combined_2017_df["Percent Employed"] = combined_2017_df["Number Employed"] / combined_2017_df["Population"]
    
    # Data is saved as a csv.
    combined_2017_df.to_csv(f'CSV_files/{combined_2017_df["Year"].min()}_{combined_2017_df["NAICS2017 Label"].min()}_ecnbasic.csv',
                            index=False)
    
    # File path for csv file just written to is appended to relevant array
    NAICS_2017_csv_paths.append(f'CSV_files/{combined_2017_df["Year"].min()}_{combined_2017_df["NAICS2017 Label"].min()}_ecnbasic.csv')

    
    # Did the loop finish?
    print("Successfully retrieved")
    
# List of csv paths is turned into a dataframe then saved as a csv.
NAICS_2017_csv_paths_df = pd.DataFrame({"Paths": NAICS_2017_csv_paths})
NAICS_2017_csv_paths_df.to_csv(f'CSV_files/NAICS_2017_csv_paths.csv', index=False) 

22
Successfully retrieved
51
Successfully retrieved
52
Successfully retrieved
53
Successfully retrieved
54
Successfully retrieved
56
Successfully retrieved
61
Successfully retrieved
62
Successfully retrieved
71
Successfully retrieved
72
Successfully retrieved
81
Successfully retrieved
