### Querying the BEA API

In [1]:
import requests
from pprint import pprint
from config import api_key
import pandas as pd
import csv
import os

In [2]:
url = "https://apps.bea.gov/api/data?"

### Build query URL

### Personal income for 2017 for all counties, in JSON format 

In [3]:
income_query_url = url + "UserID=" + api_key + "&method=GetData&Datasetname=RegionalIncome&TableName=CA1&LineCode=1&GeoFips=COUNTY&Year=2017&ResultFormat=JSON"
income_data = requests.get(income_query_url).json()
pprint(income_data)

{'BEAAPI': {'Request': {'RequestParam': [{'ParameterName': 'GEOFIPS',
                                          'ParameterValue': 'COUNTY'},
                                         {'ParameterName': 'RESULTFORMAT',
                                          'ParameterValue': 'JSON'},
                                         {'ParameterName': 'DATASETNAME',
                                          'ParameterValue': 'REGIONALINCOME'},
                                         {'ParameterName': 'USERID',
                                          'ParameterValue': '29E04437-F92A-4CAE-9F6E-5E49B1790A37'},
                                         {'ParameterName': 'LINECODE',
                                          'ParameterValue': '1'},
                                         {'ParameterName': 'TABLENAME',
                                          'ParameterValue': 'CA1'},
                                         {'ParameterName': 'YEAR',
                                          'Param

In [4]:
income=income_data["BEAAPI"]["Results"]["Data"]

In [6]:
income[0]

{'Code': 'CA1-1',
 'GeoFips': '00000',
 'GeoName': 'United States',
 'TimePeriod': '2017',
 'CL_UNIT': 'Thousands of dollars',
 'UNIT_MULT': '3',
 'DataValue': '16,820,250,000'}

### Loop through each result and append results into lists

In [7]:
county_id=[]
geo=[]
Year=[]
Unit=[]
Income=[]
for i in range(len(income)):
    county_id.append(income[i]["GeoFips"])
    geo.append(income[i]["GeoName"])
    Year.append(income[i]["TimePeriod"])
    Unit.append(income[i]["CL_UNIT"])
    Income.append(income[i]["DataValue"])

### Create Dataframe from the column above

In [8]:
income_df = pd.DataFrame({
    "County_id": county_id,
    "Geo": geo,
    "Year":Year,
    "Unit":Unit,
    "Income":Income
})



### Split the Geo column into two separate columns with county and state information

In [9]:
income_df[['county_name','state_code']] = income_df['Geo'].str.split(',',n=1, expand=True)
income_df


Unnamed: 0,County_id,Geo,Year,Unit,Income,county_name,state_code
0,00000,United States,2017,Thousands of dollars,16820250000,United States,
1,01000,Alabama,2017,Thousands of dollars,198916425,Alabama,
2,01001,"Autauga, AL",2017,Thousands of dollars,2247046,Autauga,AL
3,01003,"Baldwin, AL",2017,Thousands of dollars,9372465,Baldwin,AL
4,01005,"Barbour, AL",2017,Thousands of dollars,845349,Barbour,AL
5,01007,"Bibb, AL",2017,Thousands of dollars,680543,Bibb,AL
6,01009,"Blount, AL",2017,Thousands of dollars,1955420,Blount,AL
7,01011,"Bullock, AL",2017,Thousands of dollars,283496,Bullock,AL
8,01013,"Butler, AL",2017,Thousands of dollars,697770,Butler,AL
9,01015,"Calhoun, AL",2017,Thousands of dollars,4159490,Calhoun,AL


In [15]:
clean_income_df=income_df.drop(columns=['Geo'])
clean_income_df

Unnamed: 0,County_id,Year,Unit,Income,county_name,state_code
0,00000,2017,Thousands of dollars,16820250000,United States,
1,01000,2017,Thousands of dollars,198916425,Alabama,
2,01001,2017,Thousands of dollars,2247046,Autauga,AL
3,01003,2017,Thousands of dollars,9372465,Baldwin,AL
4,01005,2017,Thousands of dollars,845349,Barbour,AL
5,01007,2017,Thousands of dollars,680543,Bibb,AL
6,01009,2017,Thousands of dollars,1955420,Blount,AL
7,01011,2017,Thousands of dollars,283496,Bullock,AL
8,01013,2017,Thousands of dollars,697770,Butler,AL
9,01015,2017,Thousands of dollars,4159490,Calhoun,AL


### Export the income resutls into csv file

In [16]:
clean_income_df.to_csv('income.csv')

### Real per capita personal income for all states, all years, in JSON format 

In [27]:
capi_url = url + "UserID=" + api_key + "&method=GetData&Datasetname=RegionalIncome&TableName=RPI1&lineCode=2&Year=ALL&GeoFips=STATE&ResultFormat=JSON"
capi_data = requests.get(capi_url).json()
pprint(capi_data)

{'BEAAPI': {'Request': {'RequestParam': [{'ParameterName': 'GEOFIPS',
                                          'ParameterValue': 'STATE'},
                                         {'ParameterName': 'TABLENAME',
                                          'ParameterValue': 'RPI1'},
                                         {'ParameterName': 'METHOD',
                                          'ParameterValue': 'GETDATA'},
                                         {'ParameterName': 'YEAR',
                                          'ParameterValue': 'ALL'},
                                         {'ParameterName': 'RESULTFORMAT',
                                          'ParameterValue': 'JSON'},
                                         {'ParameterName': 'LINECODE',
                                          'ParameterValue': '2'},
                                         {'ParameterName': 'USERID',
                                          'ParameterValue': '29E04437-F92A-4CAE-9F6E-5E49B179

In [42]:
capi_income = capi_data["BEAAPI"]["Results"]["Data"]
capi_income[1]

{'Code': 'RPI1-2',
 'GeoFips': '00000',
 'GeoName': 'United States',
 'TimePeriod': '2009',
 'CL_UNIT': 'Chained 2012 dollars',
 'UNIT_MULT': '0',
 'DataValue': '41,750'}

In [31]:
county_id=[]
geo=[]
Year=[]
Income=[]
for i in range(len(capi_income)):
    county_id.append(capi_income[i]["GeoFips"])
    geo.append(capi_income[i]["GeoName"])
    Year.append(capi_income[i]["TimePeriod"])
    Income.append(capi_income[i]["DataValue"])

In [36]:
state_income_df = pd.DataFrame({
    "County_id": county_id,
    "State_name": geo,
    "Year":Year,
    "Income":Income
})


In [39]:
state_income_df.head()

Unnamed: 0,County_id,State_name,Year,Income
0,0,United States,2008,43431
1,0,United States,2009,41750
2,0,United States,2010,42364
3,0,United States,2011,43541
4,0,United States,2012,44582


In [40]:
state_income_df.to_csv('states_all_years_income.csv')

In [2]:
url = 'https://www.census.gov/geo/reference/codes/cousub.html'

In [5]:
tables = pd.read_html(url)
tables[0]


Unnamed: 0,Field Name,Field Description,Example
0,STATE,State postal code,MA
1,STATEFP,State FIPS code,25
2,COUNTYFP,County FIPS code,027
3,COUNTYNAME,County name and legal/statistical area descrip...,Worcester County
4,COUSUBFP,County subdivision FIPS code,28740
5,COUSUBNAME,County subdivision name and legal/statistical ...,Hardwick town
6,FUNCSTAT,Functional Status,A
7,Functional Status Codes A: identifies an activ...,,


In [15]:
# Make a reference to the water_system.csv file path
csv_path = os.path.join( "water_system_geographic_area.csv")
# Import the water_system.csv file as a DataFrame
water_system_df = pd.read_csv(csv_path,encoding="ISO-8859-1")
water_system_df.head()

Unnamed: 0,PWS ID,PWS Name,EPA Region,Primacy Agency,PWS Type,Primacy Type,Area Type,County Served,City Served,Zip Code Served,Activity Status,Deactivation Date,Is Source Water Protected,Population Served Count,State Code,Zip Code
0,NJ1710003,PICNIC GROVE MOBILE HOMES,2,New Jersey,Community water system,State,County,Salem,-,-,Active,-,N,250,NJ,8328
1,LA1063022,TOWN OF ALBANY WATER SYSTEM,6,Louisiana,Community water system,State,County,Livingston Parish,-,-,Active,-,N,6063,LA,70711
2,NY0140001,NEW SALEM WD,2,New York,Community water system,State,County,Albany,-,-,Active,-,-,350,NY,12159
3,MN1230013,Wykoff,5,Minnesota,Community water system,State,County,Fillmore,-,-,Active,-,-,444,MN,55990
4,WY5601596,BENNOR ESTATES & IMPROVEMENT DISTRICT,8,Wyoming,Community water system,State,County,Campbell,-,-,Active,-,N,174,WY,82717


In [16]:

water_system_df.rename(columns={"County Served":"county_name"}, inplace=True)

In [17]:
water_system_df

Unnamed: 0,PWS ID,PWS Name,EPA Region,Primacy Agency,PWS Type,Primacy Type,Area Type,county_name,City Served,Zip Code Served,Activity Status,Deactivation Date,Is Source Water Protected,Population Served Count,State Code,Zip Code
0,NJ1710003,PICNIC GROVE MOBILE HOMES,2,New Jersey,Community water system,State,County,Salem,-,-,Active,-,N,250,NJ,08328
1,LA1063022,TOWN OF ALBANY WATER SYSTEM,6,Louisiana,Community water system,State,County,Livingston Parish,-,-,Active,-,N,6063,LA,70711
2,NY0140001,NEW SALEM WD,2,New York,Community water system,State,County,Albany,-,-,Active,-,-,350,NY,12159
3,MN1230013,Wykoff,5,Minnesota,Community water system,State,County,Fillmore,-,-,Active,-,-,444,MN,55990
4,WY5601596,BENNOR ESTATES & IMPROVEMENT DISTRICT,8,Wyoming,Community water system,State,County,Campbell,-,-,Active,-,N,174,WY,82717
5,OK3003801,GOTEBO,6,Oklahoma,Community water system,State,County,Kiowa,-,-,Active,-,N,272,OK,73041
6,OK2000811,GRACEMONT PWA,6,Oklahoma,Community water system,State,County,Caddo,-,-,Active,-,N,336,OK,73042
7,OK3000606,GREENFIELD PWA,6,Oklahoma,Community water system,State,County,Blaine,-,-,Active,-,N,123,OK,73043
8,OK2004223,RODEO CORNER,6,Oklahoma,Transient non-community system,State,County,Logan,-,-,Active,-,N,25,PA,19426
9,OK2004255,LAZY E TRAINING CENTER,6,Oklahoma,Non-Transient non-community system,State,County,Logan,-,-,Active,-,N,30,OK,73044


In [18]:
water_system_df.to_csv('water_system.csv')