In [1]:
# import dependencies
import requests
import json
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# connect to API key
from config import gkey


In [2]:
# establish base URL 
base_url = "https://api.census.gov/data/2017/acs/acs5/profile"

# print the response object to the console
response = requests.get(base_url)
print(response)


<Response [200]>


In [3]:
# translate keys from Census API to variable for easier reading
percent_hh_snap = "DP03_0074PE"
percent_single_hh = "DP02_0011PE"
avg_commute = "DP03_0025E"
percent_no_vehicles = "DP04_0058PE"
avg_income = "DP03_0063E"
zip_code = "zip%20code%20tabulation%20area:*"

# set variables for query URL
key1 = percent_hh_snap
key2 = percent_single_hh
key3 = avg_commute
key4 = percent_no_vehicles
key5 = avg_income
geography = zip_code 

# build query URL
query_url = f"{base_url}?get={key1},{key2},{key3},{key4},{key5},NAME&for={geography}&key={gkey}"
print(query_url)


https://api.census.gov/data/2017/acs/acs5/profile?get=DP03_0074PE,DP02_0011PE,DP03_0025E,DP04_0058PE,DP03_0063E,NAME&for=zip%20code%20tabulation%20area:*&key=391a26e50b873d297dfed1954142d1f1dee85f9e


In [6]:
# convert to json
response = requests.get(query_url).json()
#pprint(response)


In [7]:
# create a dataframe 
df_raw = pd.DataFrame(response, columns=['% SNAP HH', '% Single HH','Avg Commute (min)','% No Cars', 'Avg Income ($)','ZCTA', 'Zip Code'])
df_raw = df_raw[['Zip Code','Avg Income ($)','% SNAP HH','% Single HH','% No Cars','Avg Commute (min)']]
df_raw = df_raw.drop([0], axis=0, inplace= False)

#alt_gender_stats_df["Percentage of Players"]= alt_gender_stats_df["Percentage of Players"].astype(float).map("{:,.2f}%".format)
#df["Avg Income ($)"]= df["Avg Income ($)"].astype(float).map("${:,.0f}".format)

df_raw.head()


Unnamed: 0,Zip Code,Avg Income ($),% SNAP HH,% Single HH,% No Cars,Avg Commute (min)
1,1379,66682,22.4,30.7,5.2,32.2
2,1440,62384,21.1,29.8,11.1,26.8
3,1505,119858,1.8,23.2,2.5,29.0
4,1524,86771,6.9,28.4,3.9,31.1
5,1529,104300,7.5,17.9,3.5,31.7


In [9]:
# see what data types we have
df_raw.dtypes


Zip Code             object
Avg Income ($)       object
% SNAP HH            object
% Single HH          object
% No Cars            object
Avg Commute (min)    object
dtype: object

In [10]:
# convert data types where necessary to floats
df_raw = df_raw.astype({"Avg Income ($)": float, "% SNAP HH": float, "% Single HH": float, "% No Cars": float, "Avg Commute (min)": float})
df_raw.dtypes


Zip Code              object
Avg Income ($)       float64
% SNAP HH            float64
% Single HH          float64
% No Cars            float64
Avg Commute (min)    float64
dtype: object

In [23]:
# remove any negative numbers

df = df_raw[(df_raw["Avg Income ($)"]>=0) & (df_raw["% SNAP HH"]>=0) & (df_raw["% Single HH"]>=0) & (df_raw["% No Cars"]>=0) & (df_raw["Avg Commute (min)"]>=0)]
df.head()
#print(len(df)) = 30440


Unnamed: 0,Zip Code,Avg Income ($),% SNAP HH,% Single HH,% No Cars,Avg Commute (min)
1,1379,66682.0,22.4,30.7,5.2,32.2
2,1440,62384.0,21.1,29.8,11.1,26.8
3,1505,119858.0,1.8,23.2,2.5,29.0
4,1524,86771.0,6.9,28.4,3.9,31.1
5,1529,104300.0,7.5,17.9,3.5,31.7


In [24]:
# run some calculations to understand the data

print(df["Avg Income ($)"].count())
print(df["Avg Income ($)"].median())
print(df["Avg Income ($)"].mean())
print("------------------------------------")
print(df.mean())

30440
65430.0
72782.25985545335
------------------------------------
Zip Code                      inf
Avg Income ($)       7.278226e+04
% SNAP HH            1.241990e+01
% Single HH          2.678986e+01
% No Cars            6.411570e+00
Avg Commute (min)    2.617808e+01
dtype: float64


In [25]:
highest_mean_income = df["Avg Income ($)"].max()
lowest_mean_income = df["Avg Income ($)"].min()

print(f"The highest average income of any US zip code is ${highest_mean_income:,.0f}")
print(f"The lowest average income of any US zip code is ${lowest_mean_income:,.0f}")

#df.loc[df["Avg Income ($)"] == 9398]
#highest = 21056: Gibson Island, MD
#lowest = 64147: Martin City, MO

The highest average income of any US zip code is $598,492
The lowest average income of any US zip code is $9,398
