In [1]:
# Import Dependencies

import json
import numpy as np
import pandas as pd
import matplotlib as plt
import requests
import time
from config import api_key


In [None]:
#total_pop = "B01001_001E"

## Age Group Codes
#Males age 18 to 19 = B01001_007E
#Males age 20 = B01001_008E
#Males age 21 = B01001_009E
#Males age 22 to 24 = B01001_010E
#Males age 25 to 29 = B01001_011E
#Males age 30 to 34 = B01001_012E
#Males age 35 to 39 = B01001_013E

#Females age 18 to 19 = B01001_031E
#Females age 20 = B01001_032E
#Females age 21 = B01001_033E
#Females age 22 to 24 = B01001_034E
#Females age 25 to 29 = B01001_035E
#Females age 30 to 34 = B01001_036E
#Females age 35 to 39 = B01001_037E

## Ethnicity Group Codes
#White = B02001_002E
#Black = B02001_003E
#Latino = B03001_003E
#Native Indian = B02001_004E
#Asian = B02001_005E
#Pacific Islander = B02001_006E
#Other = B02001_007E

## Income Level
#income_pop = B25095_001E
#Less than $10,000 = B25095_002E
#$10,000 to $19,999 = B25095_011E
#$20,000 to $34,999 = B25095_020E
#$35,000 to $49,999 = B25095_029E
#$50,000 to $74,999 = B25095_038E
#$75,000 to $99,999 = B25095_047E
#$100,000 to $149,999 = B25095_056E
#$150,000 or more = B25095_065E

## Education Level
#edu_pop = B23006_001E
#Less than high school graduate = B23006_002E
#High school graduate = B23006_009E
#Some college or associates = B23006_016E
#Bachelor's or higher = B23006_023E


In [2]:
# Build query URL for exploring Census API

# Base URL
url = "https://api.census.gov/data/2016/acs/acs5?get=NAME"

# API Key call, note this should always be at the end of the URL
key = "&key=" + api_key

# Call for states, note that the State's designated number should follow the colon
state = "&in=state:"

# Call for individual counties, note that you must make the county call with the state call, it cannot be called
# by itself. Also, the county number should follow the colon
county = "&for=county:"

# Places API Call, which allows for specific cities to be referred to. Requires the "state" call to be made as well,
# otherwise it might return an error.

city = "&for=place:"

# Metropolitan API call

metro = "&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:"

# ZIP code ZCTA API call

zip_code = "&for=zip%20code%20tabulation%20area:"

# Basic query url structure should look like this (Note that the "*" is used to call all values, so all states
# and cities within that state)
query_url = url + city + "04000" + state + "13" + key

# Json request
request = requests.get(query_url).json()

# Display result of request
request


[['NAME', 'state', 'place'], ['Atlanta city, Georgia', '13', '04000']]

In [3]:
# URLs with codes built in to make appropriate calls

# Age URL
age_url = "https://api.census.gov/data/2016/acs/acs5?get=NAME,B01001_001E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,"\
        +"B01001_011E,B01001_012E,B01001_013E,B01001_031E,B01001_032E,B01001_033E,B01001_034E,B01001_035E,B01001_036E,B01001_037E"

# Ethnicity URL
ethnicity_url = "https://api.census.gov/data/2016/acs/acs5?get=NAME,B01001_001E,B02001_002E,B02001_003E,B03001_003E,B02001_004E,"\
        +"B02001_005E,B02001_006E,B02001_007E"

# Income URL
income_url = "https://api.census.gov/data/2016/acs/acs5?get=NAME,B25095_001E,B25095_002E,B25095_011E,B25095_020E,"\
        +"B25095_029E,B25095_038E,B25095_047E,B25095_056E,B25095_065E"

# Education URL
edu_url = "https://api.census.gov/data/2016/acs/acs5?get=NAME,B23006_001E,B23006_002E,B23006_009E,B23006_016E,B23006_023E"

# List of URLs for iteration in API

url_list = [age_url,ethnicity_url,income_url,edu_url]


In [4]:
# Dictionary with state names as keys and the values being the corresponding code in the Census API
state_dict = {"Georgia":"13","Washington":"53","Florida":"12","New York":"36","California":"06","New Jersey":"34",\
               "Oregon":"41","Minnesota":"27","Kansas":"20","Massachusetts":"25","Utah":"49","Washington D.C.":"11",\
               "Texas":"48","Illinois":"17","Pennsylvania":"42","Ohio":"39","Colorado":"08"}

# Dictionary with city names as keys and the values being the corresponding code in the Census API
city_dict = {"Atlanta":"04000", "Seattle":"63000", "Orlando":"53000", "New York":"51000", "Carson":"11530",\
              "Harrison":"51000", "Portland":"59000", "Minneapolis":"43000", "San Jose":"68000", "Kansas City":"36000",\
              "Foxborough":"07000", "Sandy":"67440", "Washington D.C.":"50000", "Houston":"35000",\
              "Bridgeview":"14000", "Chester":"60000", "Columbus":"18000", "Commerce City":"20000", "Frisco":"27684"}

# List containing each state code in corresponding order for use in API call iteration
state_codes = ["13","53","12","36","06","34","41","27","06","20","25","49","11","48","17","42","39","08","48"]

# List containing each city code in corresponding order for use in API call iteration
city_codes = ["04000","63000","53000","51000","11530","51000","59000","43000","68000","36000","07000","67440","50000",\
             "35000","14000","60000","18000","20000","27684"]


In [5]:
# API request iteration format (NOTE: running this cell takes time due to the time.sleep! Only run
# if you want the data itself! Running this will take at least a minute and forty seconds before it finishes)

# Initialize lists to store data retrieved from the API

age_list = []
ethnicity_list = []
income_list = []
edu_list = []

counter = 1

for num in range(19):
    
    for url in url_list:
    
        # Query URL construction that pulls from the city & state codes lists
        query_url = url + city + city_codes[num] + state + state_codes[num] + key
    
        request = requests.get(query_url).json()
        
        if url == age_url:
            age_list.append(request[1])
        elif url == ethnicity_url:
            ethnicity_list.append(request[1])
        elif url == income_url:
            income_list.append(request[1])
        elif url == edu_url:
            edu_list.append(request[1])
            
        time.sleep(1)
    
    print(f"#{counter} has completed all calls")
    
    counter += 1


#1 has completed all calls
#2 has completed all calls
#3 has completed all calls
#4 has completed all calls
#5 has completed all calls
#6 has completed all calls
#7 has completed all calls
#8 has completed all calls
#9 has completed all calls
#10 has completed all calls
#11 has completed all calls
#12 has completed all calls
#13 has completed all calls
#14 has completed all calls
#15 has completed all calls
#16 has completed all calls
#17 has completed all calls
#18 has completed all calls
#19 has completed all calls


In [6]:
# Use iterative loop to convert valus into integers & sum the values in the age list,
# thus combining male and female data

age_combine = []

for num in range(19):
    
    city = age_list[num][0]
    pop = int(age_list[num][1])
    tot1819 = int(age_list[num][2]) + int(age_list[num][9])
    tot20 = int(age_list[num][3]) + int(age_list[num][10])
    tot21 = int(age_list[num][4]) + int(age_list[num][11])
    tot2224 = int(age_list[num][5]) + int(age_list[num][12])
    tot2529 = int(age_list[num][6]) + int(age_list[num][13])
    tot3034 = int(age_list[num][7]) + int(age_list[num][14])
    tot3539 = int(age_list[num][8]) + int(age_list[num][15])
    
    age_combine.append([city,pop,tot1819,tot20,tot21,tot2224,tot2529,tot3034,tot3539])
    
age_combine
    

[['Atlanta city, Georgia',
  456378,
  17968,
  10038,
  9296,
  26121,
  48140,
  44074,
  34064],
 ['Seattle city, Washington',
  668849,
  17064,
  9191,
  10748,
  37047,
  79496,
  68842,
  55087],
 ['Orlando city, Florida',
  263306,
  4996,
  4493,
  3029,
  14157,
  30607,
  25357,
  21989],
 ['New York city, New York',
  8461961,
  195586,
  118066,
  115445,
  387231,
  782039,
  715712,
  608591],
 ['Carson city, California', 92780, 2490, 1419, 1768, 4208, 6942, 5992, 5970],
 ['Newark city, New Jersey',
  280139,
  8735,
  4749,
  4467,
  13773,
  24356,
  22000,
  20797],
 ['Portland city, Oregon',
  620589,
  12541,
  7337,
  7128,
  25869,
  60058,
  61753,
  56580],
 ['Minneapolis city, Minnesota',
  404670,
  14799,
  7478,
  8377,
  25431,
  49138,
  40420,
  28914],
 ['San Jose city, California',
  1009363,
  23266,
  13739,
  12796,
  42729,
  78552,
  78713,
  74365],
 ['Kansas City city, Kansas',
  149755,
  3599,
  1564,
  2011,
  6594,
  11271,
  11337,
  10016],

In [8]:
# Creating pandas DataFrames for each data list, starting with age

age_df = pd.DataFrame(age_combine)

# Rename columns for reference

age_df.columns=["City & State","Total Population","Age 18 to 19","Age 20","Age 21","Age 22 to 24","Age 25 to 29",\
               "Age 30 to 34","Age 35 to 39"]

# Display the DataFrame

age_df


Unnamed: 0,City & State,Total Population,Age 18 to 19,Age 20,Age 21,Age 22 to 24,Age 25 to 29,Age 30 to 34,Age 35 to 39
0,"Atlanta city, Georgia",456378,17968,10038,9296,26121,48140,44074,34064
1,"Seattle city, Washington",668849,17064,9191,10748,37047,79496,68842,55087
2,"Orlando city, Florida",263306,4996,4493,3029,14157,30607,25357,21989
3,"New York city, New York",8461961,195586,118066,115445,387231,782039,715712,608591
4,"Carson city, California",92780,2490,1419,1768,4208,6942,5992,5970
5,"Newark city, New Jersey",280139,8735,4749,4467,13773,24356,22000,20797
6,"Portland city, Oregon",620589,12541,7337,7128,25869,60058,61753,56580
7,"Minneapolis city, Minnesota",404670,14799,7478,8377,25431,49138,40420,28914
8,"San Jose city, California",1009363,23266,13739,12796,42729,78552,78713,74365
9,"Kansas City city, Kansas",149755,3599,1564,2011,6594,11271,11337,10016


In [9]:
# DataFrame for ethnicity

ethnicity_df = pd.DataFrame(ethnicity_list)

# Rename columns

ethnicity_df.columns=["City & State","Total Population","White","Black/Afro-American","Latino","Native Indian","Asian",\
                      "Pacific Islander","Other","State Code","City Code"]



# Display the DataFrame

ethnicity_df

Unnamed: 0,City & State,Total Population,White,Black/Afro-American,Latino,Native Indian,Asian,Pacific Islander,Other,State Code,City Code
0,"Atlanta city, Georgia",456378,182794,239005,21838,1308,18235,202,4802,13,4000
1,"Seattle city, Washington",668849,462673,47493,43868,4091,94619,2644,14045,53,63000
2,"Orlando city, Florida",263306,160607,69572,77009,671,10109,78,15101,12,53000
3,"New York city, New York",8461961,3644982,2064927,2457945,33388,1159883,3995,1277265,36,51000
4,"Carson city, California",92780,27282,19830,37011,712,23963,2088,11679,6,11530
5,"Newark city, New Jersey",280139,68226,140750,100950,1407,4914,153,56702,34,51000
6,"Portland city, Oregon",620589,482410,35465,60040,4280,47045,4014,15111,41,59000
7,"Minneapolis city, Minnesota",404670,262130,76070,38832,4974,23911,84,17830,27,43000
8,"San Jose city, California",1009363,425883,31096,329135,5285,344188,4477,148307,6,68000
9,"Kansas City city, Kansas",149755,88953,37530,43727,986,5786,280,10559,20,36000


In [10]:
# DataFrame for Income Level

income_df = pd.DataFrame(income_list)

# Rename the columns

income_df.columns=["City & State","Total Households","Less than 10k","10k to 19k","20k to 34k","35k to 49k",\
                  "50k to 74k","75k to 99k","100k to 149k","More than 150k","State Code","City Code"]

# Display the DataFrame

income_df

Unnamed: 0,City & State,Total Households,Less than 10k,10k to 19k,20k to 34k,35k to 49k,50k to 74k,75k to 99k,100k to 149k,More than 150k,State Code,City Code
0,"Atlanta city, Georgia",81417,3349,5046,8118,8149,12465,8653,13183,22454,13,4000
1,"Seattle city, Washington",140436,3436,4381,8490,10041,18831,17223,29579,48455,53,63000
2,"Orlando city, Florida",38542,1965,3087,4812,4629,7311,4977,6116,5645,12,53000
3,"New York city, New York",1000242,35762,52923,92109,90589,152194,132604,193397,250664,36,51000
4,"Carson city, California",18979,451,934,1820,1876,3413,3067,4791,2627,6,11530
5,"Newark city, New Jersey",20502,823,1481,2628,2832,4061,3221,3784,1672,34,51000
6,"Portland city, Oregon",136114,4132,6404,11586,14229,23719,21121,28852,26071,41,59000
7,"Minneapolis city, Minnesota",80520,2291,3638,7678,8240,13799,12213,16449,16212,27,43000
8,"San Jose city, California",181122,3623,5836,10736,12344,21927,20422,38912,67322,6,68000
9,"Kansas City city, Kansas",30796,1443,2705,5612,5225,6530,4210,3874,1197,20,36000


In [54]:
# DataFrame for Education Level

edu_df = pd.DataFrame(edu_list)

# Rename the columns

edu_df.columns=["City & State","Edu Total Pop","Less Than High School Grad","High School Grad","Some College or Associates",\
               "Bachelor's Degree or Higher","State Code","City Code"]

# Display the dataframe

edu_df


Unnamed: 0,City & State,Edu Total Pop,Less Than High School Grad,High School Grad,Some College or Associates,Bachelor's Degree or Higher,State Code,City Code
0,"Atlanta city, Georgia",258046,23374,46860,57206,130606,13,4000
1,"Seattle city, Washington",413133,21214,38922,93974,259023,53,63000
2,"Orlando city, Florida",153051,13306,35951,47965,55829,12,53000
3,"New York city, New York",4752442,778896,1092575,1037451,1843520,36,51000
4,"Carson city, California",48771,8577,10814,16800,12580,6,11530
5,"Newark city, New Jersey",152531,36047,55367,38992,22125,34,51000
6,"Portland city, Oregon",381246,29657,56281,109969,185339,41,59000
7,"Minneapolis city, Minnesota",232168,23886,36065,58364,113853,27,43000
8,"San Jose city, California",564344,87769,94749,143961,237865,6,68000
9,"Kansas City city, Kansas",76939,17076,24198,22884,12781,20,36000
