In [38]:
# Import Dependencies

import json
import numpy as np
import pandas as pd
import matplotlib as plt
import requests
import time
from config import api_key


In [None]:
#total_pop = "B01001_001E"

## Age Group Codes
#Males age 18 to 19 = B01001_007E
#Males age 20 = B01001_008E
#Males age 21 = B01001_009E
#Males age 22 to 24 = B01001_010E
#Males age 25 to 29 = B01001_011E
#Males age 30 to 34 = B01001_012E
#Males age 35 to 39 = B01001_013E
#Males age 40 to 44 = B01001_014E

#Females age 18 to 19 = B01001_031E
#Females age 20 = B01001_032E
#Females age 21 = B01001_033E
#Females age 22 to 24 = B01001_034E
#Females age 25 to 29 = B01001_035E
#Females age 30 to 34 = B01001_036E
#Females age 35 to 39 = B01001_037E
#Females age 40 to 44 = B01001_038E

## Ethnicity Group Codes
#White = B02001_002E
#Black = B02001_003E
#Latino = B03001_003E
#Native Indian = B02001_004E
#Asian = B02001_005E
#Pacific Islander = B02001_006E
#Other = B02001_007E

## NEW Income Level
#income_pop = B25121_001E
#Less than $10,000 = B25121_002E
#$10,000 to $19,999 = B25121_017E
#$20,000 to $34,999 = B25121_032E
#$35,000 to $49,999 = B25121_047E
#$50,000 to $74,999 = B25121_062E
#$75,000 to $99,999 = B25121_077E
#$100,000 or more = B25121_092E

## Income Level
#income_pop = B25095_001E
#Less than $10,000 = B25095_002E
#$10,000 to $19,999 = B25095_011E
#$20,000 to $34,999 = B25095_020E
#$35,000 to $49,999 = B25095_029E
#$50,000 to $74,999 = B25095_038E
#$75,000 to $99,999 = B25095_047E
#$100,000 to $149,999 = B25095_056E
#$150,000 or more = B25095_065E

## Education Level
#edu_pop = B23006_001E
#Less than high school graduate = B23006_002E
#High school graduate = B23006_009E
#Some college or associates = B23006_016E
#Bachelor's or higher = B23006_023E


In [39]:
# Build query URL for exploring Census API

# Base URL
url = "https://api.census.gov/data/2016/acs/acs5?get=NAME"

# API Key call, note this should always be at the end of the URL
key = "&key=" + api_key

# Call for states, note that the State's designated number should follow the colon
state = "&in=state:"

# Call for individual counties, note that you must make the county call with the state call, it cannot be called
# by itself. Also, the county number should follow the colon
county = "&for=county:"

# Places API Call, which allows for specific cities to be referred to. Requires the "state" call to be made as well,
# otherwise it might return an error.

city = "&for=place:"

# Metropolitan API call

metro = "&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:"

# ZIP code ZCTA API call

zip_code = "&for=zip%20code%20tabulation%20area:"

# Basic query url structure should look like this (Note that the "*" is used to call all values, so all states
# and cities within that state)
query_url = url + city + "04000" + state + "13" + key

# Json request
request = requests.get(query_url).json()

# Display result of request
request


[['NAME', 'state', 'place'], ['Atlanta city, Georgia', '13', '04000']]

In [40]:
# URLs with codes built in to make appropriate calls

# Base URL

base_url = "https://api.census.gov/data/"

# Age URL
age_url = "/acs/acs5?get=NAME,B01001_001E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,"\
        +"B01001_011E,B01001_012E,B01001_013E,B01001_014E,B01001_031E,B01001_032E,B01001_033E,B01001_034E,B01001_035E,"\
        + "B01001_036E,B01001_037E,B01001_038E"

# Ethnicity URL
ethnicity_url = "/acs/acs5?get=NAME,B01001_001E,B02001_002E,B02001_003E,B03001_003E,B02001_005E"

# Income URL
income_url = "/acs/acs5?get=NAME,B25121_001E,B25121_002E,B25121_017E,B25121_032E,B25121_047E,B25121_062E,B25121_077E,"\
        +"B25121_092E"

# Education URL
edu_url = "/acs/acs5?get=NAME,B23006_001E,B23006_002E,B23006_009E,B23006_016E,B23006_023E"

# List of URLs for iteration in API

year_list = ["2012","2013","2014","2015","2016"]
url_list = [age_url,ethnicity_url,income_url,edu_url]


In [41]:
# Dictionary with state names as keys and the values being the corresponding code in the Census API
state_dict = {"Georgia":"13","Washington":"53","Florida":"12","New York":"36","California":"06","New Jersey":"34",\
               "Oregon":"41","Minnesota":"27","Kansas":"20","Massachusetts":"25","Utah":"49","Washington D.C.":"11",\
               "Texas":"48","Illinois":"17","Pennsylvania":"42","Ohio":"39","Colorado":"08"}

# Dictionary with city names as keys and the values being the corresponding code in the Census API
city_dict = {"Atlanta":"04000", "Seattle":"63000", "Orlando":"53000", "New York":"51000", "Carson":"11530",\
              "Harrison":"51000", "Portland":"59000", "Minneapolis":"43000", "San Jose":"68000", "Kansas City":"36000",\
              "Foxborough":"24855", "Sandy":"67440", "Washington D.C.":"50000", "Houston":"35000",\
              "Bridgeview":"08225", "Chester":"13208", "Columbus":"18000", "Commerce City":"16495", "Frisco":"27684"}

# List containing each state code in corresponding order for use in API call iteration
state_codes = ["13","53","12","36","06","34","41","27","06","20","25","49","11","48","17","42","39","08","48"]

# List containing each city code in corresponding order for use in API call iteration
city_codes = ["04000","63000","53000","51000","11530","51000","59000","43000","68000","36000","24855","67440","50000",\
             "35000","08225","13208","18000","16495","27684"]


In [42]:
# API request iteration format (NOTE: running this cell takes time due to the time.sleep! Only run
# if you want the data itself! Running this will take at least 8 MINUTES before it finishes)

# Initialize lists to store data retrieved from the API

age_list = []
ethnicity_list = []
income_list = []
edu_list = []

counter = 1

for year in year_list:

    for num in range(19):
    
        for url in url_list:
    
            # Query URL construction that pulls from the city & state codes lists
            query_url = base_url + year + url + city + city_codes[num] + state + state_codes[num] + key
        
            try:
                request = requests.get(query_url).json()
        
                if url == age_url:
                    age_list.append(request[1])
                elif url == ethnicity_url:
                    ethnicity_list.append(request[1])
                elif url == income_url:
                    income_list.append(request[1])
                elif url == edu_url:
                    edu_list.append(request[1])
            
                time.sleep(1)
        
            except Exception:
                print(f"There was an error with this url: {query_url}")
    
        print(f"#{counter} for Year {year} has completed all calls")
    
        counter += 1
        
    counter = 1


#1 for Year 2012 has completed all calls
#2 for Year 2012 has completed all calls
#3 for Year 2012 has completed all calls
#4 for Year 2012 has completed all calls
#5 for Year 2012 has completed all calls
#6 for Year 2012 has completed all calls
#7 for Year 2012 has completed all calls
#8 for Year 2012 has completed all calls
#9 for Year 2012 has completed all calls
#10 for Year 2012 has completed all calls
#11 for Year 2012 has completed all calls
#12 for Year 2012 has completed all calls
#13 for Year 2012 has completed all calls
#14 for Year 2012 has completed all calls
#15 for Year 2012 has completed all calls
#16 for Year 2012 has completed all calls
#17 for Year 2012 has completed all calls
#18 for Year 2012 has completed all calls
#19 for Year 2012 has completed all calls
#1 for Year 2013 has completed all calls
#2 for Year 2013 has completed all calls
#3 for Year 2013 has completed all calls
#4 for Year 2013 has completed all calls
#5 for Year 2013 has completed all calls
#6 for

In [44]:
# Adding years to the age list
age_by_year = []

# For Loop

for num in range(95):
    
    if num <= 18:
        age_by_year.append(age_list[num])
        age_by_year[num].append("2012")
    elif num > 18 and num <= 37:
        age_by_year.append(age_list[num])
        age_by_year[num].append("2013")
    elif num > 37 and num <= 56:
        age_by_year.append(age_list[num])
        age_by_year[num].append("2014")
    elif num > 56 and num <= 75:
        age_by_year.append(age_list[num])
        age_by_year[num].append("2015")
    elif num > 75:
        age_by_year.append(age_list[num])
        age_by_year[num].append("2016")
    

In [45]:
# Adding years to the ethnicity list
ethnicity_by_year = []

# For Loop

for num in range(95):
    
    if num <= 18:
        ethnicity_by_year.append(ethnicity_list[num])
        ethnicity_by_year[num].append("2012")
    elif num > 18 and num <= 37:
        ethnicity_by_year.append(ethnicity_list[num])
        ethnicity_by_year[num].append("2013")
    elif num > 37 and num <= 56:
        ethnicity_by_year.append(ethnicity_list[num])
        ethnicity_by_year[num].append("2014")
    elif num > 56 and num <= 75:
        ethnicity_by_year.append(ethnicity_list[num])
        ethnicity_by_year[num].append("2015")
    elif num > 75:
        ethnicity_by_year.append(ethnicity_list[num])
        ethnicity_by_year[num].append("2016")

In [46]:
# Adding years to the income list
income_by_year = []

# For Loop

for num in range(95):
    
    if num <= 18:
        income_by_year.append(income_list[num])
        income_by_year[num].append("2012")
    elif num > 18 and num <= 37:
        income_by_year.append(income_list[num])
        income_by_year[num].append("2013")
    elif num > 37 and num <= 56:
        income_by_year.append(income_list[num])
        income_by_year[num].append("2014")
    elif num > 56 and num <= 75:
        income_by_year.append(income_list[num])
        income_by_year[num].append("2015")
    elif num > 75:
        income_by_year.append(income_list[num])
        income_by_year[num].append("2016")

In [47]:
# Adding years to the education list
edu_by_year = []

# For Loop

for num in range(95):
    
    if num <= 18:
        edu_by_year.append(edu_list[num])
        edu_by_year[num].append("2012")
    elif num > 18 and num <= 37:
        edu_by_year.append(edu_list[num])
        edu_by_year[num].append("2013")
    elif num > 37 and num <= 56:
        edu_by_year.append(edu_list[num])
        edu_by_year[num].append("2014")
    elif num > 56 and num <= 75:
        edu_by_year.append(edu_list[num])
        edu_by_year[num].append("2015")
    elif num > 75:
        edu_by_year.append(edu_list[num])
        edu_by_year[num].append("2016")

In [48]:
# Use iterative loop to convert valus into integers & sum the values in the age list,
# thus combining male and female data

age_combine = []

for num in range(95):
    
    city = age_by_year[num][0]
    pop = int(age_by_year[num][1])
    tot1819 = int(age_by_year[num][2]) + int(age_by_year[num][10])
    tot20 = int(age_by_year[num][3]) + int(age_by_year[num][11])
    tot21 = int(age_by_year[num][4]) + int(age_by_year[num][12])
    tot2224 = int(age_by_year[num][5]) + int(age_by_year[num][13])
    tot2529 = int(age_by_year[num][6]) + int(age_by_year[num][14])
    tot3034 = int(age_by_year[num][7]) + int(age_by_year[num][15])
    tot3539 = int(age_by_year[num][8]) + int(age_by_year[num][16])
    tot4044 = int(age_by_year[num][9]) + int(age_by_year[num][17])
    year = age_by_year[num][20]
    
    tot1829 = tot1819 + tot20 + tot21 + tot2224 + tot2529
    tot3044 = tot3034 + tot3539 + tot4044
    
    
    age_combine.append([city,year,pop,tot1829,tot3044])
    
age_combine
    

[['Atlanta city, Georgia', '2012', 425931, 107143, 101845],
 ['Seattle city, Washington', '2012', 612916, 141245, 157786],
 ['Orlando city, Florida', '2012', 240185, 57870, 56140],
 ['New York city, New York', '2012', 8199221, 1593289, 1828653],
 ['Carson city, California', '2012', 91937, 16448, 18000],
 ['Newark city, New Jersey', '2012', 276478, 56131, 64131],
 ['Portland city, Oregon', '2012', 585888, 113642, 150432],
 ['Minneapolis city, Minnesota', '2012', 385023, 101775, 88109],
 ['San Jose city, California', '2012', 954379, 163311, 224103],
 ['Kansas City city, Kansas', '2012', 145605, 25380, 28695],
 ['Foxborough CDP, Massachusetts', '2012', 5524, 954, 1292],
 ['Sandy city, Utah', '2012', 87952, 13687, 17071],
 ['Washington city, District of Columbia', '2012', 605759, 153569, 138705],
 ['Houston city, Texas', '2012', 2107449, 423559, 472953],
 ['Bridgeview village, Illinois', '2012', 16394, 2585, 2976],
 ['Chester city, Pennsylvania', '2012', 34127, 7910, 5555],
 ['Columbus cit

In [57]:
# Iterative loop to convert values in the income 2016 list to integers

income_combine = []

for num in range(95):
    
    city = income_by_year[num][0]
    pop = int(income_by_year[num][1])
    tot2 = int(income_by_year[num][2])
    tot3 = int(income_by_year[num][3])
    tot4 = int(income_by_year[num][4])
    tot5 = int(income_by_year[num][5])
    tot6 = int(income_by_year[num][6])
    tot7 = int(income_by_year[num][7])
    tot8 = int(income_by_year[num][8])
    year = income_by_year[num][11]
    
    income_combine.append([city,year,pop,tot2,tot3,tot4,tot5,tot6,tot7,tot8])
    
income_combine

[['Atlanta city, Georgia',
  '2012',
  82411,
  3927,
  5767,
  8848,
  8415,
  12978,
  9297,
  33179],
 ['Seattle city, Washington',
  '2012',
  135156,
  3116,
  4740,
  9032,
  10904,
  21507,
  19591,
  66266],
 ['Orlando city, Florida',
  '2012',
  39155,
  1592,
  3130,
  5172,
  5452,
  8160,
  4956,
  10693],
 ['New York city, New York',
  '2012',
  990609,
  32020,
  55634,
  99465,
  94989,
  159436,
  137947,
  411118],
 ['Carson city, California',
  '2012',
  18831,
  436,
  975,
  1756,
  1990,
  3318,
  3627,
  6729],
 ['Newark city, New Jersey',
  '2012',
  21652,
  851,
  1690,
  2796,
  2643,
  4780,
  3071,
  5821],
 ['Portland city, Oregon',
  '2012',
  133777,
  3948,
  7286,
  13251,
  15980,
  26782,
  20941,
  45589],
 ['Minneapolis city, Minnesota',
  '2012',
  83163,
  2419,
  4179,
  8173,
  9708,
  16267,
  13284,
  29133],
 ['San Jose city, California',
  '2012',
  178328,
  3451,
  5992,
  10920,
  13542,
  24284,
  23536,
  96603],
 ['Kansas City city, Ka

In [51]:
# Creating pandas DataFrames for each data list, starting with age

age_df = pd.DataFrame(age_combine)

# Rename columns for reference

age_df.columns=["City & State","Year","Total Population","Age 18 to 29","Age 30 to 44"]

# Save DataFrame

age_df.to_csv("Output/Age_Data.csv")

# Display the DataFrame

age_df


Unnamed: 0,City & State,Year,Total Population,Age 18 to 29,Age 30 to 44
0,"Atlanta city, Georgia",2012,425931,107143,101845
1,"Seattle city, Washington",2012,612916,141245,157786
2,"Orlando city, Florida",2012,240185,57870,56140
3,"New York city, New York",2012,8199221,1593289,1828653
4,"Carson city, California",2012,91937,16448,18000
5,"Newark city, New Jersey",2012,276478,56131,64131
6,"Portland city, Oregon",2012,585888,113642,150432
7,"Minneapolis city, Minnesota",2012,385023,101775,88109
8,"San Jose city, California",2012,954379,163311,224103
9,"Kansas City city, Kansas",2012,145605,25380,28695


In [None]:
# Create new dataframe using previous dataframe to hold percentile values




In [56]:
# DataFrame for ethnicity

ethnicity_df = pd.DataFrame(ethnicity_by_year)

# Rename columns

ethnicity_df.columns=["City & State","Total Population","White","Black/Afro-American","Latino","Asian","State Code","City Code","Year"]

# Remove extraneous columns

ethnicity_df = ethnicity_df.loc[:, ["City & State","Year","Total Population","White","Black/Afro-American","Latino","Asian"]]

# Save DataFrame

ethnicity_df.to_csv("Output/Ethnicity_Data.csv")

# Display the DataFrame

ethnicity_df

Unnamed: 0,City & State,Year,Total Population,White,Black/Afro-American,Latino,Asian
0,"Atlanta city, Georgia",2012,425931,166054,229317,22929,14160
1,"Seattle city, Washington",2012,612916,432714,47887,37880,85935
2,"Orlando city, Florida",2012,240185,139251,69373,61886,7873
3,"New York city, New York",2012,8199221,3646181,2059279,2343458,1053649
4,"Carson city, California",2012,91937,29620,17968,36340,23769
5,"Newark city, New Jersey",2012,276478,76275,143713,91931,4508
6,"Portland city, Oregon",2012,585888,454159,37922,54420,42426
7,"Minneapolis city, Minnesota",2012,385023,263089,67461,38156,21153
8,"San Jose city, California",2012,954379,448526,29674,315241,310783
9,"Kansas City city, Kansas",2012,145605,86500,39568,40187,4067


In [58]:
# DataFrame for Income Level

income_df = pd.DataFrame(income_combine)

# Rename the columns

income_df.columns=["City & State","Year","Total Households","Less Than 10k","10k to 19k","20k to 34k","35k to 49k",\
                  "50k to 74k","75k to 99k","100k or More"]

# Save DataFrame

income_df.to_csv("Output/Income_Data.csv")

# Display the DataFrame

income_df

Unnamed: 0,City & State,Year,Total Households,Less Than 10k,10k to 19k,20k to 34k,35k to 49k,50k to 74k,75k to 99k,100k or More
0,"Atlanta city, Georgia",2012,82411,3927,5767,8848,8415,12978,9297,33179
1,"Seattle city, Washington",2012,135156,3116,4740,9032,10904,21507,19591,66266
2,"Orlando city, Florida",2012,39155,1592,3130,5172,5452,8160,4956,10693
3,"New York city, New York",2012,990609,32020,55634,99465,94989,159436,137947,411118
4,"Carson city, California",2012,18831,436,975,1756,1990,3318,3627,6729
5,"Newark city, New Jersey",2012,21652,851,1690,2796,2643,4780,3071,5821
6,"Portland city, Oregon",2012,133777,3948,7286,13251,15980,26782,20941,45589
7,"Minneapolis city, Minnesota",2012,83163,2419,4179,8173,9708,16267,13284,29133
8,"San Jose city, California",2012,178328,3451,5992,10920,13542,24284,23536,96603
9,"Kansas City city, Kansas",2012,32123,1880,2936,5906,5109,7287,4232,4773


In [55]:
# DataFrame for Education Level

edu_df = pd.DataFrame(edu_by_year)

# Rename the columns

edu_df.columns=["City & State","Edu Total Pop","Less Than High School Grad","High School Grad","Some College or Associates",\
               "Bachelor's Degree or Higher","State Code","City Code","Year"]

# Remove extraneous columns

edu_df = edu_df.loc[:, ["City & State","Year","Edu Total Pop","Less Than High School Grad","High School Grad","Some College or Associates",\
               "Bachelor's Degree or Higher"]]

# Save DataFrame

edu_df.to_csv("Output/Edu_Data.csv")

# Display the dataframe

edu_df


Unnamed: 0,City & State,Year,Edu Total Pop,Less Than High School Grad,High School Grad,Some College or Associates,Bachelor's Degree or Higher
0,"Atlanta city, Georgia",2012,240515,25188,47446,50395,117486
1,"Seattle city, Washington",2012,379354,21675,39284,93888,224507
2,"Orlando city, Florida",2012,136311,14059,34168,42055,46029
3,"New York city, New York",2012,4565255,810354,1067343,1015411,1672147
4,"Carson city, California",2012,48161,8142,10830,17185,12004
5,"Newark city, New Jersey",2012,149541,40128,53380,36402,19631
6,"Portland city, Oregon",2012,355659,31090,58526,105895,160148
7,"Minneapolis city, Minnesota",2012,219340,24032,34830,55916,104562
8,"San Jose city, California",2012,530213,82858,94340,146017,206998
9,"Kansas City city, Kansas",2012,74709,15471,25025,22402,11811
