In [65]:
import requests
import pandas as pd
import numpy as np
import json
import regex as re
pd.set_option('display.max_columns', 500)

In [2]:
def get_request(endpoint, parameters=dict()):
    api_url = 'https://api.census.gov/data/'
    response = requests.get(api_url + endpoint, parameters)
    if response.status_code != 200:
        print("Request to {} failed. Error code {}:{}".format(api_url + endpoint, response.status_code, response.text))
    
    response = json.loads(response.text)
    return response

In [168]:
# record possibilites for variables
# var_file = open('variable_possibilites.txt', 'w')

In [159]:
## Retrieve subject level variables (parameters for api)
url ='/acs/acs1/subject/variables.json'
response = get_request(str(2019) + url)
variables = response['variables']

# Retrieve profile of data variables
url ='/acs/acs1/profile/variables.json'
response = get_request(str(2019) + url)
variables.update(response['variables'])

In [171]:
education = list()
for var in variables.keys():
    edu_str = re.compile('^estimate!!percent!!age by educational attainment!!.+(18 to 24|25 years).+!!')
    if bool(re.match(edu_str, variables[var]['label'].lower())):
        education.append(var)
        print(variables[var]['label'])

Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Less than 9th grade
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!9th to 12th grade, no diploma
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Bachelor's degree or higher
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency)
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Some college or associate's degree
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Less than high school graduate
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency)
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Bachelor's degree or higher
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Graduate or professional degree


In [161]:
print(education)

['S1501_C02_007E', 'S1501_C02_008E', 'S1501_C02_005E', 'S1501_C02_003E', 'S1501_C02_004E', 'S1501_C02_002E', 'S1501_C02_009E', 'S1501_C02_017E', 'S1501_C02_018E', 'S1501_C02_015E', 'S1501_C02_013E', 'S1501_C02_014E', 'S1501_C02_011E', 'S1501_C02_012E', 'S1501_C02_010E', 'S1501_C02_027E', 'S1501_C02_026E', 'S1501_C02_023E', 'S1501_C02_024E', 'S1501_C02_021E', 'S1501_C02_020E']


In [178]:
occupation = list()
for var in variables.keys():
    occupation_re = '^estimate!!percent (male|female)!!civilian employed population 16 years and over$'
    cat_str = 'OCCUPATION BY SEX FOR THE CIVILIAN EMPLOYED POPULATION 16 YEARS AND OVER'
    edu_str = 'employment status'
    # if (edu_str.lower() in variables[var]['label'].lower()) and (cat_str.lower() in variables[var]['concept'].lower()) and (':!!' not in variables[var]['label']):
    if bool(re.match(occupation_re, variables[var]['label'].lower())) and (cat_str.lower() in variables[var]['concept'].lower()):
        occupation.append(var)
        print(variables[var]['label'])

Estimate!!Percent Female!!Civilian employed population 16 years and over
Estimate!!Percent Male!!Civilian employed population 16 years and over


In [163]:
print(len(occupation))

72


In [164]:
var_file.write("----------{Employment Status}----------\n")
employment = list()
for var in variables.keys():
    emp_re = re.compile('^estimate!!.*employment status!!.*population 16 years and over!!')
    concept_re = '^population.*and over in the united states$'
    edu_str = 'employment status'
    # if (edu_str.lower() in variables[var]['label'].lower()) and ('and over in the united states' in variables[var]['concept'].lower()):
    if bool(re.match(emp_re, variables[var]['label'].lower())) and bool(re.match(concept_re, variables[var]['concept'].lower())):
        employment.append(var)
        var_file.write("{}\n".format(variables[var]['label']))

In [165]:
print(employment)

['S0102_C02_068E', 'S0102_C02_069E', 'S0102_C02_067E', 'S0102_C02_072E', 'S0102_C02_073E', 'S0102_C02_070E', 'S0102_C02_071E', 'S0103_C02_070E', 'S0103_C02_071E', 'S0103_C02_068E', 'S0103_C02_067E', 'S0103_C02_069E', 'S0102_C01_073E', 'S0102_C01_072E', 'S0102_C01_070E', 'S0102_C01_071E', 'S0102_C01_069E', 'S0102_C01_068E', 'S0102_C01_067E', 'S0103_C01_069E', 'S0103_C01_068E', 'S0103_C01_067E', 'S0103_C01_071E', 'S0103_C01_070E']


In [166]:
var_file.write("----------{Income}----------\n")
income = list()
for var in variables.keys():
    income_re = re.compile("(percent|estimate)!!income and benefits.+!!families!!.+")
    income_str = 'selected economic characteristics'
    # if ('concept' in variables[var]) and (income_str.lower() in variables[var]['concept'].lower()) and ('household income' in variables[var]['label'].lower()):
    if ('concept' in variables[var]) and bool(re.match(income_re, variables[var]['label'].lower())) and (income_str.lower() == variables[var]['concept'].lower()):
        income.append(var)
        var_file.write("{}\n".format(variables[var]['label']))

In [167]:
print(income)

['DP03_0079PE', 'DP03_0084PE', 'DP03_0085PE', 'DP03_0078E', 'DP03_0079E', 'DP03_0076E', 'DP03_0077E', 'DP03_0082E', 'DP03_0083E', 'DP03_0080E', 'DP03_0081E', 'DP03_0077PE', 'DP03_0082PE', 'DP03_0086E', 'DP03_0087E', 'DP03_0084E', 'DP03_0085E', 'DP03_0078PE', 'DP03_0083PE', 'DP03_0080PE', 'DP03_0076PE', 'DP03_0081PE', 'DP03_0086PE', 'DP03_0087PE']


In [179]:
# params = education.copy()
params = (occupation)
# params.extend(income)
# params = (employment)
endpoint = '2019/acs/acs1/subject?get=NAME,{}&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:*'.format(','.join(params))
# print(endpoint)

results = get_request(endpoint)

In [180]:
results_cp = results.copy()
columns = results_cp.pop(0)
df = pd.DataFrame(results_cp, columns=columns)
columns = [i for i in columns if (i in variables)]
skip = ['NAME', 'metropolitan statistical area/micropolitan statistical area']
columns.extend(skip)
df = df[columns]
edu_str = 'Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!'
columns = [variables[i]['label'] if i not in skip else i for i in columns]
df.columns = columns

df['metro area'] = ['Metro Area' in i for i in df['NAME']]

df[df['metro area'] == True].head(2)

Unnamed: 0,Estimate!!Percent Female!!Civilian employed population 16 years and over,Estimate!!Percent Male!!Civilian employed population 16 years and over,NAME,metropolitan statistical area/micropolitan statistical area,metro area
1,50.6,49.4,"Abilene, TX Metro Area",10180,True
3,43.5,56.5,"Aguadilla-Isabela, PR Metro Area",10380,True


In [73]:
df.shape

(518, 25)

In [181]:
df[df['metro area']==True].to_csv('acs_survey.csv')

In [51]:
cities = df['NAME']
alabama = [i.replace(' Metro Area', '') for i in cities if 'AL' in i and 'Metro Area' in i]
print(alabama)

['Anniston-Oxford, AL', 'Auburn-Opelika, AL', 'Birmingham-Hoover, AL', 'Columbus, GA-AL', 'Daphne-Fairhope-Foley, AL', 'Decatur, AL', 'Dothan, AL', 'Florence-Muscle Shoals, AL', 'Gadsden, AL', 'Huntsville, AL', 'Mobile, AL', 'Montgomery, AL', 'Tuscaloosa, AL']
