In [9]:
import requests
import pandas as pd
import numpy as np
import json
import regex as re

In [2]:
def get_request(endpoint, parameters=dict()):
    api_url = 'https://api.census.gov/data/'
    response = requests.get(api_url + endpoint, parameters)
    if response.status_code != 200:
        print("Request to {} failed. Error code {}:{}".format(api_url + endpoint, response.status_code, response.text))
    
    response = json.loads(response.text)
    return response

In [31]:
url ='/acs/acs1/subject/variables.json'
variables = get_request(str(2019) + url)
variables = variables['variables']

In [32]:
education = list()
for var in variables.keys():
    edu_str = 'percent!!age by educational attainment!!'
    if edu_str.lower() in variables[var]['label'].lower():
        education.append(var)
        print(variables[var]['label'])

Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Less than 9th grade
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!9th to 12th grade, no diploma
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Bachelor's degree or higher
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency)
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Some college or associate's degree
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Less than high school graduate
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency)
Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 35 t

In [33]:
print(education)

['S1501_C02_007E', 'S1501_C02_008E', 'S1501_C02_005E', 'S1501_C02_006E', 'S1501_C02_003E', 'S1501_C02_004E', 'S1501_C02_001E', 'S1501_C02_002E', 'S1501_C02_009E', 'S1501_C02_019E', 'S1501_C02_017E', 'S1501_C02_018E', 'S1501_C02_015E', 'S1501_C02_016E', 'S1501_C02_013E', 'S1501_C02_014E', 'S1501_C02_011E', 'S1501_C02_012E', 'S1501_C02_010E', 'S1501_C02_027E', 'S1501_C02_025E', 'S1501_C02_026E', 'S1501_C02_023E', 'S1501_C02_024E', 'S1501_C02_021E', 'S1501_C02_022E', 'S1501_C02_020E']


In [46]:
occupation = list()
for var in variables.keys():
    occupation_re = '^estimate!!percent (male|female)!!civilian employed population 16 years and over$'
    cat_str = 'OCCUPATION BY SEX FOR THE CIVILIAN EMPLOYED POPULATION 16 YEARS AND OVER'
    edu_str = 'employment status'
    # if (edu_str.lower() in variables[var]['label'].lower()) and (cat_str.lower() in variables[var]['concept'].lower()) and (':!!' not in variables[var]['label']):
    if bool(re.match(occupation_re, variables[var]['label'].lower())) and (cat_str.lower() in variables[var]['concept'].lower()):
        occupation.append(var)
        print(variables[var]['label'])

Estimate!!Percent Female!!Civilian employed population 16 years and over
Estimate!!Percent Male!!Civilian employed population 16 years and over


In [35]:
print(len(occupation))

2


In [47]:
employment = list()
for var in variables.keys():
    emp_re = re.compile('^estimate!!.*employment status!!.*population 16 years and over$')
    concept_re = '^population.*and over in the united states$'
    edu_str = 'employment status'
    # if (edu_str.lower() in variables[var]['label'].lower()) and ('and over in the united states' in variables[var]['concept'].lower()):
    if bool(re.match(emp_re, variables[var]['label'].lower())) and bool(re.match(concept_re, variables[var]['concept'].lower())):
        employment.append(var)
        print(variables[var]['label'])

Estimate!!60 years and over!!EMPLOYMENT STATUS!!Population 16 years and over
Estimate!!65 years and over!!EMPLOYMENT STATUS!!Civilian population 16 years and over
Estimate!!Total!!EMPLOYMENT STATUS!!Population 16 years and over
Estimate!!Total!!EMPLOYMENT STATUS!!Civilian population 16 years and over


In [37]:
print(employment)

['S0102_C02_066E', 'S0103_C02_066E', 'S0102_C01_066E', 'S0103_C01_066E']


In [38]:
income = list()
for var in variables.keys():
    income_re = re.compile("^estimate!!.+!!household income!!all households$")
    income_str = 'MEAN INCOME IN THE PAST 12 MONTHS (IN 2019 INFLATION-ADJUSTED DOLLARS)'
    # if ('concept' in variables[var]) and (income_str.lower() in variables[var]['concept'].lower()) and ('household income' in variables[var]['label'].lower()):
    if ('concept' in variables[var]) and bool(re.match(income_re, variables[var]['label'].lower())) and (income_str.lower() == variables[var]['concept'].lower()):
        income.append(var)
        print(variables[var]['label'])

Estimate!!Percent Distribution!!HOUSEHOLD INCOME!!All households
Estimate!!Mean income (dollars)!!HOUSEHOLD INCOME!!All households
Estimate!!Number!!HOUSEHOLD INCOME!!All households


In [39]:
print(income)

['S1902_C02_001E', 'S1902_C03_001E', 'S1902_C01_001E']


In [40]:
params = education.copy()
params.extend(occupation)
params.extend(income)
params.extend(employment)
endpoint = '2019/acs/acs1/subject?get=NAME,{}&for=metropolitan%20statistical%20area/micropolitan%20statistical%20area:*'.format(','.join(params))
# print(endpoint)

results = get_request(endpoint)

In [41]:
results_cp = results.copy()
columns = results_cp.pop(0)
df = pd.DataFrame(results_cp, columns=columns)
columns = [i for i in columns if (i in variables) and (df[i].isnull().sum() < 30)]
skip = ['NAME', 'metropolitan statistical area/micropolitan statistical area']
columns.extend(skip)
df = df[columns]
edu_str = 'Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!'
columns = [variables[i]['label'] if i not in skip else i for i in columns]
df.columns = columns

df['metro area'] = ['Metro Area' in i for i in df['NAME']]

df[df['metro area'] == True].head(2)

Unnamed: 0,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Less than 9th grade,"Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!9th to 12th grade, no diploma",Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Bachelor's degree or higher,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency),Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Some college or associate's degree,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!Less than high school graduate,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency),Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 35 to 44 years,...,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 45 to 64 years,Estimate!!Percent!!AGE BY EDUCATIONAL ATTAINMENT!!Population 35 to 44 years!!High school graduate or higher,Estimate!!Percent Female!!Civilian employed population 16 years and over,Estimate!!Percent Male!!Civilian employed population 16 years and over,Estimate!!Percent Distribution!!HOUSEHOLD INCOME!!All households,Estimate!!Mean income (dollars)!!HOUSEHOLD INCOME!!All households,Estimate!!Number!!HOUSEHOLD INCOME!!All households,NAME,metropolitan statistical area/micropolitan statistical area,metro area
1,3.6,6.5,9.5,-888888888,41.0,40.6,-888888888,8.8,32.1,-888888888,...,-888888888,88.7,50.6,49.4,61750,71517,61750,"Abilene, TX Metro Area",10180,True
3,20.0,9.1,12.1,-888888888,22.5,56.8,-888888888,8.6,29.0,-888888888,...,-888888888,81.8,43.5,56.5,101568,25781,101568,"Aguadilla-Isabela, PR Metro Area",10380,True


In [44]:
df.to_csv('acs_survey.csv')

In [51]:
cities = df['NAME']
alabama = [i.replace(' Metro Area', '') for i in cities if 'AL' in i and 'Metro Area' in i]
print(alabama)

['Anniston-Oxford, AL', 'Auburn-Opelika, AL', 'Birmingham-Hoover, AL', 'Columbus, GA-AL', 'Daphne-Fairhope-Foley, AL', 'Decatur, AL', 'Dothan, AL', 'Florence-Muscle Shoals, AL', 'Gadsden, AL', 'Huntsville, AL', 'Mobile, AL', 'Montgomery, AL', 'Tuscaloosa, AL']
