In [1]:
import pandas as pd
import requests


def get_data(year, dataset, file_dict, geodf):

    
    
    tblid = file_dict['file'].lower()
    #define urls for data and geography, update for new 2021 1yr location
    years = {
        2021: f'https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/data/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat',
        2020: f'https://www2.census.gov/programs-surveys/acs/summary_file/{year}/prototype/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat'
    }
    data_url = years[year]#f"https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/data/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat"
    # geo_url = f"https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/documentation/Geos{year}{dataset}YR.txt"

    #read data into dataframe
    # data = pd.read_csv(data_url, sep='|', index_col="GEO_ID")
    reader = pd.read_csv(data_url, sep='|', index_col='GEO_ID', chunksize=5000)
    data = None
    for chunk in reader:
        chunk = chunk[file_dict['fields']]
        chunk = chunk.join(geodf)
        chunk = chunk[file_dict['fields']]
        data = chunk if data is None else pd.concat([data,chunk])

    # data = pd.concat([x[file_dict['fields']] for x in reader], ignore_index=True)
    # geos = pd.read_csv(geo_url, sep='|', index_col="GEO_ID")

    # data = data.drop(data.columns.difference(cols), axis=1)
    data = data.rename(dict(zip(file_dict['fields'], file_dict['names'])), axis=1)

    #add geo file names and search for state
    # data = data.join(geos[["NAME", "STUSAB"]])
    # if state != 'ALL':
    #     data = data.loc[data["STUSAB"]==state]

    #output
    # data.to_csv(f"{tblid}.dat", sep="|")
    # print(f"Done. {tblid}.dat created")
    return data


def get_geo(year, dataset, sumlevels):

    geo_url = f"https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/documentation/Geos{year}{dataset}YR.txt"
    geo_url = 'https://www2.census.gov/programs-surveys/acs/summary_file/2020/prototype/Geos20205YR.csv'
    idx = 'GEOID'
    sep = ','
    #read data into dataframe
    # geos = pd.read_csv(geo_url, sep='|', index_col="GEO_ID")
    reader = pd.read_csv(geo_url, sep=sep, index_col=idx, chunksize=5000, encoding='latin')
    geos = pd.concat([x[x.SUMLEVEL.isin(sumlevels)] for x in reader])

    return geos

def get_drive_time(x):
    col_map = {
        'B08303_E002': 5,
        'B08303_E003': 7,	
        'B08303_E004': 12,  
        'B08303_E005': 17,
        'B08303_E006': 22,
        'B08303_E007': 27,
        'B08303_E008': 32,
        'B08303_E009': 37,
        'B08303_E010': 42,
        'B08303_E011': 52,
        'B08303_E012': 75,
        'B08303_E013': 90
    }
    cumsum = x['B08303_E002']
    field = 2
    while cumsum < x['B08303_E001'] * .5 and field < 13:
        cumsum += x['B08303_E0{}'.format(str(field).zfill(2))]
        field += 1
    return col_map['B08303_E0{}'.format(str(field).zfill(2))]

def get_education(x, maxf):
    cols = [
        ('B15002_E003','B15002_E020','No schooling completed')
        ,('B15002_E004','B15002_E021','Nursery to 4th grade')
        ,('B15002_E005','B15002_E022','5th and 6th grade')
        ,('B15002_E006','B15002_E023','7th and 8th grade')
        ,('B15002_E007','B15002_E024','9th grade')
        ,('B15002_E008','B15002_E025','10th grade')
        ,('B15002_E009','B15002_E026','11th grade')
        ,('B15002_E010','B15002_E027','12th grade, no diploma')
        ,('B15002_E011','B15002_E028','High school graduate (includes equivalency)')
        ,('B15002_E012','B15002_E029','Some college, less than 1 year')
        ,('B15002_E013','B15002_E030','Some college, 1 or more years, no degree')
        ,('B15002_E014','B15002_E031',"Associate's degree")
        ,('B15002_E015','B15002_E032',"Bachelor's degree")
        ,('B15002_E016','B15002_E033',"Master's degree")
        ,('B15002_E017','B15002_E034','Professional school degree')
        ,('B15002_E018','B15002_E035','Doctorate degree')
    ][::-1]

    total = x['B15002_E001']

    cumsum = 0
    for col in cols:
        cumsum += (x[col[0]] + x[col[1]])
        if col[2] == maxf:
            break
    return cumsum / total
def get_unemployment(x):
    unemployment_fields = [
        'B12006_E006',
        'B12006_E011',
        'B12006_E017',
        'B12006_E022',
        'B12006_E028',
        'B12006_E033',
        'B12006_E039',
        'B12006_E044',
        'B12006_E050',
        'B12006_E055'
    ]

    labor_force_fields = [
        'B12006_E004',
        'B12006_E009',
        'B12006_E015',
        'B12006_E020',
        'B12006_E026',
        'B12006_E031',
        'B12006_E037',
        'B12006_E042',
        'B12006_E048',
        'B12006_E053'
    ]

    uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
    return uer
def read_apply(year, dataset, file_dict, geodf):
    tblid = file_dict['file'].lower()
    func_map = file_dict['func_map']
    #define urls for data and geography, update for new 2021 1yr location
    # data_url = f"https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/data/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat"
    years = {
        2021: f'https://www2.census.gov/programs-surveys/acs/summary_file/{year}/table-based-SF/data/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat',
        2020: f'https://www2.census.gov/programs-surveys/acs/summary_file/{year}/prototype/{dataset}YRData/acsdt{dataset}y{year}-{tblid}.dat'
    }
    data_url = years[year]

    reader = pd.read_csv(data_url, sep='|', index_col='GEO_ID', chunksize=5000, encoding='latin')
    data = None
    for chunk in reader:
        for k,v in func_map.items():
            chunk[k] = chunk.apply(v, axis=1)
        chunk = chunk[func_map.keys()]
        chunk = chunk.join(geodf)
        chunk = chunk[func_map.keys()]
        data = chunk if data is None else pd.concat([data,chunk])
    return data

In [1]:
api_cols = [
    'B01001A_001E',
    'B01001_001E',
    'B01002_001E',
    'B19013_001E',
    'B19301_001E',
    'B23020_001E',
    'B25058_001E',
    'B25105_001E',
    'B25071_001E',
    'B24031_003E',
    'B24031_004E',
    'B24031_005E',
    'B24031_006E',
    'B24031_007E',
    'B24031_008E',
    'B24031_010E',
    'B24031_011E',
    'B24031_012E',
    'B24031_014E',
    'B24031_015E',
    'B24031_017E',
    'B24031_018E',
    'B24031_019E',
    'B24031_021E',
    'B24031_022E',
    'B24031_024E',
    'B24031_025E',
    'B24031_026E',
    'B24031_027E',
    'B12006_004E',
    'B12006_009E',
    'B12006_015E',
    'B12006_020E',
    'B12006_026E',
    'B12006_031E',
    'B12006_037E',
    'B12006_042E',
    'B12006_048E',
    'B12006_053E',
    'B12006_006E',
    'B12006_011E',
    'B12006_017E',
    'B12006_022E',
    'B12006_028E',
    'B12006_033E',
    'B12006_039E',
    'B12006_044E',
    'B12006_050E',
    'B12006_055E',
    'B15002_001E',
    'B15002_004E',
    'B15002_005E',
    'B15002_006E',
    'B15002_007E',
    'B15002_008E',
    'B15002_009E',
    'B15002_010E',
    'B15002_011E',
    'B15002_012E',
    'B15002_013E',
    'B15002_014E',
    'B15002_015E',
    'B15002_016E',
    'B15002_017E',
    'B15002_018E',
    'B15002_021E',
    'B15002_022E',
    'B15002_023E',
    'B15002_024E',
    'B15002_025E',
    'B15002_026E',
    'B15002_027E',
    'B15002_028E',
    'B15002_029E',
    'B15002_030E',
    'B15002_031E',
    'B15002_032E',
    'B15002_033E',
    'B15002_034E',
    'B15002_035E',
    'B15002_020E',
    'B15002_003E',
    'B08303_002E',
    'B08303_003E',
    'B08303_004E',
    'B08303_005E',
    'B08303_006E',
    'B08303_007E',
    'B08303_008E',
    'B08303_009E',
    'B08303_010E',
    'B08303_011E',
    'B08303_012E',
    'B08303_013E',
    'B08303_001E',
]

geo_cols = [
    'NAME',
    'GEO_ID',
    'STATE',
    'COUNTY',
    'REGION',
    'PLACE',
    'SUMLEVEL',
    'UA',
    'CBSA'
]


url = 'https://api.census.gov/data/{year}/acs/acs5?get={fields}&for={geo}:*&key={key}'
geo = 'state'
year = 2021

fields = [','.join(geo_cols + api_cols[0:40]),','.join(geo_cols + api_cols[40:])]

key = 'c8763b1f3e686c707cf581321b8c16152e9d30e3'
url.format(year=year, fields=fields, geo=geo, key=key)



"https://api.census.gov/data/2021/acs/acs5?get=['NAME,GEO_ID,STATE,COUNTY,REGION,PLACE,SUMLEVEL,UA,CBSA,B01001A_001E,B01001_001E,B01002_001E,B19013_001E,B19301_001E,B23020_001E,B25058_001E,B25105_001E,B25071_001E,B24031_003E,B24031_004E,B24031_005E,B24031_006E,B24031_007E,B24031_008E,B24031_010E,B24031_011E,B24031_012E,B24031_014E,B24031_015E,B24031_017E,B24031_018E,B24031_019E,B24031_021E,B24031_022E,B24031_024E,B24031_025E,B24031_026E,B24031_027E,B12006_004E,B12006_009E,B12006_015E,B12006_020E,B12006_026E,B12006_031E,B12006_037E,B12006_042E,B12006_048E,B12006_053E,B12006_006E', 'NAME,GEO_ID,STATE,COUNTY,REGION,PLACE,SUMLEVEL,UA,CBSA,B12006_011E,B12006_017E,B12006_022E,B12006_028E,B12006_033E,B12006_039E,B12006_044E,B12006_050E,B12006_055E,B15002_001E,B15002_004E,B15002_005E,B15002_006E,B15002_007E,B15002_008E,B15002_009E,B15002_010E,B15002_011E,B15002_012E,B15002_013E,B15002_014E,B15002_015E,B15002_016E,B15002_017E,B15002_018E,B15002_021E,B15002_022E,B15002_023E,B15002_024E,B15002_02

In [114]:
import json
# filters = {
#         2009: ['B23020_001E','B12006_039E','B12006_044E','B12006_055E','B12006_026E','B24031_010E','B24031_019E','B12006_053E','B24031_017E']
#     }
# with open('filters.json', 'w') as f:
#     f.write(json.dumps(filters,indent=4))

In [2]:
import os
import json
import requests
import pandas as pd

url = 'https://api.census.gov/data/{year}/acs/acs5?get={fields}&for={geo}:*&key={key}'
# geo = 'state'
# year = 2009

geos = [
    ('state','state'),
    ('county','county'),
    ('place','place'),
    ('metro', 'metropolitan/micropolitan statistical area')
]

years = list(range(2009,2022))

files = os.listdir('data')

with open('filters.json', 'r') as f:
    filters = json.load(f)

def divide_chunks(l, n):
     
    # looping till length l
    for i in range(0, len(l), n):
        yield l[i:i + n]

filtered = list(set(api_cols) - set(filters.get(str(year),[])))
field_groups = [geo_cols + x for x in list(divide_chunks(filtered,40))]


for year in years:
    filtered = list(set(api_cols) - set(filters.get(str(year),[])))
    field_groups = [geo_cols + x for x in list(divide_chunks(filtered,40))]
    for geo in geos[:3]:
        if f'{geo[0]}_{year}_census_api.csv' not in files:
            base_df = None
            for group in field_groups:
                count = 0
                error = True
                while error and count < 200:
                    error = False
                    print(f'attempt #{count}')
                    count += 1
                    key = 'c8763b1f3e686c707cf581321b8c16152e9d30e3'
                    filtered_cols = list(set(group) - set(filters.get(str(year),[])))
                    fields = ','.join(filtered_cols)
                    formatted_url = url.format(year=year, fields=fields, geo=geo[1], key=key)

                    x = requests.get(formatted_url)



                    if x.status_code == 400 and 'error: error: unknown variable ' in x.text:
                        bad_var = x.text.replace('error: error: unknown variable ','').replace("'",'')
                        filters[str(year)] = filters.get(str(year),[]) + [bad_var]
                        with open('filters.json', 'w') as f:
                            json.dump(filters, f, indent=4)


                    # print(formatted_url)
                    # print(x.status_code)
                    
                    if x.status_code == 200:
                        try:
                            data = x.json()
                        except:
                            error = True
                            print(f'failed to parse json')
                    else:
                        error = True
                        print(f'failed with code: {x.status_code}')

                    # print(x.text)

                tmp_df = pd.DataFrame(data[1:],columns=data[0],).set_index('GEO_ID')
                if base_df is None:
                    base_df = tmp_df
                else:
                    cols_to_use = tmp_df.columns.difference(base_df.columns)
                    base_df = base_df.join(tmp_df[cols_to_use])
                # print(base_df.head())

            base_df['year'] = year
            base_df['geo_type'] = geo[0]
            base_df.to_csv(f'data/{geo[0]}_{year}_census_api.csv')
    

attempt #0
failed to parse json
attempt #1
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
failed to parse json
attempt #3
failed to parse json
attempt #4
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
failed to parse json
attempt #3
failed to parse json
attempt #4
attempt #0
failed to parse json
attempt #1
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
attempt #0
attempt #0
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
attempt #0
failed to parse json
attempt #1
failed to parse json
attempt #2
failed to parse json
attempt #3
failed to parse json
attempt #4
failed to parse json
attempt #5
failed to parse json
attempt #6
failed to parse json
attempt #7
fail

In [2]:


files = [
    {'file':'B01001',	'fields':	['B01001_E001'], 'names':	['total_population']},
    {'file':'B01002',	'fields':	['B01002_E001'], 'names':	['median_age']},
    {'file':'B01001A',	'fields':	['B01001A_E001'], 'names':['population_white']},
    {'file':'B19013',	'fields':	['B19013_E001'], 'names':	['median_household_income']},
    {'file':'B19301',	'fields':	['B19301_E001'], 'names':	['per_capita_income']},
    {'file':'B23020',	'fields':	['B23020_E001'], 'names':	['mean_hours_worked']},
    {'file':'B25058',	'fields':	['B25058_E001'], 'names':	['Median_contract_rent']},
    {'file':'B25105',	'fields':	['B25105_E001'], 'names':	['Median_monthly_housing_costs']},
    {'file':'B25071',	'fields':	['B25071_E001'], 'names':	['med_gross_rent_pct_hshld_income']},
    {
        'file':'B24031',
        'fields':[
            'B24031_E003',
            'B24031_E004',
            'B24031_E005',
            'B24031_E006',
            'B24031_E007',
            'B24031_E008',
            'B24031_E010',
            'B24031_E011',
            'B24031_E012',
            'B24031_E014',
            'B24031_E015',
            'B24031_E017',
            'B24031_E018',
            'B24031_E019',
            'B24031_E021',
            'B24031_E022',
            'B24031_E024',
            'B24031_E025',
            'B24031_E026',
            'B24031_E027',
        ],
        'names':[
            'med_inc_agriculture_forestry_fishing_hunting', 
            'med_inc_mining_quarrying_oil_gas_extraction', 
            'med_inc_construction', 
            'med_inc_manufacturing', 
            'med_inc_wholesale_trade', 
            'med_inc_retail_trade', 
            'med_inc_transportation_warehousing', 
            'med_inc_utilities', 
            'med_inc_information', 
            'med_inc_finance_insurance', 
            'med_inc_real_estate_rental_leasing', 
            'med_inc_professional_scientific_technical_services', 
            'med_inc_management_of_companies_enterprises', 
            'med_inc_administrative_support_waste_management_services', 
            'med_inc_educational_services', 
            'med_inc_health_care_social_assistance', 
            'med_inc_arts_entertainment_recreation', 
            'med_inc_accommodation_food_services', 
            'med_inc_other_services_except_public_administration', 
            'med_inc_public_administration'
        ]

    }
]

files2 = [
    {'file':'B08303',	'func_map':{
        'median_commute': get_drive_time
    }},
    {'file':'B15002',	'func_map':{
        'high_school': lambda x: get_education(x, 'High school graduate (includes equivalency)'),
        'associates': lambda x: get_education(x, "Associate's degree"),
        'bachelors': lambda x: get_education(x, "Bachelor's degree"),
        'masters': lambda x: get_education(x, "Master's degree"),
        'doctorate': lambda x: get_education(x, "Doctorate degree"),
    }},
    {
        'file': 'B12006', 'func_map':{
            'unemployment_rate': get_unemployment
        }
    }
]


In [3]:
# names=[
#             'Agriculture, forestry, fishing and hunting',
#             'Mining, quarrying, and oil and gas extraction',
#             'Construction',
#             'Manufacturing',
#             'Wholesale trade',
#             'Retail trade',
#             'Transportation and warehousing',
#             'Utilities',
#             'Information',
#             'Finance and insurance ',
#             'Real estate and rental and leasing ',
#             'Professional, scientific, and technical services',
#             'Management of companies and enterprises',
#             'Administrative and support and waste management services',
#             'Educational services',
#             'Health care and social assistance',
#             'Arts, entertainment, and recreation',
#             'Accommodation and food services',
#             'Other services, except public administration',
#             'Public administration'
#          ]

# print([
#     n.replace(',','').replace(' and ','_').replace(' ','_').lower() for n in names
# ])

In [4]:
year = 2020
dataset = 5
keep = [40,50,140,160,310]

df = get_geo(year, dataset, sumlevels=keep)

for f in files:
    print(f)
    data = get_data(year,dataset,f, df)
    df = df.join(data)



for f in files2:
    print(f)
    if f['file'] == 'B12006':
        df.to_csv(f"acs_{year}_main.dat", sep="|")
    data = read_apply(year,dataset,f, df)
    df = df.join(data)

df.to_csv(f"acs_{year}.csv", sep=",", index=False)

{'file': 'B01001', 'fields': ['B01001_E001'], 'names': ['total_population']}
{'file': 'B01002', 'fields': ['B01002_E001'], 'names': ['median_age']}
{'file': 'B01001A', 'fields': ['B01001A_E001'], 'names': ['population_white']}
{'file': 'B19013', 'fields': ['B19013_E001'], 'names': ['median_household_income']}
{'file': 'B19301', 'fields': ['B19301_E001'], 'names': ['per_capita_income']}
{'file': 'B23020', 'fields': ['B23020_E001'], 'names': ['mean_hours_worked']}
{'file': 'B25058', 'fields': ['B25058_E001'], 'names': ['Median_contract_rent']}
{'file': 'B25105', 'fields': ['B25105_E001'], 'names': ['Median_monthly_housing_costs']}
{'file': 'B25071', 'fields': ['B25071_E001'], 'names': ['med_gross_rent_pct_hshld_income']}
{'file': 'B24031', 'fields': ['B24031_E003', 'B24031_E004', 'B24031_E005', 'B24031_E006', 'B24031_E007', 'B24031_E008', 'B24031_E010', 'B24031_E011', 'B24031_E012', 'B24031_E014', 'B24031_E015', 'B24031_E017', 'B24031_E018', 'B24031_E019', 'B24031_E021', 'B24031_E022', '

  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum / total
  return cumsum 

{'file': 'B12006', 'func_map': {'unemployment_rate': <function get_unemployment at 0x7fcc28bb7dc0>}}


  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])
  uer = sum([x[f] for f in unemployment_fields]) / sum([x[f] for f in labor_force_fields])

In [10]:
df = pd.read_csv('/home/shumway743/cis5500_group/census/acs_2021.dat',sep='|')
df['pct_minority'] = 1 - (df['population_white'] / df['total_population'])
df = df.drop('population_white',axis=1)
df.to_csv('acs_2021.csv')

  df = pd.read_csv('/home/shumway743/cis5500_group/census/acs_2021.dat',sep='|')


In [10]:
df = pd.read_csv('acs_2021.dat', delimiter='|')
df.head()

  df = pd.read_csv('acs_2021.dat', delimiter='|')


Unnamed: 0,GEO_ID,FILEID,STUSAB,SUMLEVEL,COMPONENT,US,REGION,DIVISION,STATE,COUNTY,...,TL_GEO_ID,total_population,median_age,population_white,median_household_income,per_capita_income,mean_hours_worked,Median_contract_rent,Median_monthly_housing_costs,med_gross_rent_pct_hshld_income
0,0400000US01,ACSSF,AL,40,0,,,,1.0,,...,1.0,4997675,39.3,3338590,54943,30458.0,39.2,630,823,28.9
1,0400000US02,ACSSF,AK,40,0,,,,2.0,,...,2.0,735951,35.0,458520,80287,39236.0,41.3,1153,1372,28.1
2,0400000US04,ACSSF,AZ,40,0,,,,4.0,,...,4.0,7079203,38.1,4985729,65913,34644.0,38.8,994,1147,29.3
3,0400000US05,ACSSF,AR,40,0,,,,5.0,,...,5.0,3006309,38.3,2227020,52123,29210.0,39.5,612,770,27.4
4,0400000US06,ACSSF,CA,40,0,,,,6.0,,...,6.0,39455353,37.0,20553732,84097,41276.0,38.1,1547,1791,32.2


In [None]:
{
    20:'Region',
    30:'Division',
    40:'State',
    50:'State-County',
    60:'State-County-County Subdivision',
    67:'State-County-County Subdivision-Subminor Civil Division',
    140:'State-County-Census Tract',
    150:'State-County-Census Tract-Block Group',
    160:'State-Place',
    170:'State-Consolidated City',
    230:'State-Alaska Native Regional Corporation',
    250:'American Indian Area/Alaska Native Area/Hawaiian Home Land',
    251:'American Indian Area-Tribal Subdivision/Remainder',
    252:'American Indian Area/Alaska Native Area (Reservation or Statistical Entity Only)',
    254:'American Indian Area (Off-Reservation Trust Land Only)/Hawaiian Home Land',
    256:'American Indian Area-Tribal Census Tract',
    258:'American Indian Area-Tribal Census Tract-Tribal Block Group',
    310:'Metropolitan Statistical Area/Micropolitan Statistical Area',
    314:'Metropolitan Statistical Area-Metropolitan Division',
    330:'Combined Statistical Area',
    332:'Combined Statistical Area-Metropolitan Statistical Area/Micropolitan Statistical Area',
    335:'Combined New England City and Town Area',
    337:'Combined New England City and Town Area-New England City and Town Area',
    350:'New England City and Town Area',
    352:'New England City and Town Area-State-Principal City',
    355:'New England City and Town Area (NECTA)-NECTA Division',
    361:'State-New England City and Town Area-Principal City',
    500:'State-Congressional District (111th)',
    610:'State-State Legislative District (Upper Chamber)',
    620:'State-State Legislative District (Lower Chamber)',
    700:'State-County-Voting District/Remainder',
    860:'5-Digit ZIP code Tabulation Area',
    950:'State-School District (Elementary)/Remainder',
    960:'State-School District (Secondary)/Remainder',
    970:'State-School District (Unified)/Remainder'
}

keep = [40,50,140,160,310]