# Get Census Data

This script is meant to access American Community Survey data using the Census API. When run in completion, this script will retrieve the data and export a CSV file where each row is a census tract and each column is one of the requested variables.

Currently the script returns data for each census **tract** within the states and counties specified.  The script will need to be slightly modified if **block group** information is desired.

## Initialize parameters: regions & variables of interest

In [14]:
from census import Census
from us import states
import pandas as pd
import time
import math

pd.set_option('display.max_columns', None) # This ensures we can view all the columns

# Specify state and county to download (select one)

loc_name, state_codes, county_codes = "balt_city", list([states.MD.fips]), list([510]) # Baltimore
# loc_name, state_codes, county_codes = "greater_balt", states.MD.fips, list([510, 5]) # Baltimore City and County
#loc_name, state_codes, county_codes = "maryland", states.MD.fips, None # All of Maryland
#loc_name, state_codes, county_codes = "delmarva", [states.MD.fips, states.DE.fips, states.VA.fips], None # All of DelMarVa

# CENSUS API Stuff
CENSUS_API = # Your API key here
c = Census(CENSUS_API) # Initialize census class with API key

# Generate codes for census variables of interest
var_ids = []
var_ids.extend(["B19001_0{:02d}E".format(x) for x in range(2, 18)]) # Household income over 12 months
var_ids.extend(["B19037_0{:02d}E".format(x) for x in range(1, 70)]) # Household income over 12 months by age

# Local Storage Parameters
LOCAL_DATA_DIR = './data/'
ATTR_FILE_END = '_census_data.csv'
attr_outfile = LOCAL_DATA_DIR + loc_name + ATTR_FILE_END


## Define helper functions

The following functions are useful for getting the census data and converting it to a pandas dataframe.

In [19]:
def build_tract_fips(record):
    fips_code = record['state'] + record['county'] + record['tract']
    return str(fips_code)

def census_tracts_to_dataframe(var_list, state_codes, county_codes):
    
    CALL_LIM = 30 # Can only request 50 records at a time
    fips_codes = []
    all_records = []
    
    census_df = []
    census_dict = {}
    num_chunks = int(math.ceil(1.0 * len(var_list) / CALL_LIM))
    
    for state_id in state_codes:
        print("State: {0}".format(state_id))
        
        for county_id in county_codes:
            print("County: {0}".format(county_id))
            
            for chunk_num in range(0, num_chunks):
                census_data = []
                
                chunk_start = chunk_num * CALL_LIM
                chunk_end = (chunk_num + 1) * CALL_LIM
                if chunk_end > len(var_list): 
                    chunk_end = len(var_list)

                print("Chunk start: {0}".format(chunk_start))
                print("Chunk end: {0}".format(chunk_end))
                    
                var_sublist = var_list[chunk_start:chunk_end]

                census_data = c.acs.get(var_sublist, {'for': 'tract:*', 'in': 'state:{0} county{1}'.format(state_id, county_id)})
                
                print("Got {0} records.".format(len(census_data)))
                
                for idx, record in enumerate(census_data):

                    # Build fips codes
                    fips_code = build_tract_fips(record)
                    #census_data[idx]["fips"] = fips_code
                    
                    # Eliminate original code components
                    key_list = ['state', 'county', 'tract']
                    for key in key_list:
                        if key in record: 
                            del record[key]
                    
                    if fips_code in census_dict:
                        census_dict[fips_code].update(record)
                    else:
                        census_dict[fips_code] = record

    census_df = pd.DataFrame(census_dict)
    census_df = census_df.transpose()
    census_df.index.name = "fips"
    
    return census_df


## Make call for census data, write to file

In [20]:
# This segment of code will get household income estimates for each block group in Baltimore city
census_df = census_tracts_to_dataframe(var_ids, state_codes, county_codes)
census_df.to_csv(attr_outfile) # Write to csv

State: 24
County: 510
Chunk start: 0
Chunk end: 30
Got 1406 records.
Chunk start: 30
Chunk end: 60
Got 1406 records.
Chunk start: 60
Chunk end: 85
Got 1406 records.
