In [2]:
import pandas as pd # package for high-performance, easy-to-use data structures and data analysis
import numpy as np # fundamental package for scientific computing with Python

In [3]:
from subprocess import check_output
print(check_output(["ls", "./input_data"]).decode("utf8"))

fip_codes_2016.csv
geojson



In [4]:
fip_data = pd.read_csv("./input_data/fip_codes_2016.csv", delimiter=";")

In [5]:
fip_data.head()

Unnamed: 0,Summary Level,State Code (FIPS),State,County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
0,50,1,Alabama,1,0,0,0,Autauga County
1,50,1,Alabama,3,0,0,0,Baldwin County
2,50,1,Alabama,5,0,0,0,Barbour County
3,50,1,Alabama,7,0,0,0,Bibb County
4,50,1,Alabama,9,0,0,0,Blount County


In [6]:
import os

def make_sure_directory_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
def export(path, filename, df):
    make_sure_directory_exists(path)
    filepath = os.path.join(path, filename)
    df.to_csv(filepath ,index=False, sep=',')
    

In [None]:
''' Exports all state_codes, county_codes and census_track for 2016 from the HDMA database.'''
import requests

verbose = True
path = './export/census_tracts_lookup'
states = fip_data['State'].unique()

err_data = []
us_data = []
for state in states:
    verbose and print("State {}".format(state))
    fip_data_state = fip_data.loc[fip_data['State'] == state]
   
    state_data = []
    for row in fip_data_state.iterrows():
        state_code = row[1][1]
        state_name = row[1][2]
        county_code = row[1][3]
        county_name = row[1][7]
        
        verbose and print("\tCounty {}".format(county_name))
        url_template = 'https://api.consumerfinance.gov/data/hmda/slice/census_tracts.json?$where=state_code={}+AND+county_code+IN+({:03d})&$limit=6000'
        url = url_template.format(state_code, county_code)
        response = requests.get(url).json()
        
        for census in response['results']:
            result = [
                state_code,
                state_name,
                county_code,
                county_name,
                census['census_tracts'],
                census['census_tract_number'],
            ]
            state_data.append(result)
            us_data.append(result)
        if census['census_tract_number'] is None or census['census_tract_number'] == '':
            err_data.append(result)
        
    filename = state_name+'.csv'
    filepath = os.path.join(path, filename)
    verbose and print("// Exporting to '{}'".format(filepath))
    state_df = pd.DataFrame(state_data, dtype=str, columns = ['state_code', 'state', 'county_code', 'county', 'census_tracts', 'census_tract_number'])
    export(path, filename, state_df)
    
filename = 'all_states.csv'
filepath = os.path.join(path, filename)
verbose and print("// Final Export to '{}'".format(filepath))
us_df = pd.DataFrame(us_data, dtype=str, columns = ['state_code', 'state', 'county_code', 'county', 'census_tracts', 'census_tract_number'])
export(path, filename, us_df)

filename = 'faulty_census_tracts.csv'
filepath = os.path.join(path, filename)
verbose and print("// Export {} of {} faulty census tracks to '{}'".format(len(err_data), len(us_data), filepath))
err_df = pd.DataFrame(err_data, dtype=str, columns = ['state_code', 'state', 'county_code', 'county', 'census_tracts', 'census_tract_number'])
export(path, filename, err_df)


State Alabama
	County Autauga County
	County Baldwin County
	County Barbour County
	County Bibb County
	County Blount County
	County Bullock County
	County Butler County
	County Calhoun County
	County Chambers County
	County Cherokee County
	County Chilton County
	County Choctaw County
	County Clarke County
	County Clay County
	County Cleburne County
	County Coffee County
	County Colbert County
	County Conecuh County
	County Coosa County
	County Covington County
	County Crenshaw County
	County Cullman County
	County Dale County
	County Dallas County
	County DeKalb County
	County Elmore County
	County Escambia County
	County Etowah County
	County Fayette County
	County Franklin County
	County Geneva County
	County Greene County
	County Hale County
	County Henry County
	County Houston County
	County Jackson County
	County Jefferson County
	County Lamar County
	County Lauderdale County
	County Lawrence County
	County Lee County
	County Limestone County
	County Lowndes County
	County Macon

// Exporting to './export/census_tracts_lookup/District of Columbia.csv'
State Florida
	County Alachua County
	County Baker County
	County Bay County
	County Bradford County
	County Brevard County
	County Broward County
	County Calhoun County
	County Charlotte County
	County Citrus County
	County Clay County
	County Collier County
	County Columbia County
	County DeSoto County
	County Dixie County
	County Duval County
	County Escambia County
	County Flagler County
	County Franklin County
	County Gadsden County
	County Gilchrist County
	County Glades County
	County Gulf County
	County Hamilton County
	County Hardee County
	County Hendry County
	County Hernando County
	County Highlands County
	County Hillsborough County
	County Holmes County
	County Indian River County
	County Jackson County
	County Jefferson County
	County Lafayette County
	County Lake County
	County Lee County
	County Leon County
	County Levy County
	County Liberty County
	County Madison County
	County Manatee County
	C

	County Peoria County
	County Perry County
	County Piatt County
	County Pike County
	County Pope County
	County Pulaski County
	County Putnam County
	County Randolph County
	County Richland County
	County Rock Island County
	County St. Clair County
	County Saline County
	County Sangamon County
	County Schuyler County
	County Scott County
	County Shelby County
	County Stark County
