# Download 2010 Census Tract and Block Group Shapes

In [93]:
import csv, glob, json, os, re, shutil
import subprocess, sys, threading, traceback, urllib2

def exec_ipynb(filename_or_url):
    nb = (urllib2.urlopen(filename_or_url) if re.match(r'https?:', filename_or_url) else open(filename_or_url)).read()
    jsonNb = json.loads(nb)
    #check for the modified formatting of Jupyter Notebook v4
    if(jsonNb['nbformat'] == 4):
        exec '\n'.join([''.join(cell['source']) for cell in jsonNb['cells'] if cell['cell_type'] == 'code']) in globals()
    else:
        exec '\n'.join([''.join(cell['input']) for cell in jsonNb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']) in globals()

exec_ipynb('timelapse-utilities.ipynb')

Census 2010 subdivisions, for U.S. only:
    
    ~50 states
    ~73K census tracts (approx 4000 people)
    ~218K block groups (approx 1500 people)
    ~11M blocks

### Download TIGER2010 2010 Tract shapefiles

In [90]:
def state_tract_geojson_path(state_id):
    return 'capture/tiger2010_census2010_tracts/{state_id}.geojson'.format(**locals())

def download_and_convert_state(state_id):
    tracts_geojson_path = state_tract_geojson_path(state_id)
    if os.path.exists(tracts_geojson_path):
        sys.stderr.write('{tracts_geojson_path} already exists, skipping\n'.format(**locals()))
        return
    
    src_zipfile = 'https://www2.census.gov/geo/tiger/TIGER2010/TRACT/2010/tl_2010_{state_id}_tract10.zip'.format(**locals())
    local_zipfile = 'capture/tiger2010_census2010_tracts/tl_2010_{state_id}_tract10.zip'.format(**locals())
    download_file(src_zipfile, local_zipfile)
    unzip_file(local_zipfile)
    local_zipfile_dir = os.path.splitext(local_zipfile)[0]

    tracts_shapefile = glob.glob(local_zipfile_dir + '/*.shp')[0]
    tracts_geojson_path_tmp = tracts_geojson_path + '.tmp.geojson'

    try:
        os.unlink(tracts_geojson_path_tmp)
    except:
        pass

    cmd = 'ogr2ogr -f GeoJSON -t_srs crs:84 {tracts_geojson_path_tmp} {tracts_shapefile}'.format(**locals())
    subprocess_check(cmd)
    os.rename(tracts_geojson_path_tmp, tracts_geojson_path)
    os.unlink(local_zipfile)
    shutil.rmtree(local_zipfile_dir)
    
    !ls -lh $tracts_geojson_path
    
    return tracts_geojson_path

In [87]:
pool = SimpleThreadPoolExecutor(max_workers=5)

for state_id in state_ids:
    pool.submit(download_and_convert_state, state_id)

pool.shutdown()
None

SimpleThreadPoolExecutor succeeded: all 51 jobs completed


capture/tiger2010_census2010_tracts/01.geojson already exists, skipping
capture/tiger2010_census2010_tracts/02.geojson already exists, skipping
capture/tiger2010_census2010_tracts/04.geojson already exists, skipping
capture/tiger2010_census2010_tracts/05.geojson already exists, skipping
capture/tiger2010_census2010_tracts/06.geojson already exists, skipping
capture/tiger2010_census2010_tracts/08.geojson already exists, skipping
capture/tiger2010_census2010_tracts/09.geojson already exists, skipping
capture/tiger2010_census2010_tracts/10.geojson already exists, skipping
capture/tiger2010_census2010_tracts/11.geojson already exists, skipping
capture/tiger2010_census2010_tracts/13.geojson already exists, skipping
capture/tiger2010_census2010_tracts/12.geojson already exists, skipping
capture/tiger2010_census2010_tracts/15.geojson already exists, skipping
capture/tiger2010_census2010_tracts/16.geojson already exists, skipping
capture/tiger2010_census2010_tracts/17.geojson already exists, s

In [100]:
all_geoids = []

for state_id in state_ids:
    path = state_tract_geojson_path(state_id)
    geojson = json.load(open(path))
    geoids = [feature['properties']['GEOID10'] for feature in geojson['features']]
    print '%s has %d tracts' % (path, len(geoids))
    all_geoids.extend(geoids)

all_geoids = sorted(all_geoids)

print len(all_geoids), 'geoids, from', all_geoids[0], 'to', all_geoids[-1]

capture/tiger2010_census2010_tracts/01.geojson has 1181 tracts
capture/tiger2010_census2010_tracts/02.geojson has 167 tracts
capture/tiger2010_census2010_tracts/04.geojson has 1526 tracts
capture/tiger2010_census2010_tracts/05.geojson has 686 tracts
capture/tiger2010_census2010_tracts/06.geojson has 8057 tracts
capture/tiger2010_census2010_tracts/08.geojson has 1249 tracts
capture/tiger2010_census2010_tracts/09.geojson has 833 tracts
capture/tiger2010_census2010_tracts/10.geojson has 218 tracts
capture/tiger2010_census2010_tracts/11.geojson has 179 tracts
capture/tiger2010_census2010_tracts/12.geojson has 4245 tracts
capture/tiger2010_census2010_tracts/13.geojson has 1969 tracts
capture/tiger2010_census2010_tracts/15.geojson has 351 tracts
capture/tiger2010_census2010_tracts/16.geojson has 298 tracts
capture/tiger2010_census2010_tracts/17.geojson has 3123 tracts
capture/tiger2010_census2010_tracts/18.geojson has 1511 tracts
capture/tiger2010_census2010_tracts/19.geojson has 825 tracts


In [25]:
tracts['features'][0]['properties']

{u'ALAND10': 352811329.0,
 u'AWATER10': 499970.0,
 u'COUNTYFP10': u'005',
 u'FUNCSTAT10': u'S',
 u'GEOID10': u'01005950300',
 u'INTPTLAT10': u'+31.7908933',
 u'INTPTLON10': u'-085.5670514',
 u'MTFCC10': u'G5020',
 u'NAME10': u'9503',
 u'NAMELSAD10': u'Census Tract 9503',
 u'STATEFP10': u'01',
 u'TRACTCE10': u'950300'}