Download, unpack, and import into database the 2000 and 2010 Census Block Shapefiles as defined by TIGER 2010
=============================================================================================================

Note:  There are two definitions of the 2000 decennial census block shapes.  The shapes originally created in 2000 by TIGER2K are quite inaccurate and should no longer be used.  In 2010, TIGER 2010 created block shapes for the 2010 Decennial Census, and also at the same time recreated the 2000 Decennial Census block shapes, a much more accurate version.

This script downloads the TIGER 2010 versions of both the 2000 and 2010 decennial census block shapes.

In [29]:
import csv, json, os, re, shutil
import subprocess, sys, threading, traceback, urllib2

import pandas as pd
import geopandas as gpd
import numpy as np
from geopandas import GeoSeries, GeoDataFrame
%matplotlib inline

from IPython.core.display import display, HTML
display(HTML("<style>#notebook-container { margin-left:-14px; width:calc(100% + 27px) !important; }</style>"))

def exec_ipynb(filename_or_url):
    nb = (urllib2.urlopen(filename_or_url) if re.match(r'https?:', filename_or_url) else open(filename_or_url)).read()
    jsonNb = json.loads(nb)
    #check for the modified formatting of Jupyter Notebook v4
    if(jsonNb['nbformat'] == 4):
        exec '\n'.join([''.join(cell['source']) for cell in jsonNb['cells'] if cell['cell_type'] == 'code']) in globals()
    else:
        exec '\n'.join([''.join(cell['input']) for cell in jsonNb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']) in globals()

exec_ipynb('timelapse-utilities.ipynb')

Census 2010 subdivisions, for U.S. only:
    
    ~50 states
    ~73K census tracts (approx 4000 people)
    ~218K block groups (approx 1500 people)
    ~11M blocks

In [18]:
def compute_areas(state_id):
    src = 'capture/tiger2010_census2010_blocks/tl_2010_%s_tabblock10/tl_2010_%s_tabblock10.shp' % (state_id, state_id)
    dest = 'capture/tiger2010_census2010_blocks/%s-areas.json' % state_id
    
    if os.path.exists(dest):
        sys.stderr.write('%s already exists, skipping\n' % dest)
        return
    
    sys.stderr.write('Reading %s\n' % src)

    g = gpd.read_file(src)
    # convert to web mercator
    m = g.to_crs(epsg=3857)
    areas = dict(zip(m['GEOID10'], m.geometry.area))
    json.dump(areas, open(dest + '.tmp', 'w'))
    os.rename(dest + '.tmp', dest)
    sys.stderr.write('Created %s with %d blocks\n' % (dest, len(areas)))

    
pool = SimpleProcessPoolExecutor(8)
for state_id in state_ids:
    pool.submit(compute_areas, state_id)
    
pool.shutdown()
None

Reading capture/tiger2010_census2010_blocks/tl_2010_01_tabblock10/tl_2010_01_tabblock10.shp
capture/tiger2010_census2010_blocks/05-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/04-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/06-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/08-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/02-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/09-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/10-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/11-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/12-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/13-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/15-areas.json already exists, skipping
capture/tiger2010_census2010_blocks/18-areas.json already exists, skipping
capture/

SimpleProcessPoolExecutor succeeded: all 51 jobs completed


# Aggregate all the state block areas

In [19]:
areas = {}
for state_id in state_ids:
    filename = 'capture/tiger2010_census2010_blocks/%s-areas.json' % state_id
    state_areas = json.load(open(filename))
    print('Read %d blocks from %s' % (len(state_areas), filename))
    areas.update(state_areas)
print('Total of %d block areas read' % len(areas))

Read 252266 blocks from capture/tiger2010_census2010_blocks/01-areas.json
Read 45292 blocks from capture/tiger2010_census2010_blocks/02-areas.json
Read 241666 blocks from capture/tiger2010_census2010_blocks/04-areas.json
Read 186211 blocks from capture/tiger2010_census2010_blocks/05-areas.json
Read 710145 blocks from capture/tiger2010_census2010_blocks/06-areas.json
Read 201062 blocks from capture/tiger2010_census2010_blocks/08-areas.json
Read 67578 blocks from capture/tiger2010_census2010_blocks/09-areas.json
Read 24115 blocks from capture/tiger2010_census2010_blocks/10-areas.json
Read 6507 blocks from capture/tiger2010_census2010_blocks/11-areas.json
Read 484481 blocks from capture/tiger2010_census2010_blocks/12-areas.json
Read 291086 blocks from capture/tiger2010_census2010_blocks/13-areas.json
Read 25016 blocks from capture/tiger2010_census2010_blocks/15-areas.json
Read 149842 blocks from capture/tiger2010_census2010_blocks/16-areas.json
Read 451554 blocks from capture/tiger2010_ce

In [20]:
block_geoids_2010 = json.load(open('block_geoids_2010.json'))

In [21]:
len(block_geoids_2010)

11078297

In [24]:
assert(sorted(areas.keys()) == block_geoids_2010)

In [27]:
!mkdir columncache/geometry_block2010

In [30]:
np.array([1,2,3,4,5], dtype=np.float32)

array([1., 2., 3., 4., 5.], dtype=float32)

In [31]:
area_values = [areas[geoid] for geoid in sorted(areas.keys())]
area_values[0:10]

[679987.0344843781,
 678.8211526162452,
 4924.9755513186155,
 721107.0802927441,
 1054.15165899667,
 895.506225840995,
 2538.783843693782,
 1139299.9552203414,
 7735.269762136278,
 297776.74127220816]

In [32]:
np_areas = np.array([0.0] + area_values, dtype=np.float32)
np_areas

array([0.0000000e+00, 6.7998706e+05, 6.7882117e+02, ..., 1.5491511e+04,
       1.1459661e+05, 1.6444244e+05], dtype=float32)

In [41]:
np_areas.tofile('columncache/geometry_block2010/area_web_mercator_sqm.float32')

In [42]:
!ls -l columncache/geometry_block2010/area_web_mercator_sqm.float32

-rw-rw-r-- 1 rsargent rsargent 44313192 Jul 23 14:25 columncache/geometry_block2010/area_web_mercator_sqm.float32


In [43]:
44313192 / 4

11078298