In [40]:
import csv, json, os, re, shutil
import subprocess, sys, threading, time, urllib2

def exec_ipynb(filename_or_url):
    nb = (urllib2.urlopen(filename_or_url) if re.match(r'https?:', filename_or_url) else open(filename_or_url)).read()
    jsonNb = json.loads(nb)
    #check for the modified formatting of Jupyter Notebook v4
    if(jsonNb['nbformat'] == 4):
        exec '\n'.join([''.join(cell['source']) for cell in jsonNb['cells'] if cell['cell_type'] == 'code']) in globals()
    else:
        exec '\n'.join([''.join(cell['input']) for cell in jsonNb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']) in globals()

exec_ipynb('timelapse-utilities.ipynb')

In [2]:
try:
    import psycopg2
except:
    !pip install psycopg2
import psycopg2

In [3]:
year = 2011
jt = 'JT01'

Census 2010 subdivisions, for U.S. only:
    
    ~50 states
    ~73K census tracts (approx 4000 people)
    ~218K block groups (approx 1500 people)
    ~11M blocks

From https://lehd.ces.census.gov/doc/help/onthemap/OnTheMapDataOverview.pdf:

Job dominance is a feature of the LODES data made available because the LEHD data
infrastructure tracks jobs as its primary unit instead of people. As such, the system naturally
captures more than one job per person when available in the data. The dominant (or primary)
job for an individual is defined as the job that earned the individual the most money.
Constructed this way, the number of primary jobs should be equal to the number of workers.
LODES and OnTheMap release “Primary” jobs and “All” jobs so that “Non-primary” jobs can be
can be calculated through subtraction.

<img src="lodes-table-description.png">

Download all state LODES data
-----------------------------

In [19]:
def download_lodes7(state):
    curdir = "capture/lodes7/%s/" % state
    for part in ['main', 'aux']:
        # http://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.0.pdf
        url = ('http://lehd.ces.census.gov/data/lodes/LODES7/{state}/od/{state}_od_{part}_{jt}_{year}.csv.gz'
                   .format(state=state, part=part, year=year, jt=jt))
        filename = curdir + os.path.basename(url)
        download_file(url, filename)
        gunzip_file(filename)

threads = []


for name in state_names:
    threads.append(threading.Thread(target=download_lodes7, args=(name,)))
    threads[-1].start()
    
for t in threads:
    t.join()

Downloading http://lehd.ces.census.gov/data/lodes/LODES7/ak/od/ak_od_main_JT01_2011.csv.gz to capture/lodes7/ak/ak_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/al/od/al_od_main_JT01_2011.csv.gz to capture/lodes7/al/al_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/ar/od/ar_od_main_JT01_2011.csv.gz to capture/lodes7/ar/ar_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/ca/od/ca_od_main_JT01_2011.csv.gz to capture/lodes7/ca/ca_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/az/od/az_od_main_JT01_2011.csv.gz to capture/lodes7/az/az_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/co/od/co_od_main_JT01_2011.csv.gz to capture/lodes7/co/co_od_main_JT01_2011.csv.gz
Downloading http://lehd.ces.census.gov/data/lodes/LODES7/ct/od/ct_od_main_JT01_2011.csv.gz to capture/lodes7/ct/ct_od_main_JT01_2011.csv.gz
Downloading http://l

<h1>Import LODES Origin-Destination (OD) CSV files into psql</h1>

In [24]:
def drop_and_create_od_table():
    cmd = (
        "DROP TABLE IF EXISTS od_{jt}_{year};\n"
        "CREATE TABLE od_{jt}_{year} ( "
        "gid serial NOT NULL, "
        "w_geocode character varying(15), "
        "h_geocode character varying(15), "
        "S000 integer, "
        "SA01 integer, " 
        "SA02 integer, "
        "SA03 integer, "
        "SE01 integer, "
        "SE02 integer, "
        "SE03 integer, "
        "SI01 integer, "
        "SI02 integer, "
        "SI03 integer, "
        "createdate character varying(8));\n"
        "CREATE UNIQUE INDEX ON od_{jt}_{year} (gid);\n" 
        "CREATE INDEX ON od_{jt}_{year} (w_geocode);\n"
        "CREATE INDEX ON od_{jt}_{year} (h_geocode);\n"
        "CREATE INDEX ON od_{jt}_{year} (s000);\n"
    ).format(jt=jt, year=year)
    psql(cmd)

drop_and_create_od_table()

def import_csv_to_psql(filename):
    cmd = (
        "COPY od_{jt}_{year}("
        "w_geocode,h_geocode,S000,SA01,"
        "SA02,SA03,SE01,SE02,SE03,SI01,"
        "SI02,SI03,createdate) FROM "
        "'{filename}' "
        "DELIMITER ',' CSV HEADER;\n"
         ).format(jt=jt, year=year, filename=os.path.abspath(filename))
    psql(cmd)

threads = []

def import_lodes7_state(state):
    sys.stdout.write('Importing state %s\n' % state)
    for part in ['main', 'aux']:
        f = 'capture/lodes7/{state}/{state}_od_{part}_{jt}_{year}.csv'.format(jt=jt, state=state, part=part, year=year)
        import_csv_to_psql(f)
for state in state_names:
    threads.append(threading.Thread(target=import_lodes7_state, args=(state,)))
    threads[-1].start()

for t in threads:
    t.join()

DROP TABLE IF EXISTS od_JT01_2011;
CREATE TABLE od_JT01_2011 ( gid serial NOT NULL, w_geocode character varying(15), h_geocode character varying(15), S000 integer, SA01 integer, SA02 integer, SA03 integer, SE01 integer, SE02 integer, SE03 integer, SI01 integer, SI02 integer, SI03 integer, createdate character varying(8));
CREATE UNIQUE INDEX ON od_JT01_2011 (gid);
CREATE INDEX ON od_JT01_2011 (w_geocode);
CREATE INDEX ON od_JT01_2011 (h_geocode);
CREATE INDEX ON od_JT01_2011 (s000);
Finished execution: DROP TABLE
CREATE TABLE
CREATE INDEX
CREATE INDEX
CREATE INDEX
CREATE INDEX
Importing state ak
Importing state al
Importing state ar
COPY od_JT01_2011(w_geocode,h_geocode,S000,SA01,SA02,SA03,SE01,SE02,SE03,SI01,SI02,SI03,createdate) FROM '/mnt/ssd/rsargent/projects/unemployed-dotmap/unemployed-dotmap/data-visualization-tools/examples/lodes/capture/lodes7/ak/ak_od_main_JT01_2011.csv' DELIMITER ',' CSV HEADER;
Importing state az
Importing state ca
COPY od_JT01_2011(w_geocode,h_geocode,S000

Table structure
---------------

    For od_{JT00|JT01}_{year}


In [15]:
psql('\d od_{jt}_{year}'.format(jt=jt, year=year))

\d od_jt00_2011
Finished execution: Table "public.od_jt00_2011"
   Column   |         Type          |                         Modifiers                          
------------+-----------------------+------------------------------------------------------------
 gid        | integer               | not null default nextval('od_jt00_2011_gid_seq'::regclass)
 w_geocode  | character varying(15) | 
 h_geocode  | character varying(15) | 
 s000       | integer               | 
 sa01       | integer               | 
 sa02       | integer               | 
 sa03       | integer               | 
 se01       | integer               | 
 se02       | integer               | 
 se03       | integer               | 
 si01       | integer               | 
 si02       | integer               | 
 si03       | integer               | 
 createdate | character varying(8)  | 
 distance   | double precision      | 
Indexes:
    "od_jt00_2011_gid_idx" UNIQUE, btree (gid)
    "od_jt00_2011_h_geocode_idx" btree (h

In [None]:
psql('SELECT COUNT(*) FROM tl_2010_tabblock10')