# LODES Data Analysis
## Prepare Workbook

In [22]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import urllib
from urllib2 import urlopen
from StringIO import StringIO
import gzip

import requests
import json
import os
from copy import deepcopy

from pandas.io.json import json_normalize

In [2]:
# Set create working folder and set as active directory
os.chdir('C:\Users\dcapizzi\Documents\GitHub')
if not os.path.exists('lodes'):
    os.makedirs('lodes')
    
os.chdir('C:\Users\dcapizzi\Documents\GitHub\lodes')

OSError: [Errno 2] No such file or directory: 'C:\\Users\\dcapizzi\\Documents\\GitHub'

## Load initial LODES data

In [23]:
# Collect user input for the year and states to download for the analysis
year = raw_input('Enter a year: ')
input_list = raw_input("Enter states to include  separated by commas (no spaces): ")
state_list = input_list.split(',')

Enter a year: 2014
Enter states to include  separated by commas (no spaces): MD


In [24]:
# Define final data frames to aggregate all state data

lodes_columns = ['w_geocode', 'h_geocode', 'tot_jobs', 'age_29_bel_jobs',
       'age_30_54_jobs', 'age_55_over_jobs', 'sal_1250_bel_jobs',
       'sal_1250_3333_jobs', 'sal_3333_over_jobs', 'goods_prod_jobs',
       'trade_transp_jobs', 'all_other_svc_jobs', 'createdate', 'state',
       'w_block', 'h_block', 'w_2010_block', 'w_state', 'w_county_name',
       'w_block_group_code', 'w_block_group_name', 'w_metro_name',
       'w_zip_code', 'w_place_name', 'w_county_sub_name', 'w_createdate',
       'h_2010_block', 'h_state', 'h_county_name', 'h_block_group_code',
       'h_block_group_name', 'h_metro_name', 'h_zip_code', 'h_place_name',
       'h_county_sub_name', 'h_createdate']

lodes_data = pd.DataFrame([],columns=lodes_columns)

In [25]:
# Create dictionaries to house downloaded files

dict_lodes = {}
dict_xwalk = {}

# Loop through all states selected by user, download the relevant files from the Census website, unzip, read, and load into dictionaries 
# Process takes some time, please be patient

for state in state_list:
    
    # Sets url for primary "LODES" data set - which provides data on the home Census block, work Census block, and commuters in between
    lodes_url = 'http://lehd.ces.census.gov/data/lodes/LODES7/' + state.lower() + '/od/' + state.lower() + '_od_main_JT00_' + year + '.csv.gz'
    
    # Sets url for "cross-walk" data with the city, state, ZIP, etc. for each Census block
    xwalk_url = 'http://lehd.ces.census.gov/data/lodes/LODES7/' + state.lower() + '/' + state.lower() + '_xwalk.csv.gz'
    
    # Names the files
    lodes_filename = 'lodes_' + state + "_" + year + '.csv.gz'
    xwalk_filename =  'xwalk_' + state + "_" + year + '.csv.gz'
    
    # Downloads the files
    urllib.urlretrieve(lodes_url, lodes_filename)
    urllib.urlretrieve(xwalk_url, xwalk_filename)
    
    print 'Data downloaded for '+state
    
    # Unzips the files
    unzip_lodes = gzip.open(lodes_filename, 'rb')
    unzip_xwalk = gzip.open(xwalk_filename, 'rb')
    
    # Reads the files to disk 
    unzip_lodes = unzip_lodes.read()
    unzip_xwalk = unzip_xwalk.read()

    # Saves as objects in teh created dictionaries 
    dict_lodes[state]=pd.read_csv(StringIO(unzip_lodes))
    dict_xwalk[state]=pd.read_csv(StringIO(unzip_xwalk))
    print 'Data tables created for '+state
    
    # Removes unnecessary fields and names the columns to consistent, human-readable names
    dict_lodes[state].columns = ['w_geocode','h_geocode','tot_jobs','age_29_bel_jobs',
              'age_30_54_jobs','age_55_over_jobs','sal_1250_bel_jobs','sal_1250_3333_jobs','sal_3333_over_jobs',
              'goods_prod_jobs','trade_transp_jobs','all_other_svc_jobs','createdate']

    dict_xwalk[state] = DataFrame(dict_xwalk[state],columns=['tabblk2010','stusps','ctyname', 'bgrp','bgrpname','cbsaname','zcta','stplcname','ctycsubname','createdate'])
    dict_xwalk[state].columns = ['2010_block', 'state', 'county_name', 'block_group_code', 'block_group_name','metro_name', 'zip_code','place_name', 'county_sub_name','createdate']
    
    print 'Column names defined for '+state
    
    # Creates 'block-group-level' field to join LODES to xwalk and centroid lat/longs (Census block group codes are the first 12 digits of Census block codes)
    left = lambda x: str(int(x))[:12]
    dict_lodes[state]['w_block'] = dict_lodes[state]['w_geocode'].apply(left)
    dict_lodes[state]['w_block'] = dict_lodes[state]['w_geocode'].apply(left)
    dict_lodes[state]['h_block'] = dict_lodes[state]['h_geocode'].apply(left)
    dict_xwalk[state]['block_group_code']= dict_xwalk[state]['block_group_code'].apply(left)
    
    dict_lodes[state]['state'] = state
    
    print 'New fields created for '+state
    
print 'Process complete!'

Data downloaded for MD
Data tables created for MD
Column names defined for MD
New fields created for MD
Process complete!


In [26]:
# Create blank dictionaries to join or merge cross-walk data with LODES data

dict_xwalk_w = {}
dict_xwalk_h = {}

# Duplicay (copy) cross-walk data, with columns one for work, one for home
for state in dict_xwalk:
    dict_xwalk_w[state] = deepcopy(dict_xwalk[state]) 
    dict_xwalk_h[state] = deepcopy(dict_xwalk[state]) 
    dict_xwalk_w[state].rename(columns=lambda x: "w_"+x, inplace="True")
    dict_xwalk_h[state].rename(columns=lambda x: "h_"+x, inplace="True")

In [27]:
# For each state in dict_lodes, merge once on the "work" Census block (w_geocode) and once on the "home" Census block (h_geocode)
# This data will provide an idea of the city/state/zip for both the work and home block code groups

for state in dict_lodes:
    dict_lodes[state] = pd.merge(dict_lodes[state], dict_xwalk_w[state], how='left', left_on='w_geocode', right_on='w_2010_block')
    dict_lodes[state] = pd.merge(dict_lodes[state], dict_xwalk_h[state], how='left', left_on='h_geocode', right_on='h_2010_block')
    lodes_data = lodes_data.append(dict_lodes[state])

In [28]:
lodes_data.columns

Index([u'age_29_bel_jobs', u'age_30_54_jobs', u'age_55_over_jobs',
       u'all_other_svc_jobs', u'createdate', u'goods_prod_jobs',
       u'h_2010_block', u'h_block', u'h_block_group_code',
       u'h_block_group_name', u'h_county_name', u'h_county_sub_name',
       u'h_createdate', u'h_geocode', u'h_metro_name', u'h_place_name',
       u'h_state', u'h_zip_code', u'sal_1250_3333_jobs', u'sal_1250_bel_jobs',
       u'sal_3333_over_jobs', u'state', u'tot_jobs', u'trade_transp_jobs',
       u'w_2010_block', u'w_block', u'w_block_group_code',
       u'w_block_group_name', u'w_county_name', u'w_county_sub_name',
       u'w_createdate', u'w_geocode', u'w_metro_name', u'w_place_name',
       u'w_state', u'w_zip_code'],
      dtype='object')

## Transform LODES data for analysis

In [29]:
# Create new field "home to work" with both home and work geocodes
lodes_data['unique'] = lodes_data['h_geocode'].map('{0:f}'.format).astype(str).apply(lambda x: x[:15]) + ' to ' + lodes_data['w_geocode'].map('{0:f}'.format).astype(str).apply(lambda x: x[:15]) 

In [30]:
# Take new data set, and split into "home" and "work" tables to be flattened

lodes_data_home = DataFrame(lodes_data, columns = ['unique','h_geocode', 'tot_jobs', 'age_29_bel_jobs',
       'age_30_54_jobs', 'age_55_over_jobs', 'sal_1250_bel_jobs',
       'sal_1250_3333_jobs', 'sal_3333_over_jobs', 'goods_prod_jobs',
       'trade_transp_jobs', 'all_other_svc_jobs',
       'h_block', 'h_state', 'h_county_name',
       'h_block_group_code', 'h_block_group_name', 'h_metro_name',
       'h_zip_code', 'h_place_name', 'h_county_sub_name'])
lodes_data_home['type']='Home'
lodes_data_home['path']=1

lodes_data_work = DataFrame(lodes_data, columns = ['unique','w_geocode', 'tot_jobs', 'age_29_bel_jobs',
       'age_30_54_jobs', 'age_55_over_jobs', 'sal_1250_bel_jobs',
       'sal_1250_3333_jobs', 'sal_3333_over_jobs', 'goods_prod_jobs',
       'trade_transp_jobs', 'all_other_svc_jobs',
       'w_block', 'w_state', 'w_county_name',
       'w_block_group_code', 'w_block_group_name', 'w_metro_name',
       'w_zip_code', 'w_place_name', 'w_county_sub_name'])

lodes_data_work['type']='Work'
lodes_data_work['path']=2

In [31]:
# Rename columns to be the same for both new tables
new_columns = ['unique','geocode', 'tot_jobs', 'age_29_bel_jobs',
       'age_30_54_jobs', 'age_55_over_jobs', 'sal_1250_bel_jobs',
       'sal_1250_3333_jobs', 'sal_3333_over_jobs', 'goods_prod_jobs',
       'trade_transp_jobs', 'all_other_svc_jobs',
       'block', 'state', 'county_name',
       'block_group_code', 'block_group_name', 'metro_name',
       'zip_code', 'place_name', 'county_sub_name','type','path']

lodes_data_home.columns = new_columns
lodes_data_work.columns = new_columns

In [32]:
# Append both tables and sort by Path ID
lodes_data_flat = lodes_data_home.append(lodes_data_work)
lodes_data_flat = lodes_data_flat.sort(['unique','path']).reset_index(drop=True)
lodes_data_flat[:3]

  app.launch_new_instance()


Unnamed: 0,unique,geocode,tot_jobs,age_29_bel_jobs,age_30_54_jobs,age_55_over_jobs,sal_1250_bel_jobs,sal_1250_3333_jobs,sal_3333_over_jobs,goods_prod_jobs,...,state,county_name,block_group_code,block_group_name,metro_name,zip_code,place_name,county_sub_name,type,path
0,240010001001001 to 240010001003147,240010000000000.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,MD,"Allegany County, MD",240010001001,"1 (Tract 1, Allegany, MD)","Cumberland, MD-WV",21766.0,,"District 1, Orleans (Allegany, MD)",Home,1
1,240010001001001 to 240010001003147,240010000000000.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,MD,"Allegany County, MD",240010001003,"3 (Tract 1, Allegany, MD)","Cumberland, MD-WV",21555.0,,"District 2, Oldtown (Allegany, MD)",Work,2
2,240010001001001 to 240010002002031,240010000000000.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,MD,"Allegany County, MD",240010001001,"1 (Tract 1, Allegany, MD)","Cumberland, MD-WV",21766.0,,"District 1, Orleans (Allegany, MD)",Home,1


## Add additional data on latitude, longitude, and demographics into data set

In [33]:
# read in data with latitudes, longitudes, and other data sources
latlong = pd.read_csv('DDL_census_data.csv')

# Rename columns
latlong.columns = ['state', 'county', 'tract', 'blockgrouppiece', 'full_geo_id', 'geoid',
       'name', u'lsad', 'land_area', 'water_area', 'latitude', 'longitude', 'id',
       'geoid2', 'geoid3', 'geo_display','median_income','moe_median_income',
       'geoid4', 'geoid5', 'geo_display2', 'total','moe_total:',
       'foodstamps','moe_foodstamps',
       'foodstamps_disability','moe_foodstamps_disability','foodstamps_nodisability','moe_foodstamps_nodisability',
       'nofoodstamps','moe_nofoodstamps',
       'nofoodstamps_disability','moe_nofoodstamps_disability',
       'nofoodstamps_nodisability','moe_nofoodstamps_nodisability']

# Reformat columns
latlong['full_geo_id'] = latlong['full_geo_id'].apply(lambda x: x[9:])

# Eliminate unnecessary columns
latlong = DataFrame(latlong, columns = ['full_geo_id', 'latitude', 'longitude',
        'foodstamps','moe_foodstamps',
       'foodstamps_disability','moe_foodstamps_disability','foodstamps_nodisability','moe_foodstamps_nodisability',
       'nofoodstamps','moe_nofoodstamps',
       'nofoodstamps_disability','moe_nofoodstamps_disability',
       'nofoodstamps_nodisability','moe_nofoodstamps_nodisability'])

In [34]:
lodes_data_full = pd.merge(lodes_data_flat, latlong, how='left', left_on='block_group_code', right_on='full_geo_id') 

## Add additional data on transit for metro

In [35]:
lodes_data_full['category']='lodes'
lodes_data_full

Unnamed: 0,unique,geocode,tot_jobs,age_29_bel_jobs,age_30_54_jobs,age_55_over_jobs,sal_1250_bel_jobs,sal_1250_3333_jobs,sal_3333_over_jobs,goods_prod_jobs,...,moe_foodstamps_disability,foodstamps_nodisability,moe_foodstamps_nodisability,nofoodstamps,moe_nofoodstamps,nofoodstamps_disability,moe_nofoodstamps_disability,nofoodstamps_nodisability,moe_nofoodstamps_nodisability,category
0,240010001001001 to 240010001003147,2.400100e+14,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
1,240010001001001 to 240010001003147,2.400100e+14,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
2,240010001001001 to 240010002002031,2.400100e+14,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
3,240010001001001 to 240010002002031,2.400100e+14,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
4,240010001001001 to 240010005002106,2.400100e+14,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,,,,,,,lodes
5,240010001001001 to 240010005002106,2.400100e+14,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,,,,,,,lodes
6,240010001001001 to 240010011001053,2.400100e+14,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
7,240010001001001 to 240010011001053,2.400100e+14,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,,,,,,,,,,lodes
8,240010001001001 to 240010014013053,2.400100e+14,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,,,,,,,,,,lodes
9,240010001001001 to 240010014013053,2.400100e+14,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,,,,,,,,,,lodes


In [18]:
from sqlalchemy import create_engine
sqlite_file = 'sqlite://///Users/Kruthika/Projects/DDL/04-team3/census.db'
engine = create_engine(sqlite_file)
from pandas.io import sql
sql.execute('DROP TABLE IF EXISTS lodes_data',engine)
lodes_data_full.to_sql('lodes_data', engine)

In [25]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
from urllib2 import urlopen

In [26]:
#Get station-level descriptive data from WMATA API, including latitude and longitude of stations and line codes
r = requests.get('https://api.wmata.com/Rail.svc/json/jStations?api_key=fb7119a0d3464673825a26e94db74451')

In [28]:
data_list = []
for entrances in r.json()['Stations']:
    for e in entrances.keys():
        if e not in data_list:
            data_list.append(e)
print data_list

[u'Code', u'Name', u'StationTogether2', u'LineCode4', u'LineCode2', u'LineCode3', u'LineCode1', u'Lon', u'Address', u'Lat', u'StationTogether1']


In [29]:
metro_stations = json_normalize(r.json()['Stations'])
metro_stations.head(3)

Unnamed: 0,Address.City,Address.State,Address.Street,Address.Zip,Code,Lat,LineCode1,LineCode2,LineCode3,LineCode4,Lon,Name,StationTogether1,StationTogether2
0,Washington,DC,607 13th St. NW,20005,A01,38.898303,RD,,,,-77.028099,Metro Center,C01,
1,Washington,DC,1001 Connecticut Avenue NW,20036,A02,38.903192,RD,,,,-77.039766,Farragut North,,
2,Washington,DC,1525 20th St. NW,20036,A03,38.909499,RD,,,,-77.04362,Dupont Circle,,


In [30]:
metro_stations.to_csv('stations.csv')

In [None]:
#Get bus route descriptive data from WMATA API, including latitude and longitude of stations and route codes
r1 = requests.get('https://api.wmata.com/Bus.svc/json/jStops?api_key=fb7119a0d3464673825a26e94db74451')

In [None]:
stops_list = []
for stops in r1.json()['Stops']:
    for s in stops.keys():
        if s not in stops_list:
            stops_list.append(s)
print stops_list

In [33]:
bus_stops = json_normalize(r1.json()['Stops'])
bus_stops.head(3)

Unnamed: 0,Lat,Lon,Name,Routes,StopID
0,38.670006,-77.010283,LIVINGSTON RD + INDIAN HEAD HWY,"[W19, W19v2]",3000037
1,38.670116,-77.010666,LIVINGSTON RD + INDIAN HEAD HWY,"[W19, W19v1, W19v2]",3000038
2,38.710349,-76.982108,MADISON DR + MONROE AVE,[W13],3000064


In [34]:
s = bus_stops.apply(lambda x: pd.Series(x['Routes']),axis=1).stack().reset_index(level=1, drop=True)

In [37]:
s.name = 'Routes'
bus_routes = bus_stops.drop('Routes', axis=1).join(s)
bus_routes['category'] = 'bus'
bus_routes['type'] = 'bus'

In [38]:
bus_routes.columns = ['latitude','longitude','name','unique','detail','category','type']
bus_routes[:6]

Unnamed: 0,latitude,longitude,name,unique,detail,category,type
0,38.670006,-77.010283,LIVINGSTON RD + INDIAN HEAD HWY,3000037,W19,bus,bus
0,38.670006,-77.010283,LIVINGSTON RD + INDIAN HEAD HWY,3000037,W19v2,bus,bus
1,38.670116,-77.010666,LIVINGSTON RD + INDIAN HEAD HWY,3000038,W19,bus,bus
1,38.670116,-77.010666,LIVINGSTON RD + INDIAN HEAD HWY,3000038,W19v1,bus,bus
1,38.670116,-77.010666,LIVINGSTON RD + INDIAN HEAD HWY,3000038,W19v2,bus,bus
2,38.710349,-76.982108,MADISON DR + MONROE AVE,3000064,W13,bus,bus


In [39]:
bus_routes.to_csv('busroutes.csv')

In [40]:
#Get path-level train  data from WMATA API, including latitude and longitude of stations and line codes
rblue = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=J03&ToStationCode=G05&api_key=fb7119a0d3464673825a26e94db74451')
rgreen = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=F11&ToStationCode=E10&api_key=fb7119a0d3464673825a26e94db74451')
rorange = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=K08&ToStationCode=D13&api_key=fb7119a0d3464673825a26e94db74451')
rred = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=A15&ToStationCode=B11&api_key=fb7119a0d3464673825a26e94db74451')
rsilver = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=N06&ToStationCode=G05&api_key=fb7119a0d3464673825a26e94db74451')
ryellow = requests.get('https://api.wmata.com/Rail.svc/json/jPath?FromStationCode=C15&ToStationCode=E06&api_key=fb7119a0d3464673825a26e94db74451')

In [41]:
data_list = []
for paths in rblue.json()['Path']:
    for p in paths.keys():
        if p not in data_list:
            data_list.append(p)
print data_list

dfblue = json_normalize(rblue.json()['Path'])
dfgreen = json_normalize(rgreen.json()['Path'])
dforange = json_normalize(rorange.json()['Path'])
dfred = json_normalize(rred.json()['Path'])
dfsilver = json_normalize(rsilver.json()['Path'])
dfyellow = json_normalize(ryellow.json()['Path'])

[u'StationCode', u'SeqNum', u'LineCode', u'StationName', u'DistanceToPrev']


In [42]:
metro_lines = pd.concat([dfblue, dfgreen, dforange, dfred, dfsilver, dfyellow], ignore_index=True)

In [43]:
metro_lines.head(3)

Unnamed: 0,DistanceToPrev,LineCode,SeqNum,StationCode,StationName
0,0,BL,1,J03,Franconia-Springfield
1,18695,BL,2,J02,Van Dorn Street
2,20246,BL,3,C13,King St-Old Town


In [56]:
metro_combined = pd.merge(metro_lines, metro_stations, how='left', left_on='StationCode', right_on='Code')
metro_combined.head(3)

Unnamed: 0,DistanceToPrev,LineCode,SeqNum,StationCode,StationName,Address.City,Address.State,Address.Street,Address.Zip,Code,Lat,LineCode1,LineCode2,LineCode3,LineCode4,Lon,Name,StationTogether1,StationTogether2
0,0,BL,1,J03,Franconia-Springfield,Springfield,VA,6880 Frontier Drive,22150,J03,38.766129,BL,,,,-77.168797,Franconia-Springfield,,
1,18695,BL,2,J02,Van Dorn Street,Alexandria,VA,5690 Eisenhower Avenue,22310,J02,38.799193,BL,,,,-77.129407,Van Dorn Street,,
2,20246,BL,3,C13,King St-Old Town,Alexandria,VA,1900 King Street,22301,C13,38.806474,BL,YL,,,-77.061115,King St-Old Town,,


In [57]:
metro_combined = DataFrame(metro_combined,columns=['LineCode','SeqNum', 'StationName','Address.City','Address.State','Address.Zip','Lat','Lon'])
metro_combined.columns = ['unique','path','name','metro_name','state','zip','latitude','longitude']
metro_combined['type']='train'
metro_combined['category']='train'
metro_combined.head(3)

Unnamed: 0,unique,path,name,metro_name,state,zip,latitude,longitude,type,category
0,BL,1,Franconia-Springfield,Springfield,VA,22150,38.766129,-77.168797,train,train
1,BL,2,Van Dorn Street,Alexandria,VA,22310,38.799193,-77.129407,train,train
2,BL,3,King St-Old Town,Alexandria,VA,22301,38.806474,-77.061115,train,train


In [49]:
metro_combined.to_csv('trainandroute.csv')

## Blend all data sets together

In [58]:
lodes_transit_data = pd.concat([lodes_data_full, bus_routes, metro_combined], ignore_index=True)
lodes_transit_data[:3] 

Unnamed: 0,age_29_bel_jobs,age_30_54_jobs,age_55_over_jobs,all_other_svc_jobs,block,block_group_code,block_group_name,category,county_name,county_sub_name,...,sal_1250_3333_jobs,sal_1250_bel_jobs,sal_3333_over_jobs,state,tot_jobs,trade_transp_jobs,type,unique,zip,zip_code
0,0,0,1,1,110010001001,110010001001,"1 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,0,1,DC,1,0,Home,110010001001000 to 110010001004010,,20007
1,0,0,1,1,110010001004,110010001004,"4 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,0,1,DC,1,0,Work,110010001001000 to 110010001004010,,20007
2,1,0,0,0,110010001001,110010001001,"1 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,1,0,DC,1,1,Home,110010001001000 to 110010001004013,,20007
3,1,0,0,0,110010001004,110010001004,"4 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,1,0,DC,1,1,Work,110010001001000 to 110010001004013,,20007
4,0,0,1,1,110010001001,110010001001,"1 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,1,0,DC,1,0,Home,110010001001000 to 110010001004016,,20007
5,0,0,1,1,110010001004,110010001004,"4 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,1,0,DC,1,0,Work,110010001001000 to 110010001004016,,20007
6,0,0,1,1,110010001001,110010001001,"1 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,0,1,DC,1,0,Home,110010001001000 to 110010001004022,,20007
7,0,0,1,1,110010001004,110010001004,"4 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,0,0,1,DC,1,0,Work,110010001001000 to 110010001004022,,20007
8,0,1,1,2,110010001001,110010001001,"1 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,1,0,1,DC,2,0,Home,110010001001000 to 110010001004042,,20007
9,0,1,1,2,110010001004,110010001004,"4 (Tract 1, District of Columbia, DC)",lodes,"District of Columbia, DC","Washington city (District of Columbia, DC)",...,1,0,1,DC,2,0,Work,110010001001000 to 110010001004042,,20007


In [60]:
lodes_transit_data [lodes_transit_data['category']=='train'][:5]

Unnamed: 0,age_29_bel_jobs,age_30_54_jobs,age_55_over_jobs,all_other_svc_jobs,block,block_group_code,block_group_name,category,county_name,county_sub_name,...,sal_1250_3333_jobs,sal_1250_bel_jobs,sal_3333_over_jobs,state,tot_jobs,trade_transp_jobs,type,unique,zip,zip_code
4255565,,,,,,,,train,,,...,,,,VA,,,train,BL,22150,
4255566,,,,,,,,train,,,...,,,,VA,,,train,BL,22310,
4255567,,,,,,,,train,,,...,,,,VA,,,train,BL,22301,
4255568,,,,,,,,train,,,...,,,,VA,,,train,BL,22301,
4255569,,,,,,,,train,,,...,,,,VA,,,train,BL,22202,


In [53]:
lodes_transit_data.to_csv('lodes_final_output.csv')

IOError: [Errno 13] Permission denied