In [1]:
import pandas as pd
import geopandas as gpd
from functools import reduce

In [2]:
hbd = pd.read_csv('./original_data/est_commuters_HBD.csv', index_col=0)
ipums = pd.read_csv('./original_data/commuter_origin_counts.csv',index_col=0)
ipums = ipums[ipums['YEAR']==2019].reset_index(drop=True)

#### We will only visualize common transmodes of hbd & ipums

In [3]:
transmode = list(set(hbd.TransMode.unique()) & set(ipums.TransMode.unique()))
print('In Both:',transmode)
hbd = hbd[hbd['TransMode'].isin(transmode)].reset_index(drop=True)

In Both: ['Subway', 'AutoOccupants', 'Bicycle', 'CommuterRail', 'Bus', 'Ferry']


#### Check the PointEntryExit of each sector

In [4]:
def entry_check(dataframe):
    hbd_sector_entry = dataframe[["Sector","PointEntryExit"]].drop_duplicates().reset_index(drop=True)
    entry_counts = pd.DataFrame(hbd_sector_entry['PointEntryExit'].value_counts())
    repeat_entry = entry_counts[entry_counts['PointEntryExit']>1].index
    return hbd_sector_entry[hbd_sector_entry['PointEntryExit'].isin(repeat_entry)]
entry_check(hbd)

Unnamed: 0,Sector,PointEntryExit
34,Brooklyn,Ferry
43,NewJersey,AmtrakNECorridor
45,NewJersey,Ferry
54,Queens,AmtrakNECorridor
55,Queens,Ferry
61,StatenIsland,Ferry


In [5]:
### this step is only for the convenience of visualization
### distinguish the PointEntryExit with the same name in each sector
hbd.loc[(hbd['PointEntryExit']=='Ferry') & (hbd['Sector']=='Brooklyn'),'PointEntryExit'] = 'Ferry_Brooklyn'
hbd.loc[(hbd['PointEntryExit']=='Ferry') & (hbd['Sector']=='Queens'),'PointEntryExit'] = 'Ferry_Queens'
hbd.loc[(hbd['PointEntryExit']=='Ferry') & (hbd['Sector']=='NewJersey'),'PointEntryExit'] = 'Ferry_NewJersey'
hbd.loc[(hbd['PointEntryExit']=='Ferry') & (hbd['Sector']=='StatenIsland'),'PointEntryExit'] = 'Ferry_StatenIsland'
hbd.loc[(hbd['PointEntryExit']=='AmtrakNECorridor') & (hbd['Sector']=='NewJersey'),'PointEntryExit'] = 'AmtrakNECorridor_NewJersey'
hbd.loc[(hbd['PointEntryExit']=='AmtrakNECorridor') & (hbd['Sector']=='Queens'),'PointEntryExit'] = 'AmtrakNECorridor_Queens'

#### Check what kinds of TransMode each entry has, and whether they are 'one-way'
For example, as shown in the below chart, '10thAve', '1stAve', '2ndAve'... are one-way road (bus and autos),\
but there are some slow traffic (bicycle) pass these entries in the wrong direction

In [6]:
pd.set_option('display.max_rows', None)

hbd_entry_dir = hbd.groupby(by=['PointEntryExit','TransMode','Direction']).agg({"Estimated_Commuters":"sum"}).reset_index()

## build a complete dataframe for 'sanity check', 62*6*2=744 (entry*mode*dir)
PointEntryExit = pd.DataFrame(hbd['PointEntryExit'].unique(),columns=['PointEntryExit'])
TransMode = pd.DataFrame(transmode,columns=['TransMode'])
Direction = pd.DataFrame(['In','Out'],columns=['Direction'])
PointEntryExit['tmp'] = 1
TransMode['tmp'] = 1
Direction['tmp'] = 1
tmp = reduce(lambda left,right:pd.merge(left,right,on='tmp',how='inner'),[PointEntryExit,TransMode,Direction]).drop('tmp',axis=1)

## using this table, we can observe what kinds of TransMode each entry has, and whether they are 'one-way'
check = tmp.merge(right=hbd_entry_dir,on=['PointEntryExit','TransMode','Direction'],how='outer')
check = check.pivot_table(values='Estimated_Commuters',index=['PointEntryExit'],columns=['TransMode','Direction'])
check.fillna('-')

TransMode,AutoOccupants,AutoOccupants,Bicycle,Bicycle,Bus,Bus,CommuterRail,CommuterRail,Ferry,Ferry,Subway,Subway
Direction,In,Out,In,Out,In,Out,In,Out,In,Out,In,Out
PointEntryExit,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
10thAveAmsterdamAve,-,25841.0,219.0,1504.0,-,1697.0,-,-,-,-,-,-
11thAveWestEndAve,18030.0,12663.0,791.0,91.0,1191.0,943.0,-,-,-,-,-,-
12thAveWestSideHighway,74175.0,64316.0,42.0,140.0,-,-,-,-,-,-,-,-
14thStTunnel,-,-,-,-,-,-,-,-,-,-,123513.0,123278.0
1stAve,-,32855.0,79.0,3515.0,-,8942.0,-,-,-,-,-,-
2ndAve,52336.0,-,4275.0,142.0,8245.0,-,-,-,-,-,-,-
2ndAveLocal,-,-,-,-,-,-,-,-,-,-,79827.0,72200.0
3rdAve,-,34899.0,88.0,2227.0,-,5488.0,-,-,-,-,-,-
53rdStTunnel,-,-,-,-,-,-,-,-,-,-,145109.0,145534.0
5thAve,28001.0,-,2498.0,32.0,12817.0,-,-,-,-,-,-,-


#### Reshaping the HBD dataset for easier spatial visualization

In [7]:
pd.reset_option('display.max_rows', None)
hbd_reshape = hbd.pivot_table(values='Estimated_Commuters', index=['Sector','PointEntryExit','TransMode','Hour'], columns='Direction').fillna(0).astype(int).reset_index()
hbd_reshape

Direction,Sector,PointEntryExit,TransMode,Hour,In,Out
0,60thSt,10thAveAmsterdamAve,AutoOccupants,0,0,693
1,60thSt,10thAveAmsterdamAve,AutoOccupants,1,0,432
2,60thSt,10thAveAmsterdamAve,AutoOccupants,2,0,308
3,60thSt,10thAveAmsterdamAve,AutoOccupants,3,0,319
4,60thSt,10thAveAmsterdamAve,AutoOccupants,4,0,350
...,...,...,...,...,...,...
2395,StatenIsland,Ferry_StatenIsland,Ferry,19,772,2310
2396,StatenIsland,Ferry_StatenIsland,Ferry,20,638,1331
2397,StatenIsland,Ferry_StatenIsland,Ferry,21,458,925
2398,StatenIsland,Ferry_StatenIsland,Ferry,22,295,730


In [12]:
hbd_reshape.TransMode.unique()

array(['AutoOccupants', 'Bicycle', 'Bus', 'Subway', 'CommuterRail',
       'Ferry'], dtype=object)

#### Add the location information of each PointEntryExit

In [23]:
# Add 'ALL' (the sum of all TransMode) in the TransMode
hbd_allmodes = hbd_reshape.groupby(by=['Sector','PointEntryExit','Hour']).agg({"In":"sum","Out":"sum"}).reset_index()
hbd_allmodes['TransMode'] = 'ALL'
hbd_viz = pd.concat([hbd_reshape,hbd_allmodes],axis=0)

hbd_viz['In+Out'] = hbd_viz['In'] + hbd_viz['Out']    # to measure the total traffic volumn
hbd_viz['|In-Out|'] = abs(hbd_viz['In'] - hbd_viz['Out'])    # to measure the 'net' traffic volumn

def inout(row):
    if row['In'] > row['Out']:
        return 'In +'
    else:
        return 'Out +'
hbd_viz['Net Flow Dir'] = hbd_viz.apply(lambda row: inout(row), axis=1)

entryexit_point = gpd.read_file('./spatial_data/entryexit_point.geojson')
entryexit_point['lon'] = entryexit_point['geometry'].x
entryexit_point['lat'] = entryexit_point['geometry'].y
hbd_viz = hbd_viz.merge(entryexit_point,on=['Sector','PointEntryExit'])
hbd_viz = hbd_viz.drop(['Type','geometry'], axis=1)
hbd_viz['Hour'].astype(str)
hbd_viz['Hour'] = hbd_viz['Hour'].astype(str).str.zfill(2)
hbd_viz['Hour'] = '2019-10-23 ' + hbd_viz['Hour'] + ':00:00'
hbd_viz['Hour'] = pd.to_datetime(hbd_viz['Hour'])
hbd_viz = hbd_viz[hbd_viz['TransMode']!='ALL'].reset_index()
hbd_viz

Unnamed: 0,index,Sector,PointEntryExit,TransMode,Hour,In,Out,In+Out,|In-Out|,Net Flow Dir,lon,lat
0,0,60thSt,10thAveAmsterdamAve,AutoOccupants,2019-10-23 00:00:00,0,693,693,693,Out +,-73.987987,40.769873
1,1,60thSt,10thAveAmsterdamAve,AutoOccupants,2019-10-23 01:00:00,0,432,432,432,Out +,-73.987987,40.769873
2,2,60thSt,10thAveAmsterdamAve,AutoOccupants,2019-10-23 02:00:00,0,308,308,308,Out +,-73.987987,40.769873
3,3,60thSt,10thAveAmsterdamAve,AutoOccupants,2019-10-23 03:00:00,0,319,319,319,Out +,-73.987987,40.769873
4,4,60thSt,10thAveAmsterdamAve,AutoOccupants,2019-10-23 04:00:00,0,350,350,350,Out +,-73.987987,40.769873
...,...,...,...,...,...,...,...,...,...,...,...,...
2395,3859,StatenIsland,Ferry_StatenIsland,Ferry,2019-10-23 19:00:00,772,2310,3082,1538,Out +,-74.012935,40.700181
2396,3860,StatenIsland,Ferry_StatenIsland,Ferry,2019-10-23 20:00:00,638,1331,1969,693,Out +,-74.012935,40.700181
2397,3861,StatenIsland,Ferry_StatenIsland,Ferry,2019-10-23 21:00:00,458,925,1383,467,Out +,-74.012935,40.700181
2398,3862,StatenIsland,Ferry_StatenIsland,Ferry,2019-10-23 22:00:00,295,730,1025,435,Out +,-74.012935,40.700181


In [9]:
hbd_viz_all = hbd_viz.groupby(by=['Hour','lon','lat']).agg({"In":"sum","Out":"sum"}).reset_index()
hbd_viz_all['In+Out'] = hbd_viz_all['In'] + hbd_viz_all['Out']
hbd_viz_all['Net Flow Dir'] = hbd_viz_all.apply(lambda row: inout(row), axis=1)
# hbd_viz_all.to_csv('/Users/jingrong/Desktop/tecnyc/scenarios/hbd_viz_all.csv',index=0)
hbd_viz_all

Unnamed: 0,Hour,lon,lat,In,Out,In+Out,Net Flow Dir
0,2019-10-23 00:00:00,-74.018565,40.713469,71,367,438,Out +
1,2019-10-23 00:00:00,-74.018213,40.715044,2,12,14,Out +
2,2019-10-23 00:00:00,-74.014598,40.699967,132,671,803,Out +
3,2019-10-23 00:00:00,-74.012935,40.700181,49,428,477,Out +
4,2019-10-23 00:00:00,-74.012074,40.726428,975,1254,2229,Out +
...,...,...,...,...,...,...,...
1099,2019-10-23 23:00:00,-73.959739,40.758666,518,598,1116,Out +
1100,2019-10-23 23:00:00,-73.958949,40.758352,3567,3145,6712,In +
1101,2019-10-23 23:00:00,-73.958259,40.758466,3329,4967,8296,Out +
1102,2019-10-23 23:00:00,-73.957980,40.758740,0,0,0,Out +


In [14]:
hbd_viz.TransMode.unique()

array(['ALL'], dtype=object)

In [16]:
# hbd_viz.groupby(by=['Hour']).agg({"In":"sum","Out":"sum"}).reset_index().to_csv('/Users/jingrong/Desktop/tecnyc/scenarios/hbd_viz_all_all.csv',index=0)

In [20]:
# hbd_viz.groupby(by=['Hour']).agg({"In":"sum","Out":"sum"}).reset_index().to_csv('/Users/jingrong/Desktop/tecnyc/scenarios/hbd_viz_all_autos.csv',index=0)

In [26]:
inout_bymode = hbd_viz.groupby(by=['Hour','TransMode']).agg({"In":"sum","Out":"sum"}).reset_index()
inout_bymode.pivot_table(values=['In','Out'], index=['Hour'], columns='TransMode').fillna(0).astype(int).reset_index().to_csv('/Users/jingrong/Desktop/tecnyc/scenarios/hbd_viz_all_bymode.csv',index=0)

#### Now, the dataframe is good for spatial visualiation and analysis.

In [10]:
### if we want to use the timebar function of kepler.gl, time column must be 'time' type
# hbd_viz['Hour'] = pd.to_datetime(hbd_viz['Hour'], unit='h')
hbd_viz.to_csv('hbd_viz.csv',index=0)

In [11]:
### check the total number, match perfectly
sum_by_sector_dir = hbd_viz[(hbd_viz['TransMode']!='ALL')].groupby(by=["Sector"]).agg({"In":"sum","Out":"sum"})
sum_by_sector_dir

Unnamed: 0_level_0,In,Out
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1
