In [43]:
# This notebook joins links.shp and v12_link.json via A-B
# for Asana task "publish the roadway network on Tableau / ArcGIS Online": https://app.asana.com/0/572982923864207/1201640719412254
# this script is aborted when I realised "lanes" are not always numeric in v12_link.json

In [1]:
import os
import pandas as pd
import geopandas as gpd

In [21]:
# input and output directories

# directory where the standard network is saved
std_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'version_12')

# directory where the outputs will be saved
out_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_QA')

In [20]:
os.listdir(std_net_dir)

['ACE_2017_3_20_transit.lin',
 'ACTransit_2015_8_14_transit.lin',
 'agency.txt',
 'BART_2015_8_3_transit.lin',
 'Blue&Gold_gtfs_10_4_2017_transit.lin',
 'Caltrain_2015_5_13_transit.lin',
 'Capitol_2017_3_20_transit.lin',
 'CCTA_2015_8_11_transit.lin',
 'commuteDOTorg_GTFSImportExport_20160127_final_mj_transit.lin',
 'complete_network.net',
 'Emeryville_2016_10_26_transit.lin',
 'emme_archive',
 'emme_drive_network.zip',
 'emme_maz_active_modes_network_subregion_north.zip',
 'emme_maz_active_modes_network_subregion_south.zip',
 'emme_tap_transit_network.zip',
 'Fairfield_2015_10_14_transit.lin',
 'fareMatrix.txt',
 'fares.far',
 'faresystem_crosswalk.txt',
 'fare_attributes.txt',
 'fare_rules.txt',
 'frequencies.txt',
 'GGTransit_2015_9_3_transit.lin',
 'link.zip',
 'links.cpg',
 'links.dbf',
 'links.prj',
 'links.shp',
 'links.shx',
 'make_complete_network_from_fixed_width_file.s',
 'Marguerite_2016_10_10_transit.lin',
 'MarinTransit_2015_8_31_transit.lin',
 'model_net.pickle',
 'model

In [23]:
# read tm2 shapes as geodataframe
links_shp_gdf = gpd.read_file(os.path.join(std_dir,'links.shp'))

In [24]:
# get info for links.shp
links_shp_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2240283 entries, 0 to 2240282
Data columns (total 10 columns):
 #   Column      Dtype   
---  ------      -----   
 0   A           int64   
 1   B           int64   
 2   assignable  int64   
 3   link_id     int64   
 4   name        object  
 5   tollbooth   int64   
 6   cntype      object  
 7   lanes_AM    int64   
 8   useclass_A  int64   
 9   geometry    geometry
dtypes: geometry(1), int64(7), object(2)
memory usage: 170.9+ MB


In [29]:
# read tm2 network link attributes
# this zip file constains the json file v12_link.json
tm2_attrs_df = pd.read_json(os.path.join(work_dir,'v12_link.zip'))

In [31]:
# get info for tm2 attributes 
tm2_attrs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2235780 entries, 0 to 2235779
Data columns (total 43 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   access              object
 1   bike_access         int64 
 2   drive_access        int64 
 3   fromIntersectionId  object
 4   lanes               object
 5   maxspeed            object
 6   name                object
 7   oneWay              object
 8   ref                 object
 9   roadway             object
 10  shstGeometryId      object
 11  shstReferenceId     object
 12  toIntersectionId    object
 13  u                   object
 14  v                   object
 15  walk_access         int64 
 16  wayId               object
 17  county              object
 18  model_link_id       int64 
 19  A                   int64 
 20  B                   int64 
 21  rail_traveltime     object
 22  rail_only           int64 
 23  locationReferences  object
 24  ft_cal              object
 25  ft                

In [33]:
# look at what facility types are in v12_link.json
tm2_attrs_df['ft'].value_counts()

99    890204
8     600958
7     538675
6      96409
5      58376
4      39050
3       6438
2       2975
1       2695
Name: ft, dtype: int64

In [36]:
# Perform an outer join, keeping all entries in the shapefile and the attribute file  
tm2_net_attrs_gdf = pd.merge(links_shp_gdf,
                             tm2_attrs_df,
                             how='left',
                             left_on=['A','B'], 
                             right_on = ['A','B'],
                             indicator=True)

In [37]:
# Review the merge indicator
tm2_net_attrs_gdf['_merge'].value_counts()

both          2235780
left_only        4503
right_only          0
Name: _merge, dtype: int64

In [38]:
# How come some shapes in links.shp that don't have attributes from v12_link.json? 
# Of 4,503 links, 
# 1,505 links are "parallel managed lanes needed for Cube"
# 2,998 links are "access/egress dummies needed for Cube"

In [45]:
tm2_net_attrs_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2240283 entries, 0 to 2240282
Data columns (total 54 columns):
 #   Column              Dtype   
---  ------              -----   
 0   A                   int64   
 1   B                   int64   
 2   assignable_x        int64   
 3   link_id             int64   
 4   name_x              object  
 5   tollbooth_x         int64   
 6   cntype              object  
 7   lanes_AM            int64   
 8   useclass_A          int64   
 9   geometry            geometry
 10  A-B                 object  
 11  access              object  
 12  bike_access         float64 
 13  drive_access        float64 
 14  fromIntersectionId  object  
 15  lanes               object  
 16  maxspeed            object  
 17  name_y              object  
 18  oneWay              object  
 19  ref                 object  
 20  roadway             object  
 21  shstGeometryId      object  
 22  shstReferenceId     object  
 23  toIntersectionId    obje

In [46]:
# Simplify dataset for export
# These are varaibles in the Cube
# To keep this as a geodataframe, the variable 'geometry' needs to be kept 
export_cols = [
    'A',
    'B',
    'assignable_y',
    'bike_access',
    'bus_only',
    'county',
    'drive_access',
    'ft',
    'managed',
    'model_link_id',
    'name_y',
    'rail_only',
    'segment_id',
    'shstGeometryId',
    'tollbooth_y',
    'tollseg',
    'transit',
    'walk_access',
    'lanes',
    'useclass',
    'geometry'
]
tm2_net_gdf = tm2_net_attrs_gdf[export_cols].copy()

In [56]:
# drop all centroid connectors (ft=8)
tm2_net_noFT8n99_gdf = tm2_net_gdf[tm2_net_gdf['ft']<8]

In [49]:
#export to shape
tm2_net_noFT8n99_gdf.to_file(os.path.join(out_dir,'tm2_standard_format.shp'))

  tm2_net_noFT8_gdf.to_file(os.path.join(out_dir,'tm2_standard_format.shp'))


In [51]:
#export all the centroid connectors as its own layer
tm2_net_FT8_gdf = tm2_net_gdf[tm2_net_gdf['ft']==8]
tm2_net_FT8_gdf.to_file(os.path.join(out_dir,'tm2_CentroidConnectors.shp'))

  tm2_net_FT8_gdf.to_file(os.path.join(out_dir,'tm2_CentroidConnectors.shp'))


In [52]:
#prepare to export by county
tm2_attrs_df['county'].value_counts()


Santa Clara      660008
Alameda          443532
Contra Costa     326621
San Mateo        203882
Sonoma           178792
Solano           157454
San Francisco    133512
Marin             84883
Napa              47068
External             28
Name: county, dtype: int64

In [55]:
# export by county
tm2_ContraCosta_gdf = tm2_net_noFT8n99_gdf[tm2_net_noFT8n99_gdf['county']=='Contra Costa']
tm2_ContraCosta_gdf.to_file(os.path.join(out_dir,'tm2_ContraCosta.shp'))

  tm2_ContraCosta_gdf.to_file(os.path.join(out_dir,'tm2_ContraCosta.shp'))


In [58]:
# aborted the idea of publishing the netework in standard format when I realised "lanes" are not always numeric
tm2_attrs_df['lanes'].value_counts()

TypeError: unhashable type: 'dict'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'dict'


1                                                                                                                                                  2087484
2                                                                                                                                                   104031
3                                                                                                                                                    22211
4                                                                                                                                                    13305
5                                                                                                                                                     6261
6                                                                                                                                                      785
{'default': 4, 'timeofday': [{'time': [21600, 36000], 'value': 3}, {'t