In [26]:
# this notebook creates the tm2 roadway network 
# by joining the shapes from the standard network (links.shp) to the model network attributes (complete_network.dbf)
# it also split the roadway network into smaller chunks, to facilitate easy rendering and reviewing in ArcGIS

import os
import pandas as pd
import geopandas as gpd
from simpledbf import Dbf5

In [27]:
# input and output directories

# directory where the cube export (dbf) is saved
input_dir1 = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_QA',
                        'Cube_export_V12')

# directory where the tm2 shapes are saved
input_dir2 = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'version_12')

# directory where the outputs will be saved
output_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_QA')

In [28]:
os.listdir(os.path.join(input_dir1))

['.ipynb_checkpoints',
 'complete_network.dbf',
 'complete_network.prj',
 'complete_network.shp',
 'complete_network.shx',
 'Create_roadway_by_county.ipynb']

In [29]:
# read the attributes from th cube network
complete_net_dbf = Dbf5(os.path.join(input_dir1,'complete_network.dbf'))

In [30]:
# save it as a dataframe
tm2_attrs_df = complete_net_dbf.to_dataframe()

In [31]:
#get info for complete_network.dbf
tm2_attrs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240283 entries, 0 to 2240282
Data columns (total 30 columns):
 #   Column       Dtype  
---  ------       -----  
 0   A            int64  
 1   B            int64  
 2   ASSIGNABLE   int64  
 3   BIKE_ACCESS  int64  
 4   BUS_ONLY     int64  
 5   COUNTY       object 
 6   DISTANCE     float64
 7   DRIVE_ACCES  int64  
 8   FT           int64  
 9   MANAGED      int64  
 10  MODEL_LINK_  int64  
 11  NAME         object 
 12  RAIL_ONLY    int64  
 13  SEGMENT_ID   int64  
 14  SHSTGEOMETR  object 
 15  TOLLBOOTH    int64  
 16  TOLLSEG      int64  
 17  TRANSIT      int64  
 18  WALK_ACCESS  int64  
 19  CNTYPE       object 
 20  LANES_EA     int64  
 21  LANES_AM     int64  
 22  LANES_MD     int64  
 23  LANES_PM     int64  
 24  LANES_EV     int64  
 25  USECLASS_EA  int64  
 26  USECLASS_AM  int64  
 27  USECLASS_MD  int64  
 28  USECLASS_PM  int64  
 29  USECLASS_EV  int64  
dtypes: float64(1), int64(25), object(4)
memory usage: 

In [32]:
# read tm2 shapes as geodataframe
links_shp_gdf = gpd.read_file(os.path.join(input_dir2,'links.shp'))

In [33]:
# join the two
# perform an outer join, keeping all entries in the shapefile and the attribute file  
tm2_net_attrs_gdf = pd.merge(links_shp_gdf,
                             tm2_attrs_df,
                             how='outer',
                             left_on=['A','B'], 
                             right_on = ['A','B'],
                             indicator=True)

In [34]:
# review the merge indicator
tm2_net_attrs_gdf['_merge'].value_counts()

both          2240283
left_only           0
right_only          0
Name: _merge, dtype: int64

In [35]:
# get info for the joined geodatabase
tm2_net_attrs_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2240283 entries, 0 to 2240282
Data columns (total 39 columns):
 #   Column       Dtype   
---  ------       -----   
 0   A            int64   
 1   B            int64   
 2   assignable   int64   
 3   link_id      int64   
 4   name         object  
 5   tollbooth    int64   
 6   cntype       object  
 7   lanes_AM     int64   
 8   useclass_A   int64   
 9   geometry     geometry
 10  ASSIGNABLE   int64   
 11  BIKE_ACCESS  int64   
 12  BUS_ONLY     int64   
 13  COUNTY       object  
 14  DISTANCE     float64 
 15  DRIVE_ACCES  int64   
 16  FT           int64   
 17  MANAGED      int64   
 18  MODEL_LINK_  int64   
 19  NAME         object  
 20  RAIL_ONLY    int64   
 21  SEGMENT_ID   int64   
 22  SHSTGEOMETR  object  
 23  TOLLBOOTH    int64   
 24  TOLLSEG      int64   
 25  TRANSIT      int64   
 26  WALK_ACCESS  int64   
 27  CNTYPE       object  
 28  LANES_EA     int64   
 29  LANES_AM     int64   
 30  LANES_MD  

In [44]:
# Rename varaibles to be less than 10 characters, to preapre for exporting to ESRI shapefile
tm2_net_attrs_gdf.rename(columns={'BIKE_ACCESS': 'BikeAccess', 
                                  'DRIVE_ACCES': 'DrvAccess', 
                                  'MODEL_LINK_': 'ModelLinkID', 
                                  'SHSTGEOMETR': 'SHSTGEOMET', 
                                  'WALK_ACCESS': 'WalkAccess', 
                                  'USECLASS_EA': 'UseClassEA', 
                                  'USECLASS_AM': 'UseClassAM',
                                  'USECLASS_MD': 'UseClassMD', 
                                  'USECLASS_PM': 'UseClassPM', 
                                  'USECLASS_EV': 'UseClassEV'}, inplace=True)

In [45]:
#look at the data by county
tm2_net_attrs_gdf['COUNTY'].value_counts()

Santa Clara      662698
Alameda          444167
Contra Costa     327165
San Mateo        203945
Sonoma           178969
Solano           157577
San Francisco    133512
Marin             85148
Napa              47068
External             34
Name: COUNTY, dtype: int64

In [46]:
#look at the data by facility type
tm2_net_attrs_gdf['FT'].value_counts()

99    890204
8     603956
7     538684
6      96409
5      58376
4      39051
3       6452
1       3666
2       3485
Name: FT, dtype: int64

In [52]:
# Simplify dataset for export
# These are varaibles in the Cube
# To keep this as a geodataframe, the variable 'geometry' needs to be kept 
export_cols = [
    'A',
    'B',
    'ASSIGNABLE',
    'BikeAccess',
    'BUS_ONLY',
    'COUNTY',
    'DISTANCE',
    'DrvAccess',
    'FT',
    'MANAGED',
    'ModelLinkID',
    'NAME',
    'RAIL_ONLY',
    'SEGMENT_ID',
    'SHSTGEOMET',
    'TOLLBOOTH',
    'TOLLSEG',
    'TRANSIT',
    'WalkAccess',
    'CNTYPE',
    'LANES_EA',
    'LANES_AM',
    'LANES_MD',
    'LANES_PM',
    'LANES_EV',
    'UseClassEA',
    'UseClassAM',
    'UseClassMD',
    'UseClassPM',
    'UseClassEV',
    'geometry'
]
tm2_net_gdf = tm2_net_attrs_gdf[export_cols].copy()

In [56]:
# drop all centroid connectors (ft=8 and ft=99), to make the output shapefile samller
tm2_noFT8n99_gdf = tm2_net_gdf[tm2_net_gdf['FT']<8]

In [57]:
# export to shape
tm2_noFT8n99_gdf.to_file(os.path.join(output_dir,'tm2_noFT8n99.shp'))

  tm2_noFT8n99_gdf.to_file(os.path.join(output_dir,'tm2_noFT8n99.shp'))


In [58]:
# before exporting by county, look at data by county
tm2_noFT8n99_gdf['COUNTY'].value_counts()

Santa Clara      184784
Alameda          149414
Contra Costa     124701
Sonoma            70525
San Mateo         67979
Solano            59203
San Francisco     37417
Marin             33894
Napa              18206
Name: COUNTY, dtype: int64

In [62]:
# Alameda
tm2_Alameda_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Alameda']
tm2_Alameda_gdf.to_file(os.path.join(output_dir,'tm2_Alameda.shp'))

  tm2_Alameda_gdf.to_file(os.path.join(output_dir,'tm2_Alameda.shp'))


In [63]:
# Contra Costa
tm2_ContraCosta_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Contra Costa']
tm2_ContraCosta_gdf.to_file(os.path.join(output_dir,'tm2_ContraCosta.shp'))

  tm2_ContraCosta_gdf.to_file(os.path.join(output_dir,'tm2_ContraCosta.shp'))


In [64]:
# Marin
tm2_Marin_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Marin']
tm2_Marin_gdf.to_file(os.path.join(output_dir,'tm2_Marin.shp'))

  tm2_Marin_gdf.to_file(os.path.join(output_dir,'tm2_Marin.shp'))


In [65]:
# Napa
tm2_Napa_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Napa']
tm2_Napa_gdf.to_file(os.path.join(output_dir,'tm2_Napa.shp'))

  tm2_Napa_gdf.to_file(os.path.join(output_dir,'tm2_Napa.shp'))


In [66]:
# San Francisco
tm2_SanFrancisco_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='San Francisco']
tm2_SanFrancisco_gdf.to_file(os.path.join(output_dir,'tm2_SanFrancisco.shp'))

  tm2_SanFrancisco_gdf.to_file(os.path.join(output_dir,'tm2_SanFrancisco.shp'))


In [67]:
# San Mateo
tm2_SanMateo_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='San Mateo']
tm2_SanMateo_gdf.to_file(os.path.join(output_dir,'tm2_SanMateo.shp'))

  tm2_SanMateo_gdf.to_file(os.path.join(output_dir,'tm2_SanMateo.shp'))


In [68]:
# Santa Clara
tm2_SantaClara_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Santa Clara']
tm2_SantaClara_gdf.to_file(os.path.join(output_dir,'tm2_SantaClara.shp'))

  tm2_SantaClara_gdf.to_file(os.path.join(output_dir,'tm2_SantaClara.shp'))


In [69]:
# Solano
tm2_Solano_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Solano']
tm2_Solano_gdf.to_file(os.path.join(output_dir,'tm2_Solano.shp'))

  tm2_Solano_gdf.to_file(os.path.join(output_dir,'tm2_Solano.shp'))


In [70]:
# Sonoma
tm2_Sonoma_gdf = tm2_noFT8n99_gdf[tm2_noFT8n99_gdf['COUNTY']=='Sonoma']
tm2_Sonoma_gdf.to_file(os.path.join(output_dir,'tm2_Sonoma.shp'))

  tm2_Sonoma_gdf.to_file(os.path.join(output_dir,'tm2_Sonoma.shp'))


In [72]:
# Output centroid connectors as a separate layer
tm2_ft8_gdf = tm2_net_gdf[tm2_net_gdf['FT']==8]
tm2_ft8_gdf.to_file(os.path.join(output_dir,'tm2_ft8_CentroidConnectors.shp'))

  tm2_ft8_gdf.to_file(os.path.join(output_dir,'tm2_ft8_CentroidConnectors.shp'))


In [None]:
# Output bike/pedestrian only paths as a separate layer
tm2_ft99_gdf = tm2_net_gdf[tm2_net_gdf['FT']==99]
tm2_ft99_gdf.to_file(os.path.join(output_dir,'tm2_ft99_BikePedOnlyPaths.shp'))

  tm2_ft99_gdf.to_file(os.path.join(output_dir,'tm2_ft99_BikePedOnlyPaths.shp'))
