In [1]:
# this notebook reads the geopackage file
import os
import pandas as pd
import geopandas as gpd

In [3]:
# input and output directories
# I worked locallly to minimize latency. 
# When I'm done, I manually move the output to Box (C:\Users\ftsang\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild\travel-model-two-networks\tm2_network_exploration_Flavia\tm2_roadway_QA)

# directory where the tm2 geopackage is saved
input_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_gpkg_QA',
                       'network_gpkg')

# directory where the outputs will be saved
output_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_gpkg_QA')

In [4]:
os.listdir(os.path.join(input_dir))

['model_net.gpkg']

In [21]:
# read the tm2 geopackage
links_gdf = gpd.read_file(os.path.join(input_dir,'model_net.gpkg'), layer='links')

In [22]:
# get info for the geopackage
links_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2240283 entries, 0 to 2240282
Data columns (total 31 columns):
 #   Column          Dtype   
---  ------          -----   
 0   A               int64   
 1   B               int64   
 2   model_link_id   int64   
 3   shstGeometryId  object  
 4   name            object  
 5   ft              int64   
 6   assignable      int64   
 7   cntype          object  
 8   distance        float64 
 9   county          object  
 10  bike_access     int64   
 11  drive_access    int64   
 12  walk_access     int64   
 13  rail_only       int64   
 14  bus_only        int64   
 15  transit         int64   
 16  managed         int64   
 17  tollbooth       int64   
 18  tollseg         int64   
 19  segment_id      int64   
 20  lanes_EA        int64   
 21  lanes_AM        int64   
 22  lanes_MD        int64   
 23  lanes_PM        int64   
 24  lanes_EV        int64   
 25  useclass_EA     int64   
 26  useclass_AM     int64   
 27  usec

In [23]:
# add a blank field for CTA comments
links_gdf['CTA_comments'] = '' 

In [25]:
# drop all centroid connectors and bike/ped (ft=8 and ft=99), to make the output layer more readable
links_noFT8n99_gdf = links_gdf[links_gdf['ft']<8]

In [27]:
# but FT=8 also included the access/egress dummy links for managed lanes
# so save a database of these dummy links
# and then append (concat) them
dummy_gdf = links_gdf[links_gdf['name'].str.contains('Dummy', na=False)]
links_noFT8n99_gdf = pd.concat([links_noFT8n99_gdf, dummy_gdf])

In [35]:
# look at data by facility type before and after the data processing
# before:
links_gdf['ft'].value_counts()

99    890204
8     603956
7     538684
6      96409
5      58376
4      39051
3       6452
1       3666
2       3485
Name: ft, dtype: int64

In [36]:
# after:
links_noFT8n99_gdf['ft'].value_counts()

7    538684
6     96409
5     58376
4     39051
3      6452
1      3666
2      3485
8      2998
Name: ft, dtype: int64

In [37]:
# before exporting by county, look at data by county
# first with all links included
links_gdf['county'].value_counts()

Santa Clara      662698
Alameda          444167
Contra Costa     327165
San Mateo        203945
Sonoma           178969
Solano           157577
San Francisco    133512
Marin             85148
Napa              47068
External             34
Name: county, dtype: int64

In [38]:
# second without centroid connectors and bike ped
links_noFT8n99_gdf['county'].value_counts()

Santa Clara      186576
Alameda          149835
Contra Costa     125062
Sonoma            70643
San Mateo         68019
Solano            59287
San Francisco     37417
Marin             34070
Napa              18206
External              6
Name: county, dtype: int64

In [46]:
# Alameda
tm2_Alameda_gdf = links_noFT8n99_gdf[links_noFT8n99_gdf['county']=='Alameda']
tm2_Alameda_gdf.to_file(os.path.join(output_dir,"tm2QA.gpkg"), layer='tm2_Alameda', driver="GPKG")

In [47]:
# Contra Costa
tm2_ContraCosta_gdf = links_noFT8n99_gdf[links_noFT8n99_gdf['county']=='Contra Costa']
tm2_ContraCosta_gdf.to_file(os.path.join(output_dir,"tm2QA.gpkg"), layer='tm2_ContraCosta', driver="GPKG")
                            
# make sure the final geopackage has multiple layers 