# 01 Attribute Network and Make Scenario Pickle

In [1]:
import os
import sys
import yaml

import pandas as pd

# https://github.com/BayAreaMetro/network_wrangler/tree/generic_agency
from network_wrangler import RoadwayNetwork
from network_wrangler import TransitNetwork
from network_wrangler import ProjectCard
from network_wrangler import Scenario
from network_wrangler import WranglerLogger

# https://github.com/BayAreaMetro/Lasso/tree/mtc_parameters
from lasso import ModelRoadwayNetwork
from lasso import StandardTransit
from lasso import Parameters
from lasso import mtc

import pickle

In [2]:
# Reload all modules (except those excluded by %aimport) every time before executing the Python code typed.
# Note: this can cause the pickle.dump to fail with
# PicklingError: Can't pickle <class 'network_wrangler.transitnetwork.DotDict'>: 
# it's not the same object as network_wrangler.transitnetwork.DotDict
%load_ext autoreload
%autoreload 2

## Remote I/O

In [3]:
# input directories - use Box
TM2_REBUILD_BOX_DIR      = r"C:\Users\{}\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild".format(os.getenv('USERNAME'))
TM2_NETWORKS_DATA_DIR    = os.path.join(TM2_REBUILD_BOX_DIR,    "travel-model-two-networks", "data")
input_data_interim_dir   = os.path.join(TM2_NETWORKS_DATA_DIR,  "interim")
input_data_processed_dir = os.path.join(TM2_NETWORKS_DATA_DIR,  "processed", "version_12")
input_data_external_dir  = os.path.join(TM2_NETWORKS_DATA_DIR,  "external")

# Lasso
LASSO_DIR               = 'C:/Users/{}/Documents/GitHub/Lasso'.format(os.getenv('USERNAME'))

In [4]:
# output folders - use local, most likely
output_dir = r"C:\Users\{}\Documents\scratch\tm2_network_building\processed\version_12".format(os.getenv('USERNAME'))

In [5]:
parameters = Parameters(lasso_base_dir = LASSO_DIR)

2022-01-13 14:45:29, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 14:45:29, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso


## Data Reads

In [6]:
%%time
# Wall time: ~10min

# These are zipped in [input_data_processed_dir]\standard_roadway_pre_base_project_cards
# So used unzipped copy in interim dir
link_file  = os.path.join(input_data_interim_dir,'step8_standard_format','link.json')
node_file  = os.path.join(input_data_interim_dir,'step8_standard_format','node.geojson')
shape_file = os.path.join(input_data_interim_dir,'step8_standard_format','shape.geojson')

roadway_net = RoadwayNetwork.read(
    link_filename = link_file, 
    node_filename = node_file, 
    shape_filename = shape_file
)

2022-01-13 14:45:29, INFO: Reading RoadwayNetwork
2022-01-13 14:50:03, INFO: Read 1634790 links from C:\Users\lzorn\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild\travel-model-two-networks\data\interim\step8_standard_format\link.json
2022-01-13 14:50:03, INFO: Read 644480 nodes from C:\Users\lzorn\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild\travel-model-two-networks\data\interim\step8_standard_format\node.geojson
2022-01-13 14:50:03, INFO: Read 869986 shapes from C:\Users\lzorn\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild\travel-model-two-networks\data\interim\step8_standard_format\shape.geojson
Wall time: 11min 32s


In [7]:
roadway_net.links_df[
    roadway_net.links_df.model_link_id.isin([4166,38545])].A

88233     1004047
818710    1024160
Name: A, dtype: int64

In [8]:
roadway_net.links_df.model_link_id.nunique()

1634790

*Please note*: a third party library is used to read in the GTFS feed and it will generate warnings such as, `INFO: Removing calendar.txt from transit network config because file not found`. These warnings can be ignored, as the standard Network Wrangler transit network does not need these files. 

In [9]:
transit_net = TransitNetwork.read(feed_path = os.path.join(input_data_processed_dir, 
                                                           'standard_transit_pre_base_project_cards'))

2022-01-13 14:57:03, INFO: Read in transit feed from: C:\Users\lzorn\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild\travel-model-two-networks\data\processed\version_12\standard_transit_pre_base_project_cards
pruning routes.txt
pruning agency.txt
pruning routes.txt
pruning agency.txt
pruning calendar.txt
pruning calendar.txt
2022-01-13 14:57:03, INFO: Removing calendar.txt from transit network config because file not found
pruning calendar_dates.txt
pruning calendar_dates.txt
2022-01-13 14:57:03, INFO: Removing calendar_dates.txt from transit network config because file not found
pruning stop_times.txt
pruning stops.txt
Pruned stops.txt based on stop_id from 22153 rows to 21881 rows
pruning fare_rules.txt
Pruned fare_rules.txt based on origin_id from 3249 rows to 3162 rows
pruning fare_rules.txt
Pruned fare_rules.txt based on destination_id from 3162 rows to 3111 rows
pruning fare_rules.txt
pruning fare_rules.txt
Pruned fare_rules.txt 

In [10]:
transit_net.feed.fare_rules.shape

(3249, 7)

In [11]:
transit_net.feed.fare_attributes.shape

(805, 8)

## Attribute the Network

In [12]:
%%time
# Wall time: ~5min
r_net = mtc.determine_number_of_lanes(
    roadway_network = roadway_net,
    parameters = parameters,
    network_variable = 'lanes'
)

r_net.links_df.lanes.value_counts()

2022-01-13 14:57:09, INFO: Determining number of lanes
2022-01-13 14:57:09, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 14:57:09, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:02:22, INFO: Finished determining number of lanes using variable: lanes
Wall time: 5min 13s


1    1486418
2     104207
3      22455
4      14124
5       6504
6        929
7        134
8         18
9          1
Name: lanes, dtype: int64

In [13]:
r_net = mtc.calculate_facility_type(
    roadway_network = r_net,
    parameters = parameters,
    network_variable = 'ft')

r_net.links_df.ft.value_counts()

2022-01-13 15:02:23, INFO: Calculating Facility Type
2022-01-13 15:02:23, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:02:23, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:04:59, INFO: Finished calculating roadway class variable: ft


99    890174
7     538666
6      96883
5      58344
4      38332
3       6365
2       3252
1       2774
Name: ft, dtype: int64

In [14]:
r_net = mtc.calculate_useclass(
    roadway_network = r_net,
    parameters = parameters,
    network_variable = 'useclass')

r_net.links_df.useclass.value_counts()

2022-01-13 15:05:00, INFO: Determining useclass
2022-01-13 15:05:00, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:00, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:00, INFO: Calculating and adding roadway network variable: useclass
2022-01-13 15:05:00, INFO: Finished determining variable: useclass


0    1634790
Name: useclass, dtype: int64

In [15]:
r_net = mtc.calculate_assignable(
    roadway_network = r_net,
    parameters = parameters,
    network_variable = 'assignable')

r_net.links_df.assignable.value_counts()

2022-01-13 15:05:00, INFO: Determining assignable
2022-01-13 15:05:00, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:00, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:00, INFO: Calculating and adding roadway network variable: assignable
2022-01-13 15:05:10, INFO: Finished determining assignable using variable: assignable


0.0    1245221
1.0     388313
Name: assignable, dtype: int64

In [16]:
r_net = mtc.calculate_transit(
    roadway_network = r_net,
    parameters = parameters,
    network_variable = 'transit')

r_net.links_df.transit.value_counts()

2022-01-13 15:05:11, INFO: Determining transit
2022-01-13 15:05:11, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:11, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:11, INFO: Calculating and adding roadway network variable: transit
2022-01-13 15:05:11, INFO: Finished determining transit-only variable: transit


0    1634040
1        750
Name: transit, dtype: int64

In [17]:
transit_net.feed.stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21881 entries, 0 to 21880
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   stop_name            21881 non-null  object 
 1   stop_lat             21881 non-null  float64
 2   stop_lon             21881 non-null  float64
 3   zone_id              8233 non-null   object 
 4   agency_raw_name      21881 non-null  object 
 5   stop_code            15289 non-null  object 
 6   location_type        5475 non-null   float64
 7   parent_station       249 non-null    object 
 8   stop_desc            409 non-null    object 
 9   stop_url             261 non-null    object 
 10  stop_timezone        126 non-null    object 
 11  wheelchair_boarding  377 non-null    float64
 12  platform_code        52 non-null     object 
 13  position             0 non-null      object 
 14  direction            0 non-null      object 
 15  * used by routes     124 non-null   

In [18]:
transit_net.feed.fare_rules.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3249 entries, 0 to 3248
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   fare_id            3249 non-null   object 
 1   route_id_original  586 non-null    object 
 2   agency_raw_name    3249 non-null   object 
 3   origin_id          2875 non-null   object 
 4   destination_id     2875 non-null   object 
 5   contains_id        19 non-null     object 
 6   route_id           492 non-null    float64
dtypes: float64(1), object(6)
memory usage: 177.8+ KB


In [19]:
r_net = mtc.calculate_farezone(
    roadway_network = r_net,
    transit_network = transit_net,
    parameters = parameters,
    network_variable = 'farezone',
)

r_net.nodes_df.farezone.value_counts()

2022-01-13 15:05:12, INFO: Determining farezone
2022-01-13 15:05:12, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:12, INFO: Lasso base directory set as: C:/Users/lzorn/Documents/GitHub/Lasso
2022-01-13 15:05:12, INFO: Calculating and adding roadway network variable: farezone
2022-01-13 15:05:13, INFO: Finished determining variable: farezone


90.0     1532
83.0      226
72.0      208
103.0     179
95.0      130
         ... 
70.0        1
71.0        1
73.0        1
24.0        1
32.0        1
Name: farezone, Length: 120, dtype: int64

In [20]:
r_net.nodes_df.county.isnull().sum()

0

In [21]:
r_net.links_df.county.isnull().sum()

0

## Create a Scenario

In [22]:
base_scenario = {"road_net": r_net, "transit_net": transit_net}
working_scenario = Scenario.create_scenario(base_scenario = base_scenario)

2022-01-13 15:05:15, INFO: Creating Scenario


## Write to Disk

In [23]:
working_scenario_filename = os.path.join(output_dir, 'working_scenario_01.pickle')
pickle.dump(working_scenario, open(working_scenario_filename, 'wb'))

In [24]:
pd.crosstab(r_net.links_df.ft, r_net.links_df.roadway)

roadway,cycleway,footway,motorway,motorway_link,primary,primary_link,residential,secondary,secondary_link,service,tertiary,tertiary_link,trunk,trunk_link
ft,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0,0,2774,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,3252,0
3,0,0,0,4983,0,0,0,0,0,0,0,0,0,1382
4,0,0,0,0,15505,0,0,17933,0,0,4894,0,0,0
5,0,0,0,0,8432,0,0,24996,0,0,24034,0,882,0
6,0,0,0,0,2555,1506,0,25600,1566,0,65019,611,26,0
7,0,0,0,0,0,0,538666,0,0,0,0,0,0,0
99,98614,247489,0,0,0,0,0,0,0,541983,0,0,0,0
