In [1]:
# this notebook creates a crosstab of heuristic nubmer by facility type
# the main input is v12_link.json

In [2]:
import os
import pandas as pd
import geopandas as gpd

In [3]:
# input and output directories

# directory where the standard network is saved
input_dir = os.path.join('\\..\\..\\..\\..\\Users',
                        'ftsang',
                        'Box',
                        'Modeling and Surveys',
                        'Development',
                        'Travel Model Two Development',
                        'Travel Model Two Network Rebuild',
                        'travel-model-two-networks',
                        'data',
                        'processed',
                        'version_12',
                        'network_standard_v12.03')

# directory where the outputs will be saved
output_dir = os.path.join('\\..\\..\\..\\..\\Users',
                       'ftsang',
                       'Documents',
                       'tm2_network_exploration',
                       'tm2_roadway_QA')

In [4]:
os.listdir(input_dir)

['v12_agency.txt',
 'v12_fare_attributes.txt',
 'v12_fare_rules.txt',
 'v12_frequencies.txt',
 'v12_link.feather',
 'v12_link.json',
 'v12_node.geojson',
 'v12_routes.txt',
 'v12_shape.geojson',
 'v12_shapes.txt',
 'v12_stops.txt',
 'v12_stop_times.txt',
 'v12_trips.txt',
 'working_scenario_v12.pickle']

In [5]:
# read tm2 network link attributes
# the json file v12_link.json
tm2_attrs_df = pd.read_json(os.path.join(input_dir,'v12_link.json'))

In [6]:
# get info for tm2 attributes 
tm2_attrs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2235780 entries, 0 to 2235779
Data columns (total 43 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   access              object
 1   bike_access         object
 2   drive_access        object
 3   fromIntersectionId  object
 4   lanes               object
 5   maxspeed            object
 6   name                object
 7   oneWay              object
 8   ref                 object
 9   roadway             object
 10  shstGeometryId      object
 11  shstReferenceId     object
 12  toIntersectionId    object
 13  u                   object
 14  v                   object
 15  walk_access         object
 16  wayId               object
 17  county              object
 18  model_link_id       int64 
 19  A                   int64 
 20  B                   int64 
 21  rail_traveltime     object
 22  rail_only           object
 23  locationReferences  object
 24  heuristic_num       object
 25  ft_cal            

In [7]:
tm2_attrs_df['ft'].value_counts()

99    890204
8     600958
7     538675
6      96405
5      58321
4      39109
3       6438
2       2975
1       2695
Name: ft, dtype: int64

In [8]:
# look at what facility types are in v12_link.json
tm2_attrs_df['ft'].value_counts()

99    890204
8     600958
7     538675
6      96405
5      58321
4      39109
3       6438
2       2975
1       2695
Name: ft, dtype: int64

In [9]:
# look at the distribution of heuristic number
tm2_attrs_df['heuristic_num'].value_counts()

17    575932
15    551234
18    342765
11     96867
6      31276
5      20484
4       3782
13      3720
16      2942
2       2528
1        997
7        874
14       737
10       550
          60
8         44
3         38
12        11
9          7
Name: heuristic_num, dtype: int64

In [10]:
# label the facility type
def label_FacilityTypes(series):
    if series ==1:
        return "01-Freeway"
    elif series ==2:
        return "02-Expressway"
    elif series ==3:
        return "03-Ramp"
    elif series ==4:
        return "04-Divided Arterial"
    elif series ==5:
        return "05-Undivided Arterial"
    elif series ==6:
        return "06-Collector"
    elif series ==7:
        return "07-Local"
    elif series ==8:
        return "08-Connectors"
    elif series ==99:
        return "99-Bike/ped"
    else:
        return "Unknown"
    
tm2_attrs_df['ft_label']= tm2_attrs_df['ft'].apply(label_FacilityTypes)

tm2_attrs_df['ft_label'].value_counts()


99-Bike/ped              890204
08-Connectors            600958
07-Local                 538675
06-Collector              96405
05-Undivided Arterial     58321
04-Divided Arterial       39109
03-Ramp                    6438
02-Expressway              2975
01-Freeway                 2695
Name: ft_label, dtype: int64

In [11]:
# label the heuristic
# may still want to refine the label to include corroborated, only data source, etc

def label_HeuristicNumber(series):
    if series ==1:
        return "01-pems"
    elif series ==2:
        return "02-sfcta_min"
    elif series ==3:
        return "03-pems"
    elif series ==4:
        return "04-osm_min"
    elif series ==5:
        return "05-tm2_marin"
    elif series ==6:
        return "06-sfcta_min"
    elif series ==7:
        return "07-osm_min"
    elif series ==8:
        return "08-osm_min"
    elif series ==9:
        return "09-pems"
    elif series ==10:
        return "10-sfcta_min"
    elif series ==11:
        return "11-osm_min"
    elif series ==12:
        return "12-osm_min"
    elif series ==13:
        return "13-osm_min"
    elif series ==14:
        return "14-osm_min"
    elif series ==15:
        return "15-tm2_marin"
    elif series ==16:
        return "16-tom"
    elif series ==17:
        return "17-asserted one"
    elif series ==18:
        return "18-asserted one" 
    else:
        return "Unknown"
    
tm2_attrs_df['heuristic_label']= tm2_attrs_df['heuristic_num'].apply(label_HeuristicNumber)

tm2_attrs_df['heuristic_label'].value_counts()


Unknown            600992
17-asserted one    575932
15-tm2_marin       551234
18-asserted one    342765
11-osm_min          96867
06-sfcta_min        31276
05-tm2_marin        20484
04-osm_min           3782
13-osm_min           3720
16-tom               2942
02-sfcta_min         2528
01-pems               997
07-osm_min            874
14-osm_min            737
10-sfcta_min          550
08-osm_min             44
03-pems                38
12-osm_min             11
09-pems                 7
Name: heuristic_label, dtype: int64

In [12]:
pd.crosstab(tm2_attrs_df.heuristic_label, tm2_attrs_df.ft_label)

ft_label,01-Freeway,02-Expressway,03-Ramp,04-Divided Arterial,05-Undivided Arterial,06-Collector,07-Local,08-Connectors,99-Bike/ped
heuristic_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
01-pems,997,0,0,0,0,0,0,0,0
02-sfcta_min,0,50,11,1992,144,153,112,0,66
03-pems,38,0,0,0,0,0,0,0,0
04-osm_min,944,0,2836,0,0,0,2,0,0
05-tm2_marin,308,1691,810,12378,784,1885,2328,0,300
06-sfcta_min,0,24,9,1109,1972,5174,21624,0,1364
07-osm_min,372,0,499,0,0,0,3,0,0
08-osm_min,9,0,35,0,0,0,0,0,0
09-pems,7,0,0,0,0,0,0,0,0
10-sfcta_min,0,0,0,202,127,215,6,0,0
