In [1]:
import os
import pandas as pd
import geopandas as gpd

In [6]:
#set working directory
work_dir = os.path.join('/Users',
                       'jcroff',
                       'Box',
                       'MTC Data for Toole Design',
                       'final_nw_datasets',
                       'travel_model_nw')

In [7]:
os.listdir(work_dir)

['v11_link.json', 'v11_shape.geojson']

In [8]:
#read tm2 shapes as geodataframe

tm2_gdf = gpd.read_file(os.path.join(work_dir,'v11_shape.geojson'),driver='GeoJSON')

In [9]:
#read tm2 network link attributes

tm2_attrs_df = pd.read_json(os.path.join(work_dir,'v11_link.json'))

In [12]:
#get info for tm2 geo network

tm2_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1152275 entries, 0 to 1152274
Data columns (total 7 columns):
 #   Column              Non-Null Count    Dtype   
---  ------              --------------    -----   
 0   backReferenceId     869968 non-null   object  
 1   forwardReferenceId  869968 non-null   object  
 2   fromIntersectionId  1152215 non-null  object  
 3   id                  1152275 non-null  object  
 4   shape_id            869968 non-null   float64 
 5   toIntersectionId    869968 non-null   object  
 6   geometry            1152275 non-null  geometry
dtypes: float64(1), geometry(1), object(5)
memory usage: 61.5+ MB


In [13]:
#get info for tm2 attributes 

tm2_attrs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2199321 entries, 0 to 2199320
Data columns (total 44 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   access              object
 1   bike_access         int64 
 2   drive_access        int64 
 3   fromIntersectionId  object
 4   lanes               object
 5   maxspeed            object
 6   name                object
 7   oneWay              object
 8   ref                 object
 9   roadway             object
 10  shstGeometryId      object
 11  shstReferenceId     object
 12  toIntersectionId    object
 13  u                   object
 14  v                   object
 15  walk_access         int64 
 16  wayId               object
 17  county              object
 18  model_link_id       int64 
 19  A                   int64 
 20  B                   int64 
 21  rail_traveltime     object
 22  rail_only           int64 
 23  locationReferences  object
 24  shape_id            object
 25  ft_cal            

In [17]:
#Perform a left merge, keeping all network links regardless of if there are matching 
#attributes in the links dataframe. List geodataframe first so output of join is a geodataframe.  

tm2_nw_attrs_gdf = pd.merge(tm2_gdf,
                            tm2_attrs_df,
                            how='left',
                            on='id',
                            indicator=True)

In [21]:
#Check how many matches are in both 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['_merge'] == 'both'].shape[0]

2199321

In [22]:
#Check how many matches are just left matchaes

#0 left_only matches meaning for every link, there is a matching attribute. 
#The count of matches suggests that the geography links to link attributes is a one to many relationship
#For example, a link 'A' might be a two-way street 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['_merge'] == 'left_only'].shape[0]

0

In [32]:
#Check two-way street attribute to confirm theory

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['oneWay'] == 'False']

Unnamed: 0,backReferenceId,forwardReferenceId,fromIntersectionId_x,id,shape_id_x,toIntersectionId_x,geometry,access,bike_access,drive_access,...,managed,ML_lanes,segment_id,ML_tollbooth,ML_useclass,ML_access,ML_egress,ML_tollseg,tollseg,_merge
0,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both
1,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both
2,f38e31579534d3a5abb518ef31bb0517,89dd7cf0f842c091241b9a139a5e9506,1bf4572e933b7f6d81535ff217053926,95451c235af17ca9cde815dfc3f4b6ce,2.0,d60ae663adb14804616d2ca06c3ff8fb,"LINESTRING (-121.81720 38.00839, -121.81726 38...",,1,1,...,0,,0,,,,,,0,both
3,f38e31579534d3a5abb518ef31bb0517,89dd7cf0f842c091241b9a139a5e9506,1bf4572e933b7f6d81535ff217053926,95451c235af17ca9cde815dfc3f4b6ce,2.0,d60ae663adb14804616d2ca06c3ff8fb,"LINESTRING (-121.81720 38.00839, -121.81726 38...",,1,1,...,0,,0,,,,,,0,both
4,80c4235b61bd0fe1b1538b8b36b40958,31529c3bf7a3c1a1aa9f76a607762487,fc62d018be0236d6b9e43536ebc51830,6c28c78ee3ac710cf40194e2cac3730d,3.0,d60ae663adb14804616d2ca06c3ff8fb,"LINESTRING (-121.81724 38.00970, -121.81726 38...",,1,1,...,0,,0,,,,,,0,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632695,4e1fda1d8942b1011e71c2c449a3178a,7e0cbda37cbeb9c7854fd9dc66a81b33,689f83c2d6f85789f3b6b3b0f8258f50,c9040be5b4952b2cfe6f44445c1a3180,868565.0,4561d73ab01329336eeb9b2d57bdf6bd,"LINESTRING (-122.18719 37.70980, -122.18730 37...",,1,1,...,0,,0,,,,,,0,both
1632696,e39be5081874c71a7891c2b2fc5bf4e7,cc4dfbef35824731bc1e8bcbc8ead468,4561d73ab01329336eeb9b2d57bdf6bd,0af0522a64f003526c57aeeeb102fe0b,868566.0,4681e885ad919694b5a070fe9e9a8edd,"LINESTRING (-122.18730 37.70998, -122.18631 37...",,1,1,...,0,,0,,,,,,0,both
1632697,e39be5081874c71a7891c2b2fc5bf4e7,cc4dfbef35824731bc1e8bcbc8ead468,4561d73ab01329336eeb9b2d57bdf6bd,0af0522a64f003526c57aeeeb102fe0b,868566.0,4681e885ad919694b5a070fe9e9a8edd,"LINESTRING (-122.18730 37.70998, -122.18631 37...",,1,1,...,0,,0,,,,,,0,both
1632698,b4e67f60a17d510eb3051df9505889c2,f4ad5ca6ad1ed100643b9f9f5df044ce,53036ff9adcd35f66194420de89af276,441d8d0eaec8cde88fe1ab6a1442197c,868567.0,fff61e17ae37b95488ffce3b4ab2d097,"LINESTRING (-122.18917 37.70894, -122.18929 37...",,1,1,...,0,,0,,,,,,0,both


In [34]:
#Check an id selected from filtered list of two-way roads
#Two rows are returned for 1 id. Dataset appears to be one to many. 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['id'] == '89d807e34d7630c619bbc23a64e1313e']

Unnamed: 0,backReferenceId,forwardReferenceId,fromIntersectionId_x,id,shape_id_x,toIntersectionId_x,geometry,access,bike_access,drive_access,...,managed,ML_lanes,segment_id,ML_tollbooth,ML_useclass,ML_access,ML_egress,ML_tollseg,tollseg,_merge
0,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both
1,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both


In [37]:
#Prepare dataset for export

tm2_nw_attrs_gdf.columns.to_list()

['backReferenceId',
 'forwardReferenceId',
 'fromIntersectionId_x',
 'id',
 'shape_id_x',
 'toIntersectionId_x',
 'geometry',
 'access',
 'bike_access',
 'drive_access',
 'fromIntersectionId_y',
 'lanes',
 'maxspeed',
 'name',
 'oneWay',
 'ref',
 'roadway',
 'shstGeometryId',
 'shstReferenceId',
 'toIntersectionId_y',
 'u',
 'v',
 'walk_access',
 'wayId',
 'county',
 'model_link_id',
 'A',
 'B',
 'rail_traveltime',
 'rail_only',
 'locationReferences',
 'shape_id_y',
 'ft_cal',
 'ft',
 'useclass',
 'assignable_cal',
 'assignable',
 'transit',
 'tollbooth',
 'bus_only',
 'transit_access',
 'managed',
 'ML_lanes',
 'segment_id',
 'ML_tollbooth',
 'ML_useclass',
 'ML_access',
 'ML_egress',
 'ML_tollseg',
 'tollseg',
 '_merge']

In [38]:
export_cols = [
    'id',
    'backReferenceId',
    'forwardReferenceId',
    'fromIntersectionId_x',
    'toIntersectionId_x',
    'shape_id_x',
    'shstGeometryId',
    'shstReferenceId',
    'wayId',
    'model_link_id',
    'access',
    'bike_access',
    'drive_access',
    'lanes',
    'maxspeed',
    'name',
    'oneWay',
    'ref',
    'roadway',
    'u',
    'v',
    'walk_access',
    'county',
    'A',
    'B',
    'rail_traveltime',
    'rail_only',
    'locationReferences',
    'ft_cal',
    'ft',
    'useclass',
    'assignable_cal',
    'assignable',
    'transit',
    'tollbooth',
    'bus_only',
    'transit_access',
    'managed',
    'ML_lanes',
    'segment_id',
    'ML_tollbooth',
    'ML_useclass',
    'ML_access',
    'ML_egress',
    'ML_tollseg',
    'tollseg',
    'geometry'
]

tm2_nw_attrs_gdf = tm2_nw_attrs_gdf[export_cols].copy()

In [39]:
tm2_nw_attrs_gdf.rename(columns={'fromIntersectionId_x':'fromIntersectionId',
                                 'toIntersectionId_x':'toIntersectionId',
                                 'shape_id_x':'shape_id'},inplace=True)

In [40]:
tm2_nw_attrs_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 2199321 entries, 0 to 2199320
Data columns (total 47 columns):
 #   Column              Dtype   
---  ------              -----   
 0   id                  object  
 1   backReferenceId     object  
 2   forwardReferenceId  object  
 3   fromIntersectionId  object  
 4   toIntersectionId    object  
 5   shape_id            float64 
 6   shstGeometryId      object  
 7   shstReferenceId     object  
 8   wayId               object  
 9   model_link_id       int64   
 10  access              object  
 11  bike_access         int64   
 12  drive_access        int64   
 13  lanes               object  
 14  maxspeed            object  
 15  name                object  
 16  oneWay              object  
 17  ref                 object  
 18  roadway             object  
 19  u                   object  
 20  v                   object  
 21  walk_access         int64   
 22  county              object  
 23  A                   int6

In [None]:
#export to geojson

tm2_nw_attrs_gdf.to_file('data/tm2_network_and_link_attributes.geojson',
                         driver='GeoJSON')