In [1]:
import os
import pandas as pd
import geopandas as gpd

In [6]:
#set working directory
work_dir = os.path.join('/Users',
                       'jcroff',
                       'Box',
                       'MTC Data for Toole Design',
                       'final_nw_datasets',
                       'travel_model_nw')

In [7]:
os.listdir(work_dir)

['v11_link.json', 'v11_shape.geojson']

In [8]:
#read tm2 shapes as geodataframe

tm2_gdf = gpd.read_file(os.path.join(work_dir,'v11_shape.geojson'),driver='GeoJSON')

In [9]:
#read tm2 network link attributes

tm2_attrs_df = pd.read_json(os.path.join(work_dir,'v11_link.json'))

In [12]:
#get info for tm2 geo network

tm2_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1152275 entries, 0 to 1152274
Data columns (total 7 columns):
 #   Column              Non-Null Count    Dtype   
---  ------              --------------    -----   
 0   backReferenceId     869968 non-null   object  
 1   forwardReferenceId  869968 non-null   object  
 2   fromIntersectionId  1152215 non-null  object  
 3   id                  1152275 non-null  object  
 4   shape_id            869968 non-null   float64 
 5   toIntersectionId    869968 non-null   object  
 6   geometry            1152275 non-null  geometry
dtypes: float64(1), geometry(1), object(5)
memory usage: 61.5+ MB


In [13]:
#get info for tm2 attributes 

tm2_attrs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2199321 entries, 0 to 2199320
Data columns (total 44 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   access              object
 1   bike_access         int64 
 2   drive_access        int64 
 3   fromIntersectionId  object
 4   lanes               object
 5   maxspeed            object
 6   name                object
 7   oneWay              object
 8   ref                 object
 9   roadway             object
 10  shstGeometryId      object
 11  shstReferenceId     object
 12  toIntersectionId    object
 13  u                   object
 14  v                   object
 15  walk_access         int64 
 16  wayId               object
 17  county              object
 18  model_link_id       int64 
 19  A                   int64 
 20  B                   int64 
 21  rail_traveltime     object
 22  rail_only           int64 
 23  locationReferences  object
 24  shape_id            object
 25  ft_cal            

In [43]:
tm2_attrs_df.head(10)

Unnamed: 0,access,bike_access,drive_access,fromIntersectionId,lanes,maxspeed,name,oneWay,ref,roadway,...,transit_access,managed,ML_lanes,segment_id,ML_tollbooth,ML_useclass,ML_access,ML_egress,ML_tollseg,tollseg
0,,1,1,505d64eb98f1da8d812a3b3801034308,1,,,True,,service,...,,0,,0,,,,,,0
1,,1,1,473979c78435732f01ca5a168afb62e0,1,,Yolanda Circle,False,,residential,...,,0,,0,,,,,,0
2,,1,1,fc7b575d5d8c961d4a70fca846ae7f80,1,25 mph,Molino Avenue,False,,tertiary,...,,0,,0,,,,,,0
3,,1,1,6c60cf34e9dc3e123eefb829fe80c76a,1,,Mina Way,False,,residential,...,,0,,0,,,,,,0
4,,1,1,013e1f994fd86c1f226098f8364f7286,1,,,False,,service,...,,0,,0,,,,,,0
5,,1,1,c180e4b9ba43a15fb576f5decab10cd0,2,40 mph,Saratoga-Sunnyvale Road,True,,primary,...,,0,,0,,,,,,0
6,,1,0,a3d4daf5130420a2c27d87892b718574,1,,,False,,cycleway,...,,0,,0,,,,,,0
7,,1,1,2003d46b59a6ff6d782d7e13623962d9,1,,,False,,service,...,,0,,0,,,,,,0
8,,1,1,55d370526a55a5d348e23751aad86ac0,1,,29th Avenue,False,,residential,...,,0,,0,,,,,,0
9,,1,1,fff3b94580adeda21a912b9fa5bc82cc,1,,,False,,service,...,,0,,0,,,,,,0


In [17]:
#Perform a left merge, keeping all network links regardless of if there are matching 
#attributes in the links dataframe. List geodataframe first so output of join is a geodataframe.  

tm2_nw_attrs_gdf = pd.merge(tm2_gdf,
                            tm2_attrs_df,
                            how='left',
                            on='id',
                            indicator=True)

In [21]:
#Check how many matches are in both 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['_merge'] == 'both'].shape[0]

2199321

In [22]:
#Check how many matches are just left matchaes

#0 left_only matches meaning for every link, there is a matching attribute. 
#The count of matches suggests that the geography links to link attributes is a one to many relationship
#For example, a link 'A' might be a two-way street 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['_merge'] == 'left_only'].shape[0]

0

In [None]:
#Check two-way street attribute to confirm theory

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['oneWay'] == 'False'].head(5)

In [34]:
#Check an id selected from filtered list of two-way roads
#Two rows are returned for 1 id. Dataset appears to be one to many. 

tm2_nw_attrs_gdf[tm2_nw_attrs_gdf['id'] == '89d807e34d7630c619bbc23a64e1313e']

Unnamed: 0,backReferenceId,forwardReferenceId,fromIntersectionId_x,id,shape_id_x,toIntersectionId_x,geometry,access,bike_access,drive_access,...,managed,ML_lanes,segment_id,ML_tollbooth,ML_useclass,ML_access,ML_egress,ML_tollseg,tollseg,_merge
0,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both
1,d0e1dddc808772de514b913b8f6875c7,1f25555f0c2f4a6c1205f4f0079c818f,fc62d018be0236d6b9e43536ebc51830,89d807e34d7630c619bbc23a64e1313e,1.0,61c9208c38c5d942c222859cb7073e30,"LINESTRING (-121.81724 38.00970, -121.81667 38...",,1,1,...,0,,0,,,,,,0,both


In [37]:
#Prepare dataset for export

tm2_nw_attrs_gdf.columns.to_list()

['backReferenceId',
 'forwardReferenceId',
 'fromIntersectionId_x',
 'id',
 'shape_id_x',
 'toIntersectionId_x',
 'geometry',
 'access',
 'bike_access',
 'drive_access',
 'fromIntersectionId_y',
 'lanes',
 'maxspeed',
 'name',
 'oneWay',
 'ref',
 'roadway',
 'shstGeometryId',
 'shstReferenceId',
 'toIntersectionId_y',
 'u',
 'v',
 'walk_access',
 'wayId',
 'county',
 'model_link_id',
 'A',
 'B',
 'rail_traveltime',
 'rail_only',
 'locationReferences',
 'shape_id_y',
 'ft_cal',
 'ft',
 'useclass',
 'assignable_cal',
 'assignable',
 'transit',
 'tollbooth',
 'bus_only',
 'transit_access',
 'managed',
 'ML_lanes',
 'segment_id',
 'ML_tollbooth',
 'ML_useclass',
 'ML_access',
 'ML_egress',
 'ML_tollseg',
 'tollseg',
 '_merge']

In [38]:
export_cols = [
    'id',
    'backReferenceId',
    'forwardReferenceId',
    'fromIntersectionId_x',
    'toIntersectionId_x',
    'shape_id_x',
    'shstGeometryId',
    'shstReferenceId',
    'wayId',
    'model_link_id',
    'access',
    'bike_access',
    'drive_access',
    'lanes',
    'maxspeed',
    'name',
    'oneWay',
    'ref',
    'roadway',
    'u',
    'v',
    'walk_access',
    'county',
    'A',
    'B',
    'rail_traveltime',
    'rail_only',
    'locationReferences',
    'ft_cal',
    'ft',
    'useclass',
    'assignable_cal',
    'assignable',
    'transit',
    'tollbooth',
    'bus_only',
    'transit_access',
    'managed',
    'ML_lanes',
    'segment_id',
    'ML_tollbooth',
    'ML_useclass',
    'ML_access',
    'ML_egress',
    'ML_tollseg',
    'tollseg',
    'geometry'
]

tm2_nw_attrs_gdf = tm2_nw_attrs_gdf[export_cols].copy()

In [39]:
#rename colunmns to remove suffix 

tm2_nw_attrs_gdf.rename(columns={'fromIntersectionId_x':'fromIntersectionId',
                                 'toIntersectionId_x':'toIntersectionId',
                                 'shape_id_x':'shape_id'},inplace=True)

In [44]:
#find columns containing lists

def has_list(x):
    return any(isinstance(i, list) for i in x)

list_cols = tm2_nw_attrs_gdf.apply(has_list)

In [45]:
list_cols

id                    False
backReferenceId       False
forwardReferenceId    False
fromIntersectionId    False
toIntersectionId      False
shape_id              False
shstGeometryId        False
shstReferenceId       False
wayId                 False
model_link_id         False
access                False
bike_access           False
drive_access          False
lanes                 False
maxspeed              False
name                  False
oneWay                False
ref                   False
roadway               False
u                     False
v                     False
walk_access           False
county                False
A                     False
B                     False
rail_traveltime       False
rail_only             False
locationReferences     True
ft_cal                False
ft                    False
useclass              False
assignable_cal        False
assignable            False
transit               False
tollbooth             False
bus_only            

In [49]:
#Convert list columns to string columns

tm2_nw_attrs_gdf['locationReferences'] = tm2_nw_attrs_gdf['locationReferences'].apply(lambda x: str(x))
tm2_nw_attrs_gdf['ML_access'] = tm2_nw_attrs_gdf['ML_access'].apply(lambda x: str(x))
tm2_nw_attrs_gdf['ML_egress'] = tm2_nw_attrs_gdf['ML_egress'].apply(lambda x: str(x))

In [None]:
#export to geojson

tm2_nw_attrs_gdf.to_file('data/tm2_network_and_link_attributes.geojson',
                         driver='GeoJSON')