In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import pyproj
from shapely.geometry import Point, shape, LineString
import json
import os

%matplotlib inline

import matplotlib.pyplot as plt

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
from methods import link_df_to_geojson
from methods import point_df_to_geojson
from methods import reproject

In [3]:
# input directories - use Box
TM2_REBUILD_BOX_DIR     = r"C:\Users\{}\Box\Modeling and Surveys\Development\Travel Model Two Development\Travel Model Two Network Rebuild".format(os.getenv('USERNAME'))
TM2_NETWORKS_DATA_DIR   = os.path.join(TM2_REBUILD_BOX_DIR,    "travel-model-two-networks", "data")
input_data_interim_dir  = os.path.join(TM2_NETWORKS_DATA_DIR,  "interim")
input_data_external_dir = os.path.join(TM2_NETWORKS_DATA_DIR,  "external")
input_step6_interim_dir = os.path.join(input_data_interim_dir, "step6_gtfs","version_12")

In [4]:
# output folders
output_data_interim_dir = r"C:\Users\{}\Documents\scratch\tm2_network_building\interim\step8_standard_format\\".format(os.getenv('USERNAME'))
os.makedirs(output_data_interim_dir, exist_ok=True)

In [5]:
%%time
# Wall time: ~3min
print('read link')
link_file = os.path.join(input_step6_interim_dir,"link.feather")
link_df = pd.read_feather(link_file)

print('read node')
node_file = os.path.join(input_step6_interim_dir,"node.geojson")
node_gdf = gpd.read_file(node_file)

print('read shape')
shape_file = os.path.join(input_step6_interim_dir,"shape.geojson")
shape_gdf = gpd.read_file(shape_file)

read link
read node
read shape
Wall time: 2min 51s


In [6]:
link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf, geometry = link_gdf["geometry"],
                            crs=pyproj.CRS("EPSG:4326"))

In [7]:
geom_length = link_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

link_gdf["length"] = geom_length["length"]

In [8]:
node_gdf['X'] = node_gdf['geometry'].apply(lambda p: p.x)
node_gdf['Y'] = node_gdf['geometry'].apply(lambda p: p.y)
node_gdf['point'] = [list(xy) for xy in zip(node_gdf.X, node_gdf.Y)]
node_dict = dict(zip(node_gdf.model_node_id, node_gdf.point))
    
link_gdf['A_point'] = link_gdf['A'].map(node_dict)
link_gdf['B_point'] = link_gdf['B'].map(node_dict)
link_gdf['locationReferences'] = link_gdf.apply(lambda x: [{'sequence':1, 
                                                             'point': x['A_point'],
                                                             'distanceToNextRef':x['length'],
                                                            'bearing' : 0,
                                                             'intersectionId':x['fromIntersectionId']},
                                                                         {'sequence':2, 
                                                             'point': x['B_point'],
                                                             'intersectionId':x['toIntersectionId']}],
                                                   axis = 1)

In [9]:
shape_gdf["shape_id"] = range(1, 1+ len(shape_gdf))

shape_id_dict = dict(zip(shape_gdf.id, shape_gdf.shape_id))

link_gdf["shape_id"] = link_gdf["id"].map(shape_id_dict)

In [10]:
%%time
# Wall time: ~5min
print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(shape_gdf, shape_prop)

with open(os.path.join(output_data_interim_dir,"shape.geojson"), "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 4min 16s


In [11]:
shape_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 869986 entries, 0 to 869985
Data columns (total 7 columns):
 #   Column              Non-Null Count   Dtype   
---  ------              --------------   -----   
 0   id                  869986 non-null  object  
 1   fromIntersectionId  869986 non-null  object  
 2   toIntersectionId    869986 non-null  object  
 3   forwardReferenceId  869986 non-null  object  
 4   backReferenceId     869986 non-null  object  
 5   geometry            869986 non-null  geometry
 6   shape_id            869986 non-null  int32   
dtypes: geometry(1), int32(1), object(5)
memory usage: 43.1+ MB


In [12]:
%%time
# Wall time: ~1min
# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = link_gdf.drop(['geometry', 'A_point', 'B_point', "service", "roundabout", 'est_width',
                          'highway', 'junction', 'key', 'landuse', 'link', 'area', 'id', 'length',
                          'width', 'bridge', 'tunnel',
                          'shape_id'], 
                          axis = 1).columns.tolist()

out = link_gdf[link_prop].to_json(orient = "records")

with open(os.path.join(output_data_interim_dir,"link.json"), 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 50.5 s


In [13]:
print("-------write out link feather---------")

link_feather = link_gdf.drop(['geometry', 'A_point', 'B_point', "service", "roundabout", 'est_width',
                          'highway', 'junction', 'key', 'landuse', 'link', 'area', 'id', 'length',
                          'width', 'bridge', 'tunnel',
                             'shape_id'], 
                          axis = 1).copy()

link_feather.to_feather(os.path.join(output_data_interim_dir,'link.feather'))

-------write out link feather---------



This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  if __name__ == '__main__':


In [14]:
link_feather.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1634790 entries, 0 to 1634789
Data columns (total 24 columns):
 #   Column              Non-Null Count    Dtype  
---  ------              --------------    -----  
 0   access              1632702 non-null  object 
 1   bike_access         1634790 non-null  int64  
 2   drive_access        1634790 non-null  int64  
 3   fromIntersectionId  1632702 non-null  object 
 4   lanes               1632702 non-null  object 
 5   maxspeed            1632702 non-null  object 
 6   name                1632702 non-null  object 
 7   oneWay              1632702 non-null  object 
 8   ref                 1632702 non-null  object 
 9   roadway             1632702 non-null  object 
 10  shstGeometryId      1634790 non-null  object 
 11  shstReferenceId     1632702 non-null  object 
 12  toIntersectionId    1632702 non-null  object 
 13  u                   1632702 non-null  float64
 14  v                   1632702 non-null  float64
 15  walk_ac

In [15]:
%%time
# Wall time: ~4min
print("-------write out node geojson---------")

node_prop = node_gdf.drop(["geometry", "point"], axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(node_gdf, node_prop)

with open(os.path.join(output_data_interim_dir,"node.geojson"), "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 3min 27s


In [16]:
node_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 644480 entries, 0 to 644479
Data columns (total 12 columns):
 #   Column         Non-Null Count   Dtype   
---  ------         --------------   -----   
 0   osm_node_id    643811 non-null  float64 
 1   shst_node_id   644480 non-null  object  
 2   county         644480 non-null  object  
 3   drive_access   644480 non-null  int64   
 4   walk_access    644480 non-null  int64   
 5   bike_access    644480 non-null  int64   
 6   model_node_id  644480 non-null  int64   
 7   rail_only      644480 non-null  int64   
 8   geometry       644480 non-null  geometry
 9   X              644480 non-null  float64 
 10  Y              644480 non-null  float64 
 11  point          644480 non-null  object  
dtypes: float64(3), geometry(1), int64(5), object(3)
memory usage: 59.0+ MB
