In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, shape, LineString
import json


%matplotlib inline

import matplotlib.pyplot as plt

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
from methods import link_df_to_geojson
from methods import point_df_to_geojson
from methods import reproject

In [3]:
step6_output_folder = "../../data/interim/step6_gtfs/"
data_interim_dir = "../../data/interim/"

In [4]:
link_file = step6_output_folder + "link.feather"
link_df = pd.read_feather(link_file)

node_file = step6_output_folder + "node.geojson"
node_gdf = gpd.read_file(node_file)

shape_file = step6_output_folder + "shape.geojson"
shape_gdf = gpd.read_file(shape_file)

In [5]:
link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf, geometry = link_gdf["geometry"],
                            crs={"init" : "epsg:4326"})

In [6]:
geom_length = link_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

link_gdf["length"] = geom_length["length"]

In [7]:
node_gdf['X'] = node_gdf['geometry'].apply(lambda p: p.x)
node_gdf['Y'] = node_gdf['geometry'].apply(lambda p: p.y)
node_gdf['point'] = [list(xy) for xy in zip(node_gdf.X, node_gdf.Y)]
node_dict = dict(zip(node_gdf.model_node_id, node_gdf.point))
    
link_gdf['A_point'] = link_gdf['A'].map(node_dict)
link_gdf['B_point'] = link_gdf['B'].map(node_dict)
link_gdf['locationReferences'] = link_gdf.apply(lambda x: [{'sequence':1, 
                                                             'point': x['A_point'],
                                                             'distanceToNextRef':x['length'],
                                                            'bearing' : 0,
                                                             'intersectionId':x['fromIntersectionId']},
                                                                         {'sequence':2, 
                                                             'point': x['B_point'],
                                                             'intersectionId':x['toIntersectionId']}],
                                                   axis = 1)

In [8]:
link_gdf.locationReferences.iloc[0]

[{'sequence': 1,
  'point': [-122.33155420000001, 37.9812044],
  'distanceToNextRef': 84.64524959467963,
  'bearing': 0,
  'intersectionId': '505d64eb98f1da8d812a3b3801034308'},
 {'sequence': 2,
  'point': [-122.33199630000001, 37.9818082],
  'intersectionId': 'fb8b1bcb4bb81380f0dc83c1aa5006f4'}]

In [9]:
shape_gdf["shape_id"] = range(1, 1+ len(shape_gdf))

shape_id_dict = dict(zip(shape_gdf.id, shape_gdf.shape_id))

link_gdf["shape_id"] = link_gdf["id"].map(shape_id_dict)

In [10]:
%%time

print("-------write out link shape geojson---------")

shape_prop = ['id', 'shape_id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(shape_gdf, shape_prop)

with open(data_interim_dir + "step8_standard_format/shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

-------write out link shape geojson---------
Wall time: 3min 30s


In [11]:
link_gdf.columns

Index(['access', 'area', 'bike_access', 'bridge', 'drive_access', 'est_width',
       'fromIntersectionId', 'highway', 'id', 'junction', 'key', 'landuse',
       'lanes', 'link', 'maxspeed', 'name', 'oneWay', 'ref', 'roadway',
       'roundabout', 'service', 'shstGeometryId', 'shstReferenceId',
       'toIntersectionId', 'tunnel', 'u', 'v', 'walk_access', 'wayId', 'width',
       'county', 'length', 'model_link_id', 'A', 'B', 'rail_traveltime',
       'rail_only', 'geometry', 'A_point', 'B_point', 'locationReferences',
       'shape_id'],
      dtype='object')

In [12]:
%%time

# write out link variable json
# link unique handle "shstReferenceId" + "shstGeometryId"

print("-------write out link json---------")

link_prop = link_gdf.drop(['geometry', 'A_point', 'B_point', "service", "roundabout", 'est_width',
                          'highway', 'junction', 'key', 'landuse', 'link', 'area', 'id', 'length',
                          'width', 'bridge', 'tunnel'], 
                          axis = 1).columns.tolist()

out = link_gdf[link_prop].to_json(orient = "records")

with open(data_interim_dir + "step8_standard_format/link.json", 'w') as f:
    f.write(out)

-------write out link json---------
Wall time: 40.5 s


In [13]:
print("-------write out link feather---------")

link_feather = link_gdf.drop(['geometry', 'A_point', 'B_point', "service", "roundabout", 'est_width',
                          'highway', 'junction', 'key', 'landuse', 'link', 'area', 'id', 'length',
                          'width', 'bridge', 'tunnel'], 
                          axis = 1).copy()

link_feather.to_feather(data_interim_dir + 'step8_standard_format/link.feather')

-------write out link feather---------


In [14]:
link_feather.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1634769 entries, 0 to 1634768
Data columns (total 25 columns):
access                1632702 non-null object
bike_access           1634769 non-null int64
drive_access          1634769 non-null int64
fromIntersectionId    1632702 non-null object
lanes                 1632702 non-null object
maxspeed              1632702 non-null object
name                  1632702 non-null object
oneWay                1632702 non-null object
ref                   1632702 non-null object
roadway               1632702 non-null object
shstGeometryId        1634769 non-null object
shstReferenceId       1632702 non-null object
toIntersectionId      1632702 non-null object
u                     1632702 non-null float64
v                     1632702 non-null float64
walk_access           1634769 non-null int64
wayId                 1632702 non-null object
county                1634769 non-null object
model_link_id         1634769 non-null int64
A      

In [15]:
node_gdf.columns

Index(['osm_node_id', 'shst_node_id', 'county', 'drive_access', 'walk_access',
       'bike_access', 'model_node_id', 'rail_only', 'geometry', 'X', 'Y',
       'point'],
      dtype='object')

In [16]:
%%time

print("-------write out node geojson---------")

node_prop = node_gdf.drop(["geometry", "point"], axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(node_gdf, node_prop)

with open(data_interim_dir + "step8_standard_format/node.geojson", "w") as f:
    json.dump(node_geojson, f)

-------write out node geojson---------
Wall time: 2min 40s


In [17]:
link_gdf[link_gdf.model_link_id == 7000783]

Unnamed: 0,access,area,bike_access,bridge,drive_access,est_width,fromIntersectionId,highway,id,junction,...,model_link_id,A,B,rail_traveltime,rail_only,geometry,A_point,B_point,locationReferences,shape_id
9248,,,1,,1,,409aedbef6c920728691b0f3e27d5e31,secondary,52ca3e033440fc92ef3536f97d83281e,,...,7000783,4500793,4521115,,0,"LINESTRING (-122.7244187 38.4227762, -122.7243...","[-122.7244187, 38.4227762]","[-122.7243586, 38.4223055]","[{'sequence': 1, 'point': [-122.7244187, 38.42...",139078


In [18]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1634769 entries, 0 to 1634768
Data columns (total 42 columns):
access                1632702 non-null object
area                  1632702 non-null object
bike_access           1634769 non-null int64
bridge                1632702 non-null object
drive_access          1634769 non-null int64
est_width             1632702 non-null object
fromIntersectionId    1632702 non-null object
highway               1632702 non-null object
id                    1634769 non-null object
junction              1632702 non-null object
key                   1632702 non-null object
landuse               1632702 non-null object
lanes                 1632702 non-null object
link                  1632702 non-null object
maxspeed              1632702 non-null object
name                  1632702 non-null object
oneWay                1632702 non-null object
ref                   1632702 non-null object
roadway               1632702 non-null object
roundab

In [19]:
shape_gdf.shape

(869970, 7)