In [15]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json

In [2]:
from methods import link_df_to_geojson
from methods import point_df_to_geojson
from methods import identify_dead_end_nodes

In [3]:
data_interim_dir = "../../data/interim/"

# Read network

In [4]:
link_file = data_interim_dir + "step4_conflate_with_tomtom/" + "link.json"
with open(link_file) as f:
    link_json = json.load(f)
link_df = pd.DataFrame(link_json)

node_file = data_interim_dir + "step3_join_shst_extraction_with_osm/" + "node.geojson"
node_gdf = gpd.read_file(node_file)

shape_gdf = gpd.read_file(data_interim_dir + "step3_join_shst_extraction_with_osm/" 
                          + "shape.geojson")

In [5]:
link_gdf = pd.merge(link_df,
                    shape_gdf[["id", "geometry"]],
                   how = "left",
                   on = "id")

link_gdf = gpd.GeoDataFrame(link_gdf,
                            crs="EPSG:4326")

In [6]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 37 columns):
FRC                   float64
F_JNCTID              float64
ID                    float64
LANES                 float64
T_JNCTID              float64
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
pp_tomtom_link_id     float64
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunne

# Add length

In [7]:
# add length in meters

geom_length = link_gdf[['geometry']].copy()
geom_length = geom_length.to_crs(epsg = 26915)
geom_length["length"] = geom_length.length

link_gdf["length"] = geom_length["length"]

# Flag drive dead end

In [8]:
non_dead_end_link_handle_df = link_gdf[(link_gdf.drive_access == 1)][["u", "v"]]

dead_end_node_list = identify_dead_end_nodes(non_dead_end_link_handle_df)

cumulative_dead_end_node_list = []

while len(dead_end_node_list) > 0:
    cumulative_dead_end_node_list = cumulative_dead_end_node_list + dead_end_node_list
    
    non_dead_end_link_handle_df = non_dead_end_link_handle_df[~(non_dead_end_link_handle_df.u.isin(dead_end_node_list)) & 
                                            ~(non_dead_end_link_handle_df.v.isin(dead_end_node_list))].copy()
    
    dead_end_node_list = identify_dead_end_nodes(non_dead_end_link_handle_df)

In [9]:
len(cumulative_dead_end_node_list)

142359

In [10]:
cumulative_dead_end_node_list

[26117855,
 26408927,
 29409975,
 31845971,
 31866760,
 33242031,
 33947187,
 35719197,
 35719237,
 35719242,
 35725887,
 52978502,
 52978511,
 52978530,
 52978548,
 52978787,
 52978791,
 52979353,
 52979660,
 52979702,
 52979741,
 52979746,
 52979819,
 52980213,
 52980607,
 52981509,
 52981521,
 52981528,
 52982126,
 52982289,
 52982540,
 52982567,
 52982608,
 52982610,
 52982925,
 52982938,
 52982952,
 52983422,
 52983441,
 52983476,
 52983477,
 52983483,
 52983936,
 52983940,
 52984451,
 52984958,
 52985470,
 52986212,
 52986214,
 52986385,
 52986401,
 52986402,
 52986566,
 52986578,
 52986586,
 52986595,
 52986752,
 52987302,
 52987692,
 52987693,
 52987961,
 52988038,
 52988041,
 52988102,
 52988140,
 52988144,
 52988147,
 52988152,
 52988328,
 52989564,
 52989590,
 52989626,
 52990130,
 52990143,
 52990473,
 52990722,
 52990785,
 52991502,
 52991995,
 52992052,
 52992056,
 52992078,
 52992100,
 52992350,
 52992360,
 52992414,
 52993138,
 52993437,
 52993498,
 52993549,
 52994043,

In [11]:
non_dead_end_link_handle_df

Unnamed: 0,u,v
0,2401244716,2401244712
1,57839068,57869731
2,1024388950,110424978
3,65561433,1325928459
4,4545575571,4545575563
5,3377850399,4313871331
6,4305402275,4305402282
8,4925258551,4332522355
9,65308539,65318508
10,4932240373,4616479310


In [12]:
link_gdf.drive_access.value_counts()

1    1335880
0     369892
Name: drive_access, dtype: int64

In [13]:
node_gdf.drive_access.value_counts()

1    555491
0    105668
Name: drive_access, dtype: int64

In [16]:
# update node and link drive access
# if u/v in dead end node list, then drive access = 0
# if osm_node_id in dead end node list, then drive access = 0

link_gdf['drive_access'] = np.where(((link_gdf.u.isin(cumulative_dead_end_node_list)) | 
                                               (link_gdf.v.isin(cumulative_dead_end_node_list))),
                                               0,
                                               link_gdf.drive_access)

node_gdf["drive_access"] = np.where(node_gdf.osm_node_id.isin(cumulative_dead_end_node_list),
                                     0,
                                     node_gdf.drive_access)

In [17]:
link_gdf.drive_access.value_counts()

1    1047357
0     658415
Name: drive_access, dtype: int64

In [18]:
node_gdf.drive_access.value_counts()

1    413132
0    248027
Name: drive_access, dtype: int64

# Rename attributes

In [29]:
link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1705772 entries, 0 to 1705771
Data columns (total 38 columns):
FRC                   float64
F_JNCTID              float64
tomtom_ID             float64
LANES                 float64
T_JNCTID              float64
access                object
area                  object
bike_access           int64
bridge                object
drive_access          int64
est_width             object
fromIntersectionId    object
highway               object
id                    object
junction              object
key                   object
landuse               object
lanes                 object
link                  object
maxspeed              object
name                  object
oneWay                object
tomtom_unique_id      float64
ref                   object
roadway               object
roundabout            object
service               object
shstGeometryId        object
shstReferenceId       object
toIntersectionId      object
tunne

In [28]:
link_gdf.rename(columns = {"ID" : "tomtom_ID", "pp_tomtom_link_id" : "tomtom_unique_id"},
               inplace = True)

not_to_export_link_json = ["id", "link"]

In [30]:
link_gdf.roadway.value_counts()

service           567356
residential       558660
footway           250954
cycleway          118938
tertiary           95311
secondary          69775
primary            27601
motorway_link       4988
trunk               4249
motorway            2871
secondary_link      1567
primary_link        1506
trunk_link          1384
tertiary_link        612
Name: roadway, dtype: int64

# Write out

In [31]:
print("-------write out link shape geojson---------")

shape_prop = ['id', 'fromIntersectionId', 'toIntersectionId', 'forwardReferenceId', 'backReferenceId']
shape_geojson = link_df_to_geojson(shape_gdf, shape_prop)

with open("../../data/interim/step5_tidy_roadway/shape.geojson", "w") as f:
    json.dump(shape_geojson, f)

    
print("-------write out link json---------")

link_prop = link_gdf.drop(["geometry"] + not_to_export_link_json, axis = 1).columns.tolist()

out = link_gdf[link_prop].to_json(orient = "records")

with open('../../data/interim/step5_tidy_roadway/link.json', 'w') as f:
    f.write(out)

    
print("-------write out node geojson---------")

node_prop = node_gdf.drop("geometry", axis = 1).columns.tolist()
node_geojson = point_df_to_geojson(node_gdf, node_prop)

with open("../../data/interim/step5_tidy_roadway/node.geojson", "w") as f:
    json.dump(node_geojson, f)  

-------write out link json---------
-------write out node geojson---------
