# Cleaning small roads

In [187]:
import numpy as np
import pandas as pd
import json
import keplergl
import networkx as nx
from directed_road_network import DirectedRoadGraphGenerator

In [2]:
linkData = pd.read_csv("data/linkData-TokyoArea-v2.csv")
leftovers = linkData.loc[((linkData.roadType == 'unclassified') | (linkData.roadType == 'residential') | (linkData.roadType == 'living_street') | (linkData.roadType == 'pedestrian'))]
leftovers

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,type,id,nodes,roadType,roadName,oneWay,speedLimit,roadWidth
67,way,4848943,"[31254395, 6114579097, 1105125598, 1105125100,...",unclassified,,yes,,
69,way,4849049,"[499185617, 6114407385, 499185467]",unclassified,,yes,30,
122,way,4857047,"[31330006, 31330057, 6439922815, 31330058, 195...",unclassified,,yes,,
158,way,5170998,"[35926493, 35926496]",unclassified,,,,
159,way,5170999,"[35926495, 35926500, 35926501, 6877559285, 687...",unclassified,,yes,,
...,...,...,...,...,...,...,...,...
781035,way,812626290,"[1391658116, 7590140392]",residential,,,,
781036,way,812626291,"[7590140394, 7590140393, 1615202001]",residential,,,,
781037,way,812728992,"[2030079963, 7591153650, 7591153651]",residential,,,,
781038,way,812728998,"[3724132549, 3724132548, 3724132547]",residential,,,,


## What is the distribution of the road types?

In [3]:
linkData.groupby('roadType').agg('count').id / linkData.shape[0]

roadType
living_street     0.007662
motorway          0.005427
motorway_link     0.005461
pedestrian        0.011131
primary           0.014076
primary_link      0.001172
residential       0.524599
road              0.000563
secondary         0.011291
secondary_link    0.000484
tertiary          0.064568
tertiary_link     0.000769
trunk             0.011296
trunk_link        0.001741
unclassified      0.339759
Name: id, dtype: float64

Seems like there are a lot of small roads (residential and unclassified) occupying roughly 80% of the edges

## How much data are missing on each road type?

In [5]:
leftovers.loc[leftovers.roadType == 'unclassified'].isnull().sum() / leftovers.shape[0]

type          0.000000
id            0.000000
nodes         0.000000
roadType      0.000000
roadName      0.379661
oneWay        0.358725
speedLimit    0.379391
roadWidth     0.384035
dtype: float64

In [6]:
leftovers.loc[leftovers.roadType == 'residential'].isnull().sum() / leftovers.shape[0]

type          0.000000
id            0.000000
nodes         0.000000
roadType      0.000000
roadName      0.591791
oneWay        0.568345
speedLimit    0.589477
roadWidth     0.592950
dtype: float64

In [7]:
leftovers.loc[leftovers.roadType == 'living_street'].isnull().sum() / leftovers.shape[0]

type          0.000000
id            0.000000
nodes         0.000000
roadType      0.000000
roadName      0.008659
oneWay        0.007468
speedLimit    0.008630
roadWidth     0.008630
dtype: float64

In [8]:
leftovers.loc[leftovers.roadType == 'pedestrian'].isnull().sum() / leftovers.shape[0]

type          0.000000
id            0.000000
nodes         0.000000
roadType      0.000000
roadName      0.011782
oneWay        0.012037
speedLimit    0.012600
roadWidth     0.012477
dtype: float64

## How to decide what to fill the missing values with?

In [9]:
leftovers.loc[leftovers.roadType == 'unclassified'].oneWay.mode

<bound method Series.mode of 67        yes
69        yes
122       yes
158       NaN
159       yes
         ... 
781002    NaN
781015    yes
781018     no
781028    NaN
781029    NaN
Name: oneWay, Length: 265365, dtype: object>

In [17]:
leftovers.loc[leftovers.roadType == 'unclassified'].speedLimit.dropna().max

<bound method Series.max of 69        30
259       30
260       40
320       30
335       20
          ..
779649    30
779650    30
780675    30
780850    30
780851    30
Name: speedLimit, Length: 3670, dtype: object>

In [18]:
linkData.nodes.map(lambda node_list: set(node_list) & node_set)

Unnamed: 0,type,id,nodes,roadType,roadName,oneWay,speedLimit,roadWidth
0,way,4847506,"[31236733, 621545916, 621545917, 31236732, 183...",motorway,首都高速11号台場線,yes,,
1,way,4847507,"[31300417, 31300416, 31300384, 3799099877, 498...",motorway,首都高速湾岸線,yes,60,
2,way,4847509,"[31236654, 2805815266, 31236651, 2805815274, 3...",motorway_link,,yes,,
3,way,4847513,"[1832774262, 1832774263]",tertiary,,yes,,
4,way,4847514,"[31252838, 573283112, 31252812, 1876066967, 31...",tertiary,,yes,,
...,...,...,...,...,...,...,...,...
781035,way,812626290,"[1391658116, 7590140392]",residential,,,,
781036,way,812626291,"[7590140394, 7590140393, 1615202001]",residential,,,,
781037,way,812728992,"[2030079963, 7591153650, 7591153651]",residential,,,,
781038,way,812728998,"[3724132549, 3724132548, 3724132547]",residential,,,,


## Filter out the nodes and edges

In [39]:
node_data = pd.read_csv("data/leftover_nodes_v1.csv")
node_data

Unnamed: 0.1,Unnamed: 0,id,lat,lon,thisPoint,elevation
0,42287,1456908236,35.608371,139.239738,POINT (139.2397382 35.6083708),415.190002
1,42288,1456908266,35.608469,139.241025,POINT (139.2410253 35.6084687),393.260010
2,42289,1456908343,35.608457,139.241306,POINT (139.2413057 35.6084568),388.769989
3,42290,1456908374,35.608478,139.241423,POINT (139.2414228 35.6084781),386.750000
4,42291,1456908461,35.608547,139.241728,POINT (139.2417278 35.60854699999999),379.260010
...,...,...,...,...,...,...
1175378,3694804,6502211476,35.571015,139.671924,POINT (139.6719244 35.5710154),7.800000
1175379,3694805,6502211477,35.571014,139.671329,POINT (139.6713289 35.57101420000001),5.350000
1175380,3694806,6502211688,35.570163,139.672337,POINT (139.6723373 35.57016289999999),7.670000
1175381,3694807,6502211693,35.569847,139.671568,POINT (139.6715677 35.5698474),5.640000


In [40]:
edge_data = pd.read_csv("data/leftover_edges_v1.csv")
edge_data

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,id,roadType,roadName,oneWay,speedLimit,roadWidth,driveSpeed,source,target
0,4848943,unclassified,,1,10,10,10,31254395,6114579097
1,4848943,unclassified,,1,10,10,10,6114579097,1105125598
2,4848943,unclassified,,1,10,10,10,1105125598,1105125100
3,4848943,unclassified,,1,10,10,10,1105125100,1105125490
4,4848943,unclassified,,1,10,10,10,1105125490,1105125313
...,...,...,...,...,...,...,...,...,...
3281768,812728992,residential,,0,6,4,6,2030079963,7591153650
3281769,812728992,residential,,0,6,4,6,7591153650,7591153651
3281770,812728998,residential,,0,6,4,6,3724132549,3724132548
3281771,812728998,residential,,0,6,4,6,3724132548,3724132547


In [41]:
# Main coverage
min_lon = 139.2019327633228
min_lat = 35.12569127247789
max_lon = 140.4100021566109
max_lat = 36.10592212339746

# Filter nodes
filter_cond = (node_data.lon > min_lon) & (node_data.lon < max_lon) & (node_data.lat > min_lat) & (node_data.lat < max_lat)
node_data = node_data.loc[filter_cond]

# Filter edges
node_set = set(node_data.id)
edge_data = edge_data.loc[edge_data.source.map(lambda s: s in node_set) & edge_data.target.map(lambda t: t in node_set)]

In [145]:
node_data = pd.read_csv("data/filtered-nodeData-TokyoArea-v5.csv")
# node_data.iloc[:, 1:].to_csv("data/filtered-nodeData-TokyoArea-v5.csv", index=False)

In [142]:
link_data = pd.read_csv("data/filtered-linkData-TokyoArea-v5.csv")
# link_data.to_csv("data/filtered-linkData-TokyoArea-v5.csv", index=False)

  interactivity=interactivity, compiler=compiler, result=result)


In [69]:
graph_gen = DirectedRoadGraphGenerator(version=5)
graph_gen.load_graph("data/roadNetwork-Directed-TokyoArea-v5.json", full_name=False)

In [70]:
G = graph_gen.graph
list(G.nodes(data=True))[:5], list(G.edges(data=True))[:5]

([(31254395,
   {'lat': 35.6698425,
    'lon': 139.77472450000002,
    'thisPoint': 'POINT (139.7747245 35.6698425)',
    'elevation': 3.009999990463257,
    'modality': 'road'}),
  (6114579097,
   {'lat': 35.6698176,
    'lon': 139.7747877,
    'thisPoint': 'POINT (139.7747877 35.6698176)',
    'elevation': 2.740000009536743,
    'modality': 'road'}),
  (1105125598,
   {'lat': 35.66968660000001,
    'lon': 139.7751197,
    'thisPoint': 'POINT (139.7751197 35.66968660000001)',
    'elevation': 2.009999990463257,
    'modality': 'road'}),
  (1105125100,
   {'lat': 35.669650299999994,
    'lon': 139.7751501,
    'thisPoint': 'POINT (139.7751501 35.66965029999999)',
    'elevation': 1.7899999618530271,
    'modality': 'road'}),
  (1105125490,
   {'lat': 35.669593,
    'lon': 139.7751478,
    'thisPoint': 'POINT (139.7751478 35.669593)',
    'elevation': 1.7899999618530271,
    'modality': 'road'})],
 [(31254395,
   6114579097,
   {'roadType': 'unclassified',
    'roadName': '',
    'oneWa

In [68]:
node_data.isnull().sum()

id           0
lat          0
lon          0
thisPoint    0
elevation    0
dtype: int64

In [73]:
filename = "data/roadNetwork-Directed-TokyoArea-v5.json"
with open(filename, encoding='utf-8-sig') as f:
    js_graph = json.load(f)

In [77]:
links = pd.DataFrame(js_graph['links'])
nodes = pd.DataFrame(js_graph['nodes'])

In [78]:
check_map = keplergl.KeplerGl(height=400, data={"links": links, "nodes": nodes})
check_map.save_to_html(file_name="leftover_map.html")

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to leftover_map.html!


In [80]:
originalLinkData = pd.read_csv("data/elevationLinkData-TokyoArea-v4.csv")
originalLinkData

Unnamed: 0,id,roadType,roadName,oneWay,speedLimit,roadWidth,driveSpeed,source,target,elevationGain
0,4847506,motorway,首都高速11号台場線,1,80,21.0,60,31236733,621545916,-0.49
1,4847506,motorway,首都高速11号台場線,1,80,21.0,60,621545916,621545917,-0.16
2,4847506,motorway,首都高速11号台場線,1,80,21.0,60,621545917,31236732,-0.42
3,4847506,motorway,首都高速11号台場線,1,80,21.0,60,31236732,1832774251,-0.03
4,4847506,motorway,首都高速11号台場線,1,80,21.0,60,1832774251,1832774248,0.00
...,...,...,...,...,...,...,...,...,...,...
910716,812570409,primary,厚木街道,0,30,9.0,5,1565442394,7570072604,
910717,812570409,primary,厚木街道,0,30,9.0,5,7570072604,1565442396,
910718,812570409,primary,厚木街道,0,30,9.0,5,1565442396,7570072605,
910719,812570409,primary,厚木街道,0,30,9.0,5,7570072605,7309294738,


In [82]:
filename = "data/roadNetwork-Directed-TokyoArea-with-elevation.json"
with open(filename, encoding='utf-8-sig') as f:
    js_graph = json.load(f)

In [83]:
thick_links = pd.DataFrame(js_graph['links'])

In [92]:
overall_map = keplergl.KeplerGl(height=400, data={"small_roads": links, "large_roads": thick_links})
overall_map.save_to_html(file_name="overall_map.html")

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to overall_map.html!


In [91]:
thick_links.roadType.unique(), links.roadType.unique()

(array(['motorway', 'motorway_link', 'tertiary', 'tertiary_link',
        'primary', 'secondary', 'trunk', 'primary_link', 'secondary_link',
        'trunk_link', 'road'], dtype=object),
 array(['unclassified', 'residential', 'pedestrian', 'living_street'],
       dtype=object))

In [113]:
savepoint = pd.read_csv("savepoint_linkElevationData.csv")
savepoint.elevationGain.isnull().sum() / savepoint.shape[0]

  interactivity=interactivity, compiler=compiler, result=result)


0.5193948126531692

In [96]:
len(thick_links.columns), len(links.columns)

(18, 17)

In [99]:
thick_links = thick_links.drop(columns='elevationGain')
thick_links.columns

Index(['roadType', 'roadName', 'oneWay', 'speedLimit', 'roadWidth',
       'driveSpeed', 'capacity', 'numLanes', 'modality', 'x1', 'y1', 'x2',
       'y2', 'distance', 'timeWeight', 'source', 'target'],
      dtype='object')

In [100]:
overall_links = pd.concat([links, thick_links])
overall_links

Unnamed: 0,roadType,roadName,oneWay,speedLimit,roadWidth,driveSpeed,source,target,capacity,numLanes,modality,x1,y1,x2,y2,distance,timeWeight
0,unclassified,,1,10.0,10,10,31254395,6114579097,500,1,road,139.774725,35.669843,139.774788,35.669818,0,0.0
1,unclassified,,1,10.0,10,10,6114579097,1105125598,500,1,road,139.774788,35.669818,139.775120,35.669687,0,0.0
2,unclassified,,1,10.0,10,10,1105125598,1105125100,500,1,road,139.775120,35.669687,139.775150,35.669650,0,0.0
3,unclassified,,1,10.0,10,10,1105125100,1105125490,500,1,road,139.775150,35.669650,139.775148,35.669593,0,0.0
4,unclassified,,1,10.0,10,10,1105125490,1105125313,500,1,road,139.775148,35.669593,139.774943,35.669055,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595788,primary,厚木街道,0,30.0,9,5,7570072604,1565442394,8000,2,road,139.487006,35.466292,139.487283,35.466309,0,0.0
1595789,primary,厚木街道,0,30.0,9,5,1565442396,7570072605,8000,2,road,139.487354,35.466313,139.487421,35.466315,0,0.0
1595790,primary,厚木街道,0,30.0,9,5,1565442396,7570072604,8000,2,road,139.487283,35.466309,139.487354,35.466313,0,0.0
1595791,primary,厚木街道,0,30.0,9,5,7570072605,7309294738,8000,2,road,139.487421,35.466315,139.487489,35.466318,0,0.0


In [103]:
overall_links.isnull().sum() / overall_links.shape[0]

roadType      0.0
roadName      0.0
oneWay        0.0
speedLimit    0.0
roadWidth     0.0
driveSpeed    0.0
source        0.0
target        0.0
capacity      0.0
numLanes      0.0
modality      0.0
x1            0.0
y1            0.0
x2            0.0
y2            0.0
distance      0.0
timeWeight    0.0
dtype: float64

In [107]:
filter_cond_source = (thick_links.y1 > min_lat) & (thick_links.y1 < max_lat) & (thick_links.x1> min_lon) & (thick_links.x1 < max_lon)
filter_cond_target = (thick_links.y2 > min_lat) & (thick_links.y2 < max_lat) & (thick_links.x2 > min_lon) & (thick_links.x2 < max_lon)
filtered_thick_links = thick_links.loc[filter_cond_source & filter_cond_target]

In [110]:
overall_links = pd.concat([links, filtered_thick_links])

In [112]:
overall_links.to_csv("data/kepler_viz_data.csv", index=False)

In [114]:
elevationLinkData = pd.read_csv("data/filtered-elevationLinkData-TokyoArea-v5.csv")

In [116]:
elevationLinkData.isnull().sum()/elevationLinkData.shape[0]

id               0.000000
roadType         0.000000
roadName         0.976103
oneWay           0.000000
speedLimit       0.000000
roadWidth        0.000000
driveSpeed       0.000000
source           0.000000
target           0.000000
elevationGain    0.000000
dtype: float64

In [122]:
filename = "data/roadNetwork-Directed-TokyoArea-with-elevation-v5.json"
with open(filename, encoding='utf-8-sig') as f:
    js_graph = json.load(f)

In [123]:
list(js_graph['links'])[0], list(js_graph['nodes'])[0]

({'roadType': 'unclassified',
  'roadName': '',
  'oneWay': 1,
  'speedLimit': 10.0,
  'roadWidth': 10.0,
  'driveSpeed': 10,
  'elevationGain': -0.2699999809265137,
  'capacity': 500,
  'numLanes': 1,
  'modality': 'road',
  'x1': 139.77472450000002,
  'y1': 35.6698425,
  'x2': 139.7747877,
  'y2': 35.6698176,
  'distance': 0,
  'timeWeight': 0.0,
  'source': 31254395,
  'target': 6114579097},
 {'lat': 35.6698425,
  'lon': 139.77472450000002,
  'thisPoint': 'POINT (139.7747245 35.6698425)',
  'elevation': 3.009999990463257,
  'modality': 'road',
  'id': 31254395})

In [124]:
elevation_test = pd.DataFrame(js_graph['links'])

In [125]:
elevation_test.isnull().sum() / elevation_test.shape[0]

roadType         0.000000
roadName         0.000000
oneWay           0.000000
speedLimit       0.000000
roadWidth        0.000000
driveSpeed       0.000000
elevationGain    0.475369
capacity         0.000000
numLanes         0.000000
modality         0.000000
x1               0.000000
y1               0.000000
x2               0.000000
y2               0.000000
distance         0.000000
timeWeight       0.000000
source           0.000000
target           0.000000
dtype: float64

In [127]:
node_set1 = set([(lat, lon) for lat, lon in zip(elevation_test.y1, elevation_test.x1)])
node_set2 = set([(lat, lon) for lat, lon in zip(elevation_test.y2, elevation_test.x2)])
link_node_set = node_set1 | node_set2
len(link_node_set)

972801

In [129]:
len(js_graph['nodes'])

972814

In [131]:
large_elevation_links = pd.read_csv("data/elevationLinkData-TokyoArea-v4.csv")
large_elevation_links.roadType.unique()

array(['motorway', 'motorway_link', 'tertiary', 'primary', 'secondary',
       'trunk', 'tertiary_link', 'trunk_link', 'primary_link',
       'secondary_link', 'road'], dtype=object)

In [133]:
large_elevation_links.isnull().sum() / large_elevation_links.shape[0]

id               0.000000
roadType         0.000000
roadName         0.491654
oneWay           0.000000
speedLimit       0.000000
roadWidth        0.000000
driveSpeed       0.000000
source           0.000000
target           0.000000
elevationGain    0.640390
dtype: float64

In [135]:
filename = "data/roadNetwork-Directed-TokyoArea-v4.json"
with open(filename, encoding='utf-8-sig') as f:
    js_graph_v4 = json.load(f)

In [136]:
old_links = pd.DataFrame(js_graph_v4['links'])
old_links.isnull().sum() / old_links.shape[0]

roadType      0.0
roadName      0.0
oneWay        0.0
speedLimit    0.0
roadWidth     0.0
driveSpeed    0.0
source        0.0
target        0.0
capacity      0.0
numLanes      0.0
modality      0.0
x1            0.0
y1            0.0
x2            0.0
y2            0.0
distance      0.0
timeWeight    0.0
dtype: float64

In [138]:
filtered_nodes = pd.DataFrame(js_graph['nodes'])

In [140]:
filtered_nodes.isnull().sum()

lat          0
lon          0
thisPoint    0
elevation    0
modality     0
id           0
dtype: int64

In [146]:
len(set(link_data.source) | set(link_data.target)), node_data.shape[0]

(972814, 1175383)

In [147]:
node_data.isnull().sum()

id           0
lat          0
lon          0
thisPoint    0
elevation    0
dtype: int64

In [151]:
node_set = set(node_data.id)
len(node_set), node_data.shape[0]

(1175383, 1175383)

In [152]:
link_data.source.map(lambda s: s in node_set).all()

True

In [153]:
link_data.target.map(lambda t: t in node_set).all()

True

In [154]:
node_data

Unnamed: 0,id,lat,lon,thisPoint,elevation
0,1456908236,35.608371,139.239738,POINT (139.2397382 35.6083708),415.190002
1,1456908266,35.608469,139.241025,POINT (139.2410253 35.6084687),393.260010
2,1456908343,35.608457,139.241306,POINT (139.2413057 35.6084568),388.769989
3,1456908374,35.608478,139.241423,POINT (139.2414228 35.6084781),386.750000
4,1456908461,35.608547,139.241728,POINT (139.2417278 35.60854699999999),379.260010
...,...,...,...,...,...
1175378,6502211476,35.571015,139.671924,POINT (139.6719244 35.5710154),7.800000
1175379,6502211477,35.571014,139.671329,POINT (139.6713289 35.57101420000001),5.350000
1175380,6502211688,35.570163,139.672337,POINT (139.6723373 35.57016289999999),7.670000
1175381,6502211693,35.569847,139.671568,POINT (139.6715677 35.5698474),5.640000


In [155]:
filename = "data/roadNetwork-Directed-TokyoArea-with-elevation-v5.json"
with open(filename, encoding='utf-8-sig') as f:
    js_graph_v5 = json.load(f)

In [156]:
new_link_data = pd.DataFrame(js_graph_v5['links'])

In [157]:
new_link_data

Unnamed: 0,roadType,roadName,oneWay,speedLimit,roadWidth,driveSpeed,source,target,capacity,numLanes,modality,x1,y1,x2,y2,distance,timeWeight,elevationGain
0,unclassified,,1,10.0,10,10,31254395,6114579097,500,1,road,139.774725,35.669843,139.774788,35.669818,0,0.0,-0.270000
1,unclassified,,1,10.0,10,10,6114579097,1105125598,500,1,road,139.774788,35.669818,139.775120,35.669687,0,0.0,-0.730000
2,unclassified,,1,10.0,10,10,1105125598,1105125100,500,1,road,139.775120,35.669687,139.775150,35.669650,0,0.0,-0.220000
3,unclassified,,1,10.0,10,10,1105125100,1105125490,500,1,road,139.775150,35.669650,139.775148,35.669593,0,0.0,0.000000
4,unclassified,,1,10.0,10,10,1105125490,1105125313,500,1,road,139.775148,35.669593,139.774943,35.669055,0,0.0,-0.380000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2027910,residential,,0,6.0,4,6,7590138379,7590138378,500,1,road,139.571120,35.509055,139.571403,35.508881,0,0.0,7.880001
2027911,residential,,0,6.0,4,6,7590140392,1391658116,500,1,road,139.567870,35.506318,139.567525,35.506309,0,0.0,
2027912,residential,,0,6.0,4,6,7590140394,7590140393,500,1,road,139.565425,35.508689,139.565562,35.508780,0,0.0,-1.269999
2027913,residential,,0,6.0,4,6,7590140393,1615202001,500,1,road,139.565562,35.508780,139.565608,35.508829,0,0.0,0.000000


In [169]:
new_link_data.loc[new_link_data.elevationGain.isnull()]

Unnamed: 0,roadType,roadName,oneWay,speedLimit,roadWidth,driveSpeed,source,target,capacity,numLanes,modality,x1,y1,x2,y2,distance,timeWeight,elevationGain
40,unclassified,,0,10.0,10,10,35926496,35926493,500,1,road,139.782301,35.556638,139.782247,35.556608,0,0.0,
49,unclassified,,0,10.0,10,10,35930919,35930918,500,1,road,139.760529,35.573737,139.761934,35.573729,0,0.0,
51,unclassified,,0,10.0,10,10,35930915,35930919,500,1,road,139.761934,35.573729,139.761928,35.572530,0,0.0,
54,unclassified,,0,10.0,10,10,35932470,35932472,500,1,road,139.770388,35.571289,139.770272,35.571295,0,0.0,
56,unclassified,,0,10.0,10,10,2896895382,35932470,500,1,road,139.770272,35.571295,139.770131,35.571296,0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2027906,residential,,0,6.0,4,6,7589504712,7589504713,500,1,road,139.564984,35.505776,139.565178,35.505695,0,0.0,
2027908,residential,,0,6.0,4,6,7589504794,7589504795,500,1,road,139.561113,35.505531,139.561247,35.505664,0,0.0,
2027909,residential,,0,6.0,4,6,7589504793,7589504794,500,1,road,139.561247,35.505664,139.561379,35.505771,0,0.0,
2027911,residential,,0,6.0,4,6,7590140392,1391658116,500,1,road,139.567870,35.506318,139.567525,35.506309,0,0.0,


In [162]:
source_id = 7590140392
target_id = 1391658116
source_elevation = node_data.loc[node_data.id == source_id].elevation.values[0]
target_elevation = node_data.loc[node_data.id == target_id].elevation.values[0]
target_elevation - source_elevation

-0.9699993133544922

In [164]:
G = nx.json_graph.node_link_graph(js_graph_v5, directed=True, multigraph=False)

In [165]:
G.edges[source_id, target_id]

{'roadType': 'residential',
 'roadName': '',
 'oneWay': 0,
 'speedLimit': 6.0,
 'roadWidth': 4.0,
 'driveSpeed': 6,
 'x1': 139.5678703,
 'y1': 35.5063176,
 'x2': 139.567525,
 'y2': 35.506309,
 'distance': 0,
 'timeWeight': 0.0,
 'modality': 'road',
 'capacity': 500,
 'numLanes': 1}

In [170]:
new_link_data.shape[0]

2027915

In [179]:
nodes_from_links = set(new_link_data.source) | set(new_link_data.target)

In [177]:
node_set = set(pd.read_csv("data/filtered-nodeData-TokyoArea-v5.csv").id)

In [180]:
len(nodes_from_links & node_set)

972814

In [189]:
def calculate_gain(row):
    if np.isnan(row.elevationGain):
        sourceHeight = node_data.loc[node_data.id == row.source].elevation.values[0]
        targetHeight = node_data.loc[node_data.id == row.target].elevation.values[0]
        print(f"{targetHeight - sourceHeight}")
        return targetHeight - sourceHeight
    else:
        return row.elevationGain

In [196]:
for i in range(new_link_data.shape[0]):
    elevationGain = new_link_data.loc[i, 'elevationGain']
    if np.isnan(elevationGain):
        row = new_link_data.loc[i, ['source', 'target']]
        sourceHeight = node_data.loc[node_data.id == row.source].elevation.values[0]
        targetHeight = node_data.loc[node_data.id == row.target].elevation.values[0]
        new_link_data.loc[i, 'elevationGain'] = targetHeight - sourceHeight
        if i % 10000 == 0:
            print(f"{i}th row: {targetHeight - sourceHeight}")
    if i % 10000 == 0:
        print(f"{i/964007*100}% processed!")

# new_link_data.loc[:, 'elevationGain'] = new_link_data.apply(calculate_gain, axis=1)

0.0% processed!
10000th row: -0.05000305175782671
1.0373368658111404% processed!
2.0746737316222807% processed!
3.112010597433421% processed!
4.1493474632445615% processed!
50000th row: -0.40000152587890625
5.186684329055701% processed!
6.224021194866842% processed!
7.261358060677983% processed!
8.298694926489123% processed!
9.336031792300263% processed!
100000th row: -0.09000015258789773
10.373368658111403% processed!
110000th row: 0.0
11.410705523922545% processed!
12.448042389733684% processed!
13.485379255544824% processed!
14.522716121355966% processed!
15.560052987167106% processed!
16.597389852978246% processed!
170000th row: -0.059999942779541016
17.634726718789388% processed!
180000th row: -0.030002593994126414
18.672063584600526% processed!
190000th row: -1.3600006103515696
19.709400450411668% processed!
20.746737316222806% processed!
210000th row: -0.019999980926513672
21.784074182033947% processed!
220000th row: -0.7899999618530273
22.82141104784509% processed!
230000th row

187.7579727118164% processed!
188.79530957762756% processed!
1830000th row: 0.9599990844726562
189.8326464434387% processed!
1840000th row: 0.07999992370605469
190.86998330924982% processed!
1850000th row: -0.55999755859375
191.907320175061% processed!
192.9446570408721% processed!
1870000th row: 0.279998779296875
193.98199390668324% processed!
195.01933077249438% processed!
196.05666763830553% processed!
1900000th row: -0.05999755859375
197.09400450411667% processed!
198.1313413699278% processed!
1920000th row: 0.3299999237060547
199.16867823573895% processed!
1930000th row: -2.4800014495849503
200.20601510155007% processed!
1940000th row: 0.16000032424926847
201.24335196736124% processed!
202.28068883317238% processed!
1960000th row: 0.43000030517578125
203.3180256989835% processed!
204.35536256479466% processed!
1980000th row: 0.19999980926513672
205.3926994306058% processed!
1990000th row: -0.059999465942386365
206.43003629641692% processed!
2000000th row: -1.5
207.46737316222809% 

In [193]:
new_link_data.isnull().sum()

roadType              0
roadName              0
oneWay                0
speedLimit            0
roadWidth             0
driveSpeed            0
source                0
target                0
capacity              0
numLanes              0
modality              0
x1                    0
y1                    0
x2                    0
y2                    0
distance              0
timeWeight            0
elevationGain    964007
dtype: int64

In [197]:
old_links.shape

(1595793, 17)

In [None]:
new_link_data