# OpenStreetMapの道路データをもとにしたTripsLayer用データの準備


In [1]:
from pathlib import Path
import json

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import tqdm.notebook as tqdm

## 道路データ


source: Boeing, Geoff, 2020, "Global Urban Street Networks Node/Edge Lists", https://doi.org/10.7910/DVN/DC7U0A, Harvard Dataverse, V2 - [japan-JPN_nelist.zip - Global Urban Street Networks](https://dataverse.harvard.edu/file.xhtml?fileId=4287688&version=2.0)


### Nodes


In [2]:
CITY_DIR = Path("./japan-JPN_nelist/tokyo-12964")

In [3]:
df_nodes = pd.read_csv(CITY_DIR / "node_list.csv")
df_nodes

Unnamed: 0,osmid,x,y,elevation,elevation_aster,elevation_srtm,ref,highway
0,36421509,139.785895,35.512138,6,6.0,,,motorway_junction
1,36421510,139.785994,35.512034,5,5.0,,,motorway_junction
2,36579154,139.793723,35.517239,5,5.0,,,
3,38433194,139.767616,35.502292,10,11.0,10.0,,
4,38433200,139.765447,35.500852,5,17.0,5.0,,
...,...,...,...,...,...,...,...,...
898425,8273761488,139.372988,35.958693,31,30.0,31.0,,
898426,8273761489,139.372903,35.958645,34,31.0,34.0,,
898427,8273761490,139.373186,35.958432,31,28.0,31.0,,
898428,8273761492,139.373675,35.958638,31,25.0,31.0,,


In [4]:
%%time
df_nodes["geometry"] = df_nodes.apply(lambda row: Point(row.x, row.y), axis=1)
gdf_nodes = gpd.GeoDataFrame(df_nodes, geometry="geometry")
gdf_nodes

CPU times: user 12.9 s, sys: 243 ms, total: 13.2 s
Wall time: 13.3 s


Unnamed: 0,osmid,x,y,elevation,elevation_aster,elevation_srtm,ref,highway,geometry
0,36421509,139.785895,35.512138,6,6.0,,,motorway_junction,POINT (139.78589 35.51214)
1,36421510,139.785994,35.512034,5,5.0,,,motorway_junction,POINT (139.78599 35.51203)
2,36579154,139.793723,35.517239,5,5.0,,,,POINT (139.79372 35.51724)
3,38433194,139.767616,35.502292,10,11.0,10.0,,,POINT (139.76762 35.50229)
4,38433200,139.765447,35.500852,5,17.0,5.0,,,POINT (139.76545 35.50085)
...,...,...,...,...,...,...,...,...,...
898425,8273761488,139.372988,35.958693,31,30.0,31.0,,,POINT (139.37299 35.95869)
898426,8273761489,139.372903,35.958645,34,31.0,34.0,,,POINT (139.37290 35.95864)
898427,8273761490,139.373186,35.958432,31,28.0,31.0,,,POINT (139.37319 35.95843)
898428,8273761492,139.373675,35.958638,31,25.0,31.0,,,POINT (139.37368 35.95864)


### Edges


In [5]:
df_edges = pd.read_csv(CITY_DIR / "edge_list.csv")
df_edges.head().T

  df_edges = pd.read_csv(CITY_DIR / "edge_list.csv")


Unnamed: 0,0,1,2,3,4
u,36421509,36421509,36421510,36579154,36579154
v,739475894,5383763947,739502890,1122774810,1122774810
key,0,0,0,0,1
oneway,True,True,True,False,False
highway,motorway,motorway_link,motorway,unclassified,"['tertiary', 'unclassified']"
name,首都高速湾岸線,首都高速神奈川6号川崎線,首都高速湾岸線,,
length,754.976,323.935,1847.197,802.279,809.346
grade,0.012,-0.006,0.001,-0.006,-0.006
grade_abs,0.012,0.006,0.001,0.006,0.006
lanes,,,3,,


## 地域による絞り込み


[国土数値情報 | 行政区域データ](https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v2_3.html)


In [6]:
gdf_regions = gpd.read_file(
    "./N03-190101_13_GML/N03-19_13_190101.shp", encoding="shift-jis"
)
gdf_regions

Unnamed: 0,N03_001,N03_002,N03_003,N03_004,N03_007,geometry
0,東京都,,,千代田区,13101,"POLYGON ((139.77287 35.70370, 139.77279 35.703..."
1,東京都,,,中央区,13102,"POLYGON ((139.78341 35.69645, 139.78459 35.696..."
2,東京都,,,港区,13103,"POLYGON ((139.77129 35.62841, 139.77128 35.628..."
3,東京都,,,港区,13103,"POLYGON ((139.76689 35.62774, 139.76718 35.627..."
4,東京都,,,港区,13103,"POLYGON ((139.77022 35.63199, 139.77046 35.631..."
...,...,...,...,...,...,...
6182,東京都,,,所属未定地,,"POLYGON ((139.84130 35.64702, 139.84131 35.647..."
6183,東京都,,,所属未定地,,"POLYGON ((139.80438 35.60061, 139.80399 35.600..."
6184,東京都,,,所属未定地,,"POLYGON ((139.81937 35.60899, 139.81923 35.608..."
6185,東京都,,,所属未定地,,"POLYGON ((139.81009 35.61355, 139.81069 35.613..."


In [17]:
print(gdf_regions[gdf_regions["N03_004"] == "新宿区"].iloc[0].geometry.centroid)

POINT (139.7089732914856 35.701071169990925)


In [7]:
gdf_nodes_target = gdf_nodes[
    gdf_nodes.within(gdf_regions[gdf_regions["N03_004"] == "新宿区"].iloc[0].geometry)
]

print(
    f"{gdf_nodes_target.shape[0]:,} nodes ({(gdf_nodes_target.shape[0] / gdf_nodes.shape[0]):.2%})"
)

5,783 nodes (0.64%)


In [8]:
# find all edges that connect nodes within 新宿区
df_edges_target = df_edges[
    df_edges["u"].isin(gdf_nodes_target["osmid"])
    | df_edges["v"].isin(gdf_nodes_target["osmid"])
]
df_edges_target

print(
    f"{df_edges_target.shape[0]:,} edges ({(df_edges_target.shape[0] / df_edges.shape[0]):.2%})"
)

14,487 edges (0.59%)


## ランダムなパスの作成


In [9]:
target_node_ids = gdf_nodes_target["osmid"].tolist()

In [10]:
def get_node_coords(node_id):
    node = df_nodes[df_nodes["osmid"] == node_id].iloc[0]
    return [node.x, node.y]


get_node_coords(target_node_ids[0])

[139.7045284, 35.6903957]

In [11]:
def get_waypoints(start_node_id, max_path_length):
    waypoints = [get_node_coords(start_node_id)]

    known_edges = set()
    for _ in range(1, max_path_length):
        possible_edges = df_edges_target[
            (df_edges_target["u"] == start_node_id)
            & (~df_edges_target["osmid"].isin(known_edges))
        ]

        if possible_edges.shape[0] == 0:
            return waypoints

        known_edges.add(possible_edges.iloc[0]["osmid"])

        next_edge = possible_edges.sample(1).iloc[0]
        start_node_id = next_edge["v"]
        waypoints.append(get_node_coords(start_node_id))

    return waypoints

In [12]:
all_waypoints = []
max_path_length = 5
for start_node_id in tqdm.tqdm(target_node_ids):
    waypoints = get_waypoints(start_node_id, max_path_length)
    all_waypoints.append(waypoints)

for i in range(1, max_path_length):
    len_i_waypoints = [wp for wp in all_waypoints if len(wp) == i]
    print(f"{i}.\t{len(len_i_waypoints)}")

  0%|          | 0/5783 [00:00<?, ?it/s]

1.	2
2.	247
3.	808
4.	938


In [13]:
trips_data = [
    {"waypoints": wp, "timestamps": list(range(len(wp)))}
    for wp in all_waypoints
    if len(wp) == max_path_length
]

trips_data[0], len(trips_data)

({'waypoints': [[139.7045284, 35.6903957],
   [139.704596, 35.6904896],
   [139.7033748, 35.6910464],
   [139.7033046, 35.6906542],
   [139.7032002, 35.6902105]],
  'timestamps': [0, 1, 2, 3, 4]},
 3788)

In [14]:
with open("../public/trips.json", "w") as f:
    json.dump(trips_data, f, indent=2)