# OpenStreetMapの道路データをもとにしたLineLayer用データの準備


In [1]:
from pathlib import Path
import json

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import tqdm.notebook as tqdm

## 道路データ


source: Boeing, Geoff, 2020, "Global Urban Street Networks Node/Edge Lists", https://doi.org/10.7910/DVN/DC7U0A, Harvard Dataverse, V2 - [japan-JPN_nelist.zip - Global Urban Street Networks](https://dataverse.harvard.edu/file.xhtml?fileId=4287688&version=2.0)


### Nodes


In [2]:
CITY_DIR = Path("./japan-JPN_nelist/tokyo-12964")

In [3]:
df_nodes = pd.read_csv(CITY_DIR / "node_list.csv")
df_nodes

Unnamed: 0,osmid,x,y,elevation,elevation_aster,elevation_srtm,ref,highway
0,36421509,139.785895,35.512138,6,6.0,,,motorway_junction
1,36421510,139.785994,35.512034,5,5.0,,,motorway_junction
2,36579154,139.793723,35.517239,5,5.0,,,
3,38433194,139.767616,35.502292,10,11.0,10.0,,
4,38433200,139.765447,35.500852,5,17.0,5.0,,
...,...,...,...,...,...,...,...,...
898425,8273761488,139.372988,35.958693,31,30.0,31.0,,
898426,8273761489,139.372903,35.958645,34,31.0,34.0,,
898427,8273761490,139.373186,35.958432,31,28.0,31.0,,
898428,8273761492,139.373675,35.958638,31,25.0,31.0,,


In [4]:
%%time
df_nodes["geometry"] = df_nodes.apply(lambda row: Point(row.x, row.y), axis=1)
gdf_nodes = gpd.GeoDataFrame(df_nodes, geometry="geometry")
gdf_nodes

CPU times: user 10 s, sys: 98 ms, total: 10.1 s
Wall time: 10.4 s


Unnamed: 0,osmid,x,y,elevation,elevation_aster,elevation_srtm,ref,highway,geometry
0,36421509,139.785895,35.512138,6,6.0,,,motorway_junction,POINT (139.78589 35.51214)
1,36421510,139.785994,35.512034,5,5.0,,,motorway_junction,POINT (139.78599 35.51203)
2,36579154,139.793723,35.517239,5,5.0,,,,POINT (139.79372 35.51724)
3,38433194,139.767616,35.502292,10,11.0,10.0,,,POINT (139.76762 35.50229)
4,38433200,139.765447,35.500852,5,17.0,5.0,,,POINT (139.76545 35.50085)
...,...,...,...,...,...,...,...,...,...
898425,8273761488,139.372988,35.958693,31,30.0,31.0,,,POINT (139.37299 35.95869)
898426,8273761489,139.372903,35.958645,34,31.0,34.0,,,POINT (139.37290 35.95864)
898427,8273761490,139.373186,35.958432,31,28.0,31.0,,,POINT (139.37319 35.95843)
898428,8273761492,139.373675,35.958638,31,25.0,31.0,,,POINT (139.37368 35.95864)


### Edges


In [5]:
df_edges = pd.read_csv(CITY_DIR / "edge_list.csv")
df_edges.head().T

  df_edges = pd.read_csv(CITY_DIR / "edge_list.csv")


Unnamed: 0,0,1,2,3,4
u,36421509,36421509,36421510,36579154,36579154
v,739475894,5383763947,739502890,1122774810,1122774810
key,0,0,0,0,1
oneway,True,True,True,False,False
highway,motorway,motorway_link,motorway,unclassified,"['tertiary', 'unclassified']"
name,首都高速湾岸線,首都高速神奈川6号川崎線,首都高速湾岸線,,
length,754.976,323.935,1847.197,802.279,809.346
grade,0.012,-0.006,0.001,-0.006,-0.006
grade_abs,0.012,0.006,0.001,0.006,0.006
lanes,,,3,,


## 地域による絞り込み


[国土数値情報 | 行政区域データ](https://nlftp.mlit.go.jp/ksj/gml/datalist/KsjTmplt-N03-v2_3.html)


In [6]:
gdf_regions = gpd.read_file(
    "./N03-190101_13_GML/N03-19_13_190101.shp", encoding="shift-jis"
)
gdf_regions

Unnamed: 0,N03_001,N03_002,N03_003,N03_004,N03_007,geometry
0,東京都,,,千代田区,13101,"POLYGON ((139.77287 35.70370, 139.77279 35.703..."
1,東京都,,,中央区,13102,"POLYGON ((139.78341 35.69645, 139.78459 35.696..."
2,東京都,,,港区,13103,"POLYGON ((139.77129 35.62841, 139.77128 35.628..."
3,東京都,,,港区,13103,"POLYGON ((139.76689 35.62774, 139.76718 35.627..."
4,東京都,,,港区,13103,"POLYGON ((139.77022 35.63199, 139.77046 35.631..."
...,...,...,...,...,...,...
6182,東京都,,,所属未定地,,"POLYGON ((139.84130 35.64702, 139.84131 35.647..."
6183,東京都,,,所属未定地,,"POLYGON ((139.80438 35.60061, 139.80399 35.600..."
6184,東京都,,,所属未定地,,"POLYGON ((139.81937 35.60899, 139.81923 35.608..."
6185,東京都,,,所属未定地,,"POLYGON ((139.81009 35.61355, 139.81069 35.613..."


In [7]:
print(gdf_regions[gdf_regions["N03_004"] == "新宿区"].iloc[0].geometry.centroid)

POINT (139.7089732914856 35.701071169990925)


In [8]:
gdf_nodes_target = gdf_nodes[
    gdf_nodes.within(gdf_regions[gdf_regions["N03_004"] == "新宿区"].iloc[0].geometry)
]

print(
    f"{gdf_nodes_target.shape[0]:,} nodes ({(gdf_nodes_target.shape[0] / gdf_nodes.shape[0]):.2%})"
)

5,783 nodes (0.64%)


In [9]:
# find all edges that connect nodes within 新宿区
df_edges_target = df_edges[
    df_edges["u"].isin(gdf_nodes_target["osmid"])
    | df_edges["v"].isin(gdf_nodes_target["osmid"])
]
df_edges_target

print(
    f"{df_edges_target.shape[0]:,} edges ({(df_edges_target.shape[0] / df_edges.shape[0]):.2%})"
)

14,487 edges (0.59%)


## 道路の属性情報


In [10]:
def get_column_non_null_ratio(df, col):
    n_total = df.shape[0]
    n_non_null = df[col].count()
    print(
        "{:,.0f} / {:,.0f} ({:.1%})".format(n_non_null, n_total, n_non_null / n_total)
    )

In [11]:
for col in ["length", "grade", "oneway", "highway", "name", "lanes", "maxspeed"]:
    print(col, end=":\t")
    get_column_non_null_ratio(df_edges_target, col)

length:	14,487 / 14,487 (100.0%)
grade:	14,487 / 14,487 (100.0%)
oneway:	14,487 / 14,487 (100.0%)
highway:	14,487 / 14,487 (100.0%)
name:	2,767 / 14,487 (19.1%)
lanes:	2,249 / 14,487 (15.5%)
maxspeed:	2,029 / 14,487 (14.0%)


In [12]:
df_edges_target["length"].describe()

count    14487.000000
mean        56.591572
std         64.993620
min          0.625000
25%         27.754500
50%         43.805000
75%         69.858000
max       2108.612000
Name: length, dtype: float64

In [13]:
df_edges_target["grade"].describe()

count    14487.000000
mean        -0.000114
std          0.107212
min         -5.329000
25%         -0.021000
50%          0.000000
75%          0.020000
max          5.329000
Name: grade, dtype: float64

In [14]:
df_edges_target["highway"].value_counts()

highway
residential                         6446
unclassified                        4838
tertiary                            1135
primary                              988
secondary                            755
trunk                                 88
primary_link                          70
['residential', 'unclassified']       69
living_street                         26
secondary_link                        21
motorway_link                         19
motorway                              10
tertiary_link                          4
['residential', 'living_street']       4
['tertiary', 'unclassified']           4
road                                   4
trunk_link                             3
['tertiary', 'residential']            2
['primary', 'secondary']               1
Name: count, dtype: int64

In [15]:
df_edges_target["maxspeed"].value_counts()

maxspeed
40              736
30              580
50              400
20              309
60                2
['40', '50']      1
['60', '30']      1
Name: count, dtype: int64

In [16]:
df_edges_target["lanes"].value_counts()

lanes
2             1647
4              230
1              179
3              171
['2', '3']       8
5                4
['2', '1']       3
6                3
['4', '3']       2
['5', '3']       1
['2', '4']       1
Name: count, dtype: int64

In [17]:
df_edges_target["name"].value_counts()

name
大久保通り                                           227
早稲田通り                                           157
外苑東通り                                           134
新宿通り                                            117
靖国通り                                            105
                                               ... 
['桜ヶ池通り', '北原橋']                                  1
['東京都道新宿副都心八号線', '東京都道新宿副都心八号線(新都心歩道橋下交差点)']      1
['首都高速4号新宿線', '赤坂トンネル']                           1
思い出横丁                                             1
(20時から翌日7時は右折可)                                   1
Name: count, Length: 169, dtype: int64

## LineLayer用データ作成


In [18]:
def get_node_coords(node_id):
    node = gdf_nodes[gdf_nodes["osmid"] == node_id].iloc[0]
    # return [node.x, node.y, int(node.elevation)]
    return [node.x, node.y, 0]  # 高さは0にする

In [19]:
line_data = []

for row in tqdm.tqdm(
    df_edges_target.itertuples(index=False), total=df_edges_target.shape[0]
):
    edge = row._asdict()

    if isinstance(edge["name"], list):
        break

    name = edge["name"]

    line_data.append(
        {
            "from": get_node_coords(edge["u"]),
            "to": get_node_coords(edge["v"]),
            "length": edge["length"],
            "grade": edge["grade"],
            "oneway": edge["oneway"],
            "highway": edge["highway"],
            "name": edge["name"] if not pd.isna(edge["name"]) else "",
            "maxspeed": edge["maxspeed"] if not pd.isna(edge["maxspeed"]) else None,
            "lanes": edge["lanes"] if not pd.isna(edge["lanes"]) else None,
        }
    )

  0%|          | 0/14487 [00:00<?, ?it/s]

In [20]:
with open("../public/lines.json", "w") as f:
    json.dump(line_data, f, indent=2, ensure_ascii=False)