In [5]:
# download data
import os
from urllib.request import urlopen
import shutil

def download_if_not_exists(url, filename):
  if os.path.isfile(filename):
    print("{} already exists".format(filename))
    return
  os.makedirs(os.path.dirname(filename), exist_ok=True)
  obj = urlopen(url)
  with open(filename, "wb") as f:
    %time shutil.copyfileobj(obj, f)

download_if_not_exists("https://nlftp.mlit.go.jp/ksj/gml/data/N05/N05-19/N05-19_GML.zip", 
                       "tmp/railway.zip")

tmp/railway.zip already exists


In [6]:
# unzip the downloaded files
from zipfile import ZipFile
import os

def unzip(filename, directory):
  with ZipFile(filename) as z:
    %time z.extractall(path=directory)

unzip("tmp/railway.zip", "tmp/railway")

CPU times: user 347 ms, sys: 73.4 ms, total: 421 ms
Wall time: 432 ms


In [7]:
# geopandas で読込み
import geopandas as gpd

%time railways = gpd.read_file("tmp/railway/N05-19_RailroadSection2.shp", encoding="cp932")
%time stations = gpd.read_file("tmp/railway/N05-19_Station2.shp", encoding="cp932")

print(railways.shape)
display(railways.head())

print(stations.shape)
display(stations.head())

CPU times: user 2.18 s, sys: 101 ms, total: 2.28 s
Wall time: 3.4 s
CPU times: user 5.52 s, sys: 71.1 ms, total: 5.59 s
Wall time: 5.58 s
(2576, 12)


Unnamed: 0,N05_001,N05_002,N05_003,N05_004,N05_005b,N05_005e,N05_006,N05_007,N05_008,N05_009,N05_010,geometry
0,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1950,1968,EB02_11218,,,,,"LINESTRING (140.72649 41.77398, 140.72655 41.7..."
1,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1950,1968,EB02_11218,,,,,"LINESTRING (140.58803 42.10630, 140.58861 42.1..."
2,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1969,9999,EB02_11218,,,,,"LINESTRING (140.72649 41.77398, 140.72655 41.7..."
3,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1969,9999,EB02_11218,,,,,"LINESTRING (140.58803 42.10630, 140.58861 42.1..."
4,2,江差線,北海道旅客鉄道（旧国鉄）,1913,1950,2013,EB02_11219,,,,,"LINESTRING (140.12797 41.85666, 140.12805 41.8..."


(23505, 12)


Unnamed: 0,N05_001,N05_002,N05_003,N05_004,N05_005b,N05_005e,N05_006,N05_007,N05_008,N05_009,N05_011,geometry
0,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218106,,,,旭川,POINT (142.35792 43.76344)
1,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218099,,,,妹背牛,POINT (141.96665 43.69096)
2,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218103,,,,伊納,POINT (142.27170 43.76320)
3,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218100,,,,深川,POINT (142.04145 43.72125)
4,2,函館線,北海道旅客鉄道（旧国鉄）,1911,1950,9999,EB03_11218105,,,,近文,POINT (142.32534 43.78933)


In [9]:
# geometry -> wkt
from shapely import wkt
def geometry_to_wkt(x):
  %time x["wkt"] = [g.wkt for g in x.geometry]
  return x

railways = geometry_to_wkt(railways)
stations = geometry_to_wkt(stations)

display(railways.head())
display(stations.head())

CPU times: user 306 ms, sys: 12.5 ms, total: 319 ms
Wall time: 319 ms
CPU times: user 354 ms, sys: 1.23 ms, total: 356 ms
Wall time: 356 ms


Unnamed: 0,N05_001,N05_002,N05_003,N05_004,N05_005b,N05_005e,N05_006,N05_007,N05_008,N05_009,N05_010,geometry,wkt
0,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1950,1968,EB02_11218,,,,,"LINESTRING (140.72649 41.77398, 140.72655 41.7...","LINESTRING (140.726488 41.773982, 140.72655 41..."
1,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1950,1968,EB02_11218,,,,,"LINESTRING (140.58803 42.10630, 140.58861 42.1...","LINESTRING (140.58803 42.1063, 140.58861 42.10..."
2,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1969,9999,EB02_11218,,,,,"LINESTRING (140.72649 41.77398, 140.72655 41.7...","LINESTRING (140.726488 41.773982, 140.72655 41..."
3,2,函館線,北海道旅客鉄道（旧国鉄）,1880,1969,9999,EB02_11218,,,,,"LINESTRING (140.58803 42.10630, 140.58861 42.1...","LINESTRING (140.58803 42.1063, 140.58861 42.10..."
4,2,江差線,北海道旅客鉄道（旧国鉄）,1913,1950,2013,EB02_11219,,,,,"LINESTRING (140.12797 41.85666, 140.12805 41.8...","LINESTRING (140.127975 41.85666, 140.12805 41...."


Unnamed: 0,N05_001,N05_002,N05_003,N05_004,N05_005b,N05_005e,N05_006,N05_007,N05_008,N05_009,N05_011,geometry,wkt
0,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218106,,,,旭川,POINT (142.35792 43.76344),POINT (142.35792 43.76344)
1,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218099,,,,妹背牛,POINT (141.96665 43.69096),POINT (141.96665 43.690955)
2,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218103,,,,伊納,POINT (142.27170 43.76320),POINT (142.2717 43.763205)
3,2,函館線,北海道旅客鉄道（旧国鉄）,1898,1950,9999,EB03_11218100,,,,深川,POINT (142.04145 43.72125),POINT (142.04145 43.721251)
4,2,函館線,北海道旅客鉄道（旧国鉄）,1911,1950,9999,EB03_11218105,,,,近文,POINT (142.32534 43.78933),POINT (142.325344 43.789326)


In [11]:
# rename and select necessary columns
cols = {'N05_002':"line", 'N05_003':"company", 'N05_005e':"endyear", 'wkt':"wkt"}
railways2 = railways[cols.keys()].rename(columns=cols)

cols = {'N05_002':"line", 'N05_003':"company", 'N05_005e':"endyear", 
        'N05_011':"station", 'wkt':"wkt"}
stations2 = stations[cols.keys()].rename(columns=cols)

display(railways2.head())
display(stations2.head())

Unnamed: 0,line,company,endyear,wkt
0,函館線,北海道旅客鉄道（旧国鉄）,1968,"LINESTRING (140.726488 41.773982, 140.72655 41..."
1,函館線,北海道旅客鉄道（旧国鉄）,1968,"LINESTRING (140.58803 42.1063, 140.58861 42.10..."
2,函館線,北海道旅客鉄道（旧国鉄）,9999,"LINESTRING (140.726488 41.773982, 140.72655 41..."
3,函館線,北海道旅客鉄道（旧国鉄）,9999,"LINESTRING (140.58803 42.1063, 140.58861 42.10..."
4,江差線,北海道旅客鉄道（旧国鉄）,2013,"LINESTRING (140.127975 41.85666, 140.12805 41...."


Unnamed: 0,line,company,endyear,station,wkt
0,函館線,北海道旅客鉄道（旧国鉄）,9999,旭川,POINT (142.35792 43.76344)
1,函館線,北海道旅客鉄道（旧国鉄）,9999,妹背牛,POINT (141.96665 43.690955)
2,函館線,北海道旅客鉄道（旧国鉄）,9999,伊納,POINT (142.2717 43.763205)
3,函館線,北海道旅客鉄道（旧国鉄）,9999,深川,POINT (142.04145 43.721251)
4,函館線,北海道旅客鉄道（旧国鉄）,9999,近文,POINT (142.325344 43.789326)


In [12]:
# filter rows to avoid no-more-space error

# 現存する路線のみ
flg1 = (railways2.endyear == "9999")
# 新幹線のみ
flg3 = railways2.line.str.contains("新幹線")
railways3 = railways2[flg1 & flg3].reset_index(drop=True)
railways3 = railways3.drop(columns="endyear").rename(columns={"line":"line_name", "wkt":"geometry"})
print(railways3.shape)
display(railways3)

# 現存する駅のみ
flg1 = (stations2.endyear == "9999")
stations3 = stations2[flg1].reset_index(drop=True)
stations3 = stations3.drop(columns="endyear").rename(columns={"line":"line_name", "station":"station_name", "wkt":"geometry"})
print(stations3.shape)
display(stations3)

(9, 3)


Unnamed: 0,line_name,company,geometry
0,東北新幹線,東日本旅客鉄道（旧国鉄）,"LINESTRING (139.768108 35.681098, 139.76807 35..."
1,上越新幹線,東日本旅客鉄道（旧国鉄）,"LINESTRING (139.061745 37.912005, 139.05937 37..."
2,九州新幹線,九州旅客鉄道（旧国鉄）,"LINESTRING (130.421175 33.589575, 130.42108 33..."
3,東海道新幹線,東海旅客鉄道（旧国鉄）,"LINESTRING (135.500035 34.73368, 135.50225 34...."
4,山陽新幹線,西日本旅客鉄道（旧国鉄）,"LINESTRING (130.421175 33.589575, 130.42127 33..."
5,北陸新幹線,東日本旅客鉄道（旧国鉄）,"LINESTRING (138.19025 36.64375, 138.19073342 3..."
6,北陸新幹線,東日本旅客鉄道（旧国鉄）,"LINESTRING (139.012993 36.322956, 139.01283 36..."
7,北陸新幹線,西日本旅客鉄道（旧国鉄）,"LINESTRING (138.24847132 37.08136715, 138.2473..."
8,北海道新幹線,北海道旅客鉄道（旧国鉄）,"LINESTRING (140.64709716 41.90503286, 140.6486..."


(10252, 4)


Unnamed: 0,line_name,company,station_name,geometry
0,函館線,北海道旅客鉄道（旧国鉄）,旭川,POINT (142.35792 43.76344)
1,函館線,北海道旅客鉄道（旧国鉄）,妹背牛,POINT (141.96665 43.690955)
2,函館線,北海道旅客鉄道（旧国鉄）,伊納,POINT (142.2717 43.763205)
3,函館線,北海道旅客鉄道（旧国鉄）,深川,POINT (142.04145 43.721251)
4,函館線,北海道旅客鉄道（旧国鉄）,近文,POINT (142.325344 43.789326)
...,...,...,...,...
10247,三国芦原線,えちぜん鉄道,西春江ハートピア,POINT (136.20622 36.1389)
10248,三国芦原線,えちぜん鉄道,太郎丸エンゼルランド,POINT (136.20569 36.131335)
10249,三国芦原線,えちぜん鉄道,下兵庫こうふく,POINT (136.2026 36.17012)
10250,大手町線,伊予鉄道,大手町駅前,POINT (132.75547 33.840315)


In [13]:
import os

# railways
dirname = "../data/facilities/shinkansen"
os.makedirs(dirname, exist_ok=True)
railways3.to_csv(f"{dirname}/data.csv.gz", index=False, encoding="utf8", compression="gzip")

# add data.csv to .gitignore
with open(f"{dirname}/.gitignore", "w") as f:
  f.write("data.csv")


# stations
dirname = "../data/facilities/stations"
os.makedirs(dirname, exist_ok=True)
stations3.to_csv(f"{dirname}/data.csv.gz", index=False, encoding="utf8", compression="gzip")

# add data.csv to .gitignore
with open(f"{dirname}/.gitignore", "w") as f:
  f.write("data.csv")

In [14]:
stations3.station_name.str.len().max()

17

In [15]:
stations3[stations3.station_name.str.len() > 10]

Unnamed: 0,line_name,company,station_name,geometry
1541,阿武隈急行線,阿武隈急行,やながわ希望の森公園前,POINT (140.61565 37.85787)
1972,鹿島線,東日本旅客鉄道（旧国鉄）,鹿島サッカースタジアム,POINT (140.63591 35.993065)
2267,ディズニーリゾートライン,舞浜リゾートライン,東京ディズニーシー･ステーション,POINT (139.88949 35.627235)
2268,ディズニーリゾートライン,舞浜リゾートライン,リゾートゲートウェイ･ステーション,POINT (139.88495 35.63511)
2269,ディズニーリゾートライン,舞浜リゾートライン,東京ディズニーランド･ステーション,POINT (139.878445 35.635845)
2270,ディズニーリゾートライン,舞浜リゾートライン,ベイサイド･ステーション,POINT (139.876345 35.627855)
2412,大洗鹿島線,鹿島臨海鉄道,長者ヶ浜潮騒はまなす公園前,POINT (140.618855 36.03129)
2414,大洗鹿島線,鹿島臨海鉄道,鹿島サッカースタジアム,POINT (140.63591 35.993065)
3196,空港線,京浜急行電鉄,羽田空港国際線ターミナル,POINT (139.767067 35.54498)
3197,空港線,京浜急行電鉄,羽田空港国内線ターミナル,POINT (139.785858 35.54972)
