In [1]:
import json
import pandas as pd
import urllib
import networkx as nx
from geopy import distance
from calculate_distance_between_stations import *

#### 1.提取北京地铁站点数据

In [None]:
#读取subway.json文件,数据来源：高德地图地铁图 https://map.amap.com/subway/index.html

url = "https://map.amap.com/service/subway?_1715741114049&srhdata=1100_drw_beijing.json"
filename = "subway_data.json"

file = urllib.request.urlretrieve(url, filename)
#return a tuple (filename, headers)

#打开json文件
with open (file[0],"r",encoding='utf-8') as f:
    data = json.load(f)
#data文件详见'subway_data.json'，其中'n'为站名，'sl'为经纬度数据

In [3]:
subwayline = data['l']
#我们需要站名、经度、纬度、线路名四个数据，创建包含列名为['StationName','lng','lat','line']的空dataframe
df = pd.DataFrame(columns=['StationName','lng','lat','line'])

#爬取地铁站名、经度、纬度、线路名数据
for i in range(len(subwayline)):
    #subwayline[i]为北京地铁某条线路的信息
    item = subwayline[i]['st']
    line = subwayline[i]['ln']
    for station in item:
    #station为某条地铁线路的某地铁站点的信息
        name = station['n']
        lng = station['sl'].split(',')[0]
        lat = station['sl'].split(',')[1]
        line = line
        df.loc[len(df)] = [name,lng,lat,line]
df.head()

Unnamed: 0,StationName,lng,lat,line
0,苹果园,116.178945,39.925686,S1线
1,金安桥,116.162586,39.923298,S1线
2,四道桥,116.13401,39.91603,S1线
3,桥户营,116.125809,39.912383,S1线
4,上岸,116.122225,39.905138,S1线


In [4]:
lines_tuple = list(df.groupby('line'))
#lines_tuple为一个列表，每个元素为一个元组，元组的第一个元素为线路名，第二个元素为该线路的所有站点信息

#### 2.构建北京地铁graph

##### 2.1构建没有权重的undirected graph

In [5]:
#构建没有权重的undirected graph，注意2号线、10号线是环线，在构建graph时需要联通首尾
G = nx.Graph()
for line in lines_tuple:
    if line[0] in ['2号线','10号线']:
        for i in range(len(line[1])-1):
            G.add_edge(line[1].iloc[i]['StationName'],line[1].iloc[i+1]['StationName'])
        #联通环线（2号线、10号线）的首尾站点
        G.add_edge(line[1].iloc[0]['StationName'],line[1].iloc[-1]['StationName'])
    else:
        for i in range(len(line[1])-1):
            G.add_edge(line[1].iloc[i]['StationName'],line[1].iloc[i+1]['StationName'])

##### 2.2计算地铁站点间距离，此处使用曼哈顿距离

In [6]:
lines = pd.DataFrame(columns=['SStation','S_lng','S_lat','line','EStation','E_lng','E_lat'])
for line in lines_tuple:
    route = line[1].copy()
    route.columns = ['SStation','S_lng','S_lat','line']
    route['EStation'] = route['SStation'].shift(-1)
    route['E_lng'] = route['S_lng'].shift(-1)
    route['E_lat'] = route['S_lat'].shift(-1)

    #2号线喝10号线是环线
    if line[0] in ['2号线','10号线']:
        route.iloc[-1,-3:] = route.iloc[0,:3]
    lines = pd.concat([lines,route],axis=0)
lines.dropna(inplace=True,axis=0)

lines['manhattan_distance'] = lines.apply(lambda row:manhattan_distance(row['S_lat'],row['S_lng'],row['E_lat'],row['E_lng']),axis=1)

##### 2.3给每个edge赋予权重，权重为两地铁站点之间的距离

In [7]:
# Iterate over the edges in G
for u, v in G.edges():
    # Check if there is a corresponding row in amap_distance    
    mask = (lines['SStation'] == u) & (lines['EStation'] == v) | (lines['SStation'] == v) & (lines['EStation'] == u)
    if mask.any():
        # Get the distance value from amap_distance
        distance = lines.loc[mask, 'manhattan_distance'].values[0]
        # Add the distance as an attribute to the edge in G
        G[u][v]['weight'] = distance

In [8]:
#检查是否有边的权重为None或0
for u, v, weight in G.edges(data='weight'):
    if weight is None or weight == 0:
        print(f"Edge ({u}, {v}) has weight {weight}")

In [9]:
#计算closeness centrality
closeness = nx.closeness_centrality(G,distance='weight')
closeness_centrality = pd.DataFrame(zip(closeness.keys(),closeness.values()),columns=['StationName','closeness_centrality'])
closeness_centrality

Unnamed: 0,StationName,closeness_centrality
0,巴沟,0.042203
1,苏州街,0.043602
2,海淀黄庄,0.045069
3,知春里,0.045759
4,知春路,0.046571
...,...,...
381,万安,0.032618
382,国家植物园,0.030224
383,香山,0.029325
384,3号航站楼,0.019505


In [10]:
closeness_centrality.to_csv('closeness_centrality.csv',index=False)