In [1]:
# Call libraries
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import osmnx as ox
import pandas as pd
from shapely import geometry
import libpysal
import time
from rasterstats import zonal_stats

import warnings
warnings.filterwarnings("ignore")

In [2]:
# WGS84和GJC坐标系转换函数
# -*- coding: utf-8 -*-
import json
import urllib
import math
# import numpy as np

x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626  # π
a = 6378245.0  # 长半轴
ee = 0.00669342162296594323  # 偏心率平方


'''
输入（经度，维度）
'''
def bd09_to_gcj02(bd_lon, bd_lat):
    """
    百度坐标系(BD-09)转火星坐标系(GCJ-02)
    百度——>谷歌、高德
    :param bd_lat:百度坐标纬度
    :param bd_lon:百度坐标经度
    :return:转换后的坐标列表形式
    """
    x = bd_lon - 0.0065
    y = bd_lat - 0.006
    z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
    theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
    gg_lng = z * math.cos(theta)
    gg_lat = z * math.sin(theta)
    return [gg_lng, gg_lat]
def gcj02_to_wgs84(lng, lat):
    """
    GCJ02(火星坐标系)转GPS84
    :param lng:火星坐标系的经度
    :param lat:火星坐标系纬度
    :return:
    """
    if out_of_china(lng, lat):
        return [lng, lat]
    dlat = _transformlat(lng - 105.0, lat - 35.0)
    dlng = _transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [lng * 2 - mglng, lat * 2 - mglat]
def bd09_to_wgs84(bd_lon, bd_lat):
    lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
    return gcj02_to_wgs84(lon, lat)
def bd09_to_wgs84(bd_lon, bd_lat):
    lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
    return gcj02_to_wgs84(lon, lat)
def gcj02_to_bd09(lng, lat):
    """
    火星坐标系(GCJ-02)转百度坐标系(BD-09)
    谷歌、高德——>百度
    :param lng:火星坐标经度
    :param lat:火星坐标纬度
    :return:
    """
    z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
    theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
    bd_lng = z * math.cos(theta) + 0.0065
    bd_lat = z * math.sin(theta) + 0.006
    return [bd_lng, bd_lat]
def wgs84_to_gcj02(lng, lat):
    """
    WGS84转GCJ02(火星坐标系)
    :param lng:WGS84坐标系的经度
    :param lat:WGS84坐标系的纬度
    :return:
    """
    if out_of_china(lng, lat):  # 判断是否在国内
        return [lng, lat]
    dlat = _transformlat(lng - 105.0, lat - 35.0)
    dlng = _transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [mglng, mglat]
def wgs84_to_bd09(lon, lat):
    lon, lat = wgs84_to_gcj02(lon, lat)
    return gcj02_to_bd09(lon, lat)

def out_of_china(lng, lat):
    """
    判断是否在国内，不在国内不做偏移
    :param lng:
    :param lat:
    :return:
    """
    return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55)

def _transformlng(lng, lat):
    ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
          0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lng * pi) + 40.0 *
            math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
    ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
            math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
    return ret
def _transformlat(lng, lat):
    ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
          0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lat * pi) + 40.0 *
            math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
    ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
            math.sin(lat * pi / 30.0)) * 2.0 / 3.0
    return ret


In [3]:
# 转换WGS84坐标为GCJ02的函数
def transform_coordinates(row):
    lng_x, lat_x = wgs84_to_gcj02(row['longitude_x'], row['latitude_x'])
    lng_y, lat_y = wgs84_to_gcj02(row['longitude_y'], row['latitude_y'])
    return pd.Series({'lng_x_gcj02': lng_x, 'lat_x_gcj02': lat_x, 'lng_y_gcj02': lng_y, 'lat_y_gcj02': lat_y})

In [4]:
# 按照经纬度计算距离的函数
from math import radians, cos, sin, asin, sqrt
 
def haversine(lon1, lat1, lon2, lat2): # 经度1，纬度1，经度2，纬度2 （十进制度数）
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # 将十进制度数转化为弧度
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
 
    # haversine公式
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # 地球平均半径，单位为公里
    return c * r * 1000

In [5]:
pop_resid = gpd.read_file('residential_pop.shp') 
pop_resid = pop_resid.to_crs(epsg = 4326)

pop_resid = pop_resid[(pop_resid['sum_pop'] >= 1)]
pop_resid['cen'] = pop_resid['geometry'].centroid
pop_resid['Popid'] = pop_resid.index + 1

pop_resid['longitude'] = pop_resid['cen'].x
pop_resid['latitude'] = pop_resid['cen'].y

In [338]:
pop_resid = pop_resid.to_crs(epsg = 3857)
pop_resid['area'] = pop_resid.geometry.area

In [339]:
pop_resid

Unnamed: 0,name,sum_pop,geometry,cen,Popid,longitude,latitude,index,area
0,,7276.136558,"POLYGON ((12742557.833 2583965.856, 12742480.9...",POINT (114.47232 22.59552),1,114.472319,22.595523,1,2.228591e+06
1,,1217.979429,"POLYGON ((12745413.133 2576195.874, 12745295.9...",POINT (114.48832 22.53681),2,114.488317,22.536814,1,6.199168e+05
3,?????,2048.236732,"POLYGON ((12709691.109 2588710.751, 12709781.1...",POINT (114.17423 22.63940),4,114.174231,22.639398,1,2.236799e+05
4,,11243.644833,"POLYGON ((12702176.654 2585733.443, 12702091.5...",POINT (114.10326 22.61540),5,114.103263,22.615398,1,4.572930e+05
5,,875.631427,"POLYGON ((12711043.062 2583066.362, 12711070.5...",POINT (114.18960 22.59443),6,114.189601,22.594434,1,3.634558e+05
...,...,...,...,...,...,...,...,...,...
2076,????,325.056286,"POLYGON ((12690104.222 2594879.791, 12690110.1...",POINT (113.99786 22.69175),2077,113.997858,22.691749,1,1.643225e+04
2077,???,6592.324279,"POLYGON ((12668653.302 2594538.785, 12668256.1...",POINT (113.80420 22.69361),2078,113.804203,22.693606,1,3.842189e+05
2078,????,1589.763618,"POLYGON ((12685408.176 2594097.317, 12685432.6...",POINT (113.95554 22.68481),2079,113.955540,22.684806,1,8.869985e+04
2079,????,149.804736,"POLYGON ((12688879.018 2592578.554, 12688876.6...",POINT (113.98661 22.67397),2080,113.986614,22.673974,1,5.573208e+03


In [341]:
pop_resid.describe()

Unnamed: 0,sum_pop,Popid,longitude,latitude,index,area
count,1828.0,1828.0,1828.0,1828.0,1828.0,1828.0
mean,1029.052911,1038.438184,114.067542,22.613699,1.0,63721.32
std,1684.320138,611.388241,0.164655,0.080969,0.0,92463.96
min,3.078762,1.0,113.773104,22.474726,1.0,1103.434
25%,249.581774,505.75,113.929448,22.547742,1.0,19223.22
50%,540.680846,1020.5,114.041011,22.59478,1.0,36627.41
75%,1112.315046,1587.25,114.146364,22.682122,1.0,74867.4
max,26058.146784,2081.0,114.585661,22.808439,1.0,2228591.0


In [343]:
print(pop_resid['sum_pop'].sum())

1881108.7217623326


In [342]:
pop_resid = pop_resid.to_crs(epsg = 4326)

In [9]:
city_park = gpd.read_file('entrance_sim/city_park_entrance.geojson').to_crs(epsg = 4326)
nature_park = gpd.read_file('entrance_sim/nature_park_entrance.geojson').to_crs(epsg = 4326)

In [331]:
len(city_park)

208

In [332]:
len(nature_park)

58

## Public transport

In [22]:
# 调用高德路径规划api-公交，计算路径距离distance（米），耗时cost（秒）
# 路径规划1.0
import json
from urllib import request

def fetch_GaodeMap_publictrans(df):
    urlbase = 'https://restapi.amap.com/v3/direction/transit/integrated?&key=225cfe7d506a6037debd6a9f4d5aa583&origin={0},{1}&destination={2},{3}&city=0755&time=10:00'
    distances = []
    costs = []
    exceptions = []  # 用来收集引发异常的行索引

    for i, row in df.iterrows():
        x1 = row['lng_x_gcj02']
        y1 = row['lat_x_gcj02']
        x2 = row['lng_y_gcj02']
        y2 = row['lat_y_gcj02']
        url = urlbase.format(x1, y1, x2, y2)

        try:
            html = request.urlopen(url, timeout=15).read()
            js = json.loads(html)
            distance = js['route']['transits'][0]['distance']
            cost = js['route']['transits'][0]['duration']
        except Exception as e:
            print(f"Error processing row {i}: {e}")
            distance = None
            cost = None
            exceptions.append(i)  # 将引发异常的行索引添加到列表中

        distances.append(distance)
        costs.append(cost)

    df['distance'] = distances
    df['cost'] = costs

    return df, exceptions

### d) Public transport——City park——Travel time thresholds =30min     

In [85]:
# 1 连接pop和park
pop_resid['index'] = 1
city_park['index'] = 1
OD_d = pd.merge(pop_resid, city_park, on='index').drop('index', axis=1)
len(OD_d)

380224

In [86]:
# 2 根据OD左边计算直线距离，得到新列length
OD_d['length'] = OD_d[['longitude_x', 'latitude_x','longitude_y', 'latitude_y',]].\
apply(lambda x:haversine(x[0],x[1],x[2],x[3]),axis=1)

In [87]:
# 3 筛选lenth<=12km的OD对
f_OD_d = OD_d[(OD_d['length'] <= 12000)]
# 只保留同一个公园直线距离最近的入口
idx = f_OD_d.groupby(['Popid', 'Cityparkid'])['length'].idxmin()
f_OD_d = f_OD_d.loc[idx]
len(f_OD_d)

41910

In [88]:
# 4 转换WGS84坐标为GCJ02，便于下一步高德api计算
new_columns = f_OD_d.apply(transform_coordinates, axis=1)
# 合并原始 DataFrame 和新列
f_OD_d_GCJ02 = pd.concat([f_OD_d, new_columns], axis=1)
len(f_OD_d_GCJ02)

41910

In [129]:
f_OD_d_GCJ02.head(2)

Unnamed: 0,name,sum_pop,geometry_x,cen,Popid,longitude_x,latitude_x,area,Cityparkid,count,...,longitude_y,latitude_y,polygonStr,geometry_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02,geometry
177,,7276.136558,"POLYGON ((114.46834 22.60207, 114.46765 22.601...",POINT (114.47232 22.59552),1,114.472319,22.595523,189186.224851,72,4,...,114.423113,22.629912,"POLYGON ((114.42576 22.631069, 114.425851 22.6...",POINT (114.42311 22.62991),6335.085074,114.477123,22.592745,114.42804,22.627234,POINT (114.47712 22.59275)
570,?????,2048.236732,"POLYGON ((114.17310 22.64141, 114.17391 22.641...",POINT (114.17423 22.63940),4,114.174231,22.639398,135493.847185,61,2,...,114.114103,22.554264,"POLYGON ((114.110073 22.555798, 114.110462 22....",POINT (114.11410 22.55426),11301.136973,114.179205,22.636635,114.11921,22.551578,POINT (114.17921 22.63663)


In [113]:
# 导入手动获取的，以为park entrance为起点，在高德公交达到圈界面获取的35-45min公交等时圈
geo_city_capture = gpd.read_file('tmp/geo_city_capture.geojson')

In [114]:
geo_city_capture

Unnamed: 0,Cityparkid,longitude,latitude,gcj02_lng,gcj02_lat,gcj02_lng_lat,geometry
0,1,113.900289,22.481475,113.905151,22.478420,"113.905151,22.478420","MULTIPOLYGON (((113.92358 22.50266, 113.92325 ..."
1,1,113.894065,22.499114,113.898934,22.496061,"113.898934,22.496061","MULTIPOLYGON (((113.91162 22.49606, 113.90527 ..."
2,1,113.888705,22.494954,113.893580,22.491903,"113.893580,22.491903","MULTIPOLYGON (((113.91468 22.51992, 113.91431 ..."
3,1,113.911896,22.508349,113.916754,22.505297,"113.916754,22.505297","MULTIPOLYGON (((113.92529 22.53038, 113.92505 ..."
4,1,113.901735,22.508829,113.906598,22.505774,"113.906598,22.505774","MULTIPOLYGON (((113.91553 22.53482, 113.91537 ..."
...,...,...,...,...,...,...,...
203,80,114.388525,22.725322,114.393480,22.722672,"114.393480,22.722672","MULTIPOLYGON (((114.40619 22.72267, 114.39983 ..."
204,81,114.370117,22.738877,114.375064,22.736214,"114.375064,22.736214","MULTIPOLYGON (((114.37045 22.76054, 114.37011 ..."
205,82,114.235663,22.736794,114.240511,22.733969,"114.240511,22.733969","MULTIPOLYGON (((114.24630 22.75324, 114.24591 ..."
206,83,114.229912,22.742795,114.234768,22.739976,"114.234768,22.739976","MULTIPOLYGON (((114.24388 22.75372, 114.24351 ..."


In [102]:
type(geo_city_capture)

geopandas.geodataframe.GeoDataFrame

In [118]:
# 将相同Cityparkid的公园等时圈多边形合并为1个

geo_city_capture['geometry'] = geo_city_capture.geometry.buffer(0)

# 假设你的GeoDataFrame名称是geo_city_capture
dissolved_city_capture = geo_city_capture.dissolve(by='Cityparkid')

# 重置索引以便Cityparkid的公园等时圈多边形合并为1个成为一个正常的列而不是索引
dissolved_city_capture.reset_index(inplace=True)

# 打印结果以确认变化
len(dissolved_city_capture)


83

In [115]:
# 筛选在对应公园的公交等时圈中的居住点

import geopandas as gpd
from shapely.geometry import Point

# 假设你已经有了f_OD_d_GCJ02和geo_city_capture两个DataFrame

# 将f_OD_d_GCJ02转换为GeoDataFrame
geometry = [Point(xy) for xy in zip(f_OD_d_GCJ02.lng_x_gcj02, f_OD_d_GCJ02.lat_x_gcj02)]
f_OD_d_gdf = gpd.GeoDataFrame(f_OD_d_GCJ02, geometry=geometry)


In [124]:
# 设置geo_city_capture的index为Cityparkid，以便后续快速索引
dissolved_city_capture = dissolved_city_capture.set_index('Cityparkid')

In [126]:
# 筛选点在对应多边形内的行
filtered_rows = []

for idx, row in f_OD_d_gdf.iterrows():
    citypark_id = row['Cityparkid']
    if citypark_id in dissolved_city_capture.index:
        polygon = dissolved_city_capture.loc[citypark_id, 'geometry']
        if row['geometry'].within(polygon):
            filtered_rows.append(row)

# 创建新的DataFrame
filtered_df = pd.DataFrame(filtered_rows).reset_index(drop=True)

# 结果为filtered_df
print(filtered_df)

       name       sum_pop                                         geometry_x  \
0      None  11243.644833  POLYGON ((114.1055943 22.6167258, 114.1048299 ...   
1      None    875.631427  POLYGON ((114.1852426 22.5946077, 114.1854894 ...   
2      None    244.115629  POLYGON ((114.1832581 22.595519, 114.1804257 2...   
3     ?????   1987.896567  POLYGON ((114.1294901 22.611478, 114.1293185 2...   
4     ?????   1987.896567  POLYGON ((114.1294901 22.611478, 114.1293185 2...   
...     ...           ...                                                ...   
4447   None    176.790457  POLYGON ((113.9286903 22.7403265, 113.9264084 ...   
4448   ????    330.748679  POLYGON ((113.8713233 22.7855158, 113.8712439 ...   
4449   ????    330.748679  POLYGON ((113.8713233 22.7855158, 113.8712439 ...   
4450   ????    504.475622  POLYGON ((114.0656916 22.6903258, 114.0656006 ...   
4451   ????   1589.763618  POLYGON ((113.9549605 22.6860643, 113.95518 22...   

                                       

In [133]:
print(len(pop_resid['Popid'].unique()))

1828


In [None]:
print(len(filtered_df['Popid'].unique()))

1314


In [161]:
len(filtered_df)

4452

In [134]:
# 5 使用api计算高德地图时间距离
start_time = time.time()

ODd_Gaode, exception_rows = fetch_GaodeMap_publictrans(filtered_df)
print("Exception rows:", exception_rows)

end_time = time.time()
execution_time = (end_time - start_time)/60
print("Run time: ", execution_time, "mins")

Error processing row 24: list index out of range
Error processing row 45: list index out of range
Error processing row 53: list index out of range
Error processing row 67: list index out of range
Error processing row 72: list index out of range
Error processing row 77: list index out of range
Error processing row 130: list index out of range
Error processing row 133: list index out of range
Error processing row 142: list index out of range
Error processing row 170: list index out of range
Error processing row 181: The read operation timed out
Error processing row 213: list index out of range
Error processing row 244: list index out of range
Error processing row 257: list index out of range
Error processing row 268: list index out of range
Error processing row 308: list index out of range
Error processing row 317: list index out of range
Error processing row 318: list index out of range
Error processing row 325: list index out of range
Error processing row 334: list index out of range
E

In [159]:
ODd_Gaode.head(2)

Unnamed: 0,name,sum_pop,geometry_x,cen,Popid,longitude_x,latitude_x,area,Cityparkid,count,...,polygonStr,geometry_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02,geometry,distance,cost
0,,11243.644833,"POLYGON ((114.1055943 22.6167258, 114.1048299 ...",POINT (114.10326302954545 22.615397546071705),5,114.103263,22.615398,1115556.0,68,8,...,"POLYGON ((114.120325 22.589701, 114.120698 22....",POINT (114.11701567821436 22.583009820555464),3868.193121,114.108387,22.612724,114.12212,22.580325,POINT (114.10838711259221 22.612724084760575),6011,2178
1,,875.631427,"POLYGON ((114.1852426 22.5946077, 114.1854894 ...",POINT (114.18960131902347 22.5944335931523),6,114.189601,22.594434,281453.4,70,2,...,"POLYGON ((114.165136 22.597571, 114.164187 22....",POINT (114.16907378966637 22.599423892219875),2179.159673,114.194532,22.591634,114.17406,22.596663,POINT (114.1945319286058 22.59163428654367),2974,1927


In [160]:
len(ODd_Gaode)

4452

In [174]:
print(ODd_Gaode.isnull().sum())

name           909
sum_pop          0
geometry_x       0
cen              0
Popid            0
longitude_x      0
latitude_x       0
area             0
Cityparkid       0
count            0
avg_area         0
longitude_y      0
latitude_y       0
polygonStr       0
geometry_y       0
length           0
lng_x_gcj02      0
lat_x_gcj02      0
lng_y_gcj02      0
lat_y_gcj02      0
geometry         0
distance       388
cost           388
dtype: int64


In [136]:
# 6 检查问题行
exception_df = filtered_df.loc[exception_rows]

len(exception_df)

400

In [141]:
# Calculate Gaode map for exception_df travel time and travel distance again
exception_df_re,exception = fetch_GaodeMap_publictrans(exception_df)

Error processing row 24: list index out of range
Error processing row 45: list index out of range
Error processing row 53: list index out of range
Error processing row 67: list index out of range
Error processing row 72: list index out of range
Error processing row 77: list index out of range
Error processing row 130: list index out of range
Error processing row 133: list index out of range
Error processing row 142: list index out of range
Error processing row 170: list index out of range
Error processing row 213: list index out of range
Error processing row 244: list index out of range
Error processing row 257: list index out of range
Error processing row 268: list index out of range
Error processing row 308: list index out of range
Error processing row 317: list index out of range
Error processing row 318: list index out of range
Error processing row 325: list index out of range
Error processing row 334: list index out of range
Error processing row 335: list index out of range
Error 

In [145]:
exception_df_rere = exception_df_re.loc[exception]

In [164]:
print(len(exception_df_re))
print(len(exception_df_rere))

400
388


In [151]:
# 提取 cost 列不是空值的行
df_with_cost = exception_df_re[exception_df_re['cost'].notna()]

# 提取 cost 列是空值的行
df_without_cost = exception_df_re[exception_df_re['cost'].isna()]


In [157]:
ODd_Gaode['distance'] = ODd_Gaode['distance'].combine_first(df_with_cost['distance'])
ODd_Gaode['cost'] = ODd_Gaode['cost'].combine_first(df_with_cost['cost'])

In [158]:
print(ODe_Gaode.isnull().sum())

name            1295
sum_pop            0
geometry_x         0
cen                0
Popid              0
longitude_x        0
latitude_x         0
area               0
Natureparkid       0
count              0
avg_area           0
longitude_y        0
latitude_y         0
polygonStr         0
geometry_y         0
length             0
lng_x_gcj02        0
lat_x_gcj02        0
lng_y_gcj02        0
lat_y_gcj02        0
distance          19
cost              19
dtype: int64


In [167]:
df_without_cost.describe()

Unnamed: 0,sum_pop,Popid,longitude_x,latitude_x,area,Cityparkid,count,avg_area,longitude_y,latitude_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02
count,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0,388.0
mean,908.403881,983.608247,114.053802,22.58376,1269591.0,45.139175,9.350515,130900.065686,114.054033,22.583764,435.510677,114.05878,22.580915,114.059011,22.58092
std,1234.888444,506.833684,0.14633,0.078474,1397440.0,23.09001,9.176187,83080.360272,0.14632,0.078483,212.748585,0.146348,0.078521,0.146338,0.078531
min,13.176903,24.0,113.81392,22.482827,108397.6,1.0,1.0,21781.119661,113.813122,22.481475,41.173448,113.818979,22.479772,113.818184,22.47842
25%,259.183532,560.75,113.927715,22.533582,209673.4,30.75,3.0,72981.489295,113.926146,22.535059,284.207271,113.932575,22.530858,113.931007,22.532175
50%,530.693013,1005.5,114.045492,22.554366,511880.2,43.0,6.0,102376.03507,114.044323,22.554464,395.926374,114.050588,22.551643,114.049418,22.551752
75%,1027.273759,1416.0,114.126741,22.602409,2142145.0,63.0,11.25,178512.06162,114.126116,22.603175,536.797827,114.131828,22.599569,114.131204,22.600184
max,9749.345845,2055.0,114.398138,22.808439,5042844.0,84.0,36.0,336189.58237,114.403277,22.812154,1238.813146,114.403092,22.80548,114.408231,22.809197


In [166]:
# 调用高德路径规划api-步行，计算路径距离distance（米），耗时cost（秒）
# 路径规划1.0
import json
from urllib import request

def fetch_GaodeMap_walking(df):
    urlbase = 'https://restapi.amap.com/v3/direction/walking?&key=225cfe7d506a6037debd6a9f4d5aa583&origin={0},{1}&destination={2},{3}'
    distances = []
    costs = []
    exceptions = []  # 用来收集引发异常的行索引

    for i, row in df.iterrows():
        x1 = row['lng_x_gcj02']
        y1 = row['lat_x_gcj02']
        x2 = row['lng_y_gcj02']
        y2 = row['lat_y_gcj02']
        url = urlbase.format(x1, y1, x2, y2)

        try:
            html = request.urlopen(url, timeout=15).read()
            js = json.loads(html)
            distance = js['route']['paths'][0]['distance']
            cost = js['route']['paths'][0]['duration']
        except Exception as e:
            print(f"Error processing row {i}: {e}")
            distance = None
            cost = None
            exceptions.append(i)  # 将引发异常的行索引添加到列表中

        distances.append(distance)
        costs.append(cost)

    df['distance'] = distances
    df['cost'] = costs

    return df, exceptions

In [168]:
# 距离过近的OD，使用步行替代公交计算交通耗时
exception_rerere,exception = fetch_GaodeMap_walking(df_without_cost)

In [175]:
ODd_Gaode['distance'] = ODd_Gaode['distance'].combine_first(exception_rerere['distance'])
ODd_Gaode['cost'] = ODd_Gaode['cost'].combine_first(exception_rerere['cost'])

In [176]:
print(ODd_Gaode.isnull().sum())

name           909
sum_pop          0
geometry_x       0
cen              0
Popid            0
longitude_x      0
latitude_x       0
area             0
Cityparkid       0
count            0
avg_area         0
longitude_y      0
latitude_y       0
polygonStr       0
geometry_y       0
length           0
lng_x_gcj02      0
lat_x_gcj02      0
lng_y_gcj02      0
lat_y_gcj02      0
geometry         0
distance         0
cost             0
dtype: int64


In [178]:
# 7 转换为数
ODd_Gaode['cost'] = ODd_Gaode['cost'].astype(float)
ODd_Gaode['distance'] = ODd_Gaode['distance'].astype(float)

In [179]:
ODd_Gaode.describe()

Unnamed: 0,sum_pop,Popid,longitude_x,latitude_x,area,Cityparkid,count,avg_area,longitude_y,latitude_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02,distance,cost
count,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0,4452.0
mean,1037.425363,1045.087152,114.032797,22.572366,892123.2,45.062893,6.925651,128029.984941,114.033933,22.573319,2218.003209,114.037791,22.569523,114.038931,22.570479,3527.288859,1714.09434
std,1392.212175,515.269052,0.115613,0.064531,1086565.0,20.766336,8.265674,73897.344815,0.114675,0.063569,1586.302151,0.115651,0.064553,0.114713,0.063592,2196.732519,579.456448
min,13.176903,5.0,113.773104,22.481635,108397.6,1.0,1.0,21781.119661,113.769368,22.481475,41.173448,113.778255,22.478591,113.774524,22.47842,56.0,45.0
25%,300.485102,675.75,113.928275,22.533937,163767.2,31.0,2.0,72981.489295,113.930027,22.539624,1074.744839,113.933136,22.530896,113.934893,22.536907,2028.0,1408.75
50%,610.304947,1112.0,114.038799,22.553466,340011.5,48.0,4.0,103389.64193,114.043994,22.552333,1880.145814,114.043885,22.550686,114.049089,22.549625,3154.0,1727.0
75%,1179.378672,1470.25,114.094166,22.580345,1261656.0,61.0,8.0,170101.961624,114.095008,22.58301,2924.874114,114.099292,22.577529,114.100134,22.580325,4571.25,2056.0
max,26058.146784,2079.0,114.429785,22.808439,5042844.0,84.0,36.0,371885.971015,114.426345,22.812154,10957.148822,114.434702,22.80548,114.431268,22.809197,17291.0,4025.0


In [180]:
# 存储
ODd_Gaode.to_csv('tmp/ODd_Gaode.csv',index=False,encoding='utf-8')

In [210]:
# 8 以30min，即1800s为阈值，计算Ga2SFCA,得到每个popcell的可达性
f_ODd_Gaode = ODd_Gaode[ODd_Gaode['cost'] <= 1800]

f_ODd_Gaode.reset_index(drop = True, inplace = True)

print(len(f_ODd_Gaode))
print(len(f_ODd_Gaode['Popid'].unique()))


2519
1051


In [211]:
# 9 定义函数
def Ga_1800s(dij):
    e=math.exp(1)
    g=(e**(-0.5*(dij/1800)**2)-e**(-0.5))/(1-e**(-0.5)) # dij是供给和需求之间的长度（时间cost or 距离distance），1800s是阈值（时长or距离）
    return g

def Get_Rj(x):
    x=x.reset_index()
    Sj=x['area'][0]
    
    dt=0
    for i in range(len(x)):
        vl=x['sum_pop'][i]*Ga_1800s(x['cost'][i])
        dt=dt+vl
    return Sj/dt

def Get_Ai(x):
    x=x.reset_index()
    
    dt=0
    for i in range(len(x)):
        vl=x['Rj'][i]*Ga_1800s(x['cost'][i])
        dt=dt+vl
    return dt

In [212]:
print(len(f_ODd_Gaode))
print(len(f_ODd_Gaode['Popid'].unique()))

2519
1051


In [213]:
# 10 step1 计算每个公园的Rj
# 公园面积 ➗ 该公园搜索阈以内的人口数

park_s4 = f_ODd_Gaode.groupby(by='Cityparkid').apply(Get_Rj).reset_index()
park_s4 = park_s4.rename(columns={0: 'Rj'})

f_ODd_Gaode = pd.merge(f_ODd_Gaode,park_s4 [['Cityparkid', 'Rj']], on='Cityparkid', how='left')


In [215]:
# 11 step2 计算每个pop cell的Ai
# 每个供给点popcell,搜索阈以内的Rj,即park_v1
pop_s4 = f_ODd_Gaode.groupby(by='Popid').apply(Get_Ai).reset_index()
pop_s4 = pop_s4.rename(columns={0: 'Ai'})

f_ODd_Gaode = pd.merge(f_ODd_Gaode,pop_s4[['Popid', 'Ai']], on='Popid', how='left')

In [216]:
print(len(f_ODd_Gaode))
print(len(f_ODd_Gaode['Popid'].unique()))

2519
1051


In [219]:
f_ODd_Gaode.columns

Index(['name', 'sum_pop', 'geometry_x', 'cen', 'Popid', 'longitude_x',
       'latitude_x', 'area', 'Cityparkid', 'count', 'avg_area', 'longitude_y',
       'latitude_y', 'polygonStr', 'geometry_y', 'length', 'lng_x_gcj02',
       'lat_x_gcj02', 'lng_y_gcj02', 'lat_y_gcj02', 'geometry', 'distance',
       'cost', 'Rj', 'Ai'],
      dtype='object')

In [224]:
# 12 简化
access_socre_d = f_ODd_Gaode[['Popid', 'sum_pop', 'Ai']]

In [225]:
access_socre_d.drop_duplicates(inplace=True)
len(access_socre_d)

1051

In [226]:
#13 存储
result_d = pop_resid.merge(access_socre_d[['Popid', 'Ai']], on='Popid', how='left')
result_d = result_d[['sum_pop', 'geometry', 'Popid', 'Ai']]

In [229]:
result_d.fillna(0, inplace=True)

In [232]:
result_d.to_file('tmp/result_d.geojson')

### e) Public transport——Natural park——Travel time thresholds =60min , Use the bus to reach the circle polygon filter

In [257]:
# # 1 连接pop和park
# pop_resid['index'] = 1
# nature_park['index'] = 1
# OD_e = pd.merge(pop_resid, nature_park, on='index').drop('index', axis=1)
# len(OD_e)

106024

In [258]:
# # 2 根据OD左边计算直线距离，得到新列length
# OD_e['length'] = OD_e[['longitude_x', 'latitude_x','longitude_y', 'latitude_y',]].\
# apply(lambda x:haversine(x[0],x[1],x[2],x[3]),axis=1)

In [259]:
# # 3 筛选lenth<=30km的OD对
# f_OD_e = OD_e[(OD_e['length'] <= 30000)]
# # 只保留同一个公园直线距离最近的入口
# idx = f_OD_e.groupby(['Popid', 'Natureparkid'])['length'].idxmin()
# f_OD_e = f_OD_e.loc[idx]
# len(f_OD_e)

10736

In [260]:
# len(f_OD_e['Popid'].unique())

1828

In [249]:
# # 4 转换WGS84坐标为GCJ02，便于下一步高德api计算
# new_columns = f_OD_e.apply(transform_coordinates, axis=1)
# # 合并原始 DataFrame 和新列
# f_OD_e_GCJ02 = pd.concat([f_OD_e, new_columns], axis=1)
# len(f_OD_e_GCJ02)

10736

In [240]:
# # 导入手动获取的，以为park entrance为起点，在高德公交达到圈界面获取的35-45min公交等时圈
# geo_nature_capture = gpd.read_file('tmp/geo_nature_capture.geojson')

In [241]:
# # 将相同Cityparkid的公园等时圈多边形合并为1个

# geo_nature_capture['geometry'] = geo_nature_capture.geometry.buffer(0)

# # 假设你的GeoDataFrame名称是geo_city_capture
# dissolved_nature_capture = geo_nature_capture.dissolve(by='Natureparkid')

# # 重置索引以便Cityparkid的公园等时圈多边形合并为1个成为一个正常的列而不是索引
# dissolved_nature_capture.reset_index(inplace=True)

# # 打印结果以确认变化
# len(dissolved_nature_capture)


9

In [250]:
# # 筛选在对应公园的公交等时圈中的居住点

# import geopandas as gpd
# from shapely.geometry import Point

# # 将f_OD_e_GCJ02转换为GeoDataFrame
# geometry = [Point(xy) for xy in zip(f_OD_e_GCJ02.lng_x_gcj02, f_OD_e_GCJ02.lat_x_gcj02)]
# f_OD_e_gdf = gpd.GeoDataFrame(f_OD_e_GCJ02, geometry=geometry)


In [251]:
# # 设置geo_nature_capture的index为Natureparkid，以便后续快速索引
# dissolved_nature_capture = dissolved_nature_capture.set_index('Natureparkid')

In [252]:
# # 筛选点在对应多边形内的行
# filtered_rows_e = []

# for idx, row in f_OD_e_gdf.iterrows():
#     naturepark_id = row['Natureparkid']
#     if naturepark_id in dissolved_nature_capture.index:
#         polygon = dissolved_nature_capture.loc[naturepark_id, 'geometry']
#         if row['geometry'].within(polygon):
#             filtered_rows_e.append(row)

# # 创建新的DataFrame
# filtered_df_e = pd.DataFrame(filtered_rows_e).reset_index(drop=True)

# # 结果为filtered_df
# print(filtered_df_e)

       name       sum_pop                                         geometry_x  \
0      None   7276.136558  POLYGON ((114.4683446 22.6020676, 114.4676535 ...   
1      None   7276.136558  POLYGON ((114.4683446 22.6020676, 114.4676535 ...   
2      None   1217.979429  POLYGON ((114.4939942 22.5376144, 114.4929416 ...   
3      None  11243.644833  POLYGON ((114.1055943 22.6167258, 114.1048299 ...   
4      ????    480.767980  POLYGON ((114.1330173 22.6159002, 114.1339508 ...   
..      ...           ...                                                ...   
632  ??????    294.324127  POLYGON ((113.9913625 22.6733559, 113.9900255 ...   
633     ???    169.534223  POLYGON ((113.9948493 22.6677921, 113.9952047 ...   
634     ???     96.565294  POLYGON ((113.9404252 22.6761329, 113.9404043 ...   
635    ????   1589.763618  POLYGON ((113.9549605 22.6860643, 113.95518 22...   
636    ????    149.804736  POLYGON ((113.9861396 22.673476, 113.9861186 2...   

                                       

In [253]:
# print(len(pop_resid['Popid'].unique()))

1828


In [254]:
# print(len(filtered_df_e['Popid'].unique()))

560


In [255]:
# len(filtered_df_e)

637

In [256]:
# # 5 使用api计算高德地图时间距离
# start_time = time.time()

# ODe_Gaode, exception_rows = fetch_GaodeMap_publictrans(filtered_df_e)
# print("Exception rows:", exception_rows)

# end_time = time.time()
# execution_time = (end_time - start_time)/60
# print("Run time: ", execution_time, "mins")

Error processing row 7: list index out of range
Error processing row 9: list index out of range
Error processing row 11: list index out of range
Error processing row 59: list index out of range
Error processing row 60: list index out of range
Error processing row 85: list index out of range
Error processing row 87: list index out of range
Error processing row 110: list index out of range
Error processing row 111: list index out of range
Error processing row 112: list index out of range
Error processing row 116: list index out of range
Error processing row 120: list index out of range
Error processing row 135: list index out of range
Error processing row 136: list index out of range
Error processing row 141: list index out of range
Error processing row 149: list index out of range
Error processing row 150: list index out of range
Error processing row 151: list index out of range
Error processing row 152: list index out of range
Error processing row 158: list index out of range
Error pro

In [262]:
# len(ODe_Gaode)

637

In [265]:
# nan_cost_df = ODe_Gaode[ODe_Gaode['cost'].isna()]
# len(nan_cost_df)

80

In [270]:
# # 调用高德路径规划api-步行，计算路径距离distance（米），耗时cost（秒）
# # 路径规划1.0
# import json
# from urllib import request

# def fetch_GaodeMap_walking(df):
#     urlbase = 'https://restapi.amap.com/v3/direction/walking?&key=225cfe7d506a6037debd6a9f4d5aa583&origin={0},{1}&destination={2},{3}'
#     distances = []
#     costs = []
#     exceptions = []  # 用来收集引发异常的行索引

#     for i, row in df.iterrows():
#         x1 = row['lng_x_gcj02']
#         y1 = row['lat_x_gcj02']
#         x2 = row['lng_y_gcj02']
#         y2 = row['lat_y_gcj02']
#         url = urlbase.format(x1, y1, x2, y2)

#         try:
#             html = request.urlopen(url, timeout=15).read()
#             js = json.loads(html)
#             distance = js['route']['paths'][0]['distance']
#             cost = js['route']['paths'][0]['duration']
#         except Exception as e:
#             print(f"Error processing row {i}: {e}")
#             distance = None
#             cost = None
#             exceptions.append(i)  # 将引发异常的行索引添加到列表中

#         distances.append(distance)
#         costs.append(cost)

#     df['distance'] = distances
#     df['cost'] = costs

#     return df, exceptions

In [272]:
# # 对nan行重新计算
# exception_df_re,exception = fetch_GaodeMap_walking(nan_cost_df)

In [273]:
# exception

[]

In [276]:
# ODe_Gaode['distance'] = ODe_Gaode['distance'].combine_first(exception_df_re['distance'])
# ODe_Gaode['cost'] = ODe_Gaode['cost'].combine_first(exception_df_re['cost'])

In [277]:
# print(ODe_Gaode.isnull().sum())

name            152
sum_pop           0
geometry_x        0
cen               0
Popid             0
longitude_x       0
latitude_x        0
area              0
Natureparkid      0
count             0
avg_area          0
longitude_y       0
latitude_y        0
polygonStr        0
geometry_y        0
length            0
lng_x_gcj02       0
lat_x_gcj02       0
lng_y_gcj02       0
lat_y_gcj02       0
geometry          0
distance          0
cost              0
dtype: int64


In [278]:
# # 7 转换为数
# ODe_Gaode['cost'] = ODe_Gaode['cost'].astype(float)
# ODe_Gaode['distance'] = ODe_Gaode['distance'].astype(float)

In [279]:
# ODe_Gaode.describe()

Unnamed: 0,sum_pop,Popid,longitude_x,latitude_x,area,Natureparkid,count,avg_area,longitude_y,latitude_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02,distance,cost
count,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0,637.0
mean,1093.314082,1164.153846,114.020148,22.582287,15819910.0,5.00157,12.472527,939862.5,114.019598,22.583713,2658.810684,114.025118,22.579412,114.024561,22.58083,4834.270016,2347.400314
std,1508.346618,477.720658,0.121925,0.059438,15014050.0,2.217355,9.697044,662021.9,0.123049,0.055962,1950.619265,0.121912,0.05947,0.123025,0.056004,3408.50847,1727.234906
min,10.682643,1.0,113.804838,22.492294,311679.0,1.0,1.0,311679.0,113.823613,22.503005,76.32774,113.80992,22.489243,113.828646,22.499974,80.0,64.0
25%,298.173479,797.0,113.9484,22.53798,467504.7,3.0,1.0,376955.3,113.952972,22.539463,1148.028475,113.95329,22.535151,113.957865,22.536549,2415.0,1641.0
50%,580.240654,1153.0,113.997629,22.560335,22902020.0,6.0,17.0,1041001.0,113.984552,22.572901,2169.375122,114.002621,22.557492,113.989508,22.570171,4157.0,2019.0
75%,1183.28355,1530.0,114.048959,22.623779,22902020.0,6.0,22.0,1041001.0,114.048522,22.626122,3581.362324,114.05406,22.621025,114.053626,22.623278,6022.0,2497.0
max,11519.27283,2080.0,114.574451,22.745957,54929250.0,9.0,22.0,3661950.0,114.569193,22.727077,11443.674812,114.578948,22.74313,114.573699,22.724254,18613.0,12032.0


In [280]:
# # 存储
# ODe_Gaode.to_csv('tmp/ODe_Gaode.csv',index=False,encoding='utf-8')

In [281]:
# # 8 以30min，即1800s为阈值，计算Ga2SFCA,得到每个popcell的可达性
# f_ODe_Gaode = ODe_Gaode[ODe_Gaode['cost'] <= 1800]

# f_ODe_Gaode.reset_index(drop = True, inplace = True)

# print(len(f_ODe_Gaode))
# print(len(f_ODe_Gaode['Popid'].unique()))

231
230


In [282]:
# # 9 定义函数
# def Ga_1800s(dij):
#     e=math.exp(1)
#     g=(e**(-0.5*(dij/1800)**2)-e**(-0.5))/(1-e**(-0.5)) # dij是供给和需求之间的长度（时间cost or 距离distance），1800s是阈值（时长or距离）
#     return g

# def Get_Rj(x):
#     x=x.reset_index()
#     Sj=x['area'][0]
    
#     dt=0
#     for i in range(len(x)):
#         vl=x['sum_pop'][i]*Ga_1800s(x['cost'][i])
#         dt=dt+vl
#     return Sj/dt

# def Get_Ai(x):
#     x=x.reset_index()
    
#     dt=0
#     for i in range(len(x)):
#         vl=x['Rj'][i]*Ga_1800s(x['cost'][i])
#         dt=dt+vl
#     return dt

In [None]:
# # 10 step1 计算每个公园的Rj
# # 公园面积 ➗ 该公园搜索阈以内的人口数

# park_s5 = f_ODe_Gaode.groupby(by='Natureparkid').apply(Get_Rj).reset_index()
# park_s5 = park_s5.rename(columns={0: 'Rj'})

# f_ODe_Gaode = pd.merge(f_ODe_Gaode,park_s5[['Natureparkid', 'Rj']], on='Natureparkid', how='left')

# # 11 step2 计算每个pop cell的Ai
# # 每个供给点popcell,搜索阈以内的Rj,即park_v1
# pop_s5 = f_ODe_Gaode.groupby(by='Popid').apply(Get_Ai).reset_index()
# pop_s5 = pop_s5.rename(columns={0: 'Ai'})

# f_ODe_Gaode = pd.merge(f_ODe_Gaode,pop_s5[['Popid', 'Ai']], on='Popid', how='left')

# print(len(f_ODe_Gaode))
# print(len(f_ODe_Gaode['Popid'].unique()))

231
230


In [None]:
# #13 存储
# result_e = pop_resid.merge(access_socre_e[['Popid', 'Ai']], on='Popid', how='left')
# result_e = result_e[['sum_pop', 'geometry', 'Popid', 'Ai']]

# result_e.fillna(0, inplace=True)

# result_e.to_file('tmp/result_e.geojson')

### e) Public transport——Natural park——Travel time thresholds =60min

In [284]:
# 1 连接pop和park
pop_resid['index'] = 1
nature_park['index'] = 1
OD_e = pd.merge(pop_resid, nature_park, on='index').drop('index', axis=1)
len(OD_e)

106024

In [285]:
# 2 根据OD左边计算直线距离，得到新列length
OD_e['length'] = OD_e[['longitude_x', 'latitude_x','longitude_y', 'latitude_y',]].\
apply(lambda x:haversine(x[0],x[1],x[2],x[3]),axis=1)

In [286]:
# 3 筛选lenth<=20km的OD对
f_OD_e = OD_e[(OD_e['length'] <= 20000)]
# 只保留同一个公园直线距离最近的入口
idx = f_OD_e.groupby(['Popid', 'Natureparkid'])['length'].idxmin()
f_OD_e = f_OD_e.loc[idx]
len(f_OD_e)

7749

In [287]:
len(f_OD_e['Popid'].unique())

1828

In [288]:
# 4 转换WGS84坐标为GCJ02，便于下一步高德api计算
new_columns = f_OD_e.apply(transform_coordinates, axis=1)
# 合并原始 DataFrame 和新列
f_OD_e_GCJ02 = pd.concat([f_OD_e, new_columns], axis=1)
len(f_OD_e_GCJ02)

7749

In [289]:
# 5 使用api计算高德地图时间距离
start_time = time.time()

ODe_Gaode, exception_rows = fetch_GaodeMap_publictrans(f_OD_e_GCJ02)
print("Exception rows:", exception_rows)

end_time = time.time()
execution_time = (end_time - start_time)/60
print("Run time: ", execution_time, "mins")

Error processing row 3042: list index out of range
Error processing row 5739: list index out of range
Error processing row 5854: list index out of range
Error processing row 8757: list index out of range
Error processing row 8931: list index out of range
Error processing row 16007: list index out of range
Error processing row 16181: list index out of range
Error processing row 30739: list index out of range
Error processing row 30797: list index out of range
Error processing row 34601: list index out of range
Error processing row 34621: list index out of range
Error processing row 35206: list index out of range
Error processing row 35322: list index out of range
Error processing row 36988: list index out of range
Error processing row 37046: list index out of range
Error processing row 37104: list index out of range
Error processing row 37162: list index out of range
Error processing row 37220: list index out of range
Error processing row 38677: list index out of range
Error processing 

In [290]:
print(ODe_Gaode.isnull().sum())
print(len(ODe_Gaode['Popid'].unique()))

name            2238
sum_pop            0
geometry_x         0
cen                0
Popid              0
longitude_x        0
latitude_x         0
area               0
Natureparkid       0
count              0
avg_area           0
longitude_y        0
latitude_y         0
polygonStr         0
geometry_y         0
length             0
lng_x_gcj02        0
lat_x_gcj02        0
lng_y_gcj02        0
lat_y_gcj02        0
distance         104
cost             104
dtype: int64
1828


In [291]:
# 6 检查问题行
exception_df = f_OD_e_GCJ02.loc[exception_rows]

len(exception_df)

104

In [294]:
# Calculate Gaode map for exception_df travel time and travel distance again
exception_df_re,exception = fetch_GaodeMap_publictrans(exception_df)

Error processing row 3042: list index out of range
Error processing row 5739: list index out of range
Error processing row 5854: list index out of range
Error processing row 8757: list index out of range
Error processing row 8931: list index out of range
Error processing row 16007: list index out of range
Error processing row 16181: list index out of range
Error processing row 30739: list index out of range
Error processing row 30797: list index out of range
Error processing row 34601: list index out of range
Error processing row 34621: list index out of range
Error processing row 35206: list index out of range
Error processing row 35322: list index out of range
Error processing row 36988: list index out of range
Error processing row 37046: list index out of range
Error processing row 37104: list index out of range
Error processing row 37162: list index out of range
Error processing row 37220: list index out of range
Error processing row 38677: list index out of range
Error processing 

In [299]:
print(len(exception_df_re))
print(exception_df_re.isnull().sum())

104
name            15
sum_pop          0
geometry_x       0
cen              0
Popid            0
longitude_x      0
latitude_x       0
area             0
Natureparkid     0
count            0
avg_area         0
longitude_y      0
latitude_y       0
polygonStr       0
geometry_y       0
length           0
lng_x_gcj02      0
lat_x_gcj02      0
lng_y_gcj02      0
lat_y_gcj02      0
distance        99
cost            99
dtype: int64


In [300]:
nan_distance_df = exception_df_re[exception_df_re['distance'].isna()]

In [302]:
# 调用高德路径规划api-步行，计算路径距离distance（米），耗时cost（秒）
# 路径规划1.0
import json
from urllib import request

def fetch_GaodeMap_walking(df):
    urlbase = 'https://restapi.amap.com/v3/direction/walking?&key=225cfe7d506a6037debd6a9f4d5aa583&origin={0},{1}&destination={2},{3}'
    distances = []
    costs = []
    exceptions = []  # 用来收集引发异常的行索引

    for i, row in df.iterrows():
        x1 = row['lng_x_gcj02']
        y1 = row['lat_x_gcj02']
        x2 = row['lng_y_gcj02']
        y2 = row['lat_y_gcj02']
        url = urlbase.format(x1, y1, x2, y2)

        try:
            html = request.urlopen(url, timeout=15).read()
            js = json.loads(html)
            distance = js['route']['paths'][0]['distance']
            cost = js['route']['paths'][0]['duration']
        except Exception as e:
            print(f"Error processing row {i}: {e}")
            distance = None
            cost = None
            exceptions.append(i)  # 将引发异常的行索引添加到列表中

        distances.append(distance)
        costs.append(cost)

    df['distance'] = distances
    df['cost'] = costs

    return df, exceptions

In [303]:
# 距离过近的OD，使用步行替代公交计算交通耗时
exception_nan_distance_df,exception = fetch_GaodeMap_walking(nan_distance_df)

In [304]:
exception

[]

In [306]:
nan_distance_df['distance'] = nan_distance_df['distance'].combine_first(exception_nan_distance_df['distance'])
nan_distance_df['cost'] = nan_distance_df['cost'].combine_first(exception_nan_distance_df['cost'])

In [309]:
exception_df_re['distance'] = exception_df_re['distance'].combine_first(nan_distance_df['distance'])
exception_df_re['cost'] = exception_df_re['cost'].combine_first(nan_distance_df['cost'])

In [314]:
ODe_Gaode['distance'] = ODe_Gaode['distance'].combine_first(exception_df_re['distance'])
ODe_Gaode['cost'] = ODe_Gaode['cost'].combine_first(exception_df_re['cost'])

In [315]:
print(ODe_Gaode.isnull().sum())

name            2238
sum_pop            0
geometry_x         0
cen                0
Popid              0
longitude_x        0
latitude_x         0
area               0
Natureparkid       0
count              0
avg_area           0
longitude_y        0
latitude_y         0
polygonStr         0
geometry_y         0
length             0
lng_x_gcj02        0
lat_x_gcj02        0
lng_y_gcj02        0
lat_y_gcj02        0
distance           0
cost               0
dtype: int64


In [316]:
print(ODe_Gaode['Popid'].nunique())

1828


In [317]:
# 7 转换为数
ODe_Gaode['cost'] = ODe_Gaode['cost'].astype(float)
ODe_Gaode['distance'] = ODe_Gaode['distance'].astype(float)

In [318]:
ODe_Gaode.describe()

Unnamed: 0,sum_pop,Popid,longitude_x,latitude_x,area,Natureparkid,count,avg_area,longitude_y,latitude_y,length,lng_x_gcj02,lat_x_gcj02,lng_y_gcj02,lat_y_gcj02,distance,cost
count,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0,7749.0
mean,1094.797044,1105.459414,114.03508,22.59487,15946330.0,4.618015,10.842302,1016590.0,114.017385,22.599318,10324.411441,114.040068,22.592023,114.022349,22.596436,16241.344044,4137.833656
std,1729.088717,575.467112,0.132602,0.069812,16329770.0,2.541583,9.01388,685619.0,0.135146,0.058752,5119.689296,0.132613,0.06985,0.135143,0.058789,8017.484053,1624.038813
min,3.078762,1.0,113.773104,22.474726,311679.0,1.0,1.0,311679.0,113.823613,22.503005,76.32774,113.778255,22.471811,113.828646,22.499974,80.0,64.0
25%,280.235872,696.0,113.929621,22.542556,467504.7,2.0,1.0,467504.7,113.932237,22.539463,6345.055311,113.934483,22.539505,113.93711,22.536549,10188.0,3029.0
50%,583.720867,1142.0,114.028315,22.573057,5026383.0,5.0,6.0,837730.6,113.984552,22.606356,10013.537858,114.033377,22.570284,113.989508,22.603488,16130.0,3990.0
75%,1186.549446,1596.0,114.084853,22.637903,27593440.0,6.0,20.0,1623143.0,114.049479,22.632964,14510.270454,114.089982,22.635208,114.054584,22.630292,21474.0,4880.0
max,26058.146784,2081.0,114.585661,22.808439,54929250.0,9.0,22.0,3661950.0,114.569193,22.727077,19992.857824,114.590156,22.80548,114.573699,22.724254,70134.0,32942.0


In [319]:
# 存储
ODe_Gaode.to_csv('tmp/ODe_Gaode.csv',index=False,encoding='utf-8')

In [373]:
# 8 以60min，即3600s为阈值，计算Ga2SFCA,得到每个popcell的可达性
f_ODe_Gaode = ODe_Gaode[ODe_Gaode['cost'] <= 3600]

f_ODe_Gaode.reset_index(drop = True, inplace = True)

print(len(f_ODe_Gaode))
print(len(f_ODe_Gaode['Popid'].unique()))

2971
1415


In [374]:
# 9 定义函数
def Ga_3600s(dij):
    e=math.exp(1)
    g=(e**(-0.5*(dij/3600)**2)-e**(-0.5))/(1-e**(-0.5)) # dij是供给和需求之间的长度（时间cost or 距离distance），3600s是阈值（时长or距离）
    return g

def Get_Rj(x):
    x=x.reset_index()
    Sj=x['area'][0]
    
    dt=0
    for i in range(len(x)):
        vl=x['sum_pop'][i]*Ga_3600s(x['cost'][i])
        dt=dt+vl
    return Sj/dt

def Get_Ai(x):
    x=x.reset_index()
    
    dt=0
    for i in range(len(x)):
        vl=x['Rj'][i]*Ga_3600s(x['cost'][i])
        dt=dt+vl
    return dt

In [375]:
# 10 step1 计算每个公园的Rj
# 公园面积 ➗ 该公园搜索阈以内的人口数

park_s5 = f_ODe_Gaode.groupby(by='Natureparkid').apply(Get_Rj).reset_index()
park_s5 = park_s5.rename(columns={0: 'Rj'})

f_ODe_Gaode = pd.merge(f_ODe_Gaode,park_s5[['Natureparkid', 'Rj']], on='Natureparkid', how='left')


In [380]:
park_s5

Unnamed: 0,Natureparkid,Rj
0,1,2.7814
1,2,367.531753
2,3,302.161815
3,4,49457.721801
4,5,0.923271
5,6,60.13739
6,7,632.59961
7,8,21.087174
8,9,35.46336


In [376]:
f_ODe_Gaode.columns

Index(['name', 'sum_pop', 'geometry_x', 'cen', 'Popid', 'longitude_x',
       'latitude_x', 'area', 'Natureparkid', 'count', 'avg_area',
       'longitude_y', 'latitude_y', 'polygonStr', 'geometry_y', 'length',
       'lng_x_gcj02', 'lat_x_gcj02', 'lng_y_gcj02', 'lat_y_gcj02', 'distance',
       'cost', 'Rj'],
      dtype='object')

In [378]:
# 11 step2 计算每个pop cell的Ai
# 每个供给点popcell,搜索阈以内的Rj,即park_v1
pop_s5 = f_ODe_Gaode.groupby(by='Popid').apply(Get_Ai).reset_index()
pop_s5 = pop_s5.rename(columns={0: 'Ai'})

f_ODe_Gaode = pd.merge(f_ODe_Gaode,pop_s5[['Popid', 'Ai']], on='Popid', how='left')

In [379]:
pop_s5

Unnamed: 0,Popid,Ai
0,1,555.716015
1,2,27598.516217
2,4,0.956835
3,5,23.576301
4,8,5.102206
...,...,...
1410,2076,97.332734
1411,2077,172.896879
1412,2078,79.143581
1413,2079,304.562810


In [348]:
print(len(f_ODe_Gaode))
print(len(f_ODe_Gaode['Popid'].unique()))

2971
1415


In [349]:
# 12 简化
access_socre_e = f_ODe_Gaode[['Popid', 'sum_pop', 'geometry_x', 'Ai']]
access_socre_e = access_socre_e.rename(columns={'geometry_x':'geometry'})
access_socre_e.drop_duplicates(inplace=True)
len(access_socre_e)

1415

In [350]:
print(len(access_socre_e['Popid'].unique()))

1415


In [351]:
print(len(access_socre_e))

1415


In [352]:
#13 存储
result_e = pop_resid.merge(access_socre_e[['Popid', 'Ai']], on='Popid', how='left')
result_e = result_e[['sum_pop', 'geometry', 'Popid', 'Ai']]

In [353]:
result_e.fillna(0, inplace=True)

In [354]:
result_e.to_file('tmp/result_e.geojson')