This notebook investigates the service areas starting points, to make sure they are associated to a road.

In [9]:
import geopandas as gpd
import os
from shapely.strtree import STRtree
import fiona

In [25]:
data = os.path.join("..\\data\\final\\Tokyo_UGS_accessibility.gpkg")
ugs = gpd.read_file(data, layer = 'merged_ugs')
roads = gpd.read_file(data, layer = 'road_network')
roads_tree = STRtree(roads["geometry"].values) # creates a spatial index
road_sindex = roads.sindex
census_centroids = gpd.read_file(data, layer ='census_centroids')
accesses = gpd.read_file(data, layer = 'access_points_merged_parks')


Fix the accesses dataframe


In [26]:
old_accesses = gpd.read_file(data, layer = 'cleaned_park_accesses')
old_accesses.columns

Index(['area', 'CODE5', 'NAME1', 'NAME2', 'park_id', 'id_access', 'geometry'], dtype='object')

In [27]:
accesses["id_access"] = range(1,len(accesses)+1)
accesses = accesses[['area', 'CODE5', 'NAME1', 'NAME2', 'park_id', 'id_access', 'geometry']].copy()


In [30]:
def distance_to_nearest_road(point, spatial_index):
    nearby_indices = spatial_index.query(point.buffer(50)) 
    nearby_roads = roads["geometry"].iloc[nearby_indices]
    if len(nearby_roads) > 0:
        nearest_geom = min(nearby_roads, key=lambda road: point.distance(road))
        return point.distance(nearest_geom)
    else:
        return float('inf')  # Infinite distance if no nearby roads found

In [31]:
# Apply to the accesses GeoDataFrame
accesses["distance_to_road"] = accesses["geometry"].apply(lambda point: distance_to_nearest_road(point, roads_tree))
census_centroids["distance_to_road"] = census_centroids["geometry"].apply(lambda point: distance_to_nearest_road(point, roads_tree))

far_accesses = accesses[accesses['distance_to_road'] > 1]
far_census = census_centroids[census_centroids['distance_to_road'] > 1]

In [32]:
print(f"There are {len(far_accesses)} access points that are not on roads")
print(f"There are {len(far_census)} census centroids that are not on roads")

There are 183 access points that are not on roads
There are 0 census centroids that are not on roads


Census points are not an issue. Let's find if there are parks with zero access points 

In [33]:
far_accesses['park_id'].unique() # there are 68 parks with accesses that are not on the road ## Now I changed the parks layer so numbers are not reliable

array([  60,  287,  533,  535,  547,  807,  837,  905,  981, 1241, 1248,
       1285, 1574, 1655, 2350, 2627, 2651, 2655, 2705, 2928, 3267, 3421,
       3694, 3828, 3951, 3986, 4057, 4217, 4250, 4308, 4478, 4512, 4513,
       4679, 4727, 4749, 4757, 4921, 4963, 4971, 4976, 4980, 4989, 4993,
       4996, 5036, 5157, 5196, 5197, 5198, 5199, 5229, 5242, 5300, 5313,
       5340, 5341, 5349, 5469, 5474, 5475, 5480, 5493, 5496, 5736, 5747,
       5749, 5750, 5771, 5800, 6019, 6023, 6024, 6038, 6040, 6100, 6123,
       6184, 6207, 6314, 6341, 6345, 6367, 6385, 6481, 6482, 6498, 6587,
       6592, 6605, 6625, 6741, 6827, 6834, 6981, 7049, 7051, 7164, 7280,
       7375, 7516, 7605, 7734, 7758, 7766, 7768, 7924, 8005, 8278, 8318])

In [34]:
# I want to find what parks have all their accesses in "far" accesses 
# each access has a park_id and an id_access

parks_remote_access = far_accesses['park_id'].unique().tolist()

# for parks with remote accessess, find how many
far_accesses.groupby("park_id")["id_access"].count()

# create dictionary with park id and number of remote accesses
park_remote_accesses_counts = far_accesses.groupby("park_id")["id_access"].count().to_dict()
park_accesses_counts = accesses.groupby("park_id")['id_access'].count().to_dict()    


In [35]:
remote_parks = []
for park_id, count in park_remote_accesses_counts.items():
    total_accesses = park_accesses_counts[park_id]
    non_remote_accesses = total_accesses - count
    if non_remote_accesses == 0:
        remote_parks.append(park_id)


In [43]:
parks_no_access = ugs[ugs["park_id"].isin(remote_parks)] 
parks_no_access.to_file('remote_parks.geojson')

# now the 'remote' parks can be visualized in QGIS. After checking them I decided to just discard the remote park accesses


In [36]:
final_accesses = accesses[accesses['distance_to_road'] < 1]

In [37]:
print(f"Total number of accesses: {accesses.shape[0]}")
print(f"Final number of accesses: {final_accesses.shape[0]}")
print(f"Remote accesses: {far_accesses.shape[0]}")

Total number of accesses: 18014
Final number of accesses: 17831
Remote accesses: 183


In [38]:
# make sure I actually removed the correct thing
assert accesses.shape[0]- final_accesses.shape[0] == far_accesses.shape[0]

In [41]:
# export the final accesses:

final_accesses.to_file(data, layer="cleaned_merged_park_accesses", driver="GPKG", index=False)

In [42]:
assert final_accesses['id_access'].is_unique, "Duplicate id_access values found!"
assert final_accesses.columns.is_unique, "Column names are not unique!"

In [67]:
#final_accesses = final_accesses.rename(columns=lambda x: x.replace(" ", "_").replace("-", "_"))

In [43]:
final_accesses

Unnamed: 0,area,CODE5,NAME1,NAME2,park_id,id_access,geometry,distance_to_road
0,73307.720609,1.310104e+10,千代田区,九段南二丁目,1,1,POINT (372258.809 3942811.644),2.261011e-10
1,73307.720609,1.310104e+10,千代田区,九段南二丁目,1,2,POINT (372887.24 3942774.412),1.475196e-10
2,73307.720609,1.310104e+10,千代田区,九段南二丁目,1,3,POINT (372880.569 3942774.265),5.373042e-11
3,73307.720609,1.310104e+10,千代田区,九段南二丁目,1,4,POINT (372878.985 3942775.678),2.284227e-11
4,73307.720609,1.310104e+10,千代田区,九段南二丁目,1,5,POINT (372967.947 3942756.225),1.222850e-10
...,...,...,...,...,...,...,...,...
18009,69.856182,1.310104e+10,千代田区,九段南二丁目,8422,18010,POINT (399899.479 3956182.21),0.000000e+00
18010,640.066502,1.310104e+10,千代田区,九段南二丁目,8423,18011,POINT (399297.971 3956263.123),3.355028e-11
18011,443.747043,1.310104e+10,千代田区,九段南二丁目,8424,18012,POINT (399474.511 3956279.15),2.802807e-11
18012,399.711129,1.310104e+10,千代田区,九段南二丁目,8425,18013,POINT (399642.024 3956268.691),1.620954e-10
