This notebook investigates the service areas starting points, to make sure they are associated to a road.

In [2]:
import geopandas as gpd
import os
from shapely.strtree import STRtree

In [8]:
data = os.path.join("..\\data\\final\\Tokyo_UGS_accessibility.gpkg")
ugs = gpd.read_file(data, layer = 'ugs')
roads = gpd.read_file(data, layer = 'road_network')
roads_tree = STRtree(roads["geometry"].values) # creates a spatial index
road_sindex = roads.sindex # create spatial index for efficiency
census_centroids = gpd.read_file(data, layer ='census_centroids')
accesses = gpd.read_file(data, layer = 'access_points')

In [5]:
def distance_to_nearest_road(point, spatial_index):
    nearby_indices = spatial_index.query(point.buffer(50)) 
    nearby_roads = roads["geometry"].iloc[nearby_indices]
    if len(nearby_roads) > 0:
        nearest_geom = min(nearby_roads, key=lambda road: point.distance(road))
        return point.distance(nearest_geom)
    else:
        return float('inf')  # Infinite distance if no nearby roads found

In [None]:
# Apply to the accesses GeoDataFrame
accesses["distance_to_road"] = accesses["geometry"].apply(lambda point: distance_to_nearest_road(point, roads_tree))
census_centroids["distance_to_road"] = census_centroids["geometry"].apply(lambda point: distance_to_nearest_road(point, roads_tree))

far_accesses = accesses[accesses['distance_to_road'] > 1]
far_census = census_centroids[census_centroids['distance_to_road'] > 1]

In [None]:
print(f"There are {len(far_accesses)} access points that are not on roads")
print(f"There are {len(far_census)} census centroids that are not on roads")

Census points are not an issue. Let's find if there are parks with zero access points 

In [11]:
ugs = gpd.read_file(data, layer = 'ugs')

In [None]:
far_accesses['park_id'].unique() # there are 68 parks with accesses that are not on the road

In [None]:
# I want to find what parks have all their accesses in "far" accesses 
# each access has a park_id and an id_access

parks_remote_access = far_accesses['park_id'].unique().tolist()

# for parks with remote accessess, find how many
far_accesses.groupby("park_id")["id_access"].count()

# create dictionary with park id and number of remote accesses
park_remote_accesses_counts = far_accesses.groupby("park_id")["id_access"].count().to_dict()
park_accesses_counts = accesses.groupby("park_id")['id_access'].count().to_dict()    


In [35]:
remote_parks = []
for park_id, count in park_remote_accesses_counts.items():
    total_accesses = park_accesses_counts[park_id]
    non_remote_accesses = total_accesses - count
    if non_remote_accesses == 0:
        remote_parks.append(park_id)


In [43]:
parks_no_access = ugs[ugs["park_id"].isin(remote_parks)] 
parks_no_access.to_file('remote_parks.geojson')

# now the 'remote' parks can be visualized in QGIS. After checking them I decided to just discard the remote park accesses


In [None]:
final_accesses = accesses[accesses['distance_to_road'] < 1]

In [None]:
print(f"Total number of accesses: {accesses.shape[0]}")
print(f"Final number of accesses: {final_accesses.shape[0]}")
print(f"Remote accesses: {far_accesses.shape[0]}")

In [55]:
# make sure I actually removed the correct thing
assert accesses.shape[0]- final_accesses.shape[0] == far_accesses.shape[0]

In [None]:
# export the final accesses:

final_accesses.to_file("Tokyo_UGS_accessibility.gpkg", layer="cleaned_park_accesses", driver="GPKG", index=False)

In [66]:
assert final_accesses['id_access'].is_unique, "Duplicate id_access values found!"
assert final_accesses.columns.is_unique, "Column names are not unique!"

In [67]:
final_accesses = final_accesses.rename(columns=lambda x: x.replace(" ", "_").replace("-", "_"))

In [None]:
columns_to_keep = ['area','CODE5','NAME1','NAME2','park_id','id_access','geometry']
cleaned_accesses = final_accesses.loc[: , columns_to_keep]

cleaned_accesses


In [85]:
cleaned_accesses.to_file(data, layer="cleaned_park_accesses", driver="GPKG", index=False)