In [None]:
import geopandas as gpd
import pandas as pd
import structlog
import yaml
from shapely import from_wkt


def create_lookup(essential_amenities: dict[str, list[str]]) -> dict[str, str]:
    result = {}
    for cat, osm_list in essential_amenities.items():
        for i in osm_list:
            result[i] = cat
    return result


with open("../data/essential_amenities.yaml") as fp:
    essential_amenities = yaml.safe_load(fp)

logger = structlog.get_logger()

lookup = create_lookup(essential_amenities)

In [5]:
CITY = "budapest"

In [6]:
amenities = gpd.read_file(
    f"../output/{CITY}/amenities/amenities_filtered.wkt.csv", engine="pyogrio"
)
amenities.rename({"category": "osm_category"}, axis=1, inplace=True)
amenities["category"] = amenities["osm_category"].map(lookup)
amenities.dropna(subset=["category"], inplace=True)
amenities.drop(["osm_type", "amenity_type", "amenity_subtype"], axis=1, inplace=True)
amenities["geometry"] = amenities["geometry"].apply(from_wkt)
amenities = gpd.GeoDataFrame(amenities, geometry="geometry", crs=4326)
amenities.head(3)

Unnamed: 0,osm_id,osm_category,name,geometry_type,geometry,category
8,30761500,amenity:pharmacy,Valentin Gyógyszertár,Point,POINT (17.74846 46.88412),drugstores
9,30761501,amenity:restaurant,Fék Étterem,Point,POINT (17.74914 46.88292),restaurants
12,31929662,amenity:restaurant,Kisherceg vendéglő,Point,POINT (19.22771 47.52502),restaurants


In [7]:
isochrones = gpd.read_file(f"../output/{CITY}/isochrones.geojson", engine="pyogrio")
isochrones.head(3)

Unnamed: 0,stop_id,costing,range,geometry
0,8951,walk,5,"POLYGON ((19.21868 47.43322, 19.21568 47.43326..."
1,8951,walk,10,"POLYGON ((19.22068 47.43672, 19.21385 47.43585..."
2,8951,walk,15,"POLYGON ((19.22068 47.43934, 19.21968 47.43985..."


In [8]:
m = isochrones.sjoin(amenities)

In [None]:
result = pd.DataFrame()
for i in m[["costing", "range"]].drop_duplicates().itertuples():
    ac = (
        m.query(f"range == {i.range} & costing=='{i.costing}'")
        .groupby(["stop_id", "costing", "category"])["osm_id"]
        .count()
        .reset_index()
    )
    ac.rename({"osm_id": "count"}, axis=1, inplace=True)

    ac = (
        pd.pivot_table(ac, index=["stop_id"], columns=["category"], values="count")
        .fillna(0)
        .map(int)
        .reset_index()
    )

    ac["costing"] = i.costing
    ac["range"] = i.range
    result = pd.concat([result, ac])

result.to_csv(f"../output/{CITY}/amenity_counts_in_accessibility.csv", index=False)

In [12]:
stops = pd.read_csv(f"../output/{CITY}/stop_metrics_15m.csv.gz")
stops["concave"] = stops["concave"].apply(from_wkt)
stops["convex"] = stops["convex"].apply(from_wkt)
stops = gpd.GeoDataFrame(stops, geometry="concave", crs=23700)
stops.to_crs(4326, inplace=True)

FileNotFoundError: [Errno 2] No such file or directory: '../output/budapest/stop_metrics_15m.csv.gz'

In [19]:
try:
    ac_bkk = pd.read_csv("../output/amenity_counts_in_bkk_accessibility_polygons.csv")
except FileNotFoundError:
    # runs ~5m
    ac_bkk = pd.DataFrame()
    for i in range(72):
        logger.info(i)
        temp = stops.query(f"time == {i}").sjoin(amenities)
        temp = temp.groupby(["stop_id", "category"])["osm_id"].count().reset_index()
        temp.rename({"osm_id": "count"}, axis=1, inplace=True)
        temp["time"] = i
        ac_bkk = pd.concat([ac_bkk, temp])
    ac_bkk.to_csv(
        "../output/amenity_counts_in_bkk_accessibility_polygons.csv", index=False
    )

[2m2024-07-19 16:54:40[0m [[32m[1minfo     [0m] [1m0                             [0m
[2m2024-07-19 16:54:43[0m [[32m[1minfo     [0m] [1m1                             [0m
[2m2024-07-19 16:54:45[0m [[32m[1minfo     [0m] [1m2                             [0m
[2m2024-07-19 16:54:46[0m [[32m[1minfo     [0m] [1m3                             [0m
[2m2024-07-19 16:54:48[0m [[32m[1minfo     [0m] [1m4                             [0m
[2m2024-07-19 16:54:54[0m [[32m[1minfo     [0m] [1m5                             [0m
[2m2024-07-19 16:54:58[0m [[32m[1minfo     [0m] [1m6                             [0m
[2m2024-07-19 16:55:02[0m [[32m[1minfo     [0m] [1m7                             [0m
[2m2024-07-19 16:55:06[0m [[32m[1minfo     [0m] [1m8                             [0m
[2m2024-07-19 16:55:09[0m [[32m[1minfo     [0m] [1m9                             [0m
[2m2024-07-19 16:55:13[0m [[32m[1minfo     [0m] [1m10                     

In [26]:
ac_bkk_hour40 = ac_bkk.query("time==40").drop("time", axis=1).copy()

In [27]:
ac_bkk_hour40

Unnamed: 0,stop_id,category,count
0,003105,cultural_institutions,95
1,003105,drugstores,95
2,003105,groceries,232
3,003105,healthcare,60
4,003105,parks,389
...,...,...,...
18622,F04837,parks,35
18623,F04837,religious_organizations,10
18624,F04837,restaurants,63
18625,F04837,schools,16


In [63]:
bkk_amenity_count = (
    pd.pivot_table(
        ac_bkk_hour40, index=["stop_id"], columns=["category"], values="count"
    )
    .fillna(0)
    .map(int)
    .reset_index()
)
bkk_amenity_count.columns = [f"bkk_{i}" for i in bkk_amenity_count.columns]

In [64]:
bkk_amenity_count

Unnamed: 0,bkk_stop_id,bkk_cultural_institutions,bkk_drugstores,bkk_groceries,bkk_healthcare,bkk_parks,bkk_religious_organizations,bkk_restaurants,bkk_schools,bkk_services
0,003105,95,95,232,60,389,63,826,115,447
1,007817,0,0,3,0,1,0,1,0,0
2,007878,41,25,53,26,119,24,221,59,102
3,007879,32,40,95,10,193,27,218,64,118
4,007890,328,231,589,133,628,160,2402,280,870
...,...,...,...,...,...,...,...,...,...,...
2105,F04828,0,1,2,2,15,6,5,5,2
2106,F04829,5,13,27,7,43,10,77,17,39
2107,F04831,0,0,1,1,12,2,4,7,1
2108,F04834,0,5,6,0,8,0,12,0,18


In [65]:
bkk_amenity_count.to_csv(
    "../output/amenity_counts_in_bkk_accessibility_hour40.csv", index=False
)