In [5]:
import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd
import yaml
from shapely import Point, Polygon

ox.settings.use_cache = True
ox.settings.log_console = False

with open("../data/crs.yaml", "r") as fp:
    crs = yaml.safe_load(fp)

In [2]:
CITY = "budapest"
# possible values: Eigenvector Centrality, Degree Centrality, Closeness Centrality, Betweenness Centrality
CENTRALITY = "Betweenness Centrality"

In [3]:
stops = pd.read_csv(f"../data/stops/{CITY}/stops_with_centrality.csv", engine="pyarrow")
stops["geometry"] = stops.apply(lambda x: Point(x["stop_lon"], x["stop_lat"]), axis=1)
stops = gpd.GeoDataFrame(stops, geometry="geometry", crs=4326)
stops.to_crs(crs[CITY], inplace=True)
stops.head(3)

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,stop_name,geometry
0,1.0,3.632075e-15,0.000437,0.033463,0.0,044369,1.0,47.175605,18.941796,Ráckeve,POINT (641991.147 203505.079)
1,2.0,1.878265e-14,0.001311,0.034621,0.00262,CSF04632,2.0,47.227924,18.946067,Szigetszentmárton-Szigetújfalu,POINT (642322.523 209320.816)
2,4.0,3.773179e-15,0.000874,0.033464,0.000874,,,,,,POINT EMPTY


In [7]:
def read_boundary(city: str) -> Polygon:
    boundary = gpd.read_file(f"../output/{city}/boundary.geojson").set_crs(4326)
    return boundary.geometry[0]


def determine_city_centroid_by_landuse(boundary: Polygon, crs: int) -> Point:
    landuse = ox.features_from_polygon(
        boundary,
        tags={
            "landuse": [
                "residential",
                # "retail",
                # "industrial",
            ],
        },
    )
    landuse = landuse[landuse["geometry"].geom_type == "Polygon"].copy()
    landuse.to_crs(crs, inplace=True)
    return landuse.union_all().centroid

In [14]:
landuse_centroid = determine_city_centroid_by_landuse(read_boundary(CITY), crs[CITY])

In [None]:
# gpd.GeoDataFrame(geometry=[landuse_centroid], crs=crs[CITY]).to_crs(4326)

Unnamed: 0,geometry
0,POINT (19.08903 47.49537)


In [43]:
stops[stops["Betweenness Centrality"] > 0.3446914]

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,geometry
901,393.0,0.000287,0.013953,0.118924,0.350904,2521334,393,51.918393,4.480926,POINT (811688.056 24181429.978)
902,393.0,0.000287,0.013953,0.118924,0.350904,2521335,393,51.918393,4.480926,POINT (811688.056 24181429.978)
903,393.0,0.000287,0.013953,0.118924,0.350904,2522294,393,51.918347,4.480708,POINT (811702.16 24181437.454)
904,393.0,0.000287,0.013953,0.118924,0.350904,2522550,393,51.918404,4.481143,POINT (811673.401 24181426.387)
905,393.0,0.000287,0.013953,0.118924,0.350904,2522551,393,51.918404,4.481143,POINT (811673.401 24181426.387)
906,393.0,0.000287,0.013953,0.118924,0.350904,2522689,393,51.918875,4.480465,POINT (811728.13 24181381.681)


In [11]:
# based on: https://stackoverflow.com/a/38022636/4737417
def distance_matrix(data: pd.DataFrame):
    result = []
    # result.append(data["stop_id"].tolist())
    for i in data.itertuples():
        r = []
        for j in data.itertuples():
            d = np.round(i.geometry.distance(j.geometry))
            r.append(d)
        # result.append([i.stop_id] + r)
        result.append(r)
    result = np.array(result)
    return result


def medoid_index(distamce_matrix: np.array) -> int:
    return int(np.argmin(distamce_matrix.sum(axis=0)))


def calculate_medoid(data: pd.DataFrame, id_column: str = "stop_id") -> int:
    dmx = distance_matrix(data)
    i = int(np.argmin(dmx.sum(axis=0)))
    return data[id_column].tolist()[i]

In [45]:
# dmx = distance_matrix(stops[stops["Betweenness Centrality"] > 0.3446914])
# medoid_index(dmx)

In [12]:
maxc = stops[CENTRALITY].max()
medoid_id = calculate_medoid(stops[stops["Betweenness Centrality"] == maxc])
medoid = stops[stops["stop_id"] == medoid_id]["geometry"].tolist()[0]
centroid = stops[stops["Betweenness Centrality"] == maxc].union_all().centroid

In [47]:
# stops["distance_from_city_centroid"] = stops["geometry"].apply(
#     lambda x: np.round(x.distance(boundary.centroid) / 1000, 3)
# )

In [15]:
stops["distance_from_largest_betweenness_medoid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(medoid) / 1000, 3)
)
stops["distance_from_largest_betweenness_centroid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(centroid) / 1000, 3)
)
stops["distance_from_landuse_centroid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(landuse_centroid) / 1000, 3)
)

In [20]:
m = stops[
    [
        "stop_id",
        # "distance_from_city_centroid",
        "distance_from_largest_betweenness_centroid",
        "distance_from_largest_betweenness_medoid",
        "distance_from_landuse_centroid",
    ]
]
m.dropna(subset=["stop_id"]).to_csv(f"../output/{CITY}/distance.csv", index=False)