In [None]:
import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd
import yaml
from shapely import Point

ox.settings.use_cache = True
ox.settings.log_console = False

with open("../data/crs.yaml", "r") as fp:
    crs = yaml.safe_load(fp)

In [None]:
CITY = "budapest"
STOPS_FILENAME = "Budapest_stops_with_centrality.csv"
# possible values: Eigenvector Centrality, Degree Centrality, Closeness Centrality, Betweenness Centrality
CENTRALITY = "Betweenness Centrality"

In [None]:
stops = pd.read_csv(f"../data/stops/{CITY}/stops_with_centrality.csv", engine="pyarrow")
stops["geometry"] = stops.apply(lambda x: Point(x["stop_lon"], x["stop_lat"]), axis=1)
stops = gpd.GeoDataFrame(stops, geometry="geometry", crs=4326)
stops.to_crs(crs[CITY], inplace=True)
stops.head(3)

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,geometry
0,1.0,3.632075e-15,0.000437,0.033463,0.0,044369,1,47.175605,18.941796,POINT (641991.147 203505.079)
1,1.0,3.632075e-15,0.000437,0.033463,0.0,088453,1,47.174793,18.941817,POINT (641992.615 203414.809)
2,2.0,1.878265e-14,0.001311,0.034621,0.00262,CSF04632,2,47.227924,18.946067,POINT (642322.523 209320.816)


In [None]:
boundary = gpd.read_file(f"../data/geojson/{CITY}/boundary.geojson").set_crs(4326)
boundary.to_crs(crs[CITY], inplace=True)

In [None]:
# boundary_poly = boundary.to_crs(4326).geometry[0]
# landuse = ox.features_from_polygon(
#     boundary_poly, tags={"landuse": ["residential"]}
#     # boundary_poly, tags={"landuse": ["residential", "retail", "industrial"]}
# )
# landuse = landuse[landuse["geometry"].geom_type == "Polygon"].copy()
# landuse.to_crs(CRS, inplace=True)

In [29]:
stops[stops["Betweenness Centrality"] > 0.3446914]

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,geometry
4578,1865.0,0.004166,0.004806,0.090722,0.344691,036947,1865,47.489173,19.06284,POINT (651160.099 238359.249)
4579,1865.0,0.004166,0.004806,0.090722,0.344691,056227,1865,47.488903,19.061022,POINT (651023.107 238329.208)
4580,1865.0,0.004166,0.004806,0.090722,0.344691,056228,1865,47.489245,19.061938,POINT (651092.127 238367.242)
4581,1865.0,0.004166,0.004806,0.090722,0.344691,062759,1865,47.48858,19.064087,POINT (651254.08 238293.34)
4582,1865.0,0.004166,0.004806,0.090722,0.344691,CSF01289,1865,47.489585,19.061629,POINT (651068.836 238405.037)
4583,1865.0,0.004166,0.004806,0.090722,0.344691,F01005,1865,47.489219,19.061248,POINT (651040.132 238364.342)
4584,1865.0,0.004166,0.004806,0.090722,0.344691,F01006,1865,47.489461,19.061486,POINT (651058.062 238391.25)
4585,1865.0,0.004166,0.004806,0.090722,0.344691,F01007,1865,47.489074,19.062694,POINT (651149.099 238348.241)
4586,1865.0,0.004166,0.004806,0.090722,0.344691,F01010,1865,47.49001,19.061978,POINT (651095.126 238452.291)
4587,1865.0,0.004166,0.004806,0.090722,0.344691,F01011,1865,47.490109,19.060783,POINT (651005.076 238463.283)


In [75]:
# based on: https://stackoverflow.com/a/38022636/4737417
def distance_matrix(data: pd.DataFrame):
    result = []
    # result.append(data["stop_id"].tolist())
    for i in data.itertuples():
        r = []
        for j in data.itertuples():
            d = np.round(i.geometry.distance(j.geometry))
            r.append(d)
        # result.append([i.stop_id] + r)
        result.append(r)
    result = np.array(result)
    return result


def medoid_index(distamce_matrix: np.array) -> int:
    return int(np.argmin(distamce_matrix.sum(axis=0)))


def calculate_medoid(data: pd.DataFrame, id_column: str = "stop_id") -> int:
    dmx = distance_matrix(data)
    i = int(np.argmin(dmx.sum(axis=0)))
    return data[id_column].tolist()[i]

In [None]:
# dmx = distance_matrix(stops[stops["Betweenness Centrality"] > 0.3446914])
# medoid_index(dmx)

4

In [76]:
maxc = stops[CENTRALITY].max()
medoid_id = calculate_medoid(stops[stops["Betweenness Centrality"] == maxc])
medoid = stops[stops["stop_id"] == medoid_id]["geometry"].tolist()[0]

In [None]:
stops["distance_from_city_centroid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(boundary.centroid) / 1000, 3)
)

In [None]:
stops["distance_from_largest_betweenness_medoid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(medoid) / 1000, 3)
)

In [None]:
m = stops[
    [
        "stop_id",
        "distance_from_city_centroid",
        "distance_from_largest_betweenness_medoid",
    ]
]
m.to_csv(f"../output/{CITY}/distance.csv")