In [1]:
import geopandas as gpd
import numpy as np
import osmnx as ox
import pandas as pd
import yaml
from shapely import Point

ox.settings.use_cache = True
ox.settings.log_console = False

with open("../data/crs.yaml", "r") as fp:
    crs = yaml.safe_load(fp)

In [2]:
CITY = "madrid"
# possible values: Eigenvector Centrality, Degree Centrality, Closeness Centrality, Betweenness Centrality
CENTRALITY = "Betweenness Centrality"

In [3]:
stops = pd.read_csv(f"../data/stops/{CITY}/stops_with_centrality.csv", engine="pyarrow")
stops["geometry"] = stops.apply(lambda x: Point(x["stop_lon"], x["stop_lat"]), axis=1)
stops = gpd.GeoDataFrame(stops, geometry="geometry", crs=4326)
stops.to_crs(crs[CITY], inplace=True)
stops.head(3)

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,geometry
0,3.0,3e-06,0.001158,0.061367,0.006125,5130,3,40.5073,-3.68606,POINT (600111.334 656263.682)
1,3.0,3e-06,0.001158,0.061367,0.006125,5166,3,40.50767,-3.68597,POINT (600118.953 656304.721)
2,690.0,1.4e-05,0.002703,0.065333,0.01607,3881,690,40.5132,-3.67918,POINT (600693.765 656918.117)


In [4]:
# boundary = gpd.read_file(f"../data/geojson/{CITY}/boundary.geojson").set_crs(4326)
# boundary.to_crs(crs[CITY], inplace=True)

In [5]:
# boundary_poly = boundary.to_crs(4326).geometry[0]
# landuse = ox.features_from_polygon(
#     boundary_poly, tags={"landuse": ["residential"]}
#     # boundary_poly, tags={"landuse": ["residential", "retail", "industrial"]}
# )
# landuse = landuse[landuse["geometry"].geom_type == "Polygon"].copy()
# landuse.to_crs(CRS, inplace=True)

In [6]:
stops[stops["Betweenness Centrality"] > 0.3446914]

Unnamed: 0,Node,Eigenvector Centrality,Degree Centrality,Closeness Centrality,Betweenness Centrality,stop_id,clust,stop_lat,stop_lon,geometry
547,2659.0,0.149872,0.002703,0.086726,0.446911,153,2659,40.42114,-3.66951,POINT (601514.446 646707.448)
548,2659.0,0.149872,0.002703,0.086726,0.446911,5620,2659,40.42124,-3.66828,POINT (601618.713 646718.561)
933,2743.0,0.014338,0.004247,0.083524,0.379517,1292,2743,40.44998,-3.60783,POINT (606740.308 649909.046)
934,2743.0,0.014338,0.004247,0.083524,0.379517,1293,2743,40.45007,-3.60792,POINT (606732.673 649919.022)
935,2743.0,0.014338,0.004247,0.083524,0.379517,2956,2743,40.44906,-3.60818,POINT (606710.741 649806.979)
936,2743.0,0.014338,0.004247,0.083524,0.379517,3533,2743,40.44854,-3.60961,POINT (606589.618 649749.197)
937,2743.0,0.014338,0.004247,0.083524,0.379517,4592,2743,40.44901,-3.60963,POINT (606587.878 649801.325)
938,2743.0,0.014338,0.004247,0.083524,0.379517,5617,2743,40.44944,-3.6097,POINT (606581.905 649849.013)
939,2743.0,0.014338,0.004247,0.083524,0.379517,5713,2743,40.44891,-3.6098,POINT (606573.482 649790.221)
940,2743.0,0.014338,0.004247,0.083524,0.379517,5714,2743,40.44908,-3.60971,POINT (606581.092 649809.083)


In [7]:
# based on: https://stackoverflow.com/a/38022636/4737417
def distance_matrix(data: pd.DataFrame):
    result = []
    # result.append(data["stop_id"].tolist())
    for i in data.itertuples():
        r = []
        for j in data.itertuples():
            d = np.round(i.geometry.distance(j.geometry))
            r.append(d)
        # result.append([i.stop_id] + r)
        result.append(r)
    result = np.array(result)
    return result


def medoid_index(distamce_matrix: np.array) -> int:
    return int(np.argmin(distamce_matrix.sum(axis=0)))


def calculate_medoid(data: pd.DataFrame, id_column: str = "stop_id") -> int:
    dmx = distance_matrix(data)
    i = int(np.argmin(dmx.sum(axis=0)))
    return data[id_column].tolist()[i]

In [8]:
# dmx = distance_matrix(stops[stops["Betweenness Centrality"] > 0.3446914])
# medoid_index(dmx)

In [11]:
maxc = stops[CENTRALITY].max()
medoid_id = calculate_medoid(stops[stops["Betweenness Centrality"] == maxc])
medoid = stops[stops["stop_id"] == medoid_id]["geometry"].tolist()[0]
centroid = stops[stops["Betweenness Centrality"] == maxc].union_all().centroid

In [10]:
# stops["distance_from_city_centroid"] = stops["geometry"].apply(
#     lambda x: np.round(x.distance(boundary.centroid) / 1000, 3)
# )

In [12]:
stops["distance_from_largest_betweenness_medoid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(medoid) / 1000, 3)
)
stops["distance_from_largest_betweenness_centroid"] = stops["geometry"].apply(
    lambda x: np.round(x.distance(centroid) / 1000, 3)
)

In [14]:
m = stops[
    [
        "stop_id",
        # "distance_from_city_centroid",
        "distance_from_largest_betweenness_centroid",
        "distance_from_largest_betweenness_medoid",
    ]
]
m.to_csv(f"../output/{CITY}/distance.csv", index=False)