In [None]:
import pickle
from multiprocessing import Pool

import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
import shapely
from shapely import Point, Polygon


def compute_ellipticity(points: np.array) -> float:
    """
    Compute ellipticity of a set of points.

    Parameters:
    - points (numpy array): Array of shape (n, 2) representing (x, y) coordinates of points.

    Returns:
    - ellipticity (float): Ellipticity value.
    """

    # Calculate the covariance matrix of the points
    cov_matrix = np.cov(points, rowvar=False)

    # Calculate eigenvalues and eigenvectors of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvalues in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]

    # Major and minor axis lengths are square roots of eigenvalues
    major_axis_length = np.sqrt(eigenvalues[0])
    minor_axis_length = np.sqrt(eigenvalues[1])

    # Compute ellipticity
    ellipticity = 1.0 - (minor_axis_length / major_axis_length)

    return ellipticity


def ellipticity(points: list[Point], threshold: int = 10, decimals: int = 4) -> float:
    points = [(i.x, i.y) for i in points]
    if len(points) < threshold:
        return None

    return np.round(compute_ellipticity(points), decimals)


def determine_stop_geometries(
    stops: gpd.GeoDataFrame,
    subgraphs: dict,
    time_marker: int = 39,
    suffix: str = "",
    concaveness_ratio: float = 0.2,
    # include_key: bool = True,
    include_empty: bool = False,
) -> pd.DataFrame:
    """
    Calculates convex and concave hulls of the accessible network, and also the ellipticity of the stops.

    While the convex hull is unambiguous, multiple concave hulls can be constructed.
    """
    records = []
    for row in stops.itertuples():
        accessible_stops = list(
            subgraphs.get(f"{row.stop_id}_network_{time_marker}", nx.Graph())
        )
        accessible_stops = stops[stops["stop_id"].isin(accessible_stops)].copy()
        if len(accessible_stops) == 0:
            if include_empty:
                records.append([row.stop_id, Polygon(), 0, Polygon(), 0, 0])
            continue
        points = accessible_stops.union_all()
        cv = shapely.convex_hull(points)
        cc = shapely.concave_hull(points, ratio=concaveness_ratio)
        el = ellipticity(accessible_stops.geometry.tolist())

        records.append(
            [
                row.stop_id,
                cv,
                round(cv.area / 1e6, 3),
                cc,
                round(cc.area / 1e6, 3),
                el,
            ]
        )
    columns = ["stop_id"] + [
        i + suffix
        for i in [
            "convex",
            "convex_area",
            "concave",
            "concave_area",
            "ellipticity",
        ]
    ]

    return pd.DataFrame.from_records(records, columns=columns)
    # if include_key:
    #     result["time"] = time_marker
    # return result

In [40]:
# CITY = "budapest"
# STOPS_FILENAME = "stops_gtfs_15min.csv"
# DICT_FILENAME = "subgraph_dict_10minute.pkl"
# SUFFIX = ""
CITY = "budapest"
STOPS_FILENAME = "Budapest_stops_with_centrality.csv"
DICT_FILENAME = "Budapest_10min_walbetclus.pkl"

In [None]:
with open(f"../data/stops/{CITY}/{DICT_FILENAME}", "rb") as fp:
    subgraphs = pickle.load(fp)

In [None]:
stops = pd.read_csv(f"../data/stops/{CITY}/{STOPS_FILENAME}", engine="pyarrow")
stops["geometry"] = stops.apply(lambda x: Point(x["stop_lon"], x["stop_lat"]), axis=1)
stops = gpd.GeoDataFrame(stops, geometry="geometry", crs=4326)
# stops.to_crs(23700, inplace=True)
stops.head(3)

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,stop_code,location_type,parent_station,wheelchair_boarding,stop_direction,clust,max_distance,quant,geometry
0,008951,Béke tér,47.430019,19.215675,,,008951,2.0,-64.0,11,0.0,1,POINT (662691.105 231796.227)
1,F03807,Szent László utca,47.441814,19.207651,,,F03807,2.0,30.0,12,0.0,1,POINT (662083.11 233106.253)
2,F03851,Selmecbánya utca,47.438409,19.20479,,,F03851,2.0,29.0,13,0.0,1,POINT (661868.082 232727.272)


In [None]:
# stops_accessibility = determine_stop_geometries(stops, subgraphs, suffix=SUFFIX)
# stops_accessibility.head(3)
# stops_accessibility.to_csv(f"../output/{CITY}/stop_geometries.csv", index=False)

In [None]:
# try:
#     full = pd.read_csv(f"../output/{CITY}/stop_geometries.csv", engine="pyarrow")
# except FileNotFoundError:
#     # runs 12m for 10-minute only
#     full = pd.DataFrame()
#     for i in range(72):
#         sa = determine_stop_geometries(stops, subgraphs, time_marker=i)
#         sa["time"] = i
#         full = pd.concat([full, sa])
#     full.to_csv(f"../output/{CITY}/stop_geometries.csv", index=False)
#     full.drop(["convex", "concave"], axis=1).to_csv(
#         f"../output/{CITY}/stop_metrics.csv", index=False
#     )

In [None]:
def kernel(i: int) -> pd.DataFrame:
    sa = determine_stop_geometries(stops, subgraphs, time_marker=i, include_empty=True)
    sa["time"] = i
    return sa


with Pool(4) as p:
    partials = p.map(kernel, range(72))

full = pd.DataFrame()
for i in partials:
    full = pd.concat([full, i])

full.to_csv(f"../output/{CITY}/stop_geometries.csv", index=False)
full.drop(["convex", "concave"], axis=1).to_csv(
    f"../output/{CITY}/stop_metrics.csv", index=False
)

In [34]:
isochrones = gpd.read_file(f"../output/{CITY}/isochrones.geojson")

In [None]:
def determine_stop_geometries_from_walk(
    stops: gpd.GeoDataFrame,
    isochrones: gpd.GeoDataFrame,
    include_empty: bool = False,
) -> pd.DataFrame:
    isochrones.query("costing == 'walk' & range == 5").union_all()

    records = []
    for row in stops.itertuples():
        accessible_stops = list(
            subgraphs.get(f"{row.stop_id}_network_{time_marker}", nx.Graph())
        )
        accessible_stops = stops[stops["stop_id"].isin(accessible_stops)].copy()
        if len(accessible_stops) == 0:
            if include_empty:
                records.append([row.stop_id, Polygon(), 0, Polygon(), 0, 0])
            continue
        el = ellipticity(accessible_stops.geometry.tolist())

        records.append(
            [
                row.stop_id,
                el,
            ]
        )
    columns = ["stop_id", "ellipticity"]

    return pd.DataFrame.from_records(records, columns=columns)