In [None]:
import json

import geopandas as gpd
import networkx as nx
import numpy as np
import pandas as pd
import shapely
import structlog
from common import load_crs, load_stops
from shapely import Point, Polygon


def compute_ellipticity(points: np.array) -> float:
    """
    Compute ellipticity of a set of points.

    Parameters:
    - points (numpy array): Array of shape (n, 2) representing (x, y) coordinates of points.

    Returns:
    - ellipticity (float): Ellipticity value.
    """

    # Calculate the covariance matrix of the points
    cov_matrix = np.cov(points, rowvar=False)

    # Calculate eigenvalues and eigenvectors of the covariance matrix
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    # Sort eigenvalues in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]

    # Major and minor axis lengths are square roots of eigenvalues
    major_axis_length = np.sqrt(eigenvalues[0])
    minor_axis_length = np.sqrt(eigenvalues[1])

    # Compute ellipticity
    ellipticity = 1.0 - (minor_axis_length / major_axis_length)

    return ellipticity


def ellipticity(points: list[Point], threshold: int = 10, decimals: int = 4) -> float:
    points = [(i.x, i.y) for i in points]
    if len(points) < threshold:
        return None

    return np.round(compute_ellipticity(points), decimals)


def determine_stop_geometries(
    stops: gpd.GeoDataFrame,
    subgraphs: dict,
    time_marker: int = 39,
    suffix: str = "",
    concaveness_ratio: float = 0.2,
    include_empty: bool = False,
) -> pd.DataFrame:
    """
    Calculates convex and concave hulls of the accessible network, and also the ellipticity of the stops.

    While the convex hull is unambiguous, multiple concave hulls can be constructed.
    """
    records = []
    for row in stops.itertuples():
        accessible_stops = list(
            subgraphs.get(f"{row.stop_id}_network_{time_marker}", nx.Graph())
        )
        accessible_stops = stops[stops["stop_id"].isin(accessible_stops)].copy()
        if len(accessible_stops) == 0:
            if include_empty:
                records.append([row.stop_id, Polygon(), 0, Polygon(), 0, 0])
            continue
        points = accessible_stops.union_all()
        cv = shapely.convex_hull(points)
        cc = shapely.concave_hull(points, ratio=concaveness_ratio)
        el = ellipticity(accessible_stops.geometry.tolist())

        records.append(
            [
                row.stop_id,
                cv,
                round(cv.area / 1e6, 3),
                cc,
                round(cc.area / 1e6, 3),
                el,
            ]
        )
    columns = ["stop_id"] + [
        i + suffix
        for i in [
            "convex",
            "convex_area",
            "concave",
            "concave_area",
            "ellipticity",
        ]
    ]

    return pd.DataFrame.from_records(records, columns=columns)


def determine_stop_geometries_from_walk(
    stops: gpd.GeoDataFrame,
    isochrones: gpd.GeoDataFrame,
    accessible_stops,
    crs: int = 23700,
    ellipticity_threshold: int = 2,
) -> gpd.GeoDataFrame:
    records = []
    for row in stops.itertuples():
        if row.stop_id not in accessible_stops:
            # logger.info(row.stop_id)
            continue
        accessible = stops[stops["stop_id"].isin(accessible_stops[row.stop_id])].copy()

        el = ellipticity(accessible.geometry.tolist(), threshold=ellipticity_threshold)
        accessible_area = isochrones[
            (isochrones["stop_id"].isin(accessible_stops[row.stop_id]))
            & (isochrones["costing"] == "walk")
            & (isochrones["range"] == 5)
        ].copy()
        accessible_area_crs = accessible_area.to_crs(crs).union_all()
        records.append(
            [
                row.stop_id,
                accessible_area.union_all(),
                round(accessible_area_crs.area / 1e6, 3),
                el,
                len(accessible),
            ]
        )
    df = pd.DataFrame.from_records(
        records,
        columns=[
            "stop_id",
            "geometry",
            "area",
            "ellipticity",
            "number_of_accessible_stops",
        ],
    )
    return gpd.GeoDataFrame(df, crs=4326)

In [None]:
crs = load_crs()

logger = structlog.get_logger()

In [35]:
CITY = "madrid"
ELLIPTICITY_THRESHOLD = 5

In [36]:
with open(f"../data/stops/{CITY}/accessible_stops.json", "r") as fp:
    accessible_stops = json.load(fp)

if CITY in ["paris"]:
    accessible_stops = {int(float(k)): v for k, v in accessible_stops.items()}

In [37]:
all_stops = set([i for k, v in accessible_stops.items() for i in v])
len(all_stops)

4826

In [None]:
isochrones = pd.read_csv(f"../output/{CITY}/isochrones.csv", dtype={"stop_id": str})
isochrones["geometry"] = isochrones["geometry"].apply(shapely.from_wkt)
isochrones = gpd.GeoDataFrame(isochrones, geometry="geometry", crs=4326)

stops = load_stops(CITY)

In [41]:
sgfw = determine_stop_geometries_from_walk(
    stops,
    isochrones.query("costing == 'walk' & range == 5"),
    accessible_stops,
    crs=crs[CITY],
    ellipticity_threshold=ELLIPTICITY_THRESHOLD,
)
sgfw.to_csv(f"../output/{CITY}/stop_geometries_from_walk.csv", index=False)
sgfw.to_file(f"../output/{CITY}/stop_geometries_from_walk.geojson")

In [42]:
sgfw

Unnamed: 0,stop_id,geometry,area,ellipticity,number_of_accessible_stops
0,5130,"MULTIPOLYGON (((-3.69881 40.50731, -3.69878 40...",1.977,0.8803,11
1,5166,"MULTIPOLYGON (((-3.69881 40.50731, -3.69878 40...",1.977,0.8803,11
2,3881,"MULTIPOLYGON (((-3.68241 40.50059, -3.68501 40...",1.984,0.5615,15
3,3882,"MULTIPOLYGON (((-3.68241 40.50059, -3.68501 40...",1.984,0.5615,15
4,1026,"POLYGON ((-3.70619 40.50018, -3.70751 40.49997...",1.717,0.7955,13
...,...,...,...,...,...
4821,5299,"POLYGON ((-3.55165 40.39363, -3.55291 40.39331...",1.235,0.7705,5
4822,51003,"POLYGON ((-3.55193 40.39877, -3.54974 40.39805...",1.813,0.3185,7
4823,50007,"POLYGON ((-3.55782 40.3994, -3.56102 40.3984, ...",0.857,,4
4824,51007,"POLYGON ((-3.55258 40.39949, -3.55186 40.39886...",1.212,0.7041,5


In [43]:
# stop = "009461"
# # stop = "009749"
# fig, ax = plt.subplots()
# sgfw[sgfw["stop_id"] == stop].plot(ax=ax, fc="#afdfff", ec="#00aaff")
# stops[
#     (stops["stop_id"].isin(accessible_stops[stop])) & (stops["stop_id"] != stop)
# ].plot(ax=ax, color="#2d2d2d", markersize=15, zorder=5)
# stops[stops["stop_id"] == stop].plot(ax=ax, color="red", markersize=20, zorder=10)