In [1]:
import geopandas as gpd
import yaml
from map_stop_area_to_statistic_unit import (
    aggregate_to_stop_area,
    merge_stop_area_with_statistics,
)

with open("../../data/crs.yaml", "r") as fp:
    crs = yaml.safe_load(fp)

In [2]:
CITY = "madrid"
VERSION = "20250415"
YEAR = 2020

In [3]:
path = f"../../output/{CITY}/{VERSION}/"

# Read the GeoJSON file where for each stop id we know the multimodal area (5 min walk + 10 min BKK) + ellipticity of this shape + size of the area
# and multipolygons describing the 5 min walk from each such station
multimodal = gpd.read_file(f"{path}/stop_geometries_from_walk.geojson")
multimodal["stop_id"] = multimodal["stop_id"].astype(str)
# Read GeoJSON file for 15 minute walking distance
walking = gpd.read_file(f"{path}/isochrones.geojson")
# filter walking
walking15 = walking[(walking["costing"] == "walk") & (walking["range"] == 15)]
walking15 = walking15.copy()
walking15.drop(columns=["costing", "range"], inplace=True)

gini = gpd.read_file(f"../../data/statistics/{CITY}/{YEAR}/gini.geojson")
gini.drop(columns=["year", "municipality", "province"], inplace=True)

income = gpd.read_file(f"../../data/statistics/{CITY}/{YEAR}/income.geojson")
income.drop(columns=["year", "municipality", "province"], inplace=True)

# set projection
walking15.to_crs(gini.crs, inplace=True)
multimodal.to_crs(gini.crs, inplace=True)

In [None]:
walk15_to_tract = merge_stop_area_with_statistics(
    walking15,
    gini,
    stat_unit_id="tract_code",
    stat_column="gini",
)
walk15_gini = aggregate_to_stop_area(walk15_to_tract, ["gini"])
walk15_gini.rename(columns={"gini": "walk15_gini"}, inplace=True)
walk15_gini["walk15_gini"] = round(walk15_gini["walk15_gini"], 6)
# walk15_gini.to_csv(f"{path}/walk15_stop_gini.csv", index=False, float_format="%0.06f")

In [None]:
multimodal_to_tract = merge_stop_area_with_statistics(
    multimodal,
    gini,
    stat_unit_id="tract_code",
    stat_column="gini",
)
multimodal_gini = aggregate_to_stop_area(multimodal_to_tract, ["gini"])
multimodal_gini.rename(columns={"gini": "multimodal_gini"}, inplace=True)
multimodal_gini["multimodal_gini"] = round(multimodal_gini["multimodal_gini"], 6)
# multimodal_gini.to_csv(
#     f"{path}/multimodal_stop_gini.csv",
#     index=False,
#     float_format="%0.06f",
# )

In [6]:
# import pandas as pd
# ref = pd.read_csv("../../output/madrid/socioecon_merged2.csv")

In [7]:
# rw = ref[["stop_id", "weighted_gini_walk"]].merge(walk15_gini, on="stop_id")
# rw["weighted_gini_walk"].corr(rw["gini"])

In [8]:
# rm = ref[["stop_id", "weighted_gini_multi"]].merge(multimodal_gini, on="stop_id")
# rm["weighted_gini_multi"].corr(rm["gini"])

In [None]:
walk15_inc_to_tract = merge_stop_area_with_statistics(
    walking15,
    income.rename(columns={"net_income_hh": "income_walk"}),
    stat_unit_id="tract_code",
    stat_column="income_walk",
)
walk15_inc = aggregate_to_stop_area(walk15_inc_to_tract, ["income_walk"])
walk15_inc["income_walk"] = round(walk15_inc["income_walk"])
# walk15_inc.to_csv(
#     f"{path}/walk15_stop_income.csv",
#     index=False,
# )

In [None]:
multimodal_inc_to_tract = merge_stop_area_with_statistics(
    multimodal,
    income.rename(columns={"net_income_hh": "income_multimodal"}),
    stat_unit_id="tract_code",
    stat_column="income_multimodal",
)
multimodal_inc = aggregate_to_stop_area(multimodal_inc_to_tract, ["income_multimodal"])
multimodal_inc["income_multimodal"] = round(multimodal_inc["income_multimodal"])
# multimodal_inc.to_csv(
#     f"{path}/multimodal_stop_income.csv",
#     index=False,
# )

In [12]:
stop_socioecon = (
    walk15_gini.merge(multimodal_gini, on="stop_id")
    .merge(walk15_inc[["stop_id", "income_walk"]], on="stop_id")
    .merge(multimodal_inc[["stop_id", "income_multimodal"]], on="stop_id")
)
stop_socioecon.to_csv(f"{path}/stop_socioecon.csv")