In [15]:
import polars as pl
import geopandas as gpd
from utils.loader_local import LoaderLocal


In [16]:
# Read data
def read_etat_des_ascenceurs():
    df = pl.read_csv('/home/onyxia/work/hackathon_mobilites_2025/data/raw/etat-des-ascenseurs.csv', 
    separator=";",
    truncate_ragged_lines=True)
    return df

In [17]:
df_ascensceurs = read_etat_des_ascenceurs()
df_ascensceurs.head()

zdcid,zdcxepsg2154,zdcyepsg2154,liftid,liftreason,liftstatus,privateelevatorid,liftsituation,liftdirection,liftstateupdate,liftstate,zdcname,liftmode,centroidzdc
i64,i64,i64,i64,str,str,str,str,str,str,i64,str,str,str
71485,645032,6865544,318,,"""available""","""PMR 1 AQ-DEFESP1""","""Surface <> Salle d'accès <> Qu…","""La Défense - Grande Arche""","""2025-11-13T14:10""",1,"""Esplanade de la Défense""","""Metro""","""48.88756136779482, 2.250392485…"
483315,649868,6856158,1036,,"""available""","""ASC 2 - EQ11-A005-000000308""","""Couloir""","""Porte de Clignancourt""","""2025-11-13T14:10""",1,"""Bagneux - Lucie Aubrac""","""Metro""","""48.80354814483759, 2.317444747…"
70441,649888,6856847,1041,,"""available""","""ASC 2 - EQ11-A005-000000306""","""Couloir""","""Porte de Clignancourt""","""2025-11-13T14:10""",1,"""Barbara""","""Metro""","""48.80974606530683, 2.317635973…"
70441,649888,6856847,1043,,"""available""","""ASC 6 - EQ11-A005-000000303""","""Salle d'accès""","""Toutes directions""","""2025-11-13T14:10""",1,"""Barbara""","""Metro""","""48.80974606530683, 2.317635973…"
70586,650128,6857831,749,,"""available""","""PBS2 (FRANCELIFT)""","""À l'avant du quai M4""","""Porte de Clignancourt""","""2025-11-13T14:10""",1,"""Mairie de Montrouge""","""Metro""","""48.8186139988321, 2.3207885883…"


In [28]:
ref_table_finale_path = "/home/onyxia/work/hackathon_mobilites_2025/data/enrich/final_table.gpq"

stations = LoaderLocal.loader_geoparquet(ref_table_finale_path)
stations.count()

geo_point_2d                     590
id_ref_zdc                       590
nom_zda                          590
station_clean                    590
res_com                          590
mode                             590
exploitant                       590
geometry                         590
ligne                            128
station                          590
facilite_acces_code              590
facilite_acces                   590
nombre_facilite_acces_station    590
id_zdc                           490
total_validation                 490
total_validation_amethyste       490
pct_amethyste                    490
dtype: int64

In [32]:
stations_pl.filter(pl.col('id_ref_zdc')=='71485.0')

geo_point_2d,id_ref_zdc,nom_zda,station_clean,res_com,mode,exploitant,ligne,station,facilite_acces_code,facilite_acces,nombre_facilite_acces_station,id_zdc,total_validation,total_validation_amethyste,pct_amethyste
str,str,str,str,str,str,str,str,str,str,str,i64,str,f64,f64,f64
"""48.88813847761218, 2.249792769…","""71485.0""","""Esplanade de La Défense""","""esplanadedeladéfense""","""METRO 1""","""METRO""","""RATP""",,"""Esplanade de La Défense""","""green""","""très facile d'accès (ascenseur…",1,"""71485.0""",137108230.0,19586890.0,14.2857


In [37]:
stations_geometry = stations.geometry.copy()

stations_pl = pl.from_pandas(stations.drop(columns="geometry"))

df_ascensceurs = df_ascensceurs.with_columns(
    pl.col("zdcid").cast(pl.Utf8)
)

print(df_ascensceurs.select('zdcid').head())

stations_pl = stations_pl.with_columns(
    pl.col("id_ref_zdc")
    .cast(pl.Float64, strict=False)   # 1) cast to float
    .cast(pl.Int64,   strict=False)   # 2) cast to int
    .cast(pl.Utf8,    strict=False)   # 3) cast to string
)
print(stations_pl.select('id_ref_zdc').head())

joined_pl = stations_pl.join(
    df_ascensceurs,
    left_on="id_ref_zdc",
    right_on="zdcid",
    how="left"
)
joined_gdf = gpd.GeoDataFrame(joined_pl.to_pandas(), geometry=stations_geometry)

shape: (5, 1)
┌────────┐
│ zdcid  │
│ ---    │
│ str    │
╞════════╡
│ 71485  │
│ 483315 │
│ 70441  │
│ 70441  │
│ 70586  │
└────────┘
shape: (5, 1)
┌────────────┐
│ id_ref_zdc │
│ ---        │
│ str        │
╞════════════╡
│ 71432      │
│ 73669      │
│ 72491      │
│ 71030      │
│ 71632      │
└────────────┘


In [None]:
joined_pl.

In [38]:
match_col = "zdcname"   # typically the join key is good

matched_count = joined_pl.filter(
    pl.col(match_col).is_not_null()
).count()

not_matched_count = joined_pl.filter(
    pl.col(match_col).is_null()
).count()

total = joined_pl.count()

matched_count, not_matched_count, total

(shape: (1, 29)
 ┌────────────┬────────────┬─────────┬────────────┬───┬───────────┬─────────┬──────────┬────────────┐
 │ geo_point_ ┆ id_ref_zdc ┆ nom_zda ┆ station_cl ┆ … ┆ liftstate ┆ zdcname ┆ liftmode ┆ centroidzd │
 │ 2d         ┆ ---        ┆ ---     ┆ ean        ┆   ┆ ---       ┆ ---     ┆ ---      ┆ c          │
 │ ---        ┆ u32        ┆ u32     ┆ ---        ┆   ┆ u32       ┆ u32     ┆ u32      ┆ ---        │
 │ u32        ┆            ┆         ┆ u32        ┆   ┆           ┆         ┆          ┆ u32        │
 ╞════════════╪════════════╪═════════╪════════════╪═══╪═══════════╪═════════╪══════════╪════════════╡
 │ 678        ┆ 678        ┆ 678     ┆ 678        ┆ … ┆ 675       ┆ 678     ┆ 678      ┆ 678        │
 └────────────┴────────────┴─────────┴────────────┴───┴───────────┴─────────┴──────────┴────────────┘,
 shape: (1, 29)
 ┌────────────┬────────────┬─────────┬────────────┬───┬───────────┬─────────┬──────────┬────────────┐
 │ geo_point_ ┆ id_ref_zdc ┆ nom_zda ┆ station_cl