In [9]:
import geopandas as gpd
import pandas as pd
import altair as alt
import json

In [33]:
# 1. LOAD & PREP THE NAME DATA
csv_path = "Names_hints/dpt2020.csv"
df = (
    pd.read_csv(csv_path, sep=";")
      .query("preusuel != '_PRENOMS_RARES'")
      .astype({"dpt": str})                         # keep leading zeros (e.g. '01')
)

# aggregate across sexes
df_agg = (
    df.groupby(["annais", "dpt", "preusuel"], as_index=False)["nombre"]
      .sum()
)

# total births per (year, dept) → percentages
tot = (
    df_agg.groupby(["annais", "dpt"], as_index=False)["nombre"]
          .sum()
          .rename(columns={"nombre": "total"})
)
df_agg = df_agg.merge(tot, on=["annais", "dpt"])
df_agg["pct"] = df_agg["nombre"] / df_agg["total"] * 100

# keep top-5 names per (year, dept) and pivot wide
def top5(group):
    top = group.nlargest(5, "nombre")
    top["rank"] = range(1, len(top) + 1)
    return top

df_top5 = (
    df_agg.groupby(["annais", "dpt"], group_keys=False)
          .apply(top5)
)

wide = (
    df_top5
      .pivot(index=["annais", "dpt"], columns="rank", values=["preusuel", "pct"])
      .reset_index()
)

# flatten MultiIndex columns → annais, dpt, top1_name, top1_pct, … top5_pct
wide.columns = (
    ["annais", "dpt"] +
    [f"top{r}_{kind}" for kind in ("name", "pct") for r in range(1, 6)]
)
df_wide_filtered = wide                         # table used in transform_lookup

# 2. LOAD DEPARTMENT GEOMETRY
geo_path = "Names_hints/departements-version-simplifiee.geojson"
with open(geo_path) as f:
    fr_geojson = json.load(f)                   # GeoJSON stays as dict for Altair

# 3. BUILD THE ALTAIR CHOROPLETH WITH A YEAR SLIDER
# Filter out 'XXXX' values before calculating year range
year_data = wide[wide.annais != 'XXXX']
year_min, year_max = int(year_data.annais.min()), int(year_data.annais.max())
year_sel = alt.param(
    name="Année",
    value=year_max,
    bind=alt.binding_range(min=year_min, max=year_max, step=1, name="Année")
)


  .apply(top5)


In [39]:
# Filter out 'XXXX' values before calculating year range
year_data = wide[wide.annais != 'XXXX']
year_min, year_max = int(year_data.annais.min()), int(year_data.annais.max())
year_sel = alt.param(
    name="year",  # Changed from "Année" to "year"
    value=year_max,
    bind=alt.binding_range(min=year_min, max=year_max, step=1, name="Année")  # Display name can still use special characters
)

chart = (
    alt.Chart(alt.InlineData(values=fr_geojson, format=alt.DataFormat(type="json", property="features")))
       .mark_geoshape(stroke="black", strokeWidth=0.2)
       .encode(
            color=alt.Color(
                "top1_name:N",
                title="Prénom le + donné",
                scale=alt.Scale(scheme="category20")
            ),
            tooltip=[
                alt.Tooltip("top1_name:N", title="1er prénom"),
                alt.Tooltip("top1_pct:Q",  title="%"),
                alt.Tooltip("top2_name:N", title="2ᵉ prénom"),
                alt.Tooltip("top2_pct:Q",  title="%"),
                alt.Tooltip("top3_name:N", title="3ᵉ prénom"),
                alt.Tooltip("top3_pct:Q",  title="%"),
                alt.Tooltip("top4_name:N", title="4ᵉ prénom"),
                alt.Tooltip("top4_pct:Q",  title="%"),
                alt.Tooltip("top5_name:N", title="5ᵉ prénom"),
                alt.Tooltip("top5_pct:Q",  title="%"),
            ]
       )
       .transform_lookup(
            lookup="properties.code",
            from_=alt.LookupData(
                df_wide_filtered,
                key="dpt",
                fields=list(df_wide_filtered.columns)
            )
       )
       .add_params(year_sel)
       .transform_filter(year_sel)
       .project("mercator")
       .properties(width=650, height=800)
)

In [40]:
chart