## Example treatment

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from pathlib import Path
from loguru import logger

# Move to the root directory of the project
os.chdir(Path.cwd().parent)
logger.info("Current working directory : {}", Path.cwd())

[32m2026-02-04 17:08:16.887[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mCurrent working directory : c:\Users\alexa\Documents\sources\dashboard-prenoms[0m


In [3]:
from src.filter_aggregate.functions import (
    aggregate_df_name_by_year,
    merge_df_code_reg_dep,
    filter_and_complete_data,
    aggregate_df_by_sexe,
)
from src.visualisation.plot import (
    plot_ts_name_over_year,
    plot_map_static_matplotlib,
)
import pandas as pd

In [4]:
data = pd.read_csv(
    "data/output/prenom_clean.csv",
    dtype={"dept": str, "year": str, "sex": str},  # <- force dept to string
)

In [5]:
# get unique names to list
unique_names = data["name"].unique().tolist()
print(unique_names)

['_PRENOMS_RARES', 'AADIL', 'AAHIL', 'AARON', 'AARONN', 'AARUSH', 'AATHIRAN', 'AAYAN', 'AB', 'AB-DEL', 'ABASS', 'ABBAS', 'ABBES', 'ABD', 'ABD-ALLAH', 'ABD-EL', 'ABDALAH', 'ABDALLAH', 'ABDARRAHMAN', 'ABDE', 'ABDEL', 'ABDEL-AZIZ', 'ABDEL-HAKIM', 'ABDEL-HALIM', 'ABDEL-JALIL', 'ABDEL-KADER', 'ABDEL-KARIM', 'ABDEL-KRIM', 'ABDEL-MALEK', 'ABDEL-MALIK', 'ABDEL-NASSER', 'ABDEL-RAHIM', 'ABDEL-RAHMAN', 'ABDEL-RAHMANE', 'ABDELAH', 'ABDELAKIM', 'ABDELALI', 'ABDELATIF', 'ABDELAZIZ', 'ABDELAZZIZ', 'ABDELDJALIL', 'ABDELGHANI', 'ABDELHADI', 'ABDELHAFID', 'ABDELHAK', 'ABDELHAKIM', 'ABDELHALIM', 'ABDELHAMID', 'ABDELILAH', 'ABDELILLAH', 'ABDELJALIL', 'ABDELKADER', 'ABDELKARIM', 'ABDELKRIM', 'ABDELLA', 'ABDELLAH', 'ABDELLATIF', 'ABDELLAZIZ', 'ABDELMADJID', 'ABDELMAGID', 'ABDELMAJID', 'ABDELMALEK', 'ABDELMALIK', 'ABDELNACER', 'ABDELNASSER', 'ABDELNOUR', 'ABDELOUAHAB', 'ABDELOUHAB', 'ABDELRAHIM', 'ABDELRAHMAN', 'ABDELRAHMANE', 'ABDELRAHMEN', 'ABDELRANI', 'ABDELSAMAD', 'ABDELWAHAB', 'ABDENBI', 'ABDENNOUR', 'A

In [16]:
data[data["name"] != "_PRENOMS_RARES"].groupby(["name"], as_index=False).agg(
    {"count": "sum"}
).sort_values("count", ascending=False)

Unnamed: 0,name,count
9698,MARIE,2255641
6704,JEAN,1911549
12426,PIERRE,892291
10688,MICHEL,818084
989,ANDRÉ,709736
...,...,...
16188,ÉMERIC,3
83,ABDELSAMAD,3
81,ABDELRAHMEN,3
16145,ÉLIAM,3


In [18]:
data.groupby("sex", as_index=False).agg({"count": "sum"})

Unnamed: 0,sex,count
0,1,40347191
1,2,38698338


In [6]:
prenom = "RAPHAEL"

In [None]:
df_filtered = filter_and_complete_data(data, prenom)
df_filtered

In [None]:
agg_sex = aggregate_df_by_sexe(df_filtered)
agg_sex

In [None]:
agg_year = aggregate_df_name_by_year(df_filtered)
agg_year

In [None]:
plot_ts_name_over_year(agg_year, prenom)

In [None]:
import geopandas as gpd

df_merge = merge_df_code_reg_dep(
    df_filtered,
    gpd.read_parquet("data/geojson/output/region_departement.parquet"),
)
df_merge

In [None]:
df_merge[(df_merge["year"] == "2010") & (df_merge["code_dep"] == "2B")]

In [None]:
plot_map_static_matplotlib(df_merge, "2010", "dep")

In [None]:
plot_map_static_matplotlib(df_merge, "2010", "reg")