In [31]:
import polars as pl
import json
import altair as alt
import geopolars as gpl

In [9]:
df = pl.read_csv("data/dataset.csv")

In [10]:
is_rare = pl.col("name") == "_PRENOMS_RARES"

In [14]:
top_names = df.filter(~is_rare)["name"].value_counts().sort("count", descending=True).limit(20)[["name"]]

In [18]:
name_dpt_sexe = df.join(top_names, on="name").group_by("name", "dpt", "sexe").len().drop_nulls()
name_dpt_sexe

name,dpt,sexe,len
str,i64,str,u32
"""CAMILLE""",47,"""M""",47
"""CLAUDE""",974,"""F""",35
"""HÉLÈNE""",79,"""F""",99
"""CAMILLE""",20,"""F""",57
"""PIERRE""",82,"""M""",114
…,…,…,…
"""CAMILLE""",972,"""M""",76
"""LOUIS""",68,"""M""",112
"""PAUL""",30,"""M""",117
"""CLAUDE""",17,"""F""",48


In [19]:
alt.Chart(name_dpt_sexe).mark_circle().encode(
    alt.X("dpt:N"),
    alt.Y("name:N"),
    alt.Size("len"),
    alt.Color("sexe:N"),
)

In [23]:
dom_tom = name_dpt_sexe.filter(pl.col("dpt").is_between(970, 979))

In [24]:
alt.Chart(dom_tom).mark_bar().encode(
    alt.Y("name:N", sort="x"),
    alt.X("len:Q"),
    alt.Color("sexe:N"),
    alt.Column("dpt")
)

In [26]:
alt.Chart(name_dpt_sexe).mark_bar().encode(
    alt.X("dpt:N"),
    alt.Y("name:N"),
    alt.Color("len:Q"),
)

In [49]:
url_geojson = "https://france-geojson.gregoiredavid.fr/repo/departements.geojson"
geodata = alt.Data(url=url_geojson, format=alt.DataFormat(property="features"))
geodata

Data({
  format: DataFormat({
    property: 'features'
  }),
  url: 'https://france-geojson.gregoiredavid.fr/repo/departements.geojson'
})

In [55]:
alt.Chart(geodata).mark_geoshape().encode(color='properties.code:N')

In [56]:
centers = pl.read_csv("data/dpt_positions.csv")

In [57]:
data = (alt.Chart(name_dpt_sexe)
    .transform_filter(alt.datum.dpt <= 100).transform_filter(alt.datum.name == "MARIE")
    .encode(
        
        #facet=alt.Facet('name:N', columns=5),
    )
 .transform_lookup(
    lookup='dpt',
    from_=alt.LookupData(geodata, 'properties.code'),
      as_="geo"
)
.transform_lookup(
    lookup='dpt',
    from_=alt.LookupData(centers, 'dpt', ["lon", "lat"]),
)
)

bg = data.mark_geoshape().encode(color='len:Q', shape='geo:G')
fg = data.mark_circle().encode(longitude="lon:Q", latitude="lat:Q", shape="sexe:N")

(bg + fg)#.layer("name")