In [1]:
import altair as alt
import geopandas as gpd
import pandas as pd
import toolz

def custom(data):
    return toolz.curried.pipe(data, alt.to_json(filename='altdata/{prefix}-{hash}.{extension}') )
alt.data_transformers.register('custom', custom)
alt.data_transformers.enable('custom')

DataTransformerRegistry.enable('custom')

In [2]:
# Load the shapefile
shp_data = gpd.read_file('departements-version-simplifiee.geojson')
shp_data = shp_data[shp_data['nom'] != 'NaN']

shp_data['centroid_x'] = shp_data.geometry.centroid.x
shp_data['centroid_y'] = shp_data.geometry.centroid.y

# Create an Altair chart from the geoshape object
chart = alt.Chart(shp_data).mark_geoshape(
  fill='grey', stroke='white', strokeWidth=0.5
).properties(
  width = 700,
  height = 700
)

# Display the chart
chart


  shp_data['centroid_x'] = shp_data.geometry.centroid.x

  shp_data['centroid_y'] = shp_data.geometry.centroid.y


In [3]:
names = pd.read_csv("dpt2020.csv", sep=";")
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)
names.drop(names[names.dpt == 'Nan'].index, inplace=True)

names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
2799395,2,LAURENE,1997,71,3
3618953,2,SYLVIE,1947,25,3
3509933,2,SANA,2013,14,3
1836872,2,AMEL,1988,93,14
2864868,2,LINDA,1973,86,11


In [4]:
# Select most given names

ids = names.groupby(['dpt','annais','sexe'])['nombre'].idxmax()
most_given_names = names.loc[ids]
most_given_names.sample(10)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
1591415,1,THOMAS,2001,78,233
1249339,1,NATHAN,2011,30,63
3050587,2,MARIE,1913,83,313
3051787,2,MARIE,1926,61,147
3010977,2,MANON,1994,16,43
3620334,2,SYLVIE,1963,1,144
2782872,2,LAURA,1992,20,63
927399,1,JULIEN,1984,36,63
1249278,1,NATHAN,2010,68,94
2640887,2,JADE,2015,973,15


In [5]:
# Merge with geoshape
global_gdf = shp_data.merge(most_given_names, how='right', left_on='code', right_on='dpt')



global_gdf.sample(50)

Unnamed: 0,code,nom,geometry,centroid_x,centroid_y,sexe,preusuel,annais,dpt,nombre
8042,34.0,Hérault,"POLYGON ((3.35836 43.91383, 3.42445 43.91160, ...",3.368095,43.579246,1,JEAN,1928,34,316
19908,83.0,Var,"MULTIPOLYGON (((6.43480 43.01554, 6.45520 43.0...",6.244529,43.442624,1,JEAN,1932,83,243
19197,80.0,Somme,"POLYGON ((1.38155 50.06577, 1.45388 50.11033, ...",2.276052,49.958245,2,NICOLE,1939,80,176
3801,16.0,Charente,"POLYGON ((-0.10294 45.96966, -0.04143 45.99348...",0.202366,45.718699,2,AURÉLIE,1985,16,71
7435,31.0,Haute-Garonne,"POLYGON ((0.95398 43.78737, 0.97780 43.78644, ...",1.173295,43.358874,2,JULIE,1987,31,173
7096,30.0,Gard,"POLYGON ((3.37365 44.17076, 3.43083 44.14800, ...",4.180297,43.993572,1,JEAN,1939,30,321
9939,42.0,Loire,"POLYGON ((3.89953 46.27591, 3.90940 46.25773, ...",4.16529,45.727626,2,MARIE,1908,42,1040
16232,68.0,Haut-Rhin,"POLYGON ((7.19828 48.31047, 7.24173 48.30243, ...",7.273442,47.859262,1,JOSEPH,1909,68,396
7400,31.0,Haute-Garonne,"POLYGON ((0.95398 43.78737, 0.97780 43.78644, ...",1.173295,43.358874,1,LAURENT,1970,31,327
15160,63.0,Puy-de-Dôme,"POLYGON ((2.56538 46.14303, 2.64069 46.11848, ...",3.140421,45.725961,1,SÉBASTIEN,1978,63,197


In [9]:
year = 1989
subset = global_gdf[global_gdf['annais'] == str(year)]

# alt.Chart(global_gdf).mark_geoshape(stroke='white').encode(
#     color=alt.Max('nombre'),
# ).properties(width=800, height=600)

# Separate subsets for boys and girls
subset_girls = subset[subset['sexe'] == 2]
subset_boys = subset[subset['sexe'] == 1]
subset_girls.sample(10)


# Merge boys and girls data to create tooltip information
tooltip_data = subset_girls[['code', 'preusuel']].merge(
    subset_boys[['code', 'preusuel']],
    on='code',
    suffixes=('_girl', '_boy')
)

# Merge tooltip data back with shape data
merged_data = shp_data.merge(tooltip_data, how='left', on='code')

merged_data.sample(10)

# Create Altair chart
chart = alt.Chart(merged_data).mark_geoshape(stroke='white').encode(
    tooltip=[alt.Tooltip('nom:N', title='Region'),
             alt.Tooltip('preusuel_girl:N', title='Most Given Name (Girl)'),
             alt.Tooltip('preusuel_boy:N', title='Most Given Name (Boy)')],
    color=alt.value('lightgrey')
).properties(
    width=800,
    height=600
)

seuil_minimal = 0.25

# Filtrer les départements par taille
filtered_data = merged_data[merged_data['geometry'].area > seuil_minimal]

# Labels pour les prénoms des filles (filtre appliqué)
text_girl = alt.Chart(filtered_data).mark_text(
    align='center',
    baseline='middle',
    fontSize=10,
    color='red'
).encode(
    longitude='centroid_x:Q',
    latitude='centroid_y:Q',
    text='preusuel_girl:N'
)

# Labels pour les prénoms des garçons (filtre appliqué)
text_boy = alt.Chart(filtered_data).mark_text(
    align='center',
    baseline='middle',
    dy=10,  # Décalage vertical
    fontSize=10,
    color='blue'
).encode(
    longitude='centroid_x:Q',
    latitude='centroid_y:Q',
    text='preusuel_boy:N'
)

# Display the chart
chart + text_girl + text_boy


  filtered_data = merged_data[merged_data['geometry'].area > seuil_minimal]


In [10]:
# Create Altair chart
chartG = alt.Chart(merged_data).mark_geoshape(stroke='white').encode(
    tooltip=[alt.Tooltip('nom:N', title='Region'),
             alt.Tooltip('preusuel_girl:N', title='Most Given Name (Girl)')],
    color=alt.Color('preusuel_girl:N')
).properties(
    width=800,
    height=600
)

chartB = alt.Chart(merged_data).mark_geoshape(stroke='white').encode(
    tooltip=[alt.Tooltip('nom:N', title='Region'),
             alt.Tooltip('preusuel_boy:N', title='Most Given Name (Boy)')],
    color=alt.Color('preusuel_boy:N')
).properties(
    width=800,
    height=600
)

seuil_minimal = 0.25

# Filtrer les départements par taille
filtered_data = merged_data[merged_data['geometry'].area > seuil_minimal]

# Labels pour les prénoms des filles (filtre appliqué)
text_girl = alt.Chart(filtered_data).mark_text(
    align='center',
    baseline='middle',
    fontSize=10,
    color='black'
).encode(
    longitude='centroid_x:Q',
    latitude='centroid_y:Q',
    text='preusuel_girl:N'
)

text_boy = alt.Chart(filtered_data).mark_text(
    align='center',
    baseline='middle',
    dy=10,  # Décalage vertical
    fontSize=10,
    color='black'
).encode(
    longitude='centroid_x:Q',
    latitude='centroid_y:Q',
    text='preusuel_boy:N'
)

# Display the chart
(chartG + text_girl) & (chartB + text_boy)


  filtered_data = merged_data[merged_data['geometry'].area > seuil_minimal]
