In [2]:
import altair as alt
import geopandas as gpd
import pandas as pd
import toolz

def custom(data):
    return toolz.curried.pipe(data, alt.to_json(filename='altdata/{prefix}-{hash}.{extension}') )
alt.data_transformers.register('custom', custom)
alt.data_transformers.enable('custom')

DataTransformerRegistry.enable('custom')

In [17]:
# Load the shapefile
shp_data = gpd.read_file('departements-version-simplifiee.geojson')
shp_data = shp_data[shp_data['nom'] != 'NaN']

# Create an Altair chart from the geoshape object
chart = alt.Chart(shp_data).mark_geoshape(
  fill='grey', stroke='white', strokeWidth=0.5
).properties(
  width = 700,
  height = 700
)

# Display the chart
chart

In [25]:
names = pd.read_csv("dpt2020.csv", sep=";")
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)
names.drop(names[names.dpt == 'Nan'].index, inplace=True)

names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
990638,1,LEONARD,2003,49,4
1536796,1,STEPHAN,1970,75,24
2128104,2,CHARLOTTE,1940,22,3
407907,1,EDEN,2016,47,10
1983003,2,AURÉLIE,1974,12,5


In [26]:
# Select most given names

ids = names.groupby(['dpt','annais','sexe'])['nombre'].idxmax()
most_given_names = names.loc[ids]
most_given_names.sample(10)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
775614,1,JEAN,1952,42,535
950268,1,KEVIN,1991,63,147
774179,1,JEAN,1937,17,341
1200883,1,MICHEL,1954,39,131
1178694,1,MAXIME,2000,15,29
2047211,2,CAMILLE,1993,12,30
3054544,2,MARIE,1955,972,268
1341540,1,PHILIPPE,1967,65,97
779824,1,JEAN,1999,974,197
773260,1,JEAN,1927,38,353


In [36]:
# Merge with geoshape
global_gdf = shp_data.merge(most_given_names, how='right', left_on='code', right_on='dpt')

global_gdf.sample(50)

Unnamed: 0,code,nom,geometry,sexe,preusuel,annais,dpt,nombre
927,4.0,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.69209 44.18648, ...",2,CAMILLE,2000,4,18
14721,61.0,Orne,"POLYGON ((-0.84094 48.75222, -0.81927 48.75413...",2,LÉA,2000,61,58
22570,,,,1,JOSEPH,1909,972,54
3773,16.0,Charente,"POLYGON ((-0.10294 45.96966, -0.04143 45.99348...",2,NATHALIE,1971,16,153
17269,72.0,Sarthe,"POLYGON ((-0.05453 48.38200, -0.04463 48.37976...",2,MONIQUE,1943,72,240
1252,6.0,Alpes-Maritimes,"POLYGON ((6.88743 44.36105, 6.92257 44.35073, ...",1,JEAN,1921,6,256
10044,42.0,Loire,"POLYGON ((3.89953 46.27591, 3.90940 46.25773, ...",1,PHILIPPE,1961,42,328
7578,32.0,Gers,"POLYGON ((0.07605 43.98314, 0.14096 43.99468, ...",1,JEAN,1938,32,122
14177,59.0,Nord,"MULTIPOLYGON (((3.04040 50.15971, 3.06301 50.1...",2,NATHALIE,1970,59,1559
706,3.0,Allier,"POLYGON ((3.03207 46.79491, 3.04907 46.75808, ...",1,ENZO,2011,3,43


In [67]:
year = 1989
subset = global_gdf[global_gdf['annais'] == str(year)]

# alt.Chart(global_gdf).mark_geoshape(stroke='white').encode(
#     color=alt.Max('nombre'),
# ).properties(width=800, height=600)

# Separate subsets for boys and girls
subset_girls = subset[subset['sexe'] == 2]
subset_boys = subset[subset['sexe'] == 1]
subset_girls.sample(10)


# Merge boys and girls data to create tooltip information
tooltip_data = subset_girls[['code', 'preusuel']].merge(
    subset_boys[['code', 'preusuel']],
    on='code',
    suffixes=('_girl', '_boy')
)

# Merge tooltip data back with shape data
merged_data = shp_data.merge(tooltip_data, how='left', on='code')
merged_data.sample(10)

# Create Altair chart
chart = alt.Chart(merged_data).mark_geoshape(stroke='white').encode(
    tooltip=[alt.Tooltip('nom:N', title='Region'),
             alt.Tooltip('preusuel_girl:N', title='Most Given Name (Girl)'),
             alt.Tooltip('preusuel_boy:N', title='Most Given Name (Boy)')],
    color=alt.value('lightgrey')
).properties(
    width=800,
    height=600
)

# Display the chart
chart