In [1]:
import altair as alt
import pandas as pd
import geopandas as gpd # Requires geopandas -- e.g.: conda install -c conda-forge geopandas
alt.data_transformers.enable('json') # Let Altair/Vega-Lite work with large data sets

pass

In [2]:
names = pd.read_csv("dpt2020.csv", sep=";")
names.drop(names[names.preusuel == '_PRENOMS_RARES'].index, inplace=True)
names.drop(names[names.dpt == 'XX'].index, inplace=True)

names.sample(5)

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
1386428,1,RAYANE,1997,69,38
1760038,2,AGATHE,2015,53,10
1935675,2,ANNIE,1961,65,18
2138000,2,CHLOÉ,2002,2,58
2534859,2,GINETTE,1945,89,16


In [3]:
depts = gpd.read_file('departements-version-simplifiee.geojson')

depts.sample(5)

Unnamed: 0,code,nom,geometry
52,52,Haute-Marne,"POLYGON ((4.67018 48.53189, 4.71801 48.54199, ..."
64,64,Pyrénées-Atlantiques,"POLYGON ((-0.24284 43.58498, -0.21061 43.59324..."
86,86,Vienne,"POLYGON ((-0.10212 47.06480, -0.09806 47.09135..."
58,58,Nièvre,"POLYGON ((2.87463 47.52042, 2.84890 47.53754, ..."
79,79,Deux-Sèvres,"POLYGON ((-0.89196 46.97582, -0.85592 46.97908..."


### Choix des prénoms populaires

In [4]:
group_name = names.groupby(['preusuel','sexe', 'annais'], as_index=False)['nombre'].sum()
max_name = group_name.groupby(['preusuel', 'sexe'], as_index=False)['nombre'].max()
max_name_1 = max_name[max_name['sexe']==1].sort_values(by='nombre', ascending=False)[:2]
max_name_2 = max_name[max_name['sexe']==2].sort_values(by='nombre', ascending=False)[:2]
max_name_12 = pd.concat([max_name_1, max_name_2])
max_name_12

Unnamed: 0,preusuel,sexe,nombre
6685,JEAN,1,53584
10703,MICHEL,1,32582
9694,MARIE,2,52150
11380,NATHALIE,2,31410


In [5]:
merge_name = pd.merge(max_name_12, group_name, on=['preusuel', 'sexe', 'nombre'], how='left')

In [6]:
merge_name

Unnamed: 0,preusuel,sexe,nombre,annais
0,JEAN,1,53584,1946
1,MICHEL,1,32582,1947
2,MARIE,2,52150,1901
3,NATHALIE,2,31410,1966


In [7]:
group_name2 = names.groupby(['preusuel','annais','sexe'], as_index=False)['nombre'].sum()

In [8]:
hover_filter = alt.selection_point(on='mouseover', fields=['preusuel'], empty=False)
bar_names = alt.Chart(merge_name).mark_bar(interpolate='natural').encode(
                x=alt.X('annais:T', scale=alt.Scale(domain=['1900', '2020'])),
                y='nombre',
                color=alt.Color('sexe:N', scale=alt.Scale(domain=[1, 2], range=['blue', 'red'])),
                tooltip=['preusuel', 'nombre']
            ).add_params(hover_filter)

# Text labels on top of bars
text = bar_names.mark_text(
    align='center',
    baseline='bottom',
    dy=-5  # Adjust vertical position
).encode(
    text='preusuel:N',
).add_params(hover_filter)

line = alt.Chart(group_name2).mark_line(interpolate='monotone', opacity=0).encode(
        x='annais:T',
        y='nombre',
        color=alt.Color('sexe:N', scale=alt.Scale(domain=[1, 2], range=['blue', 'red'])),
        opacity=alt.condition(hover_filter, alt.value(1), alt.value(0))
    ).transform_filter(hover_filter)

final_chart = bar_names + text + line
final_chart = final_chart.properties(width=800, height=400).resolve_scale(y='shared', x='shared')
final_chart

In [9]:
names[names['preusuel']=='FRIDA']

Unnamed: 0,sexe,preusuel,annais,dpt,nombre
2486502,2,FRIDA,1900,57,3
2486503,2,FRIDA,1900,67,7
2486504,2,FRIDA,1900,68,8
2486505,2,FRIDA,1901,67,8
2486506,2,FRIDA,1901,68,8
...,...,...,...,...,...
2486605,2,FRIDA,2018,69,3
2486606,2,FRIDA,2018,75,8
2486607,2,FRIDA,2019,13,4
2486608,2,FRIDA,2019,67,4


### Choix des prénoms non-populaires

In [10]:
unpopular_names = names[names['preusuel'].isin(['ELISABETH', 'FRIDA', 'JOHN', 'PABLO', 'NELSON', 'ELVIS', 'ADOLPHE', 'SERENA', 'EVA', 'ROSA'])]
group_unpopular = unpopular_names.groupby(['preusuel','sexe', 'annais'], as_index=False)['nombre'].sum()
max_unpopular = group_unpopular.groupby(['preusuel', 'sexe'], as_index=False)['nombre'].max()
max_unpopular = max_unpopular.sort_values(by='nombre', ascending=False)
max_unpopular

Unnamed: 0,preusuel,sexe,nombre
3,ELISABETH,2,4439
5,EVA,2,3330
9,PABLO,1,942
0,ADOLPHE,1,560
10,ROSA,2,442
7,JOHN,1,394
11,SERENA,2,177
8,NELSON,1,175
4,ELVIS,1,97
6,FRIDA,2,37


In [11]:
merge_unpopular = pd.merge(max_unpopular, group_unpopular, on=['preusuel', 'sexe', 'nombre'], how='left')
merge_unpopular

Unnamed: 0,preusuel,sexe,nombre,annais
0,ELISABETH,2,4439,1952
1,EVA,2,3330,2003
2,PABLO,1,942,2020
3,ADOLPHE,1,560,1907
4,ROSA,2,442,1901
5,JOHN,1,394,1982
6,SERENA,2,177,2012
7,NELSON,1,175,2014
8,ELVIS,1,97,1978
9,FRIDA,2,37,1909


In [12]:
group_unpopular2 = unpopular_names.groupby(['preusuel','annais','sexe'], as_index=False)['nombre'].sum()

In [13]:
hover_filter = alt.selection_point(on='mouseover', fields=['preusuel'], empty=False)
bar_names = alt.Chart(merge_unpopular).mark_bar(interpolate='natural').encode(
                x=alt.X('annais:T', scale=alt.Scale(domain=['1900', '2020'])),
                y='nombre',
                color=alt.Color('sexe:N', scale=alt.Scale(domain=[1, 2], range=['blue', 'red'])),
                tooltip=['preusuel', 'nombre', 'annais:T']
            ).add_params(hover_filter)

# Text labels on top of bars
text = bar_names.mark_text(
    align='center',
    baseline='bottom',
    dy=-5  # Adjust vertical position
).encode(
    text='preusuel:N',
).add_params(hover_filter)

line = alt.Chart(group_unpopular2).mark_line(interpolate='natural', opacity=0).encode(
        x='annais:T',
        y='nombre',
        color=alt.Color('sexe:N', scale=alt.Scale(domain=[1, 2], range=['blue', 'red'])),
        opacity=alt.condition(hover_filter, alt.value(1), alt.value(0))
    ).transform_filter(hover_filter)

final_chart = bar_names + text + line
final_chart = final_chart.properties(width=800, height=400).resolve_scale(y='shared', x='shared')
final_chart