In [1]:
import altair as alt
import pandas as pd
import geopandas as gpd # Requires geopandas -- e.g.: conda install -c conda-forge geopandas
import toolz

import ipywidgets as widgets
from ipywidgets import interactive, VBox, HBox
from IPython.display import display, clear_output

import os

def custom(data):
    return toolz.curried.pipe(data, alt.to_json(filename='altdata/{prefix}-{hash}.{extension}') )
alt.data_transformers.register('custom', custom)
alt.data_transformers.enable('custom')

pass

In [2]:
df = pd.read_csv("dpt2020.csv", sep=";")
df.drop(df[df.preusuel == '_PRENOMS_RARES'].index, inplace=True)
df.drop(df[df.dpt == 'XX'].index, inplace=True)
df['annais'] = df['annais'].astype(int)
df['nombre'] = df['nombre'].astype(int)
df['sexe'] = df['sexe'].replace({1: 'M', 2: 'F'})
df = df.rename(columns={"annais": "annee"})

In [34]:
def createHistogram(starting_year, ending_year):

    if starting_year > ending_year: starting_year, ending_year = ending_year, starting_year

    df_filtered = df[(df['annee'] >= starting_year) & (df['annee'] <= ending_year)]

    number_of_names = 300

    # Aggréger les données par prénom et sexe
    df_aggregated = df_filtered.groupby(['preusuel', 'sexe'], as_index=False)['nombre'].sum()

    # Sélection des 10 prénoms les plus populaires pour les garçons
    top_boys = df_aggregated[df_aggregated['sexe'] == 'M'].nlargest(number_of_names, 'nombre')

    # Sélection des 10 prénoms les plus populaires pour les filles
    top_girls = df_aggregated[df_aggregated['sexe'] == 'F'].nlargest(number_of_names, 'nombre')

    top_boys = top_boys.sort_values(by='nombre', ascending=False)
    top_girls = top_girls.sort_values(by='nombre', ascending=False)

    top_boys['nombre'] = top_boys['nombre'] / df_aggregated[df_aggregated['sexe'] == 'M']['nombre'].sum()
    top_girls['nombre'] = top_girls['nombre'] / df_aggregated[df_aggregated['sexe'] == 'F']['nombre'].sum()

    # creating data 
    boys_list = top_boys['nombre'].tolist()
    girls_list = top_girls['nombre'].tolist()

    boys_names = top_boys['preusuel'].tolist()
    girls_names = top_girls['preusuel'].tolist()

    boys_list += [0] * (number_of_names - len(boys_list))
    girls_list += [0] * (number_of_names - len(girls_list))
    
    boys_names += [''] * (number_of_names - len(boys_names))
    girls_names += [''] * (number_of_names - len(girls_names))

    # creating data 
    df_overlap = pd.DataFrame({
        'Rank': list(range(1, number_of_names + 1)),
        'Boys': boys_list, 
        'Girls': girls_list,
        'Boys_Names': boys_names,
        'Girls_Names': girls_names
    })

    # Overlapping Histograms 
    chart = alt.Chart(df_overlap).transform_fold( 
        ['Boys', 'Girls'], 
        as_=['Columns', 'Values'] 
    ).mark_area( 
        opacity=0.5, 
        interpolate='step'
    ).encode( 
        alt.X('Rank:Q', title='Rank'), 
        alt.Y('Values:Q', stack=None, scale=alt.Scale(type='log'), title='Percentage compared to all boys or girls', axis=alt.Axis(format='%')), 
        color=alt.Color('Columns:N', scale=alt.Scale(domain=['Boys', 'Girls'], range=['blue', 'pink'])),
        tooltip=[
            alt.Tooltip('Rank:Q', title='Rank'), 
            alt.Tooltip('Boys_Names:N', title='Boy Name'), 
            alt.Tooltip('Girls_Names:N', title='Girl Name'), 
            alt.Tooltip('Boys:Q', title='Boys', format='%'), 
            alt.Tooltip('Girls:Q', title='Girls', format='%')
                 ]
    ).properties(
        width='container',
        title=f'Distribution of baby names from year {starting_year} to {ending_year}'
    )

    return chart

In [35]:
# Les sliders interactifs pour choisir les années de début et de fin
start_year_slider = widgets.IntSlider(value=2000, min=1900, max=2020, step=1, description='Start Year')
end_year_slider = widgets.IntSlider(value=2010, min=1900, max=2020, step=1, description='End Year')

# Conteneur pour le graphique
output = widgets.Output()


def update_chart(starting_year, ending_year):
    with output:
        clear_output(wait=True)
        chart = createHistogram(starting_year, ending_year)
        display(chart)

# Configurer l'interaction
interactive_plot = interactive(update_chart, starting_year=start_year_slider, ending_year=end_year_slider)

# Layout pour éviter la duplication des widgets
layout = VBox([HBox([start_year_slider, end_year_slider]), output])

# Affichage
display(layout)

# Initialiser le graphique avec les valeurs par défaut
update_chart(start_year_slider.value, end_year_slider.value)

VBox(children=(HBox(children=(IntSlider(value=2000, description='Start Year', max=2020, min=1900), IntSlider(v…