In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

In [None]:
df = pd.read_csv('dpt2020.csv', sep=';')

df = df[df['dpt'] != 'XX']
df = df[df['dpt'] != '971']
df = df[df['dpt'] != '972']
df = df[df['dpt'] != '973']
df = df[df['dpt'] != '974']

df['sexe'] = df['sexe'].apply(str)

df.loc[df['sexe'] == '1', 'sexe'] = 'M'
df.loc[df['sexe'] == '2', 'sexe'] = 'F'

df = df[df['preusuel'] != '_PRENOMS_RARES']

df['annais'] = df['annais'].astype(int)

df.head()

In [None]:
df_group = df.groupby(['preusuel'], as_index=False)['nombre'].sum()
names = df_group.loc[df_group['nombre'] > 500, 'preusuel'].to_numpy()

df = df[df['preusuel'].isin(names)]

In [None]:
df = df.groupby(['sexe', 'preusuel', 'annais'], as_index=False)['nombre'].sum()

In [None]:
min_year, max_year = df['annais'].min(), df['annais'].max()
marks_year = list(range(df['annais'].min(), df['annais'].max() + 1, 10))
marks_year = {i:str(i) for i in marks_year}

In [None]:
def df_filter_partial_name(df, partial_name):
    df = df[df['preusuel'].str.startswith(partial_name)]
    return df

In [None]:
def df_filter_genders(df, genders):
    df = df[df['sexe'].isin(genders)]
    return df

In [None]:
def df_filter_years(df, year_start=None, year_end=None):
    if year_start is not None:
        df = df[df['annais'] >= year_start]
        
    if year_end is not None:
        df = df[df['annais'] <= year_end]
        
    return df

In [None]:
def get_genders(genders):
    if genders == 'Boys':
        return ['M']
    elif genders == 'Girls':
        return ['F']
    else:
        return ['M', 'F']

In [None]:
from dash import Dash, html, dcc, Input, Output

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    
    html.Div([
        dcc.Input(
            type='text',
            value=None,
            placeholder='Filter names',
            id='input_name'  
        )
    ], style={'width': '24%', 'display': 'inline-block', 'vertical-align':'top'}),
    
    html.Div([
        dcc.RadioItems(
            ['Show gender ratio', 'Show total number'],
            'Show gender ratio',
            id='option-color'
        )
    ], style={'width': '24%', 'display': 'inline-block', 'padding': '0 20'}),
    
    
    html.Div([
        dcc.RangeSlider(
            min=min_year, max=max_year, step=10,
            value = [min_year, max_year],
            marks=marks_year,
            id='year-slider'
        )
    ], style={'width': '49%', 'display': 'inline-block'}), 
    
    html.Div([
        dcc.Graph(
            id='bar'
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),

    html.Div([
        dcc.Graph(
            id='line'
        )
    ], style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'}),
    
])

@app.callback(
    Output('bar', 'figure'),
    Input('input_name', 'value'),
    Input('year-slider', 'value'), 
    Input('option-color', 'value'))
def update_bar(input_name, pair_years, option_color):
    df_copy = df.copy()
    df_copy = df_filter_years(df_copy, year_start=pair_years[0], year_end=pair_years[1])
    
    if input_name is not None:
        df_copy = df_filter_partial_name(df_copy, partial_name=input_name.upper())
    
    df_copy = df_copy.pivot_table(index='preusuel', columns='sexe', values='nombre', aggfunc=np.sum)
    df_copy = pd.DataFrame(df_copy.to_records())    
    
    df_copy['nombre'] = df_copy['F'] + df_copy['M']
    
    df_copy['ratio'] = 1 - np.abs(df_copy['F'] - df_copy['M']) / df_copy[['M', 'F']].max(axis = 1)
    df_copy['ratio (M)'] =  df_copy['ratio'] * (df_copy['M'] / df_copy['nombre'])
    df_copy['ratio (F)'] =  df_copy['ratio'] * (df_copy['F'] / df_copy['nombre'])
    
    df_copy = df_copy.sort_values(by=['ratio'], ascending=False)
    df_copy = df_copy.head(10)
    
    if option_color == 'Show gender ratio':
        fig = px.bar(df_copy, x=df_copy['preusuel'], 
                     y=['ratio (M)', 'ratio (F)'], 
                     color_discrete_sequence=['darkblue', 'gold'], 
                     range_y=[0, 1])
    
    else:
        fig = px.bar(df_copy, x=df_copy['preusuel'], 
                     y=df_copy['ratio'], 
                     color=df_copy['nombre'], 
                     range_y=[0, 1])
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, 
                      xaxis={'categoryorder':'total descending'},
                      clickmode='event+select'
    )
    
    return fig


@app.callback(
    Output('line', 'figure'),
    Input('input_name', 'value'),
    Input('year-slider', 'value'), 
    Input('bar', 'selectedData'))
def update_line(input_name, pair_years, selectedData):
    df_copy = df.copy()
    
    if input_name is not None:
        df_copy = df_filter_partial_name(df_copy, partial_name=input_name.upper())
        
    if selectedData is not None:
        df_copy = df_copy[df_copy['preusuel'] == selectedData['points'][0]['x']]
        df_copy['annais'] = df_copy['annais'].apply(lambda x: round(x / 10) * 10)
        
        df_copy = df_copy.pivot_table(index='annais', columns='sexe', values='nombre', aggfunc=np.sum)
        df_copy = df_copy.fillna(0)
        df_copy = pd.DataFrame(df_copy.to_records())
        
        fig = px.area(df_copy, x=df_copy['annais'], 
                     y=['M', 'F'],
                     color_discrete_sequence=['darkblue', 'gold'])
    
    else:
        # Get the same names as the one in the bar chart
        df_names = df_copy.copy()
        
        df_names = df_filter_years(df_names, year_start=pair_years[0], year_end=pair_years[1])
        df_names['annais'] = df_names['annais'].apply(lambda x: round(x / 10) * 10)
        
        df_names = df_names.pivot_table(index='preusuel', columns='sexe', values='nombre', aggfunc=np.sum)
        df_names = pd.DataFrame(df_names.to_records())

        df_names['ratio'] = 1 - np.abs(df_names['F'] - df_names['M']) / df_names[['M', 'F']].max(axis = 1)

        df_names = df_names.sort_values(by=['ratio'], ascending=False)
        df_names = df_names.head(10)

        names = df_names['preusuel'].unique()

        # Keep only those names in the dataframe
        df_copy = df_copy[df_copy['preusuel'].isin(names)]
        df_copy['annais'] = df_copy['annais'].apply(lambda x: round(x / 10) * 10)
        df_copy = df_copy.pivot_table(index=['preusuel', 'annais'], columns='sexe', values='nombre', aggfunc=np.sum)
        df_copy = pd.DataFrame(df_copy.to_records())

        df_copy['ratio'] = 1 - np.abs(df_copy['F'] - df_copy['M']) / df_copy[['M', 'F']].max(axis = 1)

        df_copy = df_copy.pivot(index='annais', columns='preusuel', values='ratio')
        df_copy = df_copy.fillna(0)

        fig = px.line(df_copy, x=df_copy.index, 
                      y=df_copy.columns, 
                      color_discrete_sequence = px.colors.qualitative.Dark24, 
                      range_y=[0, 1])
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

    return fig

app.run_server()