# Équipe 7
## Notebook de Gabriel
### Processing des données

In [1]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np


In [2]:
def load_distinction_data():
    '''
    Cette fonction importe les données des distinctions
    de divers individus au courant des années.
    '''
    df = pd.read_csv("../Src/Assets/Data/distinctions_people.csv")

    # Enlever ligne avec erreur
    df.drop([1176], inplace = True)
    
    # Enlever colonnes de liens https
    df.drop(labels = ['person', 'distinction'], axis = 1, inplace = True)

    # Renommer deux colonnes
    df.rename(columns={'personLabel': 'person', 'distinctionLabel': 'distinction'}, inplace  =True)

    # Convertir Format Date en Entier
    df["date"] = df["date"].str[:4].astype('int32')
    
    return df


Les deux principales données qui seront utilisées sont df_distinc et df_count

In [3]:
# Load dataframe
df_distinc = load_distinction_data()
df_distinc.head()


Unnamed: 0,person,distinction,date
0,Robert Marcel Lepage,Prix René-Jodoin,2018
1,Jean-Marie Drot,commandeur des Arts et des Lettres,2013
2,Jean-Marie Drot,concours général,1946
3,François Damiens,chevalier des Arts et des Lettres,2011
4,Nicole Garcia,César de la meilleure actrice dans un second rôle,1980


In [4]:
# Get counts of dinstinctions per person
df_count = df_distinc.groupby("person")["date"].count()

# Only consider people with more than 5 distinctions
df_count = df_count.loc[df_count >= 5]
df_count.head()

person
Alain Corneau        6
Alan Arkin          12
Alanis Obomsawin    18
Albert Uderzo        8
Alec Baldwin        15
Name: date, dtype: int64

### Dash jupyter app

In [5]:
from jupyter_dash import JupyterDash
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output, State


In [6]:
def compute_distinc_histogram(df_count, min_count, max_count):
    '''
    Compute the height in the scatter plot (histogram). The height
    does not encode a data per-say but is used to distinguish multiple
    persons who have the same number of distinctions.
    '''
    # Compute the height of each person in the scatter plot (custom histogram)
    df_height = pd.Series(np.zeros((df_count.size, )), index = df_count.index)

    for c in range(min_count, max_count + 1):
        # Get positions of people with "c" distinctions
        bool_idx = df_count == c
        if not bool_idx.sum() == 0:
            df_height[bool_idx] = np.arange(bool_idx.sum())

    return df_height
        

In [7]:
def get_histogram(df_count):
    '''
    Return the scatter plot (custom histogram) that represents the distribution of
    distinctions.
    '''
    min_count = df_count.min()
    max_count = df_count.max()
    df_height = compute_distinc_histogram(df_count, min_count, max_count)

    # Plot results
    fig = go.Figure(
            go.Scatter(
                mode = 'markers',
                marker_symbol = "square", 
                marker_size = 10,
                marker_color = "#B3DEC1",
                marker_line_width = 2,
                marker_line_color = "black",
                x = df_count,
                y = df_height,
                hovertemplate =
                '<br><b>Personne </b>: %{text}<br>'+
                '<b>Distinctions </b> : %{x}  <extra></extra>',
                text = df_count.index,
                showlegend = False 
                ),
            go.Layout(
                title_text = "Distribution des nombres de distinctions",
                title_x = 0.5,
                width=1000,
                height=700
                )
            )           
    fig.update_xaxes(range=[min_count - 1, max_count + 1], dtick = 1, title = "Nombre de distinctions")
    fig.update_yaxes(range=[-1, df_height.max() + 1])
    return fig


In [8]:
def get_empty_table(df_distinc):
    fig = go.Figure(
            data=[go.Table(
                    columnwidth = [600,100],
                    header=dict(values=list(df_distinc.columns[1:]),
                                fill_color="#B3DEC1",
                                align='center'),
            )],
            layout=go.Layout(
                    width=700,
                    height=700
            )
        )
                
    return fig

In [9]:
def get_filled_table(df_distinc, name):
    one_actor = df_distinc[df_distinc["person"] == name].sort_values("date", ascending = True)

    fig = go.Figure(    
            data=[go.Table(
                    columnwidth = [600,100],
                    header=dict(values=list(df_distinc.columns[1:]),
                                fill_color="#B3DEC1",
                                align='center'),
                    cells=dict(values=[one_actor["distinction"], one_actor["date"]],
                               fill_color='#E5FCF5',
                               align=['left','center'])
            )],
            layout=go.Layout(
                    title_text = f"Distinctions de {name}",
                    title_x = 0.5,
                    width=700,
                    height=700
            )
        )
         
    return fig

In [10]:
external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)


In [11]:
app.layout = html.Div(className='content', children=[
    html.Header(children=[
        html.H1('Graphique des distinctions'),
    ]),
    html.Main(className='viz-container', children=[
        dcc.Graph(
            id='histogram',
            className='graph',
            figure=get_histogram(df_count),
            style={'display': 'inline-block'},
            config=dict(
                scrollZoom=False,
                showTips=False,
                showAxisDragHandles=False,
                doubleClick=False,
                displayModeBar=False
            )
        ),
        dcc.Graph(
            id='table',
            className='graph',
            figure=get_empty_table(df_distinc),
            style={'display': 'inline-block'},
            config=dict(
                scrollZoom=False,
                showTips=False,
                showAxisDragHandles=False,
                doubleClick=False,
                displayModeBar=False
            )
        )
    ])
])


In [12]:
@app.callback(
    Output('table', 'figure'),
    [Input('histogram', 'clickData')]
)
def histogram_clicked(click_data):
    if click_data is None:
        return get_empty_table(df_distinc)
    else:
        return get_filled_table(df_distinc, click_data['points'][0]['text'])

In [13]:
app.run_server(port="8050", debug=True)

Dash app running on http://127.0.0.1:8050/
