# Équipe 7
## Notebook de Jean-Christophe

In [13]:
import numpy as np
import pandas as pd
import datetime

import json

import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output, State
from jupyter_dash import JupyterDash

import plotly.graph_objects as go
import plotly.express as px

## Treemap
### Tests - With Gapminder data

In [14]:
# Using fake other data
df = px.data.gapminder()
print("columns : ",df.columns)
print("years : ", df.year.values)

# Add a constant column in order to have a single root node
df["planet"] = "Earth" 

columns :  Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num'],
      dtype='object')
years :  [1952 1957 1962 ... 1997 2002 2007]


In [15]:
# USING A NON DASH DROPDOWN MENU, i.e. one built-in the figure, 
# DOES NOT WORK BECAUSE ONE NEEDS TO 
# REQUERY THE DATA TO CHANGE THE TRACE FOR EACH YEAR.
# THEREFORE WE WILL USE DASH DROPDOWN MENU WITH CALLBACKS TO UPDATE THE DATA TO DISPLAY.

# Create a list of continents
continents = list(set(df['continents'].values))

# Crete dropdown menu - menu items
buttons = []
for continent in continents:
    # Create a temp_df for the year, i.e. the trace
    query_str = "continent==" + continent
    temp_df = df.query(query_str)
    
    for year in np.arange(start=year_min, stop=year_max+1, step=5):

        # Create a temp_df for the year, i.e. the trace
        query_str = "year==" + str(year)
        temp_df = temp_df.query(query_str)[['continent', 'country', 'pop']]

        # Create a figure with px
        temp_fig = px.treemap(temp_df, path=['continent', 'country'], values='pop')

        # Add the trace of the temp_fig to the fig
        fig.add_trace(
            go.Treemap(
                branchvalues="total",
                ids=temp_fig.data[0].ids,
                labels=temp_fig.data[0].labels,
                parents=temp_fig.data[0].parents,
                values=temp_fig.data[0].values,
                visible=False,
            )
        )

# Crete dropdown menu - menu
updatemenus=[dict(
    buttons=buttons,
    direction="down",
    pad={"r": 10, "t": 10},
    showactive=True,
    x=0.1,
    xanchor="left",
    y=1.1,
    yanchor="top",
)]

# Add drop down menu
fig.update_layout(
    updatemenus=updatemenu,
)

KeyError: 'continents'

In [16]:
# Trying different ways to built the marks for the range slider
year_max = df['year'].max()
year_min = df['year'].min()
years = np.arange(start=year_min, stop=year_max+1, step=5)
print(years)
print(type(years))

marks=({year: '{}'.format(year) for i, year in enumerate(years)},)
print(marks)
print(type(marks))

marks={i: '{}'.format(10 ** i) for i in range(4)},
print(marks)
print(type(marks))

marks={str(year): str(year) for year in years}
print(marks)
print(type(marks))

[1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007]
<class 'numpy.ndarray'>
({1952: '1952', 1957: '1957', 1962: '1962', 1967: '1967', 1972: '1972', 1977: '1977', 1982: '1982', 1987: '1987', 1992: '1992', 1997: '1997', 2002: '2002', 2007: '2007'},)
<class 'tuple'>
({0: '1', 1: '10', 2: '100', 3: '1000'},)
<class 'tuple'>
{'1952': '1952', '1957': '1957', '1962': '1962', '1967': '1967', '1972': '1972', '1977': '1977', '1982': '1982', '1987': '1987', '1992': '1992', '1997': '1997', '2002': '2002', '2007': '2007'}
<class 'dict'>


In [17]:
# Create a range slider

# Using fake other data
df = px.data.gapminder()

# Extract year min and max
year_max = df['year'].max()
year_min = df['year'].min()
years = np.arange(start=year_min, stop=year_max+1, step=5)

rs = dcc.RangeSlider(
    id='range-slider',
    min=year_min,
    max=year_max,
    step=5,
    value=[year_min, year_max],
    pushable=False,
    allowCross=True,
    dots=False,
    updatemode='mouseup',
    marks={str(year): str(year) for year in years},
)  
    
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
nom_de_la_page = "Notes sur la librairie Dash de Plotly"
app = JupyterDash(nom_de_la_page, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    rs,
    html.Div(id='output-range-slider')
])
@app.callback(
    dash.dependencies.Output('output-range-slider', 'children'),
    [dash.dependencies.Input('range-slider', 'value')])
def update_output(value):
    return 'You have selected "{}"'.format(value)

app.run_server(mode='inline', port=8010)

In [18]:
# Create a dropdown menu

# Using fake other data
df = px.data.gapminder()

# Create a list of continents
continents = list(set(df['continent'].values))

# Create le menu options
options=[{'label': "All", 'value': "All"}]
for continent in continents:
    option = {'label': continent, 'value': continent}
    options.append(option)
    
ddm = dcc.Dropdown(
        id='dropdown-menu',
        options=options,
        value='All'
    )

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
nom_de_la_page = "Notes sur la librairie Dash de Plotly"
app = JupyterDash(nom_de_la_page, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    ddm,
    html.Div(id='output-dropdown-menu')
])
@app.callback(
    dash.dependencies.Output('output-dropdown-menu', 'children'),
    [dash.dependencies.Input('dropdown-menu', 'value')])
def update_output(value):
    return 'You have selected "{}"'.format(value)

app.run_server(mode='inline', port=8010)

### Test - With real data

In [19]:
relative_path = "../Src/Assets/Data/"
file_name = "_film_genres.csv"
full_path = relative_path + file_name
df_film_genres = pd.read_csv(
    filepath_or_buffer=full_path,
    sep=',',
    header=0,
    index_col=None,
    parse_dates=True,
)

relative_path = "../Src/Assets/Data/"
file_name = "_film_pays.csv"
full_path = relative_path + file_name
df_film_pays = pd.read_csv(
    filepath_or_buffer=full_path,
    sep=',',
    header=0,
    index_col=None,
    parse_dates=True,
)

relative_path = "../Src/Assets/Data/"
file_name = "_film_langue.csv"
full_path = relative_path + file_name
df_film_langues = pd.read_csv(
    filepath_or_buffer=full_path,
    sep=',',
    header=0,
    index_col=None,
    parse_dates=True,
)

relative_path = "../Src/Assets/Data/"
file_name = "pays_vsParContinent_vsFrancais.csv"
full_path = relative_path + file_name
df_pays = pd.read_csv(
    filepath_or_buffer=full_path,
    sep=',',
    header=0,
    index_col=None,
    parse_dates=True,
)

temp = pd.merge(
    left=df_film_pays,
    right=df_pays,
    how="left",
    left_on="pays",
    right_on="pays",
    sort=True,
    suffixes=("", ""),
)

temp = pd.merge(
    left=temp,
    right=df_film_langues,
    how="left",
    left_on="filmoId",
    right_on="filmoId",
    sort=True,
    suffixes=("", ""),
)

temp = pd.merge(
    left=temp,
    right=df_film_genres,
    how="left",
    left_on="filmoId",
    right_on="filmoId",
    sort=True,
    suffixes=("", ""),
)

In [20]:
temp = temp[["anneeSortie", "continent", "pays", "capitale", "langue", "filmoId", "titreOriginal", "genres_categorized"]]
temp = temp.rename(columns={"anneeSortie": "annee", "filmoId": "id", "titreOriginal": "titre", "genres_categorized": "genre"})
treemap_df = temp.dropna(subset=['genre'])

In [21]:
#display(treemap_df[treemap_df.isna()])

In [22]:
temp = treemap_df.dropna(subset=['annee'])
temp = temp.astype({'annee': 'int'})
temp = temp.astype({'genre': 'str'})
temp['genre'] = temp['genre'].apply(str.capitalize)
#display(temp)

### Preprocession

In [23]:
def clean_year_column(df):
    temp = df.dropna(subset=['annee'])
    temp = temp.astype({'annee': 'int'})
    temp = temp[temp['annee']<=datetime.datetime.now().year]
    return temp

In [24]:
def clean_genre_column(df):
    temp = df.dropna(subset=['genre'])
    temp = temp.astype({'genre': 'str'})
    temp['genre'] = temp['genre'].apply(str.capitalize)
    
    return temp

In [25]:
def clean_title_column(df):
    temp = df.dropna(subset=['titre'])
    temp = temp.astype({'titre': 'str'})
    temp['titre'] = temp['titre'].apply(str.capitalize)
    
    return temp

In [26]:
def load_data_for_treemap():
    relative_path = "../Src/Assets/Data/"
    file_name = "_film_genres.csv"
    full_path = relative_path + file_name
    df_film_genres = pd.read_csv(
        filepath_or_buffer=full_path,
        sep=',',
        header=0,
        index_col=None,
        parse_dates=True,
    )

    relative_path = "../Src/Assets/Data/"
    file_name = "_film_pays.csv"
    full_path = relative_path + file_name
    df_film_pays = pd.read_csv(
        filepath_or_buffer=full_path,
        sep=',',
        header=0,
        index_col=None,
        parse_dates=True,
    )

    relative_path = "../Src/Assets/Data/"
    file_name = "_film_langue.csv"
    full_path = relative_path + file_name
    df_film_langues = pd.read_csv(
        filepath_or_buffer=full_path,
        sep=',',
        header=0,
        index_col=None,
        parse_dates=True,
    )

    relative_path = "../Src/Assets/Data/"
    file_name = "pays_vsParContinent_vsFrancais.csv"
    full_path = relative_path + file_name
    df_pays = pd.read_csv(
        filepath_or_buffer=full_path,
        sep=',',
        header=0,
        index_col=None,
        parse_dates=True,
    )

    temp = pd.merge(
        left=df_film_pays,
        right=df_pays,
        how="left",
        left_on="pays",
        right_on="pays",
        sort=True,
        suffixes=("", ""),
    )

    temp = pd.merge(
        left=temp,
        right=df_film_langues,
        how="left",
        left_on="filmoId",
        right_on="filmoId",
        sort=True,
        suffixes=("", ""),
    )

    temp = pd.merge(
        left=temp,
        right=df_film_genres,
        how="left",
        left_on="filmoId",
        right_on="filmoId",
        sort=True,
        suffixes=("", ""),
    )

    temp = temp[["anneeSortie", "continent", "pays", "capitale", "langue", "filmoId", "titreOriginal", "genres_categorized"]]
    temp = temp.rename(columns={"anneeSortie": "annee", "filmoId": "id", "titreOriginal": "titre", "genres_categorized": "genre"})

    return treemap_df

In [27]:
def get_min_and_max_year(df):
    year_max = df['annee'].max()
    year_min = df['annee'].min()
    
    return year_min, year_max

In [28]:
def get_list_of_genres(df):
    genres = list(set(df['genre'].values))
    
    return genres

In [29]:
def add_constant_col(df, col_name, const_name):
    temp_df = df.copy()
    temp_df[col_name] = const_name
    
    return temp_df

In [30]:
def query_data_for_treemap(df, genre, from_year, to_year):
    
    # Extract data corresponding to the right genre
    if genre == "All":
        temp_df = df
    else:
        query_str = "genre=='" + genre + "'"
        temp_df = df.query(query_str)
    
    # Extract data corresponding to the right years
    query_str = "annee>=" + str(from_year)
    temp_df = temp_df.query(query_str)
    query_str = "annee<=" + str(to_year)
    temp_df = temp_df.query(query_str)
        
    # Aggregate agnostic to years
    temp_df = (temp_df
        .groupby(['planete', 'continent', 'pays'])
        .agg(nombreDeFilms=pd.NamedAgg(column='genre', aggfunc='count'))
        .reset_index()
    )
    return temp_df

In [31]:
def create_treemap_dropdown_menu(genres):
    # Create options
    options=[{'label': "All", 'value': "All"}]
    for genre in genres:
        option = {'label': genre, 'value': genre}
        options.append(option)

    # Create menu
    ddm = dcc.Dropdown(
            id='treemap-dropdown-menu',
            options=options,
            value='All',
        )
    
    return ddm

In [32]:
def create_treemap_title(year_min_displayed, year_max_displayed, genre_displayed):
    
    if genre_displayed=="All":
        title_str = (
            'Nombre de films de tous genres produits entre les années ' + 
            str(year_min_displayed) + 
            ' et ' + 
            str(year_max_displayed) + 
            '.'
        )
    else: 
        title_str = (
            'Nombre de films de genre ' + 
            genre_displayed + 
            ' produit entre les années ' + 
            str(year_min_displayed) + 
            ' et ' + 
            str(year_max_displayed) + 
            '.'
        )

    title = html.Header( 
        children=[
            html.H2(id='treemap-title', children=title_str)
        ]
    )
    
    return title

In [33]:
def create_treemap_range_slider(year_min, year_max):
    
    # Create list of years
    first_decade = 10 - (year_min%10) + year_min
    last_decade = year_max - (year_max%10)
    years = np.arange(start=first_decade, stop=last_decade+1, step=10)

    rs = dcc.RangeSlider(
        id='treemap-range-slider',
        min=year_min,
        max=year_max,
        value=[year_min, year_max],
        pushable=False,
        allowCross=True,
        dots=False,
        updatemode='mouseup',
        marks={str(year): str(year) for year in years},
    )    
    
    return rs

In [34]:
def init_treemap_fig(df, year_min, year_max):
    # Get data
    temp_df = query_data_for_treemap(
        df=df, 
        genre='All', 
        from_year=year_min, 
        to_year=year_max
    )

    # Create a figure with px
    fig = px.treemap(
        temp_df, 
        path=['planete', 'continent', 'pays'], 
        values='nombreDeFilms',
    )

    return fig

In [35]:
def callback_treemap(df, slider_min_year, slider_max_year, ddm_genre):
    
    # Create new title
    treemap_title = create_treemap_title(
        year_min_displayed=slider_min_year, 
        year_max_displayed=slider_max_year, 
        genre_displayed=ddm_genre
    )
    
    # Get data
    temp_df = query_data_for_treemap(
        df=df, 
        genre=ddm_genre, 
        from_year=slider_min_year, 
        to_year=slider_max_year
    )

    # Create a figure with px
    treemap_fig = px.treemap(
        temp_df, 
        path=['planete', 'continent', 'pays'], 
        values='nombreDeFilms',
    )

    return treemap_title, treemap_fig

In [36]:
def create_treemap_graph(treemap_fig):
    treemap_graph = dcc.Graph(
        figure=treemap_fig, 
        id='treemap-graph',
        config=dict(
            showTips=False,
            showAxisDragHandles=False,
            displayModeBar=False
        )
    )

    return treemap_graph

In [37]:
def create_treemap(treemap_title, treemap_rs, treemap_ddm, treemap_graph):
    
    treemap = html.Div([
        treemap_title,
        treemap_ddm,
        treemap_graph,
        treemap_rs
    ])

    return treemap

In [38]:
my_df = load_data_for_treemap()

my_df = clean_year_column(my_df)
my_df = clean_genre_column(my_df)
my_df = clean_title_column(my_df)
my_df = add_constant_col(my_df, "planete", "Terre")

year_min, year_max = get_min_and_max_year(df=my_df)
genres = get_list_of_genres(df=my_df)

treemap_fig = init_treemap_fig(df=my_df, year_min=year_min, year_max=year_max)

treemap_title = create_treemap_title(
    year_min_displayed=year_min, 
    year_max_displayed=year_max, 
    genre_displayed="All"
)
treemap_graph = create_treemap_graph(treemap_fig=treemap_fig)
treemap_ddm = create_treemap_dropdown_menu(genres=genres)
treemap_rs = create_treemap_range_slider(year_min=year_min, year_max=year_max)


external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
nom_de_la_page = "Projet INF8808 - Cinemathèque québécoise"
app = JupyterDash(nom_de_la_page, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    create_treemap(
        treemap_title=treemap_title,
        treemap_rs=treemap_rs,
        treemap_ddm=treemap_ddm,
        treemap_graph=treemap_graph
    )
])
@app.callback([Output('treemap-title', 'children'),
               Output('treemap-graph', 'figure')],
              [Input('treemap-dropdown-menu', 'value'),
               Input('treemap-range-slider', 'value')])
def update_treemap(ddm_value, rs_value):     
    treemap_title, treemap_fig = callback_treemap(
        df=my_df,
        slider_min_year=rs_value[0], 
        slider_max_year=rs_value[1], 
        ddm_genre=ddm_value
    )
       
    return treemap_title, treemap_fig

#app.run_server(mode='inline', port=8010)
app.run_server(mode='jupyterlab', port = 8010, dev_tools_ui=True, #debug=True, 
               dev_tools_hot_reload =True, threaded=True)

### Extra tools

In [40]:
fig.show()

In [None]:
fig.full_figure_for_development(warn=False).show('json')