In [2]:
import pandas as pd
import os
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
path_energy = os.path.join("energy-cleaned-dataset.csv")
df_energy = pd.read_csv(path_energy)
THEME="plotly-dark"

In [4]:
def df_energy_query(area_type: str, area_name: str) -> pd.DataFrame:
    """
    The function df_energy_query return a filtered dataframe of df_energy.
    It useful because the computation is done only one time.
    :param area_type:
    :param area_name:
    :return: a pandas dataframe which is subpart of the df_energy
    """
    return df_energy.query(f"{area_type} == '{area_name}'")

In [30]:
def graph_lines_percentage(area_type: str, area_name: str) -> go.Figure:
    
    df_normalized = df_energy_query(area_type, area_name)
    df_normalized = df_normalized.replace(0,1)

    cols_to_normalize = df_normalized.columns.difference(
        ['Country', 'Year', 'Continent', 'Region', 'iso3', 'Access to Electricity (%)', 'Low-Carbon Electricity (%)',
         'Renewables (% Equivalent Primary Energy)'])
    # Exclure les colonnes non numériques et celle qui sont deja en %

    assert df_normalized[cols_to_normalize].apply(lambda s: pd.to_numeric(s, errors='coerce').notnull().all()).sum() != 9, "<-- All values are not numeric -->"

    # Créez un masque pour exclure les années autres que 2000
    mask_2000 = (df_normalized['Year'] == 2000)
    df_normalized_2000 = df_normalized[mask_2000][cols_to_normalize].reindex(df_normalized.index, method='pad', fill_value=1)

    df_normalized[cols_to_normalize] = ((df_normalized[cols_to_normalize] - df_normalized_2000[
        cols_to_normalize]) / df_normalized_2000) * 100

    df_line = df_normalized.groupby("Year").sum()

    lines = px.line(df_line, x=df_line.index, y=cols_to_normalize,
                    markers=True, log_y=False,
                    title="Evolution in % with 2000 as reference",
                    # template=THEME,
                    )

    # Change the name of the variable in the légend. The goal is to have a better render and have shortened name
    legend_new = {
        'CO2 Emissions (kt by country)': 'CO2 Emissions',
        'Electricity from Fossil Fuels (TWh)': 'Electricity Fossil',
        'Electricity from Nuclear (TWh)': 'Electricity Nuclear',
        'Electricity from Renewables (TWh)': 'Electricity Renewables',
        'GDP Growth': 'GDP Growth',
        'GDP per Capita': 'GDP per Capita',
        'Human Development Index': 'HDI',
        'Primary Energy Consumption per Capita (kWh/person)': 'Primary Energy',
        'Renewable Electricity Capacity per Capita': 'Renewable Electricity'
    }

    lines.for_each_trace(lambda t: t.update(name=legend_new[t.name]))
    # legendgroup = legend_new[t.name],
    # hovertemplate = t.hovertemplate.replace(t.name, legend_new[t.name])

    lines.update_layout(

        legend=dict(
            orientation="h",
            entrywidth=70,
            yanchor="bottom",
            y=-0.5,
            xanchor="right",
            x=1
        ),
    )
    return lines

graph_lines_percentage("Continent", "Europe")

In [118]:
def graph_histo_hdi(area_type: str, area_name: str, reference_year: int = 2020) -> go.Figure:
    """
    The functino graph_histo_HDI make a histogram with the frequency of HDI for a given area type.
    The slider change the year of observation and a marginal distribution as a box plot.
    The area name is the current country observed.
    :param reference_year: an int in [2000, 2020]
    :param area_type: ["Entity", "Continent", "Region", "iso3"]
    :param area_name: ["France", "Europe", "Western Europe", "FRA", ...]
    :return: go.Figure.histogram
    """
    df_histo = df_energy_query(area_type, area_name)
    df_histo_filtered = df_histo[['Year', 'Country', 'Continent', 'Region', 'Human Development Index']]

    # Creation of the feature 'reference' that is a boolean.
    reference = pd.DataFrame({'reference': df_histo_filtered['Year'] == reference_year})   # chatGPT  
    df_histo_filtered = pd.concat([df_histo_filtered, reference], axis=1)

    histo = px.histogram(df_histo_filtered,
                         x="Human Development Index",
                         marginal="box",  # ["rug", "box", "violin"]
                         color='reference',
                         title=f"Histogram Human Development Index per year for {area_name}",
                         pattern_shape='reference',
                         opacity=1,
                         # template=THEME,
                         text_auto=True,
                         animation_frame="Year",
                         )

    histo.update_layout(
        showlegend=False,
        transition={'duration': 10000}
    )
    
    return histo

graph_histo_hdi("Continent", "Europe")