In [79]:
import pandas as pd
import os
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [80]:
path_energy = os.path.join("dataset", "energy-cleaned-dataset.csv")
df_energy = pd.read_csv(path_energy)

In [81]:
THEME = "plotly_dark"
type = "Country"
name = "Afghanistan"
global na_info

In [82]:
def df_energy_query(area_type: str = type, area_name: str = name) -> pd.DataFrame : 
	"""
	The function df_energy_query return a filtered dataframe of df_energy.
	It useful because the computation is done only one time.
	:param area_type: 
	:param area_name: 
	:return: 
	"""
	return df_energy.query(f"{area_type} == '{area_name}'")

In [83]:
def get_na_info(df_filtered : pd.DataFrame = df_energy_query(), area_type = type, area_name = name) : 
    # compute the values for the indicator
    NB_OF_NAN = df_filtered.isna().sum().sum()
    NB_OF_NAN_GLOBAL = df_energy.isna().sum().sum()
    NB_MEAN_NAN = (NB_OF_NAN_GLOBAL / len(df_energy[area_type].unique()))
    # compare the nb of nan for the given area_name to the mean of nan of other area_type
    
    return NB_OF_NAN, NB_OF_NAN_GLOBAL, NB_MEAN_NAN

na_info = get_na_info()

In [84]:
def heatmap_missing_values(area_type: str = type, area_name: str = name) -> go.Figure:
    """
    This function creat a complexe multi-figure within a heatmap of missing values for the given area_name for all features and an indicator trace.
    :param area_type: ["Entity", "Continent", "Region", "iso3"]
    :param area_name: ["France", "Europe", "Western Europe", "FRA", ...]
    :return: a figure object of the plotly lib (heatmap + indicator)
    """
    
    # The set of var to observe except the one used on other axis
    col = df_energy.columns.tolist()
    for x in ['Year', 'Country', 'Continent', 'Region', 'iso3']: col.remove(x)

    df_heat = df_energy_query(area_type, area_name)[col]  # make the df with for the given area_name
    df_heat_na = df_heat.isna()
    df_heat_na.replace({True: 1, False: 0},
                       inplace=True)  # We change the True to 1 because plotly can not interpret them

    

    df_heat_na['Year'] = df_energy['Year']  # add the year column to the df
    transposed_df = df_heat_na.groupby('Year').sum().T

    # testing after the upper calculation to be sure that the nb of missing val and shape is plausible
    assert transposed_df.sum().sum() == df_energy.query(f"{area_type} == '{area_name}'").isna().sum().sum(), "<-- The sum of nan is  not the same -->"
    assert transposed_df.shape[0] == len(col), "<-- The number of columns is not the same -->"
    assert transposed_df.shape[1] == len(df_energy['Year'].unique().tolist()), "<-- The number of rows is not the same -->"

    heatmap = go.Figure()
    heatmap.add_trace(go.Heatmap(
                z=transposed_df,
                x=df_energy['Year'].unique().tolist(),
                y=col,
    
                # Styling
                colorscale='Viridis',
                colorbar=dict(
                    title="nombre",
                    titleside="top"
                )
            )
        )

    return heatmap
    
heatmap_missing_values()

In [85]:
NB_OF_NAN, NB_OF_NAN_GLOBAL, NB_MEAN_NAN = na_info

indicator_na = go.Figure()


indicator_na.add_trace(go.Indicator(
        mode="number+delta",  # 'delta' mean the % btw the value and a ref
        value=NB_OF_NAN,  # nb of nan for the current country looked
        delta={'reference': int(NB_MEAN_NAN), 'relative': True, 'valueformat': '.2f', "suffix": "%"},
        # styling
        title={"text": f"NN of nan for {name}<br>"
                       "<span style='font-size:0.8em;color:gray'>"
                       f"compare to the mean of {type}</span>"
               }
    )
)
indicator_na.update_layout(
    height=250,
)
indicator_na.show()

In [86]:
NB_OF_NAN, NB_OF_NAN_GLOBAL, NB_MEAN_NAN = na_info

indicator_na = go.Figure()

indicator_na.add_trace(go.Indicator(
    mode="number", 
    value=NB_OF_NAN_GLOBAL, 
    title={"text": f"NN of nan for {...}"}
)
)
indicator_na.update_layout(
    height=250,
)
indicator_na.show()

In [120]:
# Creat a line graph with the df_energy. 
# The line graph value for the first year is 0 and then the evolution of the %
# % evolution = ((current year - first year) / first year) * 100

df_energy_line = df_energy_query(area_name="Afghanistan")

df_2000 = df_energy_line.query("Year == 2000")
df_2000.replace({np.nan: 1, 0: 1}, inplace=True)  # to avoid division by 0
df_energy_line.replace({np.nan: 1, 0: 1}, inplace=True) 


for col in df_2000.keys()[1:] : 
    for val in df_2000[col]: 
        print((val - df_2000[col])/df_2000[col])
        print()

0    0.0
Name: Year, dtype: float64

0    0.0
Name: Access to Electricity (%), dtype: float64

0    0.0
Name: Renewable Electricity Capacity per Capita, dtype: float64

0    0.0
Name: Electricity from Fossil Fuels (TWh), dtype: float64

0    0.0
Name: Electricity from Nuclear (TWh), dtype: float64

0    0.0
Name: Electricity from Renewables (TWh), dtype: float64

0    0.0
Name: Low-Carbon Electricity (%), dtype: float64

0    0.0
Name: Primary Energy Consumption per Capita (kWh/person), dtype: float64

0    0.0
Name: CO2 Emissions (kt by country), dtype: float64

0    0.0
Name: Renewables (% Equivalent Primary Energy), dtype: float64

0    0.0
Name: GDP Growth, dtype: float64

0    0.0
Name: GDP per Capita, dtype: float64




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



TypeError: unsupported operand type(s) for -: 'str' and 'str'