In [None]:
import pandas as pd
import geopandas as gpd
import os
import seaborn as sn
import numpy as np
#bimport folium
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
path_energy = os.path.join("dataset", "energy-cleaned-dataset.csv")
path_geodata = os.path.join("dataset", "world-administrative-boundaries.geojson")
categorical_columns = ["Entity","Continent","Region","iso3"]

In [None]:
df_energy = pd.read_csv(path_energy)
COUNTRY = "Afghanistan"
template = "plotly_dark"

In [None]:

data_na = df_energy.isna()
nb_na = [df_energy[df_energy["Year"] == year].isna().sum().sum() for year in df_energy["Year"].unique()]
fig_na = px.scatter(df_energy,
                    x=df_energy["Year"].unique(),
                    y=nb_na,
                    template=template,
                    range_y=[0, 400]
                    )
fig_na.show()

In [None]:
def getNaNumber(variable):
    names = df_energy["Entity"].unique()
    value = np.array([df_energy[df_energy["Entity"]==country][variable].isna().sum() for country in names])
    no_data = value[(value == 21)].__len__()
    valide_data = value[(value == 0)].__len__()
    incomplete_data = names.__len__() - valide_data - no_data
    return no_data, incomplete_data, valide_data

values = [[],[],[],[]]
for variable in df_energy.keys():
    nd, id, vd = getNaNumber(variable)
    values[0].append(variable)
    values[1].append(nd)
    values[2].append(id)
    values[3].append(vd)

# print(values)

dic = { key : value for key, value in zip(["Variable","No data", "Incomplete data", "Valide data"], values) }

df_na = pd.DataFrame(dic)
# print(df_na)

df_na.sort_values("No data", ascending=False, inplace=True)
# print(df_na)

for i, ligne in df_na[0:1].iterrows():
    fig = px.pie(ligne,
                names=ligne.index[1:],
                values=ligne.values[1:],
                title=ligne["Variable"],
                template=template,
                # color_discrete_sequence=["green","orange","red"],
                color_discrete_map={"No data":"rgb(255,0,0)", "Incomplete data": "orange", "Valide data": "green"},
                )
    fig.show()

### Heatmap of missing values : 
The goal is to make a subplot of a Heatmap which indicate the number of nan for a given region or country and the number for a easier reading. 

We creat the needed dataset for the heatmap 
* on a country
* with only the required values (all except the 'Year' and the 'Entity') present on other axis.
* 

In [None]:
col = df_energy.columns.tolist()
col.remove('Year')
col.remove('Entity')

In [None]:
df_heat = df_energy.query(f"Entity == '{COUNTRY}'")[col]    # créeation du df
df_heat = df_heat.isna()
NB_OF_NAN = df_heat.sum().sum()     # reférence de nan pour le pays donné
RATIO_NAN = 1 # = int(df_energy.isna().sum().sum()/df_energy.size)  # reférence de nan au globale
# TODO : vraiment 1 ? adapter la shape 21 != 19
df_heat.replace({True: 1, False: 0}, inplace=True)      # We change the True to 1 because plotly can not interpret them

We make the  subplot graph thanks to : 
1. [mixed sub-plot](https://plotly.com/python/mixed-subplots/) documentation
2. [specs of subplot](https://plotly.com/python/subplots/#subplots-types) documentation

In [None]:
heatmap = make_subplots(
    rows=1, cols=2,     # 2 col bcs 2 graph
    column_widths=[0.9, 0.2],
    specs=[[{"type": "xy"}, {"type": "domain"}]],   
    subplot_titles=("Heatmap des valeurs manquantes par année et par variable", "")
)

# The heatmap
heatmap.add_trace(
    go.Heatmap(
        z=df_heat,
        x=df_energy['Year'],
        y=col,
        
        # Styling
        colorscale='Viridis', 
        colorbar=dict(
            title="nombre",
            titleside="top"
        )
    ),
    # placement of the graph in 1st position
    row=1, col=1
)


# The indicator 
heatmap.add_trace(
    go.Indicator(
        mode = "number+delta",  # 'delta' mean the % btw the value and a ref
        value = NB_OF_NAN,      # nb of nan for the current country looked
        delta = {'reference': RATIO_NAN, 'relative': True}, # RATIO_NAN in the nb of nan for all the dataset / nb of element in dataset
        #TODO: Si il y a la possibilité d'afficher pour plusieurs pays => il faut que changer la référence pour avoir qq chose de proportionnelle 
        
        # styling
        title = {"text": "Nb isnan<br>"
                         "<span style='font-size:0.8em;color:gray'>"
                         "% comparé à la moyenne</span>"
        }
    ),
    # placement of the graph in 2nd position
    row=1, col=2
)

# styling 
heatmap.update_layout(
    template="plotly_dark",  # just a default theme
    title_text="Observation des valeurs manquantes pour : \'{}\'".format(COUNTRY),
)

heatmap.show()

### Heatmap of Correlation :

In [None]:
df_energy.head()

In [None]:

cor = df_energy.drop(columns=categorical_columns).corr()
sn.heatmap(cor)

### Easy map with go.Choropleth()

In [None]:
year = 2012
column_name = "Human Development Index"

fig = go.Figure()

filtered_df = df_energy[df_energy['Year'] == year]


trace = go.Choropleth(
            locations=filtered_df['Entity'],
            z=filtered_df[column_name],
            locationmode='country names',
            colorscale='viridis',  # Use a different color scale for better contrast
            colorbar=dict(title=column_name),
            zmin=df_energy[column_name].min(),
            zmax=df_energy[column_name].max(),
)

fig.add_trace(trace)

fig.update_layout(
    title_text=f'{column_name} Map with slider',  # Set the initial title
    title_font_size=24,  # Increase title font size
    title_x=0.5,  # Center the title
    height=800,  # Set the height of the figure in pixels
    width=1000,  # Set the width of the figure in pixels
    font=dict(family='Arial', size=12),  # Customize font family and size for the whole figure
    margin=dict(t=80, l=50, r=50, b=50),  # Add margin for better layout spacing
    # Change the template to 'plotly_dark'
)


fig.show()

In [None]:
geodata = gpd.read_file(path_geodata)

In [None]:
# fig = px.choropleth(df_energy, geojson=geodata, locations='iso3',
#                     locationmode="ISO-3",
#                     color='Human Development Index',
#                     color_continuous_scale="Viridis",
#                     # range_color=(0, 1),
#                     animation_frame = "Year"
#                     )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.show()