This notebook check, also with some KPIs from chronix2grid the data generated by the script "generate_all" that will be used for the WCCI 2022 competition.

Goal:
<img src="images/target_em.png" width="30%" height="30%">

In [None]:
import numpy as np
import os
import pandas as pd
import grid2op
from grid2op.Chronics import ChangeNothing
from chronix2grid.kpi.Generator_parameter_checker import EnergyMix_AprioriChecker
import plotly
import plotly.graph_objects as go
from grid2op.PlotGrid import NUKE_COLOR, THERMAL_COLOR, WIND_COLOR, SOLAR_COLOR, HYDRO_COLOR
import json
import copy

# for pandas interactive plots
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)


In [None]:
path_full_gen = "../example/custom/output/fixed_chronics_complete"
n_scenarios = 1
env_name = "../example/custom/input/generation/case118_l2rpn_wcci_benjamin" 
env = grid2op.make(env_name, chronics_class=ChangeNothing)

li_months = ["2050-01-03", 
             "2050-01-10",
             "2050-01-17",
             "2050-01-24",
             "2050-01-31",
             "2050-02-07",
             "2050-02-14",
             "2050-02-21",
             "2050-02-28",
             "2050-03-07",
             "2050-03-14",
             "2050-03-21",
             "2050-03-28",
             "2050-04-04",
             "2050-04-11",
             "2050-04-18",
             "2050-04-25",
             "2050-05-02", 
             "2050-05-09", 
             "2050-05-16", 
             "2050-05-23", 
             "2050-05-30",
             "2050-06-06",
             "2050-06-13",
             "2050-06-20",
             "2050-06-27",
             "2050-07-04", 
             "2050-07-11", 
             "2050-07-18", 
             "2050-07-25", 
             "2050-08-01", 
             "2050-08-08", 
             "2050-08-15", 
             "2050-08-22", 
             "2050-08-29", 
             "2050-09-05", 
             "2050-09-12", 
             "2050-09-19", 
             "2050-09-26", 
             "2050-10-03", 
             "2050-10-10", 
             "2050-10-17", 
             "2050-10-24", 
             "2050-10-31", 
             "2050-11-07", 
             "2050-11-14", 
             "2050-11-21", 
             "2050-11-28", 
             "2050-12-05",
             "2050-12-12",
             "2050-12-19",
             "2050-12-26",
            ]

In [None]:
loads_p = []
prods_p = []
for scen_id in range(n_scenarios):
    for start_date in li_months:
        path_data_generated = os.path.join(path_full_gen, f"{start_date}_{scen_id}")
        if not os.path.exists(path_data_generated):
            continue
        # when I read the data i remove the first and last rows because it's redundant (data start at 23h55 and
        # ends at 00h00)
        loads_p.append(pd.read_csv(os.path.join(path_data_generated, "load_p.csv.bz2"), sep=";").iloc[1:-1])
        prods_p.append(pd.read_csv(os.path.join(path_data_generated, "prod_p.csv.bz2"), sep=";").iloc[1:-1])
    
loads_p = pd.concat(loads_p, ignore_index=True)
prods_p = pd.concat(prods_p, ignore_index=True)

# I Check the total repartition

In [None]:
labels2 = ['solar','wind','hydro', "nuclear", "thermal"]
values2 = [prods_p.iloc[:, env.gen_type=="solar"].sum().sum(),
           prods_p.iloc[:, env.gen_type=="wind"].sum().sum(),
           prods_p.iloc[:, env.gen_type=="hydro"].sum().sum(),
           prods_p.iloc[:, env.gen_type=="nuclear"].sum().sum(),
           prods_p.iloc[:, env.gen_type=="thermal"].sum().sum(),
          ]

fig = go.Figure(data=[go.Pie(labels=labels2,
                             values=values2,
                             marker_colors=[SOLAR_COLOR, WIND_COLOR, HYDRO_COLOR, NUKE_COLOR, THERMAL_COLOR],
                            text=[f"{round(el / 12., -3):,.0f} MWh" for el in values2]
                            )]
                             
                )
fig.update_layout(
    title=f"Share of energy produced depending on energy type"
)
fig.show()

# II Check the distribution per generation type

## II A Wind

In [None]:
french_ref_data = "ref_french_data"
all_year_ref = list(range(2012, 2021))
french_dfs = []
for year_ in all_year_ref:
    temp_df = pd.read_csv(os.path.join(french_ref_data,
                                       f"eCO2mix_RTE_Annuel-Definitif_{year_}.zip"),
                          sep="\t",
                          encoding="ISO-8859-1",
                          index_col=False)
    french_dfs.append(temp_df)

In [None]:
from chronix2grid.getting_started.example.input.kpi.case118_l2rpn_neurips_1x.France.renewable_ninja import renewable_ninja_pattern_path
nrel_wind_data = pd.read_csv(os.path.join(renewable_ninja_pattern_path, "wind.csv"), sep=";")

In [None]:
gen_wind_name = env.name_gen[env.gen_type == "wind"]

In [None]:
total_wind = prods_p.iloc[:, env.gen_type=="wind"]

# reference data are hourly or 30 mins, this is 5 mins

# here I do: the rolling sum (for a window of 1h, then i keep only the hourly data, and I remove the first ones
# that are Nan)
total_wind_hourly = total_wind.rolling(12).sum().iloc[::12].iloc[1:].reset_index(drop=True)
# same as above for 30 mins
total_wind_30m = total_wind.rolling(6).sum().iloc[::6].iloc[1:].reset_index(drop=True)

In [None]:
fig = go.Figure()
for i, year_ in enumerate(all_year_ref[-2:]):
    tmp_ = french_dfs[i]["Eolien"].dropna().values
    tmp_ /= tmp_.max()
    fig.add_trace(go.Histogram(x=tmp_, name=f'{year_}'))
tmp_ = 1.0 * total_wind_30m.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))
tmp_ = 1.0 * total_wind_30m[gen_wind_name[0]]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the wind energy (ref = France)"
)
fig.show()

In [None]:
fig = go.Figure()
for i, year_ in enumerate(all_year_ref[-2:]):
    tmp_ = np.diff(french_dfs[i]["Eolien"].dropna().values)
    tmp_ /= tmp_.max()
    fig.add_trace(go.Histogram(x=tmp_, name=f'{year_}'))
tmp_ = 1.0 * np.diff(total_wind_30m.sum(axis=1).values)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))
tmp_ = 1.0 * np.diff(total_wind_30m[gen_wind_name[0]])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the variation of wind energy (ref = France)"
)
fig.show()

In [None]:
fig = go.Figure()
tmp_ = nrel_wind_data.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (sum)'))
tmp_ = 1.0 * total_wind_hourly.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))


tmp_ = nrel_wind_data.iloc[:, 0]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (one)'))
tmp_ = 1.0 * total_wind_hourly[gen_wind_name[0]]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the wind energy (ref = renewable ninja)"
)
fig.show()

In [None]:
fig = go.Figure()
tmp_ = np.diff(nrel_wind_data.sum(axis=1))
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (sum)'))
tmp_ = 1.0 * np.diff(total_wind_hourly.sum(axis=1))
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))


tmp_ = np.diff(nrel_wind_data.iloc[:, 0])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (one)'))
tmp_ = 1.0 * np.diff(total_wind_hourly[gen_wind_name[0]])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the variation of wind energy (ref = renewable ninja)"
)
fig.show()

## II B Solar

To best view these plots, we strongly encouraged you to zomm in to remove the huge "0" impact (meaning there is no solar at night, no kidding...

In [None]:
nrel_solar_data = pd.read_csv(os.path.join(renewable_ninja_pattern_path, "solar.csv"), sep=";")

In [None]:
gen_solar_name = env.name_gen[env.gen_type == "solar"]
total_solar = prods_p.iloc[:, env.gen_type=="solar"]

# reference data are hourly or 30 mins, this is 5 mins

# here I do: the rolling sum (for a window of 1h, then i keep only the hourly data, and I remove the first ones
# that are Nan)
total_solar_hourly = total_solar.rolling(12).sum().iloc[::12].iloc[1:].reset_index(drop=True)
# same as above for 30 mins
total_solar_30m = total_solar.rolling(6).sum().iloc[::6].iloc[1:].reset_index(drop=True)

In [None]:
fig = go.Figure()
for i, year_ in enumerate(all_year_ref[-2:]):
    tmp_ = french_dfs[i]["Solaire"].dropna().values
    tmp_ /= tmp_.max()
    fig.add_trace(go.Histogram(x=tmp_, name=f'{year_}'))
tmp_ = 1.0 * total_solar_30m.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))
tmp_ = 1.0 * total_solar_30m[gen_solar_name[0]]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the solar energy (ref = France)"
)
fig.show()

In [None]:
fig = go.Figure()
for i, year_ in enumerate(all_year_ref[-2:]):
    tmp_ = np.diff(french_dfs[i]["Solaire"].dropna().values)
    tmp_ /= tmp_.max()
    fig.add_trace(go.Histogram(x=tmp_, name=f'{year_}'))
tmp_ = 1.0 * np.diff(total_solar_30m.sum(axis=1).values)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))
tmp_ = 1.0 * np.diff(total_solar_30m[gen_solar_name[0]])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the variation of solar energy (ref = France)"
)
fig.show()

In [None]:
fig = go.Figure()
tmp_ = nrel_solar_data.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (sum)'))
tmp_ = 1.0 * total_solar_hourly.sum(axis=1)
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))


tmp_ = nrel_solar_data.iloc[:, 0]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (one)'))
tmp_ = 1.0 * total_solar_hourly[gen_solar_name[0]]
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the solar energy (ref = renewable ninja)"
)
fig.show()

In [None]:
fig = go.Figure()
tmp_ = np.diff(nrel_solar_data.sum(axis=1))
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (sum)'))
tmp_ = 1.0 * np.diff(total_solar_hourly.sum(axis=1))
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (sum)"))


tmp_ = np.diff(nrel_solar_data.iloc[:, 0])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name=f'renewable ninja data (one)'))
tmp_ = 1.0 * np.diff(total_solar_hourly[gen_solar_name[0]])
tmp_ /= tmp_.max()
fig.add_trace(go.Histogram(x=tmp_, name="generated (one)"))
fig.update_layout(
    title=f"Distribution of the variation of solar energy (ref = renewable ninja)"
)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=nrel_solar_data.iloc[:,0], name="renewable ninja (one)"))
fig.add_trace(go.Scatter(y=total_solar_hourly.iloc[:,0] / (env.gen_pmax[env.gen_type == "solar"][0] * 12.),
                         name="generated (0)"))
fig.add_trace(go.Scatter(y=total_solar_hourly.iloc[:,5] / (env.gen_pmax[env.gen_type == "solar"][5] * 12.),
                         name="generated (5)"))
fig.update_layout(
    title=f"Generation horaire solaire (un generateur)"
)
fig

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=total_solar.iloc[:,0] / (env.gen_pmax[env.gen_type == "solar"][0]),
                         name="generated (one)"))
fig.update_layout(
    title=f"Generation du solaire (5 mins) pour les donnees generees (1 generateur)"
)
fig

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=nrel_solar_data.iloc[:,0], name="renewable ninja (0)"))
fig.add_trace(go.Scatter(y=nrel_solar_data.iloc[:,5], name="renewable ninja (5)"))
fig.add_trace(go.Scatter(y=nrel_solar_data.iloc[:,12], name="renewable ninja (12)"))
fig.update_layout(
    title=f"Generation du solaire (1h) pour 3 generateurs de renewable ninja"
)
fig

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(y=nrel_solar_data.sum(axis=1) / nrel_solar_data.shape[1], name="renewable ninja (sum)"))
fig.add_trace(go.Scatter(y=total_solar_hourly.sum(axis=1) / (np.sum(env.gen_pmax[env.gen_type == "solar"]) * 12.),
                         name="generated (one)"))
fig.update_layout(
    title=f"Generation du solaire (1h) totale"
)
fig

## II C Nuclear

## II D Hydro

# III Spatial analysis

This part is dedicated to the analysis of the spatial correlation, mainly between loads, solar and wind generators.

## III A) Wind

In [None]:
wind_corr_5mins = total_wind.corr()
fig = go.Figure(data=go.Heatmap(z=wind_corr_5mins, x=wind_corr_5mins.columns, y=wind_corr_5mins.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the wind generated data (5 mins)"
)
fig.show()

In [None]:
wind_corr_hourly = total_wind_hourly.corr()
fig = go.Figure(data=go.Heatmap(z=wind_corr_hourly,
                                x=wind_corr_hourly.columns,
                                y=wind_corr_hourly.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the wind generated data (hourly)"
)
fig.show()

In [None]:
nrel_wind_data_corr = nrel_wind_data.corr()
fig = go.Figure(data=go.Heatmap(z=nrel_wind_data_corr,
                                x=nrel_wind_data_corr.columns,
                                y=nrel_wind_data_corr.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the wind data (renewable ninja - hourly)"
)
fig.show()

## III B) Solar

In [None]:
solar_corr_5mins = total_solar.corr()
fig = go.Figure(data=go.Heatmap(z=solar_corr_5mins, x=solar_corr_5mins.columns, y=solar_corr_5mins.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the solar generated data (5 mins)"
)
fig.show()

In [None]:
nrel_solar_data_corr = nrel_solar_data.corr()
fig = go.Figure(data=go.Heatmap(z=nrel_solar_data_corr,
                                x=nrel_solar_data_corr.columns,
                                y=nrel_solar_data_corr.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the solar data (renewable ninja - hourly)"
)
fig.show()

## III C Loads

In [None]:
load_data_corr = loads_p.corr()
fig = go.Figure(data=go.Heatmap(z=load_data_corr,
                                x=load_data_corr.columns,
                                y=load_data_corr.columns))
fig.update_layout(
    title=f"Heatmap of spatial correlation for the load generated data (5 mins resolution)"
)
fig.show()

In [None]:
loads_p["load_112_93"]