# This notebooks generate the data that will help the calibration of an environment and its analysis

In this notebook, you will be able to load a grid environment with existing chronics and get data out of it on electrical flows. This will allow you to study the flows, their thermal limits and resulting congestions, to either calibrate thermal limits or study the level of difficulty of the environment.

As an output of this notebook, you will get a large dataframe of flows and other grid state information over all run scenarios (with do nothing agent, and possibly other basic agents). If thermal limits already exist for your environment, you can afterwards generate a summary dataframe on observed congestions. 


<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#This-notebooks-generate-the-data-that-will-help-the-calibration-of-an-environment-and-its-analysis" data-toc-modified-id="This-notebooks-generate-the-data-that-will-help-the-calibration-of-an-environment-and-its-analysis-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>This notebooks generate the data that will help the calibration of an environment and its analysis</a></span><ul class="toc-item"><li><span><a href="#Config-parameters-for-data-generation" data-toc-modified-id="Config-parameters-for-data-generation-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Config parameters for data generation</a></span></li><li><span><a href="#Load-the-environment" data-toc-modified-id="Load-the-environment-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Load the environment</a></span></li><li><span><a href="#Generate-the-data" data-toc-modified-id="Generate-the-data-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Generate the data</a></span><ul class="toc-item"><li><span><a href="#Run-basic-agents-on-scenarios-and-get-flow-results-in-dataframe" data-toc-modified-id="Run-basic-agents-on-scenarios-and-get-flow-results-in-dataframe-1.3.1"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>Run basic agents on scenarios and get flow results in dataframe</a></span></li><li><span><a href="#saving-the-data-in-a-single-dataframe" data-toc-modified-id="saving-the-data-in-a-single-dataframe-1.3.2"><span class="toc-item-num">1.3.2&nbsp;&nbsp;</span>saving the data in a single dataframe</a></span><ul class="toc-item"><li><span><a href="#check-Energy-mix" data-toc-modified-id="check-Energy-mix-1.3.2.1"><span class="toc-item-num">1.3.2.1&nbsp;&nbsp;</span>check Energy mix</a></span></li></ul></li><li><span><a href="#Bonus:-build-specific-dataframe-on-overloads-based-on-flow-results-&amp;-a-lighter-df-analysis" data-toc-modified-id="Bonus:-build-specific-dataframe-on-overloads-based-on-flow-results-&amp;-a-lighter-df-analysis-1.3.3"><span class="toc-item-num">1.3.3&nbsp;&nbsp;</span>Bonus: build specific dataframe on overloads based on flow results &amp; a lighter df analysis</a></span><ul class="toc-item"><li><span><a href="#summary-overload-dataframe" data-toc-modified-id="summary-overload-dataframe-1.3.3.1"><span class="toc-item-num">1.3.3.1&nbsp;&nbsp;</span>summary overload dataframe</a></span></li><li><span><a href="#ligther-df_analysis" data-toc-modified-id="ligther-df_analysis-1.3.3.2"><span class="toc-item-num">1.3.3.2&nbsp;&nbsp;</span>ligther df_analysis</a></span></li></ul></li></ul></li></ul></li></ul></div>

In [None]:
import os
import re
import numpy as np
import pandas as pd
import grid2op
import cufflinks as cf
from grid2op.PlotGrid import PlotMatplot
import pyarrow #necessary for saving dataframe in feather format
from grid2op.Parameters import Parameters
from grid2op.Chronics import Multifolder, GridStateFromFileWithForecasts
from lightsim2grid import LightSimBackend
from tqdm.notebook import tqdm
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
#from grid2op.PlotGrid import PlotMatplot
from grid2op.PlotGrid import NUKE_COLOR, THERMAL_COLOR, HYDRO_COLOR, SOLAR_COLOR, WIND_COLOR
import matplotlib.pyplot as matplt
import seaborn as sns
%matplotlib inline

from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

import ipywidgets as widgets
from ipywidgets import Layout

## Config parameters for data generation

In [None]:
# This cell is tagged as `parameters`
# can be used with papermill for parametrized notebook execution in cli
env_mix_name="l2rpn_neurips_2020_track2_x3"

input_folder="Inputs" #where to load some data
output_folder="Outputs" #where to save generated dataframes

n_scenarios_to_look_at=120#2#120 #number of scenarios to run
random_seed=0
nb_process=10 #nb of cores to use when running scenarios

action_file_name="l2rpn_wcci_actions.json" #path of file with unitary action agents to run
only_do_nothing_agent=False #if you only want to run do nothing agent, and not other unitary action agents

env_name = "l2rpn_neurips_2020_track2_small"#'l2rpn_idf_2023_v0'


In [None]:
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

## Load the environment

In [None]:
path_chronix = f"/home/marotant/data_grid2op/{env_name}/{env_mix_name}/chronics/"#f"../generation_donnees/{env_name}/chronics/"
path_env = f"/home/marotant/data_grid2op/{env_name}"#f"../generation_donnees/{env_name}"

In [None]:
params = Parameters()
params.NO_OVERFLOW_DISCONNECTION = True
env_ref = grid2op.make(path_env,
                   chronics_path=path_chronix,
                   param=params,
                   backend=LightSimBackend(),
                   chronics_class=Multifolder,
                   data_feeding_kwargs={"gridvalueClass": GridStateFromFileWithForecasts}
                   )
env_ref_ = env_ref[env_mix_name]
#obs = env.reset()

In [None]:
#load actions
from utils.OneChangeAgent import load_actions
if only_do_nothing_agent:
    actions=[{}]#only do nothing action
else:
    actions=load_actions(os.path.join(input_folder,action_file_name))
    actions.insert(0,{})#add do nothing
actions

## Generate the data

### Run basic agents on scenarios and get flow results in dataframe

In [None]:
#from grid2op.Agent import DoNothingAgent
from grid2op.Runner import Runner
from utils.Runner import Runner_Calibration
from utils.OneChangeAgent import OneChangeThenOnlyReconnect, load_actions, set_agent_name
from utils.Create_Run_Dataframe import create_run_df,get_size
import time

obs_var_to_keep=["a_or","load_p","gen_p","day_of_week","hour_of_day","month"]
#list_df=[]

env_output_folder=os.path.join(output_folder,env_mix_name)
if not os.path.exists(env_output_folder):
    os.makedirs(env_output_folder)

agent_names=[]

counter_folders=0#check that a folder is not larger than a certain size or create a new one
max_folder_size_gb=7#0.05#7

current_env_output_folder=os.path.join(env_output_folder,'FlowDataset_'+str(counter_folders))
os.makedirs(current_env_output_folder,exist_ok=True)

df_analysis=pd.DataFrame()

for i,act in enumerate(actions):#one OneChangeThenOnlyReconnect agent running per action to test
    
    action=env_ref_.action_space(act)
    agent_name=set_agent_name(action,agent_names)
    agent_names.append(agent_name)
    print("agent number: "+str(i))    
    print(agent_name)
    print(action)
    

    df_file_path=os.path.join(current_env_output_folder,'FlowDataset_'+agent_name+'.parket')
    if (os.path.exists(df_file_path)):#already exist in the current folder, so load it
        print("file exist: "+df_file_path)
        #agent_df=pd.read_parquet(df_file_path)
        continue
    else:
        if(get_size(current_env_output_folder)>max_folder_size_gb*10**9):#switch folder
            counter_folders+=1
            current_env_output_folder=os.path.join(env_output_folder,'FlowDataset_'+str(counter_folders))
            os.makedirs(current_env_output_folder,exist_ok=True)
            df_file_path=os.path.join(current_env_output_folder,'FlowDataset_'+agent_name+'.parket')
            
        if (os.path.exists(df_file_path)):#exist in switched folder, so load it
            print("file exist: "+df_file_path)
            #agent_df=pd.read_parquet(df_file_path)
            continue
        else:#else compute the run for this agent
            oneAction_agent_class = OneChangeThenOnlyReconnect.gen_next(action)
            runner_ref = Runner_Calibration(obs_var_to_keep,**env_ref_.get_params_for_runner(), agentClass=oneAction_agent_class)#Runner_Calibration
            res_ref = runner_ref.run(nb_episode=n_scenarios_to_look_at,nb_process=nb_process,add_detailed_output=True)

            agent_df=create_run_df(env_ref,env_ref_.env_name,res_ref,agent_name,available_obs=obs_var_to_keep)
            print("saving agent results")
            agent_df.to_parquet(df_file_path,compression='brotli')

    #print("appending agent results to df")
    #start_time=time.time()
    #df_analysis=df_analysis.append(agent_df)#use append because faster that concat in our case
    #end_time=time.time()
    #print("appending dataframe took: "+str(end_time-start_time))
    #list_df.append(create_run_df(env_ref,env_ref_.env_name,res_ref,agent_name,available_obs=obs_var_to_keep))


We have saved dataframes per chunk each time one was computed not to lose this computation if the kernel dies

We have used the .parket file format to allow for easy files merging at reloading

We are then creating one overall parket file per chunk folder

And finally one full dataframe file for all results 

This goes step by step to run this with limited RAM memory

In [None]:
print("here are the folders and files created")
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print('{}{}/'.format(indent, os.path.basename(root)))
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print('{}{}'.format(subindent, f))
          
list_files(env_output_folder)

### saving the data in a single dataframe
creating one parquet dataframe file per chunk folders FlowDataset_i

In [None]:
#creating one parquet dataframe file per chunk folders FlowDataset_i
from utils.Create_Run_Dataframe import save_light_df_file
for directory in os.listdir(env_output_folder):
    #
    if os.path.isdir(os.path.join(env_output_folder,directory)):
        print(directory)
        dir_output_folder=os.path.join(env_output_folder,directory)
        save_light_df_file(dir_output_folder)




creating a fully merged dataframe from each FlowDataset_i.file

In [None]:
df_analysis=pd.DataFrame()
df_analysis_file_path=os.path.join(env_output_folder,'FlowDataset.file')

if(os.path.isfile(df_analysis_file_path)):
    print("loading the file from: "+df_analysis_file_path)
    df_analysis=pd.read_feather(df_analysis_file_path)
    if only_do_nothing_agent:
        df_analysis=df_analysis[df_analysis.agent=="do_nothing"]
        df_analysis.reset_index(drop=True,inplace=True)

else:
    for path in os.listdir(env_output_folder):
        if os.path.isfile(os.path.join(env_output_folder,path)) and (".file" in path):
            print(path)
            df_analysis=df_analysis.append(pd.read_feather(os.path.join(env_output_folder,path)))#.reset_index(drop=True)
            #df_analysis.reset_index(drop=True)
    #df_analysis.to_feather(os.path.join(env_output_folder,'FlowDataset.file'))

    df_analysis.reset_index(drop=True,inplace=True)

df_analysis.head(10)

In [None]:

if(os.path.isfile(df_analysis_file_path)):
    print("this file already exist: "+df_analysis_file_path)
else:
    print("saving file in: "+df_analysis_file_path)
    df_analysis.to_feather(df_analysis_file_path)

#### check Energy mix

In [None]:
mix_df=df_analysis[["hydro","wind","solar","nuclear","thermal"]].astype("float32").sum()
mix_df.plot.pie(autopct='%1.0f%%')

### Bonus: build specific dataframe on overloads based on flow results & a lighter df analysis
in case base thermal limits already exist

#### summary overload dataframe

In [None]:
from utils.Create_Run_Dataframe import fast_pd_concat,get_overload_info_df

#df_analysis=fast_pd_concat(list_df)
lines_name=env_ref_.name_line
thermal_limits=1.0 * env_ref._thermal_limit_a
indicesLineOverloaded=[i  for i,l in enumerate(env_ref_.name_line) if (df_analysis[l]>=env_ref._thermal_limit_a[i]).sum()>=1]
#np.where((df_analysis[env_ref_.name_line]>=env_ref._thermal_limit_a).sum(axis=0)>=1)[0] #too memory costly



In [None]:
overload_file_path=os.path.join(env_output_folder,'Overloads_info_Agents.file')

if(os.path.isfile(overload_file_path)):
    print("loading the file from: "+overload_file_path)
    overloads_info=pd.read_feather(overload_file_path)
    if only_do_nothing_agent:
        overloads_info=overloads_info[overloads_info.agent=="do_nothing"].reset_index(drop=True)

else:
    overloads_info=get_overload_info_df(df_analysis,lines_name,thermal_limits,indicesLineOverloaded, verbose=True)
    print("saving file in: "+overload_file_path)
    overloads_info.to_feather(overload_file_path)

overloads_info.head(10)

#### ligther df_analysis
Given that we know the number overload at each timesteps, for each agent, we will take the best of all agents at each time step to identify what could have been the best configuration with least overloads. The result will be a "best_agent". We can then only keep the "do_nothing" agent and "best_agent" for a more straightforward analysis


In [None]:
def get_overload_df(df_analysis,line_names,thermal_limits):
    nb_overloads_df=pd.DataFrame({'nb_total':np.zeros(df_analysis.shape[0])})

    for i,l in enumerate(line_names):
        print("computing for line " + l)
        nb_overloads_df["nb_total"]=nb_overloads_df["nb_total"].add((df_analysis[l]>= thermal_limits[i]).astype('bool'))
    
    nb_overloads_df["scenario"]=df_analysis["scenario"]
    nb_overloads_df["agent"]=df_analysis["agent"]
    nb_overloads_df["datetimes"]=df_analysis["datetimes"]
    
    #plot per agent
    matplt.xticks(rotation=90)
    nb_overloads_df.groupby(["agent"]).sum().plot(kind="bar",title="number of overloads per agent over all scenarios")
    
    
    return nb_overloads_df

In [None]:
thermal_limits=env_ref_._thermal_limit_a.astype('float16')
line_names=env_ref.name_line

Overloads_df=get_overload_df(df_analysis,line_names,thermal_limits)

In [None]:
best_df=Overloads_df.sort_values('nb_total', ascending=True).drop_duplicates(['scenario','datetimes'])
df_analysis_light=df_analysis.loc[best_df.index,:]
df_analysis_light["agent"]="best_agent"

df_analysis_light=df_analysis_light.append(df_analysis[df_analysis.agent=="do_nothing"]).reset_index(drop=True)

df_analysis_light_file_path=os.path.join(env_output_folder,'FlowDataset_light.file')
df_analysis_light.to_feather(df_analysis_light_file_path)

In [None]:
percentage_reduced=np.round(1-(best_df["nb_total"].sum()/Overloads_df.loc[Overloads_df.agent=="do_nothing","nb_total"].sum()),2)*100
print("the percentage of overload decrease with considered actions is: "+str(percentage_reduced))

In [None]:
overload_best_agent_file_path=os.path.join(env_output_folder,'Overloads_info_Best_Agent.file')

if(os.path.isfile(overload_best_agent_file_path)):
    print("loading the file from: "+overload_best_agent_file_path)
    overloads_info_best_agent=pd.read_feather(overload_file_path)
    if only_do_nothing_agent:
        overloads_info_best_agent=overloads_info_best_agent[overloads_info_best_agent.agent=="do_nothing"].reset_index(drop=True)

else:
    overloads_info_best_agent=get_overload_info_df(df_analysis_light,lines_name,thermal_limits,indicesLineOverloaded, verbose=True)
    print("saving file in: "+overload_best_agent_file_path)
    overloads_info_best_agent.to_feather(overload_best_agent_file_path)

overloads_info_best_agent.head(10)