In [3]:
def fpath(algo,prefix=''):
    model_name = algo.__name__
    tb_log_name = prefix + "_"+ f"{model_name}"
    model_path = "./" + prefix + "/" f"{model_name}"
    return model_name, model_path, tb_log_name

In [1]:
import os
import grid2op
from grid2op.Runner import Runner
from agent_wrapper import Grid2opAgentWrapper
# from env_wrapper import Grid2opEnvWrapper
from env_wrapper_custom import Grid2opEnvWrapper
from stable_baselines3.common.env_util import make_vec_env

from grid2op.Reward import RedispReward, L2RPNReward, EpisodeDurationReward


env_name = "l2rpn_case14_sandbox"
prefix='L2RPNReward'

base_path = os.path.expanduser("C:/Users/henri/data_grid2op")
train_dir = os.path.join(base_path, f"{env_name}_train")
val_dir = os.path.join(base_path, f"{env_name}_val")
test_dir = os.path.join(base_path, f"{env_name}_test")

def directories_exist(train_path, val_path, test_path):
    return os.path.exists(train_path) and os.path.exists(val_path) and os.path.exists(test_path)

# Make environment
env = grid2op.make(env_name, reward_class=L2RPNReward)

if directories_exist(train_dir, val_dir, test_dir):
    print("Directories already exist. Skipping train-val-test split.")
else:
    print("Directories do not exist. Proceeding with train-val-test split.")
    # Perform the split
    try:
        nm_env_train, nm_env_val, nm_env_test = env.train_val_split_random(
            pct_val=10, # 10% validation
            pct_test=10, # 10% test
            add_for_train="train",
            add_for_val="val",
            add_for_test="test"
        )
        print(f"Training environment created: {nm_env_train}")
        print(f"Validation environment created: {nm_env_val}")
        print(f"Test environment created: {nm_env_test}")
    except OSError as e:
        print(f"An error occurred during splitting: {e}")


Directories already exist. Skipping train-val-test split.


In [2]:
env_config = {
    # "backend_cls": ,
    # "backend_options": {},
    "env_name": "l2rpn_case14_sandbox", # "l2rpn_neurips_2020_track1_small"
    "env_is_test": False,
    # "obs_attr_to_keep": ["gen_p", "p_or" ,"load_p", "rho", "line_status"], # "gen_q", "gen_v",
    "act_type": "discrete", # "discrete" "box" "multi_discrete"
    # "act_attr_to_keep": ["change_line_status", "set_line_status_simple", "set_bus"], # set_line_status
    "reward_class": L2RPNReward, # EpisodeDurationReward,
    "data_set": "train", # for training data set train/val/test
}

In [None]:
from grid2op.Runner import Runner
from grid2op.Agent import DoNothingAgent, TopologyGreedy, PowerLineSwitch, RandomAgent
from agent_wrapper import Grid2opAgentWrapper

algorithms

# Create test environnement
test_env_config = env_config.copy()
test_env_config["data_set"] = "test"
test_env = Grid2opEnvWrapper(test_env_config)

# Testing parameters
nb_episode_test = 50
seeds_test_env = tuple(range(nb_episode_test))  # Seeds for the environment
seeds_test_agent = tuple(range(nb_episode_test))  # Seeds for the agent

# Trained agents
for algo in algorithms:
    # Define the model path and tensorboard log name dynamically
    model_name, model_path, _ =  fpath(algo,prefix)

    # Load the trained model
    sb3_algo_to_test = algo.load(model_path+'/'+model_name, env=test_env)

    # Convert to grid2op agent
    my_agent = Grid2opAgentWrapper(test_env, sb3_algo_to_test)

    runner = Runner(**test_env._g2op_env.get_params_for_runner(),
                    agentClass=None,
                    agentInstance=my_agent)

    res = runner.run(nb_episode=nb_episode_test,
                    env_seeds=seeds_test_env,
                    agent_seeds=seeds_test_agent,
                    # episode_id=ts_ep_test,
                    add_detailed_output=True,
                    path_save=model_path+"/test"
                    )

# Baseline agents:
baseline_agents= [RandomAgent,DoNothingAgent]
for baseline_agent in baseline_agents : 
    
    model_name, model_path, _ =  fpath(baseline_agent,prefix)
    
    runner = Runner(**test_env._g2op_env.get_params_for_runner(),
                agentClass=baseline_agent)

    res = runner.run(nb_episode=nb_episode_test,
                 env_seeds=seeds_test_env,
                 agent_seeds=seeds_test_agent,
                 # episode_id=ts_ep_test,
                 add_detailed_output=True,
                 path_save=model_path
                 )

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob


fig, ax = plt.subplots(2,1,sharex=True,sharey=True)
# plt.title('Comparison of Trained agents and Baslines')
ax[1].set_xlabel('Episode #')
ax[0].set_ylabel('Cumulative reward')
cum_rewards = dict()
for algo in algorithms:
    model_name, model_path, _ =  fpath(algo,prefix)
    files = glob.glob(model_path+'/test/*/rewards*')
    n_ep = np.arange(len(files))
    rew_per_ep = []
    for file in files: # 1 file = 1 episode
        rewards = np.load( file)['data']
        rew_per_ep.append(np.nansum(rewards))
    cum_rewards[model_name] = np.array(rew_per_ep)
    ax[0].plot(n_ep, rew_per_ep, '-',label=model_name)
ax[0].legend()

for baseline_agent in baseline_agents : 
    model_name, model_path, _ =  fpath(baseline_agent,prefix)
    files = glob.glob(model_path+'/*/rewards*')
    n_ep = np.arange(len(files))
    rew_per_ep = []
    for file in files: # 1 file = 1 episode
        rewards = np.load( file)['data']
        rew_per_ep.append(np.nansum(rewards))
    cum_rewards[model_name] = np.array(rew_per_ep)
    ax[0].plot(n_ep, rew_per_ep, '--',label=model_name)
ax[0].legend()


ax[1].plot(n_ep, cum_rewards['A2C'] - cum_rewards['DoNothingAgent']  )

plt.tight_layout()