# Importing packages

In [170]:
import os

import grid2op
from grid2op.Converter import IdToAct
import gymnasium
import ray
from grid2op.gym_compat import GymEnv, ScalerAttrConverter
from ray.rllib.algorithms import ppo  # import the type of agents
from ray import tune, train
from typing import Any, OrderedDict


# Global settings

In [171]:
ENV_NAME = "rte_case5_example"
LIBRARY_DIRECTORY = "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/data/"
NB_STEP_TRAIN = 10
RHO_THRESHOLD = 0.95

# Only run first time to set-up

In [172]:
if not os.path.exists(LIBRARY_DIRECTORY + ENV_NAME + "_train"):
    # env = grid2op.make(ENV_NAME, test=True)
    env = grid2op.make(LIBRARY_DIRECTORY + ENV_NAME, reward_class=grid2op.Reward.L2RPNReward)

    # extract 5% of the "chronics" to be used in the validation environment, 5% for testing,
    # 80% for training
    nm_env_train, nm_env_val, nm_env_test = env.train_val_split_random(
        pct_val=5.0, pct_test=5.0, add_for_test="test"
    )
    # and now you can use the training set only to train your agent:
    print(f"The name of the training environment is {nm_env_train}")
    print(f"The name of the validation environment is {nm_env_val}")
    print(f"The name of the test environment is {nm_env_test}")



# Define environment

In [173]:
env = grid2op.make(LIBRARY_DIRECTORY + ENV_NAME, reward_class=grid2op.Reward.L2RPNReward)
obs = env.reset()

# POSSIBLE_SUBSTATION_ACTIONS = IdToAct.get_all_unitary_topologies_change(env.action_space, 0)
# POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_change(env.action_space, 2)
# POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_change(env.action_space, 3)
POSSIBLE_SUBSTATION_ACTIONS = IdToAct.get_all_unitary_topologies_set(env.action_space, 0)
POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_set(env.action_space, 2)
POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_set(env.action_space, 3)
# reduce further TODO

print(len(POSSIBLE_SUBSTATION_ACTIONS))
print(POSSIBLE_SUBSTATION_ACTIONS)
for sub in POSSIBLE_SUBSTATION_ACTIONS:
    print(sub.to_json())

class CustomDiscreteActions(gymnasium.spaces.Discrete):
    """
    Class that customizes the action space.

    Example usage:
    
    import grid2op
    from grid2op.Converter import IdToAct
    
    env = grid2op.make("rte_case14_realistic")
    
    all_actions = # a list of of desired actions
    converter = IdToAct(env.action_space) 
    converter.init_converter(all_actions=all_actions) 


    env.action_space = ChooseDiscreteActions(converter=converter)


    """
    def __init__(self, converter, do_nothing):
    # def __init__(self, converter, do_nothing, action_space):
        self.do_nothing = do_nothing
        # self.env = env
        # self.action_space = action_space
        self.converter = converter
        super().__init__(n=converter.n)
    def from_gym(self, gym_action):
        if gym_action == -1:
            # return self.env.action_space({})
            # return self.action_space({})
            return self.do_nothing
        else:
            print(self.converter.convert_act(gym_action))
            return self.converter.convert_act(gym_action)


52
[<grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x290be7f70>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29e703640>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29e702320>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29e7007c0>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29d4179d0>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29d416230>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x290039060>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x28eafed40>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29050dfc0>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29050c160>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x29050ce80>, <grid2op.Space.GridObjects.TopologyAction_rte_case5_example object at 0x

In [174]:
# MyEnv class, and train a Proximal Policy Optimisation based agent
class MyEnv(gymnasium.Env):
    """Encapsulate Grid2Op environment and set action/observation space."""

    def __init__(self, env_config : dict[str,Any]):
        # 1. create the grid2op environment
        if not "env_name" in env_config:
            raise RuntimeError(
                "The configuration for RLLIB should provide the env name"
            )
        nm_env:str = env_config["env_name"]
        del env_config["env_name"]
        self.env_glop = grid2op.make(nm_env, **env_config)

        # 2. create the gym environment
        self.env_gym = GymEnv(self.env_glop)
        obs_gym = self.env_gym.reset()

        # 3. customize action and observation space space to only change bus 
        # create converter
        converter = IdToAct(self.env_glop.action_space)
        converter.init_converter(all_actions=POSSIBLE_SUBSTATION_ACTIONS)

        # set gym action space to discrete
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop.action_space())        
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop)        
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop.action_space({}), self.env_glop.action_space)        
        # self.env_gym.action_space = gym.spaces.Discrete(converter.n)        
        self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop.action_space())        
        
        # customize observation space
        ob_space:dict[str,Any] = self.env_gym.observation_space
        ob_space = ob_space.keep_only_attr(
            ["rho", "gen_p", "load_p", "topo_vect", "p_or", "p_ex", "timestep_overflow"]
        )

        self.env_gym.observation_space = ob_space

        # 4. specific to rllib
        self.action_space = gym.spaces.Discrete(converter.n)
        self.observation_space = self.env_gym.observation_space
        
        self.last_rho = 0 # below threshold TODO

    def reset(self, seed: int = None, options: dict[str, Any] = None) -> tuple[OrderedDict[str,Any], dict[str, str]]:
        obs, info = self.env_gym.reset()
        self.last_rho = max(obs["rho"])
        return obs, info

    def step(self, action):
        obs: tuple[OrderedDict[str,Any], dict[str, str]]
        # for the first action or whenever the lines are not near overloading, do nothing
        if self.last_rho < RHO_THRESHOLD:
            action = -1
        
        obs, reward, done, truncated, info = self.env_gym.step(action)
        self.last_rho = max(obs["rho"])
        return obs, reward, done, truncated, info
    
    def get_grid2op_env(self):
        return self.env_glop
    
env = MyEnv({"env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train"})
env.step(-1)


(OrderedDict([('gen_p', array([ 0.3     , 27.177578], dtype=float32)),
              ('load_p', array([7.8, 8.3, 8.1], dtype=float32)),
              ('p_ex',
               array([ 11.366332 ,   2.720053 ,  -0.0811479,  -4.1699634, -15.022329 ,
                       -6.103853 ,  -6.103853 ,  -3.9300373], dtype=float32)),
              ('p_or',
               array([-9.700301  , -2.3244371 ,  0.27054456,  4.2541933 , 15.811247  ,
                       6.1511383 ,  6.1511383 ,  3.988854  ], dtype=float32)),
              ('rho',
               array([0.76219594, 0.9349968 , 0.7559517 , 0.6063585 , 0.7606089 ,
                      0.38162327, 0.38162327, 0.59309185], dtype=float32)),
              ('timestep_overflow',
               array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)),
              ('topo_vect',
               array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                     dtype=int32))]),
 4.387135028839111,
 False,
 False,
 {'disc_lines': arra

# Train agent

In [175]:
config = ppo.PPOConfig()
config = config.training(gamma=0.95, lr=0.003, vf_loss_coeff=0.5, entropy_coeff=0.01, clip_param=0.2, lambda_=0.95, sgd_minibatch_size=4, train_batch_size=32)
config = config.environment(env=MyEnv, env_config={
        "env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train", "reward_class":grid2op.Reward.L2RPNReward})

if NB_STEP_TRAIN:
    try:
        analysis = tune.run(
            ppo.PPO,
            config=config.to_dict(),
            stop={"timesteps_total": 10000},  
            checkpoint_config=train.CheckpointConfig(checkpoint_frequency=1000, checkpoint_at_end=True),
            verbose=1,
            local_dir="/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results"
        )
    finally:
        # shutdown ray
        ray.shutdown()

0,1
Current time:,2023-11-15 15:47:23
Running for:,00:00:06.02
Memory:,12.6/16.0 GiB

Trial name,# failures,error file
PPO_MyEnv_df780_00000,1,/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results/PPO_2023-11-15_15-47-17/PPO_MyEnv_df780_00000_0_2023-11-15_15-47-17/error.txt

Trial name,status,loc
PPO_MyEnv_df780_00000,ERROR,


2023-11-15 15:47:23,599	ERROR tune_controller.py:1502 -- Trial task failed for trial PPO_MyEnv_df780_00000
Traceback (most recent call last):
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/_private/worker.py", line 2549, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__

[2m[36m(RolloutWorker pid=76117)[0m 33
[2m[36m(RolloutWorker pid=76117)[0m (OrderedDict([('gen_p', array([10.3     , 15.971496], dtype=float32)), ('load_p', array([8.5, 8.2, 8.7], dtype=float32)), ('p_ex', array([  5.589767  ,   0.25839484,  -1.562665  ,  -5.468073  ,
[2m[36m(RolloutWorker pid=76117)[0m        -10.167201  ,  -4.941659  ,  -4.941659  ,  -3.231927  ],
[2m[36m(RolloutWorker pid=76117)[0m       dtype=float32)), ('p_or', array([-5.156298  , -0.15775152,  1.6134641 ,  5.5005856 , 10.381728  ,
[2m[36m(RolloutWorker pid=76117)[0m         4.9544034 ,  4.9544034 ,  3.2459826 ], dtype=float32)), ('rho', array([0.38249025, 0.45199147, 0.35916278, 0.36414132, 0.39959523,
[2m[36m(RolloutWorker pid=76117)[0m        0.20312877, 0.20312877, 0.31144747], dtype=float32)), ('timestep_overflow', array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)), ('topo_vect', array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[2m[36m(RolloutWorker pid=76117)[0m       dt



TuneError: ('Trials did not complete', [PPO_MyEnv_df780_00000])

In [None]:
type(env.action_space({}))

grid2op.Space.GridObjects.TopologyAction_rte_case5_example