In [1]:
"""
Trains PPO baseline agent.
"""
from typing import Any, OrderedDict
import gymnasium
import numpy as np
import ray
from ray.rllib.algorithms import ppo  # import the type of agents
from ray import tune, train
import grid2op
from grid2op import Reward
from grid2op.gym_compat import GymEnv
from mahrl.grid2op_env import utils

ENV_NAME = "rte_case5_example"
LIBRARY_DIRECTORY = "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/data/"
NB_STEP_TRAIN = 10
RHO_THRESHOLD = 0.95
CHANGEABLE_SUBSTATIONS = [0, 2, 3]


class CustomizedGrid2OpEnvironment(gymnasium.Env):
    """Encapsulate Grid2Op environment and set action/observation space."""

    def __init__(self, env_config: dict[str, Any]):
        # 1. create the grid2op environment
        if not "env_name" in env_config:
            raise RuntimeError(
                "The configuration for RLLIB should provide the env name"
            )
        nm_env = env_config.pop("env_name", None)
        self.env_glop = grid2op.make(nm_env, **env_config)

        # 1.a. Setting up custom action space
        possible_substation_actions = utils.get_possible_topologies(
            self.env_glop, CHANGEABLE_SUBSTATIONS
        )

        # 2. create the gym environment
        self.env_gym = GymEnv(self.env_glop)
        _, _ = self.env_gym.reset()

        # 3. customize action and observation space space to only change bus
        # create converter
        converter = utils.setup_converter(self.env_glop, possible_substation_actions)

        # set gym action space to discrete
        self.env_gym.action_space = utils.CustomDiscreteActions(
            converter, self.env_glop.action_space()
        )

        # customize observation space
        ob_space = self.env_gym.observation_space
        ob_space = ob_space.keep_only_attr(
            ["rho", "gen_p", "load_p", "topo_vect", "p_or", "p_ex", "timestep_overflow"]
        )

        self.env_gym.observation_space = ob_space

        # 4. specific to rllib
        # self.action_space = gym.spaces.Discrete(converter.n)
        self.action_space = self.env_gym.action_space
        self.observation_space = self.env_gym.observation_space

        self.last_rho = 0  # below threshold TODO

    def reset(self) -> tuple[OrderedDict[str, Any], dict[str, str]]:
        obs, info = self.env_gym.reset()
        self.last_rho = np.max(obs.rho)
        return obs, info

    def step(self, action):
        obs: tuple[OrderedDict[str, Any], dict[str, str]]
        # for the first action or whenever the lines are not near overloading, do nothing
        if self.last_rho < RHO_THRESHOLD:
            action = -1

        obs, reward, done, truncated, info = self.env_gym.step(action)
        self.last_rho = np.max(obs.rho)
        return obs, reward, done, truncated, info


utils.make_train_test_val_split(
    LIBRARY_DIRECTORY, ENV_NAME, 5.0, 5.0, Reward.L2RPNReward
)
env = CustomizedGrid2OpEnvironment(
    {"env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train"}
)
config = ppo.PPOConfig()
config = config.training(
    gamma=0.95,
    lr=0.003,
    vf_loss_coeff=0.5,
    entropy_coeff=0.01,
    clip_param=0.2,
    lambda_=0.95,
    sgd_minibatch_size=4,
    train_batch_size=32,
)
config = config.environment(
    env=CustomizedGrid2OpEnvironment,
    env_config={
        "env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train",
        "reward_class": Reward.L2RPNReward,
    },
)

if NB_STEP_TRAIN:
    try:
        analysis = tune.run(
            ppo.PPO,
            config=config.to_dict(),
            stop={"timesteps_total": 10000},
            checkpoint_config=train.CheckpointConfig(
                checkpoint_frequency=1000, checkpoint_at_end=True
            ),
            verbose=1,
            local_dir="/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results",
        )
    finally:
        # shutdown ray
        ray.shutdown()


0,1
Current time:,2023-11-16 13:34:20
Running for:,00:00:05.82
Memory:,12.8/16.0 GiB

Trial name,# failures,error file
PPO_CustomizedGrid2OpEnvironment_738a7_00000,1,/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results/PPO_2023-11-16_13-34-14/PPO_CustomizedGrid2OpEnvironment_738a7_00000_0_2023-11-16_13-34-14/error.txt

Trial name,status,loc
PPO_CustomizedGrid2OpEnvironment_738a7_00000,ERROR,


[2m[36m(RolloutWorker pid=85186)[0m   logger.deprecation(
[2m[36m(RolloutWorker pid=85185)[0m Your environment ({}) does not abide to the new gymnasium-style API!
[2m[36m(RolloutWorker pid=85185)[0m From Ray 2.3 on, RLlib only supports the new (gym>=0.26 or gymnasium) Env APIs.
[2m[36m(RolloutWorker pid=85185)[0m {}
[2m[36m(RolloutWorker pid=85185)[0m Learn more about the most important changes here:
[2m[36m(RolloutWorker pid=85185)[0m https://github.com/openai/gym and here: https://github.com/Farama-Foundation/Gymnasium
[2m[36m(RolloutWorker pid=85185)[0m 
[2m[36m(RolloutWorker pid=85185)[0m In order to fix this problem, do the following:
[2m[36m(RolloutWorker pid=85185)[0m 
[2m[36m(RolloutWorker pid=85185)[0m 1) Run `pip install gymnasium` on your command line.
[2m[36m(RolloutWorker pid=85185)[0m 2) Change all your import statements in your code from
[2m[36m(RolloutWorker pid=85185)[0m    `import gym` -> `import gymnasium as gym` OR
[2m[36m(Rollo

TuneError: ('Trials did not complete', [PPO_CustomizedGrid2OpEnvironment_738a7_00000])

# Importing packages

In [1]:
import os

import grid2op
import gymnasium
import ray
from grid2op.gym_compat import GymEnv
from ray.rllib.algorithms import ppo  # import the type of agents
from ray import tune, train
from typing import Any, OrderedDict
from mahrl.grid2op_env import utils

  from .autonotebook import tqdm as notebook_tqdm
2023-11-16 13:24:43,242	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2023-11-16 13:24:43,928	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


# Global settings

In [2]:
ENV_NAME = "rte_case5_example"
LIBRARY_DIRECTORY = "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/data/"
NB_STEP_TRAIN = 10
RHO_THRESHOLD = 0.95
CHANGEABLE_SUBSTATIONS = [0, 2, 3]

# Only run first time to set-up

In [3]:
utils.make_train_test_val_split(LIBRARY_DIRECTORY, ENV_NAME, 5.0, 5.0, grid2op.Reward.L2RPNReward)

# Define environment

In [8]:
# MyEnv class, and train a Proximal Policy Optimisation based agent
class MyEnv(gymnasium.Env):
    """Encapsulate Grid2Op environment and set action/observation space."""

    def __init__(self, env_config : dict[str,Any]):
        # 1. create the grid2op environment
        if not "env_name" in env_config:
            raise RuntimeError(
                "The configuration for RLLIB should provide the env name"
            )
        nm_env:str = env_config["env_name"]
        del env_config["env_name"]
        self.env_glop = grid2op.make(nm_env, **env_config)

        # 1.a. Setting up custom action space
        possible_substation_actions = utils.get_possible_topologies(self.env_glop, CHANGEABLE_SUBSTATIONS)

        # 2. create the gym environment
        self.env_gym = GymEnv(self.env_glop)
        obs_gym, info = self.env_gym.reset()

        # 3. customize action and observation space space to only change bus 
        # create converter
        converter = utils.setup_converter(self.env_glop, possible_substation_actions)

        # set gym action space to discrete
        self.env_gym.action_space = utils.CustomDiscreteActions(converter, self.env_glop.action_space())        
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop)        
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop.action_space({}), self.env_glop.action_space)        
        # self.env_gym.action_space = gym.spaces.Discrete(converter.n)        
        # TODO ACTIVATE
        # self.env_gym.action_space = CustomDiscreteActions(converter)        
        # self.env_gym.action_space = CustomDiscreteActions(converter, self.env_glop.action_space())        
        # TODO REMOVE
        # self.env_gym.action_space = self.env_gym.action_space.ignore_attr(
        #     "set_bus"
        # ).ignore_attr("set_line_status")
        # self.env_gym.action_space = self.env_gym.action_space.reencode_space(
        #     "change_bus", MultiToTupleConverter()
        # )
        # self.env_gym.action_space = self.env_gym.action_space.reencode_space(
        #     "change_line_status", MultiToTupleConverter()
        # )
        # TODO REMOVE

        # customize observation space
        ob_space:dict[str,Any] = self.env_gym.observation_space
        ob_space = ob_space.keep_only_attr(
            ["rho", "gen_p", "load_p", "topo_vect", "p_or", "p_ex", "timestep_overflow"]
        )

        self.env_gym.observation_space = ob_space

        # 4. specific to rllib
        # self.action_space = gym.spaces.Discrete(converter.n)
        self.action_space = self.env_gym.action_space
        self.observation_space = self.env_gym.observation_space

        print(self.action_space)
        
        d = {k: v for k, v in self.env_gym.observation_space.spaces.items()}
        self.observation_space = gymnasium.spaces.Dict(d)
        # TODO REMOVE
        # a = {"set_bus" : self.env_gym.action_space}
        # self.action_space = gym.spaces.Dict(a)
        print(self.action_space)

        self.last_rho = 0 # below threshold TODO

    def reset(self, seed: int = None, options: dict[str, Any] = None) -> tuple[OrderedDict[str,Any], dict[str, str]]:
        obs, info = self.env_gym.reset()
        self.last_rho = max(obs["rho"])
        return obs, info

    def step(self, action):
        obs: tuple[OrderedDict[str,Any], dict[str, str]]
        # for the first action or whenever the lines are not near overloading, do nothing
        if self.last_rho < RHO_THRESHOLD:
            action = -1
            # action = {}
        
        obs, reward, done, truncated, info = self.env_gym.step(action)
        print(obs)
        print(type(obs))
        print(obs["rho"])
        self.last_rho = max(obs["rho"])
        return obs, reward, done, truncated, info
    
    def get_grid2op_env(self):
        return self.env_glop
    
env = MyEnv({"env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train"})
# env.step({})
env.step(1)


Exception ignored in: <function __AuxGymEnv.__del__ at 0x2808200d0>
Traceback (most recent call last):
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/gym_compat/gymenv.py", line 220, in __del__
    self.close()
  File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/gym_compat/gymenv.py", line 187, in close
    self.action_space.close()
AttributeError: 'CustomDiscreteActions' object has no attribute 'close'


Discrete(52)
Discrete(52)
OrderedDict([('gen_p', array([10.3     , 15.971496], dtype=float32)), ('load_p', array([8.5, 8.2, 8.7], dtype=float32)), ('p_ex', array([  5.589767  ,   0.25839484,  -1.562665  ,  -5.468073  ,
       -10.167201  ,  -4.941659  ,  -4.941659  ,  -3.231927  ],
      dtype=float32)), ('p_or', array([-5.156298  , -0.15775152,  1.6134641 ,  5.5005856 , 10.381728  ,
        4.9544034 ,  4.9544034 ,  3.2459826 ], dtype=float32)), ('rho', array([0.38249025, 0.45199147, 0.35916278, 0.36414132, 0.39959523,
       0.20312877, 0.20312877, 0.31144747], dtype=float32)), ('timestep_overflow', array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)), ('topo_vect', array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
      dtype=int32))])
<class 'collections.OrderedDict'>
[0.38249025 0.45199147 0.35916278 0.36414132 0.39959523 0.20312877
 0.20312877 0.31144747]


(OrderedDict([('gen_p', array([10.3     , 15.971496], dtype=float32)),
              ('load_p', array([8.5, 8.2, 8.7], dtype=float32)),
              ('p_ex',
               array([  5.589767  ,   0.25839484,  -1.562665  ,  -5.468073  ,
                      -10.167201  ,  -4.941659  ,  -4.941659  ,  -3.231927  ],
                     dtype=float32)),
              ('p_or',
               array([-5.156298  , -0.15775152,  1.6134641 ,  5.5005856 , 10.381728  ,
                       4.9544034 ,  4.9544034 ,  3.2459826 ], dtype=float32)),
              ('rho',
               array([0.38249025, 0.45199147, 0.35916278, 0.36414132, 0.39959523,
                      0.20312877, 0.20312877, 0.31144747], dtype=float32)),
              ('timestep_overflow',
               array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)),
              ('topo_vect',
               array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                     dtype=int32))]),
 7.0493998527526855,
 False

# Train agent

In [5]:
config = ppo.PPOConfig()
config = config.training(gamma=0.95, lr=0.003, vf_loss_coeff=0.5, entropy_coeff=0.01, clip_param=0.2, lambda_=0.95, sgd_minibatch_size=4, train_batch_size=32)
config = config.environment(env=MyEnv, env_config={
        "env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train", "reward_class":grid2op.Reward.L2RPNReward})

if NB_STEP_TRAIN:
    try:
        analysis = tune.run(
            ppo.PPO,
            config=config.to_dict(),
            stop={"timesteps_total": 10000},  
            checkpoint_config=train.CheckpointConfig(checkpoint_frequency=1000, checkpoint_at_end=True),
            verbose=1,
            local_dir="/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results"
        )
    finally:
        # shutdown ray
        ray.shutdown()

0,1
Current time:,2023-11-16 13:24:51
Running for:,00:00:03.65
Memory:,12.7/16.0 GiB

Trial name,status,loc
PPO_MyEnv_221e5_00000,PENDING,


2023-11-16 13:24:54,431	INFO tune.py:1143 -- Total run time: 6.10 seconds (3.65 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- PPO_MyEnv_221e5_00000: FileNotFoundError('Could not fetch metrics for PPO_MyEnv_221e5_00000: both result.json and progress.csv were not found at /Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results/PPO_2023-11-16_13-24-48/PPO_MyEnv_221e5_00000_0_2023-11-16_13-24-48')


[2m[36m(RolloutWorker pid=84687)[0m Discrete(52)
[2m[36m(RolloutWorker pid=84687)[0m Discrete(52)
[2m[36m(RolloutWorker pid=84688)[0m Discrete(52)[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


