# Importing packages

In [203]:
import os

import grid2op
from grid2op.Converter import IdToAct
import gymnasium as gym
import ray
from grid2op.gym_compat import GymEnv, ScalerAttrConverter
from ray.rllib.algorithms import ppo  # import the type of agents
from ray import tune, train
from typing import Any, OrderedDict


# Global settings

In [204]:
ENV_NAME = "rte_case5_example"
LIBRARY_DIRECTORY = "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/data/"
NB_STEP_TRAIN = 10
RHO_THRESHOLD = 0.95

# Only run first time to set-up

In [205]:
if not os.path.exists(LIBRARY_DIRECTORY + ENV_NAME + "_train"):
    # env = grid2op.make(ENV_NAME, test=True)
    env = grid2op.make(LIBRARY_DIRECTORY + ENV_NAME, reward_class=grid2op.Reward.L2RPNReward)

    # extract 5% of the "chronics" to be used in the validation environment, 5% for testing,
    # 80% for training
    nm_env_train, nm_env_val, nm_env_test = env.train_val_split_random(
        pct_val=5.0, pct_test=5.0, add_for_test="test"
    )
    # and now you can use the training set only to train your agent:
    print(f"The name of the training environment is {nm_env_train}")
    print(f"The name of the validation environment is {nm_env_val}")
    print(f"The name of the test environment is {nm_env_test}")



# Define environment

In [206]:
env = grid2op.make(LIBRARY_DIRECTORY + ENV_NAME, reward_class=grid2op.Reward.L2RPNReward)
obs = env.reset()

POSSIBLE_SUBSTATION_ACTIONS = IdToAct.get_all_unitary_topologies_change(env.action_space, 0)
POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_change(env.action_space, 2)
POSSIBLE_SUBSTATION_ACTIONS += IdToAct.get_all_unitary_topologies_change(env.action_space, 3)



In [208]:
# MyEnv class, and train a Proximal Policy Optimisation based agent
class MyEnv(gym.Env):
    """Encapsulate Grid2Op environment and set action/observation space."""

    def __init__(self, env_config : dict[str,Any]):
        # 1. create the grid2op environment
        if not "env_name" in env_config:
            raise RuntimeError(
                "The configuration for RLLIB should provide the env name"
            )
        nm_env:str = env_config["env_name"]
        del env_config["env_name"]
        self.env_glop = grid2op.make(nm_env, **env_config)

        # 2. create the gym environment
        self.env_gym = GymEnv(self.env_glop)
        obs_gym = self.env_gym.reset()

        # 3. customize action and observation space space to only change bus 
        # set gym action space to discrete
        self.env_gym.action_space = gym.spaces.Discrete(len(POSSIBLE_SUBSTATION_ACTIONS))

        # create converter
        self.converter = IdToAct(self.env_gym.action_space)
        self.converter.init_converter(all_actions=POSSIBLE_SUBSTATION_ACTIONS)

        # customize observation space
        ob_space:dict[str,Any] = self.env_gym.observation_space
        ob_space = ob_space.keep_only_attr(
            ["rho", "gen_p", "load_p", "topo_vect", "p_or", "p_ex", "timestep_overflow"]
        )
        ob_space = ob_space.reencode_space(
            "gen_p", ScalerAttrConverter(substract=0.0, divide=self.env_glop.gen_pmax)
        )
        ob_space = ob_space.reencode_space(
            "load_p",
            ScalerAttrConverter(
                substract=obs_gym[0]["load_p"], divide=0.5 * obs_gym[0]["load_p"]
            ),
        )
        ob_space = ob_space.reencode_space(
            "p_or", ScalerAttrConverter(substract=0.0, divide=0.5 * obs_gym[0]["p_or"])
        )
        ob_space = ob_space.reencode_space(
            "p_ex", ScalerAttrConverter(substract=0.0, divide=0.5 * obs_gym[0]["p_ex"])
        )

        self.env_gym.observation_space = ob_space

        # 4. specific to rllib
        self.action_space = self.env_gym.action_space
        self.observation_space = self.env_gym.observation_space

        # 4. build the action space and observation space directly from the spaces class.
        d:dict[str,Any] = {k: v for k, v in self.env_gym.observation_space.spaces.items()}
        self.observation_space = gym.spaces.Dict(d)
        a:dict[str,Any] = {k: v for k, v in self.env_gym.action_space.items()}
        self.action_space = gym.spaces.Dict(a)
        
        self.last_rho = None

    def reset(self, seed: int = None, options: dict[str, Any] = None) -> tuple[OrderedDict[str,Any], dict[str, str]]:
        obs: tuple[OrderedDict[str,Any], dict[str, str]] = self.env_gym.reset()
        return obs

    def step(self, action):
        obs: tuple[OrderedDict[str,Any], dict[str, str]]

        # for the first action or whenever the lines are not near overloading, do nothing
        if self.last_rho == None or self.last_rho < RHO_THRESHOLD:
            action = {}
        else:
            action = self.converter.convert_act(action)
        
        obs, reward, done, truncated, info = self.env_gym.step(action)
        self.last_rho = max(obs["rho"])
        return obs, reward, done, truncated, info
    
    def get_env(self):
        return self.env_glop
    
env = MyEnv({"env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train"})
# env.step({})


AttributeError: 'GymnasiumActionSpace' object has no attribute 'legal_action'

# Train agent

In [None]:
config = ppo.PPOConfig()
config = config.training(gamma=0.95, lr=0.003, vf_loss_coeff=0.5, entropy_coeff=0.01, clip_param=0.2, lambda_=0.95, sgd_minibatch_size=4, train_batch_size=32)
config = config.environment(env=MyEnv, env_config={
        "env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train", "reward_class":grid2op.Reward.L2RPNReward})

if NB_STEP_TRAIN:
    try:
        analysis = tune.run(
            ppo.PPO,
            config=config.to_dict(),
            stop={"timesteps_total": 10000},  
            checkpoint_config=train.CheckpointConfig(checkpoint_frequency=1000, checkpoint_at_end=True),
            verbose=1,
            local_dir="/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results"
        )
    finally:
        # shutdown ray
        ray.shutdown()

0,1
Current time:,2023-11-10 11:00:44
Running for:,00:00:01.69
Memory:,13.1/16.0 GiB

Trial name,status,loc
PPO_MyEnv_0256f_00000,PENDING,


2023-11-10 11:00:49,107	INFO tune.py:1143 -- Total run time: 6.59 seconds (1.69 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- PPO_MyEnv_0256f_00000: FileNotFoundError('Could not fetch metrics for PPO_MyEnv_0256f_00000: both result.json and progress.csv were not found at /Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results/PPO_2023-11-10_11-00-42/PPO_MyEnv_0256f_00000_0_2023-11-10_11-00-42')


[2m[36m(RolloutWorker pid=55005)[0m Dict('change_bus': Tuple(Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2)), 'change_line_status': MultiBinary(8))
[2m[36m(RolloutWorker pid=55005)[0m OrderedDict([('change_bus', (1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0)), ('change_line_status', array([0, 1, 0, 0, 0, 1, 0, 1], dtype=int8))])


[2m[36m(PPO pid=55002)[0m 2023-11-10 11:00:49,099	ERROR actor_manager.py:500 -- Ray error, taking actor 1 out of service. The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=55004, ip=127.0.0.1, actor_id=3d5ca7e8c822c0eac1f7d63101000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x13ad1da50>)
[2m[36m(PPO pid=55002)[0m   File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 525, in __init__
[2m[36m(PPO pid=55002)[0m     self._update_policy_map(policy_dict=self.policy_dict)
[2m[36m(PPO pid=55002)[0m   File "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1727, in _update_policy_map
[2m[36m(PPO pid=55002)[0m     self._build_policy_map(
[2m[36m(PPO pid=55002)[0m   File "/Users/barberademol/Documents/GitHub/mahrl_grid2

[2m[36m(RolloutWorker pid=55004)[0m Dict('change_bus': Tuple(Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2), Discrete(2)), 'change_line_status': MultiBinary(8))[32m [repeated 3x across cluster][0m
[2m[36m(RolloutWorker pid=55004)[0m OrderedDict([('change_bus', (1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1)), ('change_line_status', array([1, 0, 0, 0, 1, 0, 1, 1], dtype=int8))])
