# Importing packages

In [15]:
import os

import grid2op
import gymnasium as gym
import ray
from grid2op.gym_compat import GymEnv, ScalerAttrConverter, MultiToTupleConverter
from ray.rllib.algorithms import ppo  # import the type of agents
from ray import tune
from typing import Any


# Global settings

In [2]:
ENV_NAME = "rte_case5_example"
LIBRARY_DIRECTORY = "/Users/barberademol/Documents/GitHub/mahrl_grid2op/venv_mahrl/lib/python3.10/site-packages/grid2op/data/"
NB_STEP_TRAIN = 10

# Only run first time to set-up

In [5]:
if not os.path.exists(LIBRARY_DIRECTORY + ENV_NAME + "_train"):
    # env = grid2op.make(ENV_NAME, test=True)
    env = grid2op.make(LIBRARY_DIRECTORY + ENV_NAME)

    # extract 10% of the "chronics" to be used in the validation environment, 10% for testing,
    # 80% for training
    nm_env_train, nm_env_val, nm_env_test = env.train_val_split_random(
        pct_val=10.0, pct_test=10.0, add_for_test="test"
    )
    # and now you can use the training set only to train your agent:
    print(f"The name of the training environment is {nm_env_train}")
    print(f"The name of the validation environment is {nm_env_val}")
    print(f"The name of the test environment is {nm_env_test}")



The name of the training environment is rte_case5_example_train
The name of the validation environment is rte_case5_example_val
The name of the test environment is rte_case5_example_test


# Speeding up

In [6]:
# TODO: The grid2op documentation is full of details to "optimize" the number of steps you can do
# per seconds. This number can rise from a few dozen per seconds to around a thousands per seconds
# with proper care. We strongly encouraged you to leverage all the possibilities which includes
# (but are not limited to):
# - using "lightsim2grid" as a backend for a 10-15x speed up in the "env.step(...)" function
# - using "MultifolderWithCache"/"env.chronics_handler.set_chunk(...)" for faster "env.reset(...)"
#   see https://grid2op.readthedocs.io/en/latest/environment.html#optimize-the-data-pipeline
# - using "SingleEnvMultiProcess" for parrallel computation

# Define environment

In [11]:
# MyEnv class, and train a Proximal Policy Optimisation based agent
class MyEnv(gym.Env):
    """Encapsulate Grid2Op environment and set action/observation space."""

    def __init__(self, env_config):
        # 1. create the grid2op environment
        if not "env_name" in env_config:
            raise RuntimeError(
                "The configuration for RLLIB should provide the env name"
            )
        nm_env:str = env_config["env_name"]
        del env_config["env_name"]
        self.env_glop = grid2op.make(nm_env, **env_config)

        # 2. create the gym environment
        self.env_gym = GymEnv(self.env_glop)
        obs_gym = self.env_gym.reset()

        # 3. (optional) customize it (see section above for more information)
        # customize action space
        self.env_gym.action_space = self.env_gym.action_space.ignore_attr(
            "set_bus"
        ).ignore_attr("set_line_status")
        self.env_gym.action_space = self.env_gym.action_space.reencode_space(
            "change_bus", MultiToTupleConverter()
        )
        self.env_gym.action_space = self.env_gym.action_space.reencode_space(
            "change_line_status", MultiToTupleConverter()
        )
        ## customize observation space
        ob_space = self.env_gym.observation_space
        ob_space = ob_space.keep_only_attr(
            ["rho", "gen_p", "load_p", "topo_vect", "actual_dispatch"]
        )
        ob_space = ob_space.reencode_space(
            "actual_dispatch",
            ScalerAttrConverter(substract=0.0, divide=self.env_glop.gen_pmax),
        )
        ob_space = ob_space.reencode_space(
            "gen_p", ScalerAttrConverter(substract=0.0, divide=self.env_glop.gen_pmax)
        )
        ob_space = ob_space.reencode_space(
            "load_p",
            ScalerAttrConverter(
                substract=obs_gym[0]["load_p"], divide=0.5 * obs_gym[0]["load_p"]
            ),
        )
        self.env_gym.observation_space = ob_space

        # 4. specific to rllib
        self.action_space = self.env_gym.action_space
        self.observation_space = self.env_gym.observation_space

        # 4.to avoid other type of issues, we recommend to build the action space and observation
        # space directly from the spaces class.
        d = {k: v for k, v in self.env_gym.observation_space.spaces.items()}
        self.observation_space = gym.spaces.Dict(d)
        a = {k: v for k, v in self.env_gym.action_space.items()}
        self.action_space = gym.spaces.Dict(a)

    def reset(self, seed: int = None, options: dict[str, Any] = None):
        obs = self.env_gym.reset()
        return obs

    def step(self, action):
        # print(self.env_gym.step(action))
        obs, reward, done, truncated, info = self.env_gym.step(action)
        return obs, reward, done, truncated, info
    
    def get_env(self):
        return self.env_gym

# Train agent

In [33]:
config = {
    "num_workers": 4,
    "num_envs_per_worker": 1,
    "env": MyEnv,
    "env_config": {
        "env_name": LIBRARY_DIRECTORY + ENV_NAME + "_train"},

    "framework": "torch",
    # "model": {
    #     "vf_share_layers": True,
    # },
    "lr": 0.0005,
    "gamma": 0.99,
}


if NB_STEP_TRAIN:
    try:
        analysis = tune.run(
            ppo.PPO,
            config=config,
            stop={"timesteps_total": 10000},  # Adjust the stopping criterion
            verbose=1,
            local_dir="/Users/barberademol/Documents/GitHub/mahrl_grid2op/notebooks/results"
        )
    finally:
        # shutdown ray
        ray.shutdown()

0,1
Current time:,2023-11-06 11:44:30
Running for:,00:01:08.86
Memory:,13.2/16.0 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_MyEnv_4df63_00000,TERMINATED,127.0.0.1:22953,3,62.149,12000,2447.95,12901.5,472.923,430.083




[2m[36m(RolloutWorker pid=22957)[0m OrderedDict([('change_bus', (0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1)), ('change_line_status', (1, 0, 1, 0, 0, 0, 0, 1))])
[2m[36m(RolloutWorker pid=22957)[0m (OrderedDict([('actual_dispatch', array([0., 0.], dtype=float32)), ('gen_p', array([0.71999997, 0.58678216], dtype=float32)), ('load_p', array([ 0.02531643, -0.1975309 ,  0.02439034], dtype=float32)), ('rho', array([0.45425078, 0.54077774, 0.4275628 , 0.39348567, 0.46876583,
[2m[36m(RolloutWorker pid=22957)[0m        0.23648427, 0.23648427, 0.36574695], dtype=float32)), ('topo_vect', array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[2m[36m(RolloutWorker pid=22957)[0m       dtype=int32))]), 6.699284076690674, False, False, {'disc_lines': array([-1, -1, -1, -1, -1, -1, -1, -1], dtype=int32), 'is_illegal': True, 'is_ambiguous': False, 'is_dispatching_illegal': False, 'is_illegal_reco': False, 'reason_alarm_illegal': None, 'reason_alert_illega

[2m[36m(PPO pid=22953)[0m Install gputil for GPU system monitoring.
2023-11-06 11:44:30,763	INFO tune.py:1143 -- Total run time: 69.25 seconds (68.85 seconds for the tuning loop).


[2m[36m(RolloutWorker pid=22954)[0m OrderedDict([('change_bus', (1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1)), ('change_line_status', (0, 0, 0, 0, 0, 1, 0, 1))])[32m [repeated 3x across cluster][0m
[2m[36m(RolloutWorker pid=22954)[0m        0.34983304, 0.34983304, 0.54493695], dtype=float32)), ('topo_vect', array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],[32m [repeated 2x across cluster][0m
[2m[36m(RolloutWorker pid=22954)[0m       dtype=int32))]), 5.016216278076172, False, False, {'disc_lines': array([-1, -1, -1, -1, -1, -1, -1, -1], dtype=int32), 'is_illegal': True, 'is_ambiguous': False, 'is_dispatching_illegal': False, 'is_illegal_reco': False, 'reason_alarm_illegal': None, 'reason_alert_illegal': None, 'opponent_attack_line': None, 'opponent_attack_sub': None, 'opponent_attack_duration': 0, 'exception': [Grid2OpException IllegalAction IllegalAction('More than 1 line status affected by the action: [5 7]')], 'rewards': {}})[32m 

In [34]:
print(analysis.trials)

[PPO_MyEnv_4df63_00000]
