In [1]:
from grid2op import make
from grid2op.Action.PlayableAction import PlayableAction
from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv
import numpy as np
from grid2op.multi_agent.multi_agentExceptions import *

from lightsim2grid import LightSimBackend
bk_cls = LightSimBackend

action_domains = {
    'agent_0' : [0,1,2,3, 4],
    'agent_1' : [5,6,7,8,9,10,11,12,13]
}
env_name = "l2rpn_case14_sandbox"#"educ_case14_storage"
env = make(env_name, test=True, backend=bk_cls(),
                action_class=PlayableAction, _add_to_name="_test_ma", )


ma_env = MultiAgentEnv(env, action_domains, copy_env=False)

ma_env.seed(0)
obs = ma_env.reset()



In [5]:
obs = env.reset()
o = np.array(obs.to_vect())
o.shape

(467,)

In [13]:
test = {1 : 1}
np.save('test.npy', arr=test)

In [28]:
test = np.load('test.npy', allow_pickle=True)
type(test[None][0])

dict

In [10]:
from typing import Dict
from grid2op.multi_agent.ma_typing import MAAgents
from grid2op.Environment.BaseEnv import BaseEnv
from grid2op.Agent.baseAgent import BaseAgent
from grid2op.multi_agent.multiAgentEnv import MultiAgentEnv


def _run_simple_actor(
    env : BaseEnv,
    actor : BaseAgent,
    nb_episodes : int,
) -> dict:
    
    T = np.zeros(nb_episodes, dtype = int)
    obs = env.reset()
    t = 0
    
    rewards_history = []
    mean_rewards_history = np.zeros(nb_episodes)
    std_rewards_history = np.zeros(nb_episodes)
    cumulative_reward = np.zeros(nb_episodes)
    
    info_history = [[] for _ in range(nb_episodes)]
    
    obs_history = [[] for _ in range(nb_episodes)]
    
    done_history = [[] for _ in range(nb_episodes)]
    
    actions_history = [[] for _ in range(nb_episodes)]
    
    reward = 0
    
    for episode in range(nb_episodes):
        while True:
            t += 1
            action = actor.act(observation = obs, reward = reward)
            obs, reward, done, info = env.step(action)
            
            #obs._obs_env = None

            rewards_history.append(reward)
            info_history[episode].append(info.copy())
            obs_history[episode].append(obs)
            done_history[episode].append(done)
            actions_history[episode].append(action)
            

            if done:
                mean_rewards_history[episode] = np.mean(rewards_history)
                std_rewards_history[episode] = np.std(rewards_history)
                cumulative_reward[episode] = np.sum(rewards_history)
                obs = env.reset()
                T[episode] = t
                t = 0
                break
            
    return {
        'mean_rewards' : mean_rewards_history,
        'std_rewards' : std_rewards_history,
        'episode_len' : T,
        'info_history' : info_history,
        'obs_history' : obs_history,
        'done_history' : done_history,
        'actions' : actions_history,
        'cumulative_reward' : cumulative_reward
         
        # TODO cum reward done
        # TODO local actions
    }

def _run_ma_actors(
    ma_env : MultiAgentEnv,
    actors : MAAgents,
    nb_episodes : int,
) -> dict:
    
    T = np.zeros(nb_episodes, dtype = int)
    obs = ma_env.reset()
    t = 0
    
    rewards_history = []
    mean_rewards_history = np.zeros(nb_episodes)
    std_rewards_history = np.zeros(nb_episodes)
    cumulative_reward = np.zeros(nb_episodes)
    
    info_history = [[] for _ in range(nb_episodes)]
    
    obs_history = [[] for _ in range(nb_episodes)]
    
    done_history = [[] for _ in range(nb_episodes)]
    
    actions_history = [[] for _ in range(nb_episodes)]
    
    r = 0
    
    for episode in range(nb_episodes):
        while True:
            t += 1
            actions = {
                agent : actors[agent].act(observation = obs[agent], reward = r)
                for agent in ma_env.agents
            }
            obs, reward, dones, info = ma_env.step(actions)

            r = reward[ma_env.agents[0]]
            rewards_history.append(r)
            info_history[episode].append(info[ma_env.agents[0]].copy())
            
            for agent in ma_env.agents:
                # TODO pourquoi ce problème ?
                obs[agent]._obs_env = None
                
            obs_history[episode].append(obs[ma_env.agents[0]])
            done_history[episode].append(dones[ma_env.agents[0]])
            actions_history[episode].append(ma_env.global_action)
                

            if dones[ma_env.agents[0]]:
                mean_rewards_history[episode] = np.mean(rewards_history)
                std_rewards_history[episode] =  np.std(rewards_history)
                cumulative_reward[episode] = np.sum(rewards_history)
                
                rewards_history = []
                
                obs = ma_env.reset()
                T[episode] = t
                t = 0
                break
            
    return {
        'mean_rewards' : mean_rewards_history,
        'std_rewards' : std_rewards_history,
        'episode_len' : T,
        'info_history' : info_history,
        'obs_history' : obs_history,
        'done_history' : done_history,
        'actions' : actions_history,
        'cumulative_reward' : cumulative_reward
    }

    
def compare_simple_and_multi(
    env : BaseEnv, # It is grid2op.multi_agent.multiAgentEnv.MultiAgentEnv
    simple_actor : BaseAgent, 
    ma_actors : MAAgents, 
    episodes : int = 2,
    seed = 0,
    chronics_id = 0,
    save_path = "./",
    copy_env = True
    ):
    
    ma_env = MultiAgentEnv(env, action_domains, copy_env=copy_env)
    
    ma_env.seed(seed)
    ma_env._cent_env.set_id(chronics_id)
    
    results_simple = _run_simple_actor(ma_env._cent_env, simple_actor, episodes)
    results_ma = _run_ma_actors(ma_env, ma_actors, episodes)
    
    #save results
    # TODO
    
    return results_simple, results_ma

## Replay with 2 random agents

In [11]:
from grid2op.Agent import RandomAgent
from grid2op.Converter.IdToAct import IdToAct

simple_actor = RandomAgent(env.action_space)
episodes = 5
ma_actors = dict()
for agent_nm in ma_env.agents:
    IdToActThis = ma_env.action_spaces[agent_nm].make_local(IdToAct)
    #assert IdToActThis.agent_name == agent_nm    
    ma_actors[agent_nm] = RandomAgent(ma_env.action_spaces[agent_nm],
                                   action_space_converter=IdToActThis
                                   )


results_simple, results_ma = compare_simple_and_multi(
    env=env,
    simple_actor=simple_actor,
    ma_actors=ma_actors,
    episodes=episodes,
    copy_env=False
    # TODO plus d'episodes
)



In [12]:
from grid2op.Agent.fromActionsListAgent import FromActionsListAgent

for action in results_ma['actions'][0]:
    print(action)

replays = []
for episode in range(episodes):
    # Faire plusieurs replay sur plusieurs episodes
    replays.append(
        FromActionsListAgent(
            env.action_space,
            results_ma['actions'][episode]
        )
    )

This action will:
	 - NOT change anything to the injections
	 - NOT perform any redispatching action
	 - NOT modify any storage capacity
	 - NOT perform any curtailment
	 - NOT force any line status
	 - NOT switch any line status
	 - NOT switch anything in the topology
	 - Set the bus of the following element(s):
	 	 - Assign bus 2 to line (extremity) id 3 [on substation 3]
	 	 - Assign bus 1 to line (extremity) id 5 [on substation 3]
	 	 - Assign bus 2 to line (origin) id 6 [on substation 3]
	 	 - Assign bus 1 to line (origin) id 15 [on substation 3]
	 	 - Assign bus 1 to line (origin) id 16 [on substation 3]
	 	 - Assign bus 2 to load id 2 [on substation 3]
	 	 - Assign bus 2 to line (origin) id 10 [on substation 8]
	 	 - Assign bus 1 to line (origin) id 11 [on substation 8]
	 	 - Assign bus 1 to line (extremity) id 16 [on substation 8]
	 	 - Assign bus 2 to line (origin) id 19 [on substation 8]
	 	 - Assign bus 1 to load id 5 [on substation 8]
This action will:
	 - NOT change anythi

In [18]:
from grid2op.Runner import Runner
import os 
import shutil
from tqdm.notebook import tqdm

env = ma_env._cent_env 

path_agents = "study_agent_getting_started"
max_iter = 10_000

shutil.rmtree(os.path.abspath(path_agents), ignore_errors=True)
if not os.path.exists(path_agents):
    os.mkdir(path_agents)

# make a runner for this agent
path_agent = os.path.join(path_agents, "ReplayAgent")
shutil.rmtree(os.path.abspath(path_agent), ignore_errors=True)

env.seed(0)
env.set_id(0)
env.reset()

replay_cum_rewards = []

for i, replay in enumerate(replays):

    runner = Runner(**env.get_params_for_runner(),
                    agentClass=None,
                    agentInstance=replay
                    )
    res = runner.run(path_save=path_agent,
                     nb_episode=1, 
                     max_iter=max_iter,
                     env_seeds=[0],
                     episode_id=[i],
                     pbar=tqdm)
    print("The results for the evaluated agent are:")
    for _, chron_id, cum_reward, nb_time_step, max_ts in res:
        replay_cum_rewards.append(cum_reward)
        msg_tmp = "\tFor chronics with id {}\n".format(chron_id)
        msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward)
        msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format(nb_time_step, max_ts)
        print(msg_tmp)

episode:   0%|          | 0/1 [00:00<?, ?it/s]

episode:   0%|          | 0/575 [00:00<?, ?it/s]

The results for the evaluated agent are:
	For chronics with id 0000
		 - cumulative reward: 53.410580
		 - number of time steps completed: 2 / 575


episode:   0%|          | 0/1 [00:00<?, ?it/s]

episode:   0%|          | 0/575 [00:00<?, ?it/s]

The results for the evaluated agent are:
	For chronics with id 0001
		 - cumulative reward: 54.976692
		 - number of time steps completed: 2 / 575


episode:   0%|          | 0/1 [00:00<?, ?it/s]

episode:   0%|          | 0/575 [00:00<?, ?it/s]

The results for the evaluated agent are:
	For chronics with id 0002
		 - cumulative reward: -10.000000
		 - number of time steps completed: 1 / 575


episode:   0%|          | 0/1 [00:00<?, ?it/s]

episode:   0%|          | 0/575 [00:00<?, ?it/s]

The results for the evaluated agent are:
	For chronics with id 0000
		 - cumulative reward: 117.280548
		 - number of time steps completed: 3 / 575


episode:   0%|          | 0/1 [00:00<?, ?it/s]

episode:   0%|          | 0/575 [00:00<?, ?it/s]

The results for the evaluated agent are:
	For chronics with id 0001
		 - cumulative reward: 119.353088
		 - number of time steps completed: 3 / 575


In [20]:
assert (results_ma['cumulative_reward'] == replay_cum_rewards).all()