In [1]:
import os
import sys
import grid2op
res = None
try:
    from jyquickhelper import add_notebook_menu
    res = add_notebook_menu
except NoduleNotFoundError:
     print("Impossible to automatically add a menu / table of content to this notebook.\nYou can download \"jyquickhelper\" package with: \n\"pip install jyquickhelper\"")
res

<function jyquickhelper.helper_in_notebook.add_notebook_menu(menu_id='my_id_menu_nb', raw=False, format='html', header=None, first_level=2, last_level=4, keep_item=None)>

In [2]:
#通过env.step()可获得对环境的观察，这里默认使用rte_case14_realistic环境
env = grid2op.make(test=True)
#为了执行一个时间步，需要一个动作，env.action_space()用于执行动作，这里的动作是do nothing，动作后可获得观察、奖励等信息
do_nothing_act = env.action_space({})
obs,reward,done,info = env.step(do_nothing_act)



In [6]:
#Grid2op包含不同种类的观察。例如有些观察可以是不完全的数据或者噪声数据。CompleteObservation给出电网全状态的观察，不带噪声，是默认的观察类型
#一个观察包含了日历数据(例如一个观察的时间戳)
obs.year, obs.month, obs.day, obs.hour_of_day, obs.minute_of_hour, obs.day_of_week

(2019, 1, 1, 0, 10, 1)

In [3]:
#电网的基本信息
print("Number of generators of the powergrid: {}".format(obs.n_gen))
print("Number of loads of the powergrid: {}".format(obs.n_load))
print("Number of powerline of the powergrid: {}".format(obs.n_line))
print("Number of elements connected to each substations in the powergrid: {}".format(obs.sub_info))
print("Total number of elements: {}".format(obs.dim_topo))

Number of generators of the powergrid: 5
Number of loads of the powergrid: 11
Number of powerline of the powergrid: 20
Number of elements connected to each substations in the powergrid: [3 6 4 6 5 6 3 2 5 3 3 3 4 3]
Total number of elements: 56


In [6]:
#他还有发电机的信息(每个发电机可以看作是三维空间的一个点)
print("Generators active production: {}".format(obs.prod_p))
print("Generators reactive production: {}".format(obs.prod_q))
print("Generators voltage setpoint : {}".format(obs.prod_v))

Generators active production: [81.6    81.1    12.9     0.     77.7201]
Generators reactive production: [ 21.790668  70.214264  48.05804   24.508774 -16.541656]
Generators voltage setpoint : [142.1      142.1       22.        13.200001 142.1     ]


In [18]:
#负载信息(负载同样可以看作是三维空间的一个点)
print("Loads active consumption: {}".format(obs.load_p))
print("Loads reactive consumption: {}".format(obs.load_q))
print("Loads voltage (voltage magnitude of the bus to which it is connected) : {}".format(obs.load_v))

Loads active consumption: [25.4 84.8 45.   6.8 12.7 28.8  9.5  3.4  5.6 11.9 15.4]
Loads reactive consumption: [17.8 59.6 30.8  4.6  8.7 19.7  6.6  2.5  3.9  8.4 10.9]
Loads voltage (voltage magnitude of the bus to which it is connected) : [142.1      142.1      138.70158  139.39479   22.        21.092138
  21.08566   21.453533  21.569204  21.430758  20.69996 ]


In [4]:
#输电线可以看成是一个8维空间中的点，包括有功潮流、无功潮流、电压幅值、电流潮流，每个值包含线路初始端和末端两个值。
#比如，一条传输线连接两个端点A和B，则该线路的有功潮流有两个独立的值：从A到B的潮流(origin)和从B到A的潮流(extremity)。
print("Origin active flow: {}".format(obs.p_or))
print("Origin reactive flow: {}".format(obs.q_or))
print("Origin current flow: {}".format(obs.a_or))
print("Origin voltage (voltage magnitude to the bus to which the origin end is connected): {}".format(obs.v_or))
print("Extremity active flow: {}".format(obs.p_ex))
print("Extremity reactive flow: {}".format(obs.q_ex))
print("Extremity current flow: {}".format(obs.a_ex))
print("Extremity voltage (voltage magnitude to the bus to which the origin end is connected): {}".format(obs.v_ex))

Origin active flow: [ 3.9922398e+01  3.7797703e+01  2.1780769e+01  4.0161697e+01
  3.3859901e+01  1.7860813e+01 -2.8052214e+01  9.7739801e+00
  7.7287230e+00  1.8051615e+01  3.3593693e+00  7.7858996e+00
 -6.1440678e+00  2.0364611e+00  7.8760457e+00  2.5437183e+01
  1.4508085e+01  3.5354317e+01 -2.2204460e-14 -2.5437183e+01]
Origin reactive flow: [-15.334058    -1.2075976   -7.0024953    0.663526    -0.383117
   7.329237    -2.9436882   10.462834     5.576318    14.927625
  -0.85521334   4.0310593   -7.464343     1.4842943    7.410275
 -15.625636    -2.7032974   -5.641245   -23.634314    -5.573329  ]
Origin current flow: [173.75766  153.64987   92.956    163.19867  137.5811    78.4405
 117.409485 375.74683  250.10808  614.7267    94.88822  239.99174
 264.71527   67.45319  291.3341   124.26482   61.42983  148.28416
 918.84076  712.8031  ]
Origin voltage (voltage magnitude to the bus to which the origin end is connected): [142.1      142.1      142.1      142.1      142.1      142.1
 138.

In [9]:
#另外一个输电线特征是ρ比率rho，就是每条输电线的电流潮流和温升极限之间的比值
obs.rho

array([0.45143566, 0.3991941 , 0.24462105, 0.42947018, 0.87631273,
       0.20642236, 0.30897233, 0.34864962, 0.54149985, 0.7985534 ,
       0.3521812 , 0.62351686, 0.34830958, 0.1775084 , 0.38333434,
       0.32284945, 0.26599896, 0.86817706, 0.27006915, 0.20950978],
      dtype=float32)

In [11]:
#每条输电线处于过载状态的时间步个数
obs.timestep_overflow

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [12]:
#每个输电线的状态：True代表连接，False代表断开
obs.line_status

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [13]:
#每个元素(发电机、负载、输电线的一端)的拓扑向量连接在变电站的哪个bus上：1=bus 1，2=bus2，-1=断开
#在grid2op中，每个物体可以被连接或断开在变电站的bus 1或者bus 2上

obs.topo_vect  # the topology vector the each element (generator, load, each end of a powerline) to which the object
# is connected: 1 = bus 1, 2 = bus 2

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [5]:
#一个观察也可以转换成numpy array，这样有助于与其他机器学习库的交互或者存储，但是可读性变差
#这个函数会把上面所有提到的属性堆叠在一个numpy.float64向量中
vector_representation_of_observation = obs.to_vect()
vector_representation_of_observation

array([ 2.01900000e+03,  1.00000000e+00,  1.00000000e+00,  0.00000000e+00,
        1.00000000e+01,  1.00000000e+00,  8.15999985e+01,  8.10999985e+01,
        1.28999996e+01,  0.00000000e+00,  7.77201004e+01,  2.17906685e+01,
        7.02142639e+01,  4.80580406e+01,  2.45087738e+01, -1.65416565e+01,
        1.42100006e+02,  1.42100006e+02,  2.20000000e+01,  1.32000008e+01,
        1.42100006e+02,  2.53999996e+01,  8.48000031e+01,  4.50000000e+01,
        6.80000019e+00,  1.26999998e+01,  2.87999992e+01,  9.50000000e+00,
        3.40000010e+00,  5.59999990e+00,  1.18999996e+01,  1.53999996e+01,
        1.77999992e+01,  5.95999985e+01,  3.07999992e+01,  4.59999990e+00,
        8.69999981e+00,  1.97000008e+01,  6.59999990e+00,  2.50000000e+00,
        3.90000010e+00,  8.39999962e+00,  1.08999996e+01,  1.42100006e+02,
        1.42100006e+02,  1.38701584e+02,  1.39394791e+02,  2.20000000e+01,
        2.10921383e+01,  2.10856609e+01,  2.14535332e+01,  2.15692043e+01,
        2.14307575e+01,  

In [16]:
#一个观察也可以被复制
obs2 = obs.copy()

In [17]:
#或者重制
obs2.reset()
print(obs2.prod_p)

[nan nan nan nan nan]


In [19]:
#或者从一个向量中加载
obs2.from_vect(vector_representation_of_observation)
obs2.prod_p

array([81.6   , 81.1   , 12.9   ,  0.    , 77.7201], dtype=float32)

In [20]:
#还可判断两个观察值是否相等
obs == obs2

True

In [6]:
#还有一类观察值，可以将拓扑变成矩阵，拓扑矩阵有两种格式
#第一种式connectivity matrix，如果mat[i,j]=1，则表明矩阵中的元素连接在一个bus上，或者是一个输电线的两端
obs.connectivity_matrix()

array([[0., 1., 1., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 1.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 1., 1., 0.]], dtype=float32)

In [8]:
#第二种式bus connectivity matrix，如果mat[i,j]=1，则表明矩阵中的至少有一个输电线连接了bus i和j
obs.bus_connectivity_matrix()

array([[1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
       [1., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0.],
       [0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]],
      dtype=float32)

In [9]:
#与大部分的RL问题相反，这个框架增加了“仿真”功能去仿真可能的动作对电网的影响。这有助于计算RL设定的roll-out
#仿真的方法基于预测数据(预测数据属于GridStateFromFileWithForecasts类)
#simulate功能，给电网运行一个预设的动作，通过simulator来仿真潮流，返回预期的observation、reward、error和其他信息
#simulate功能是grid2op与pypownet最主要的不同，在pypowent，仿真直接在environment中进行，可使agent直接访问environment未来的数据，这样
#可能会突破RL框架，因为agent只需要知道environment目前的状态。
#而在grid2op，目前仍然还是从environment目前的状态进行仿真。
do_nothing_act = env.action_space()
obs_sim,reward_sim,is_done_sim,info_sim = obs.simulate(do_nothing_act)

In [10]:
obs.prod_p

array([81.6   , 81.1   , 12.9   ,  0.    , 77.7201], dtype=float32)

In [11]:
obs_sim.prod_p

array([81.5   , 79.7   , 12.9   ,  0.    , 79.5781], dtype=float32)

In [7]:
#这一部分是根据observation来创建agent
#所有的agent必须来自grid2op.Agent类。Agents最主要的功能是act
#总体来说，Agent会根据接收的reward和observation来选择下一步的动作，gri2op中有些预定义好的agents
#Agent会选择下面4个动作之一：1 do nothing 2断开具有higher relative flows的输电线 3 重连断开的输电线 4 断开具有lower relative flows的输电线
from grid2op.Agent import BaseAgent
import numpy as np
import pdb
class MyAgent(BaseAgent):
    def __init__(self,action_space):
        #python required method to code
        BaseAgent.__init__(self,action_space)
        self.do_nothing = self.action_space({})
        self.print_next = False
    
    def act(self,observation,reward,done=False):
        i_max = np.argmax(observation.rho)
        new_status_max = np.zeros(observation.rho.shape)
        new_status_max[i_max] = -1
        act_max = self.action_space({"set_line_status": new_status_max})
        
        i_min = np.argmin(observation.rho)
        new_status_min = np.zeros(observation.rho.shape)
        if observation.rho[i_min] > 0:
            #代表所有的输电线处于连接状态，而i尝试断开这根线
            new_status_min[i_min] = -1
            act_min = self.action_space({"set_line_status":new_status_min})
        else:
            #至少有有一条输电线断开，i尝试重新连接它
            new_status_min[i_min] = 1
            act_min = self.action_space({"set_line_status":new_status_min,"set_bus":{"line_or_id":[(i_min,1)],"line_ex_id":[(i_min,1)]}})
            
        _, reward_sim_dn, *_ = observation.simulate(self.do_nothing)
        _, reward_sim_max, *_ = observation.simulate(act_max)
        _, reward_sim_min, *_ = observation.simulate(act_min)
        
        if reward_sim_dn >= reward_sim_max and reward_sim_dn >= reward_sim_min:
            self.print_next = False
            res = self.do_nothing
        elif reward_sim_max >= reward_sim_min:
            self.print_next = True
            res = act_max
            print(res)
        else:
            self.print_next = True
            res = act_min
            print(res)
        return res

In [8]:
#现在将这个Agent和Donothin agent在3个episodes上进行比较，通过使用L2RPNReward会使比较更有趣
from grid2op.Runner import Runner
from grid2op.Agent import DoNothingAgent
from grid2op.Reward import L2RPNReward
from grid2op.Chronics import GridStateFromFileWithForecasts

max_iter = 10#为了让计算更快，我们只考虑50个时间步，而不是287个
runner = Runner(**env.get_params_for_runner(),agentClass=DoNothingAgent)
res = runner.run(nb_episode=1,max_iter=max_iter)

print("The results for DoNothing agent are:")
for _,chron_name,cum_reward,nb_time_step,max_ts in res:
    msg_tmp = "\tFor chronics with id {}\n".format(cum_reward)
    msg_tmp += "\t\t - cumulative reward:{:.6f}\n".format(cum_reward)
    msg_tmp += "\t\t - number of time steps completed:{:.0f}/{:.0f}".format(nb_time_step,max_ts)
    print(msg_tmp)

The results for DoNothing agent are:
	For chronics with id 11076.7685546875
		 - cumulative reward:11076.768555
		 - number of time steps completed:10/10


In [26]:
#从结果看出两个agent得分相同，但是如果不限制episode长度是10的话，两个agent的得分将不同，关闭episode限制就是将max_iter=-1
runner = Runner(**env.get_params_for_runner(),agentClass=MyAgent)
res = runner.run(nb_episode=1,max_iter=max_iter)

print("The results for the custom agent are:")
for _,chron_name,cum_reward,nb_time_step,max_ts in res:
    msg_tmp = "\tFor chronics with id {}\n".format(cum_reward)
    msg_tmp += "\t\t - cumulative reward:{:.6f}\n".format(cum_reward)
    msg_tmp += "\t\t - number of time steps completed:{:.0f}/{:.0f}".format(nb_time_step,max_ts)
    print(msg_tmp)

The results for the custom agent are:
	For chronics with id 11076.7685546875
		 - cumulative reward:11076.768555
		 - number of time steps completed:10/10


In [9]:
from grid2op.Agent import PowerLineSwitch
runner = Runner(**env.get_params_for_runner(),agentClass=PowerLineSwitch)
res = runner.run(nb_episode=1,max_iter=max_iter)

print("The result for the PowerLineSwitch agent are:")
for _,chron_name,cum_reward,nb_time_step,max_ts in res:
    msg_tmp = "\tFor chronics with id {}\n".format(chron_name)
    msg_tmp += "\t\ - cumulative reward:{:.06f}\n".format(cum_reward)
    msg_tmp += "\t\t - number of time steps completed:{:.0f}/{:.0f}".format(nb_time_step,max_ts)
    print(msg_tmp)

The result for the PowerLineSwitch agent are:
	For chronics with id 000
	\ - cumulative reward:11076.778320
		 - number of time steps completed:10/10
