In [1]:
import grid2op
from grid2op.Agent import BaseAgent, DeltaRedispatchRandomAgent, TopologyGreedy
import time
import tensorflow as tf
import networkx as nx
import numpy as np
from collections import defaultdict
import  scipy.sparse as sp

In [2]:
def sparse_to_tuple(sparse_mx):
    """
    Convert sparse matrix to tuple representation.
    """
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)
    return sparse_mx

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)) # D
    d_inv_sqrt = np.power(rowsum, -0.5).flatten() # D^-0.5
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # D^-0.5
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() # D^-0.5AD^0.5

def preprocess_adj(adj):
    """输入：矩阵A
    返回：D^-0.5AD^0.5，格式为稀疏矩阵格式：元素位置，元素值，矩阵size"""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))  # adj + sp.eye(adj.shape[0])即为（A+I）矩阵
    return sparse_to_tuple(adj_normalized)

def preprocess_features(features):
    """
    Row-normalize feature matrix and convert to tuple representation
    输入：features，scipy稀疏矩阵格式
    输出：对特征矩阵归一化，每行除以行之和。格式为系数矩阵格式：元素位置，元素值，矩阵size
    """
    rowsum = np.array(features.sum(1)) # get sum of each row, [节点数, 1]，每个节点一个sum
    r_inv = np.power(rowsum, -1).flatten() # 1/rowsum, [节点数]
    r_inv[np.isinf(r_inv)] = 0. # zero inf data
    r_mat_inv = sp.diags(r_inv) # sparse diagonal matrix, [节点数, 节点数]，对角矩阵，元素为r_inv中的元素，以稀疏矩阵格式存储
    features = r_mat_inv.dot(features) # D^-1:[节点数, 节点数] 乘以 X:[节点数, 特征数]
    return sparse_to_tuple(features) # [coordinates, data, shape]


In [3]:
def MatrixA(env, cur_obs):
    '''返回Matrix A，目前只实现了无权图'''
    # cur_obs = env.get_obs()
    line_ex_buses = cur_obs.line_ex_bus
    line_or_buses = cur_obs.line_or_bus
    line_ex_subid = cur_obs.line_ex_to_subid
    line_or_subid = cur_obs.line_or_to_subid
    buses = env.backend._grid.bus

    con_dict = defaultdict(list)
    for i in range(len(buses.index)): #遍历所有的nodes，对每一个node：
        list_i = []
        for j in range(len(line_or_subid)):
            if line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1) == i: #有线路的始端在节点i
                if line_ex_buses[j] > 0:
                    list_i.append(line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1))
        for j in range(len(line_ex_subid)):
            if line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1) == i: #有线路的末端在节点i
                if line_or_buses[j] > 0:
                    list_i.append(line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1))
        list_i = list(set(list_i))
        con_dict[i] = list_i
    return con_dict

def MatrixX(env):
    '''返回Matrix X，格式为：元素位置，元素值，矩阵size
    目前feature选取了bus的V，theta，P，Q'''
    features = env.backend._grid.res_bus.values
    features = sp.csr_matrix(features)
    return preprocess_features(features)

def GenerateObs(features, support):
    '''输入：稀疏格式的features和support'''
    f_reordered = tf.sparse.reorder(features)
    s_reordered = tf.sparse.reorder(support)
    # f_dense = tf.compat.v1.sparse_to_dense(sparse_indices=f_reordered.indices,output_shape=f_reordered.shape,sparse_values=f_reordered.values)
    # s_dense = tf.compat.v1.sparse_to_dense(sparse_indices=s_reordered.indices,output_shape=s_reordered.shape,sparse_values=s_reordered.values)
    f_dense = tf.sparse.to_dense(f_reordered)
    s_dense = tf.sparse.to_dense(s_reordered)
    return tf.concat([f_dense, s_dense],axis=1)




In [4]:
DATA_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox'
SCENARIO_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox/chronics'
env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
all_actions = env.action_space.get_all_unitary_topologies_change(env.action_space)


In [5]:
class DQNAgent(BaseAgent):
    def __init__(self, env, action_space):
        super(DQNAgent, self).__init__(action_space=action_space)
        self.env = env
        self.actions = action_space
        self.dqn_model = tf.keras.models.load_model('/Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/t7fc/DQN_2301')

    def find_best_line_to_reconnect(self, obs, original_action):    #这里返回的是两个动作合在一起
        disconnected_lines = np.where(obs.line_status == False)[0]
        if not len(disconnected_lines):
            return original_action
        if (obs.time_before_cooldown_line[disconnected_lines] > 0).all():
            return original_action
        o, _, _, _ = obs.simulate(original_action)
        min_rho = o.rho.max()
        line_to_reconnect = -1
        for line in disconnected_lines:
            if not obs.time_before_cooldown_line[line]:
                reconnect_array = np.zeros_like(obs.rho)
                reconnect_array[line] = 1
                reconnect_action = deepcopy(original_action)
                reconnect_action.update({'set_line_status': reconnect_array})
                if not self.is_legal(reconnect_action, obs):
                    continue
                o, _, _, _ = obs.simulate(reconnect_action)
                if o.rho.max() < min_rho:
                    line_to_reconnect = line
                    min_rho = o.rho.max()
        if line_to_reconnect != -1:
            reconnect_array = np.zeros_like(obs.rho)
            reconnect_array[line_to_reconnect] = 1
            original_action.update({'set_line_status': reconnect_array})
        return original_action

    def act(self, observation, done=False):
        if observation.rho.max() < 0.999:
            a=self.env.action_space()
            return self.find_best_line_to_reconnect(observation, a)    #这里返回的是两个动作合在一起
            # return a
        else:# >0.999
            # 定义矩阵A，D^-0.5AD^0.5
            MatA = nx.adjacency_matrix(nx.from_dict_of_lists(MatrixA(env,observation)))
            support = [preprocess_adj(MatA)]
            support = [tf.cast(tf.SparseTensor(*support[0]),dtype=tf.double)]
            # 定义矩阵X，并预处理
            features = MatrixX(env)
            features = tf.SparseTensor(*features)
            obss = GenerateObs(features,support[0])
            obss = tf.reshape(obss, (1,32*28))
            action_idx = self.dqn_model.predict(obss)
            action_idx = int(tf.argmax(action_idx,axis=-1))
            a = self.actions[int(action_idx)]
            return a

In [6]:
DQNA = DQNAgent(env, all_actions)



In [7]:
steps = []
tt_reward = []
av_reward = []
old_t = time.time()
for i in range(30):
    print(i)
    done = False  #先把done置0
    time_step = int(0)  #计数
    cum_reward = 0.
    obs = env.reset()  #复位env
    reward = env.reward_range[0]
    max_iter = 8064  #设置最大迭代次数
    while not done:
        act = DQNA.act(observation=obs) # chose an action to do, in this case "do nothing"
        obs, reward, done, info = env.step(act) # implement this action on the powergrid
        cum_reward += reward
        time_step += 1
        if time_step >= max_iter:
            break
    steps.append(time_step)
    tt_reward.append(cum_reward)
    av_reward.append(cum_reward/time_step)
ttt = time.time()-old_t

0
1
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [9]:
steps

[575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575]

In [11]:
tt_reward

[446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203]

[446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203,
 446810.93225097656,
 461150.29162597656,
 458757.86993408203]

In [12]:
av_reward

[777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035,
 777.0624908712635,
 802.0005071756115,
 797.8397737984035]

In [13]:
tf.reduce_mean(av_reward)

<tf.Tensor: shape=(), dtype=float64, numpy=792.3009239484261>

In [9]:
steps

[575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575]

In [10]:
tt_reward

[446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203,
 446810.93225097656,
 461156.9514770508,
 458757.86993408203]

In [11]:
av_reward

[777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035,
 777.0624908712635,
 802.0120895253057,
 797.8397737984035]

In [15]:
ttt

168.8276867866516

In [12]:
ttt


158.4662139415741

In [13]:
tf.reduce_mean(av_reward)



<tf.Tensor: shape=(), dtype=float64, numpy=792.3047847316576>