In [1]:
import random
import re
import tensorflow as tf
import os
import sys
import grid2op
import numpy as np
import shutil
from grid2op.Agent import AgentWithConverter, BaseAgent
from collections import deque
import time
import  scipy.sparse as sp
import networkx as nx
from collections import defaultdict
from layers2 import *
import copy
tf.autograph.set_verbosity(0)

In [38]:
def sparse_to_tuple(sparse_mx):
    """
    Convert sparse matrix to tuple representation.
    """
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx

def MatrixX(env):
    '''返回Matrix X，格式为：元素位置，元素值，矩阵size
    目前feature选取了bus的V，theta，P，Q'''
    features = copy.deepcopy(env.backend._grid.res_bus.values)
    return preprocess_features(features)

'''
原版特征预处理，采用row-normalize，现已弃用
def preprocess_features(features):
    """
    Row-normalize feature matrix and convert to tuple representation
    输入：features，scipy稀疏矩阵格式
    输出：对特征矩阵归一化，每行除以行之和。格式为稀疏矩阵格式：元素位置，元素值，矩阵size
    """
    rowsum = np.array(features.sum(1)) # get sum of each row, [节点数, 1]，每个节点一个sum
    r_inv = np.power(rowsum, -1).flatten() # 1/rowsum, [节点数]
    r_inv[np.isinf(r_inv)] = 0. # zero inf data
    r_mat_inv = sp.diags(r_inv) # sparse diagonal matrix, [节点数, 节点数]，对角矩阵，元素为r_inv中的元素，以稀疏矩阵格式存储
    features = r_mat_inv.dot(features) # D^-1:[节点数, 节点数] 乘以 X:[节点数, 特征数]
    return sparse_to_tuple(features) # [coordinates, data, shape]
'''
def preprocess_features(features):
    """
    可以理解为col-normalize，对特征矩阵归一化。
    输入：features，格式为N * D，array格式，非稀疏
    输出：归一化后的特征矩阵。格式为稀疏矩阵格式：元素位置，元素值，矩阵size
    """
    for i in range(len(features[0])): # 遍历每一种feature
        new_f = [f[i] for f in features]
        fmin = np.nanmin(new_f)
        fmax = np.nanmax(new_f)
        if i == 0 or i == 1:
            new_f = (new_f - fmin) / (fmax - fmin) #normalize
        else:
            new_f = new_f/(fmax-fmin)
        for j in range(len(features)):
            features[j][i] = new_f[j]
    features = sp.csr_matrix(features)
    return sparse_to_tuple(features) # [coordinates, data, shape]

In [52]:
def Get_Conn(env, cur_obs):
    '''返回节点之间的连接关系，输出为dict格式，只有相连关系没有权重'''
    # cur_obs = env.get_obs()
    line_ex_buses = cur_obs.line_ex_bus
    line_or_buses = cur_obs.line_or_bus
    line_ex_subid = cur_obs.line_ex_to_subid
    line_or_subid = cur_obs.line_or_to_subid
    buses = env.backend._grid.bus.index

    con_dict = defaultdict(list)
    for i in range(len(buses)): #遍历所有的nodes，对每一个node：
        list_i = []
        for j in range(len(line_or_subid)):
            if line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1) == i: #有线路的始端在节点i
                if line_ex_buses[j] > 0:
                    list_i.append(line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1))
        for j in range(len(line_ex_subid)):
            if line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1) == i: #有线路的末端在节点i
                if line_or_buses[j] > 0:
                    list_i.append(line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1))
        list_i = list(set(list_i))
        con_dict[i] = list_i
    return con_dict

def MatrixA(Conn, add_weight=False, weight=None):
    '''
    输入：
    Conn: defaultdict格式的各节点连接关系——key为节点，value为包含了与该节点相连的所有节点的list
    add_weight: 默认False，返回无权图；True返回有权图
    weight: 权重
    输出：邻居矩阵A
    '''
    if not add_weight: # 返回无权图
        return nx.adjacency_matrix(nx.from_dict_of_lists(Conn))
    # TODO 有权图的编写

def Normalize_A(adj):
    """输入：邻接矩阵A
    返回：D^-0.5AD^0.5，格式为稀疏矩阵格式：元素位置，元素值，矩阵size"""
    A_n_I = adj + sp.eye(adj.shape[0]) # (A+I)矩阵
    A_n_I = sp.coo_matrix(A_n_I)
    rowsum = np.array(A_n_I.sum(1)) # D
    d_inv_sqrt = np.power(rowsum, -0.5).flatten() # D^-0.5
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # D^-0.5
    A_n_I_normalized = A_n_I.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() # D^-0.5AD^0.5
    return sparse_to_tuple(A_n_I_normalized)

In [37]:
def GenerateObs(features, support):
    '''输入：tf.SparseTensor格式的features（X）和support（D^-0.5AD^-0.5）'''
    f_reordered = tf.sparse.reorder(features)
    s_reordered = tf.sparse.reorder(support)
    # f_dense = tf.compat.v1.sparse_to_dense(sparse_indices=f_reordered.indices,output_shape=f_reordered.shape,sparse_values=f_reordered.values)
    # s_dense = tf.compat.v1.sparse_to_dense(sparse_indices=s_reordered.indices,output_shape=s_reordered.shape,sparse_values=s_reordered.values)
    f_dense = tf.sparse.to_dense(f_reordered)
    s_dense = tf.sparse.to_dense(s_reordered)
    return tf.concat([f_dense, s_dense],axis=1)

In [4]:
class GC_DQN_Model(tf.keras.Model):
    def __init__(self, input_dim, act_dim, num_features_non_zero, **kwargs):
        '''input_dim：feature的长度，即D
            act_dim：输出的维度，动作空间的长度
            num_features_non_zero：'''
        # super(GC_DQN_Model, self).__init__(**kwargs)
        super().__init__()
        self.dense_cells = 720
        self.GConv_cells = 16
        self.input_dim = input_dim
        self.act_dim = act_dim
        self.dropout = 0.2

        self.GConv1 = GraphConvolution(input_dim=input_dim,
                                      output_dim=self.GConv_cells,
                                      num_features_nonzero=num_features_non_zero,
                                      activation=tf.nn.relu,
                                      dropout=self.dropout,
                                      # is_sparse_inputs=True,
                                       is_sparse_inputs=False)
        self.GConv2 = GraphConvolution(input_dim=self.GConv_cells,
                                       output_dim=self.GConv_cells,
                                       num_features_nonzero=num_features_non_zero,
                                       activation=tf.nn.relu,
                                       dropout=self.dropout)
        # self.reshape = tf.keras.layers.Reshape(target_shape=(28*self.GConv_cells,))
        self.dense1 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense4 = tf.keras.layers.Dense(units=self.act_dim, activation=tf.nn.softmax)

    def call(self, inputs, training=None):
        '''
        inputs：(features, support)
        '''
        x, spt = inputs[:,:-28], inputs[:,-28:]
        # print('reshape前：')
        # print(x.shape)
        x = self.GConv1(inputs=inputs,training=training)
        # print(x.shape)
        x = self.GConv2(inputs=tf.concat([x, spt],axis=1),training=training)
        # print(x.shape)
        x = tf.concat([x,spt],axis=1)
        x = tf.reshape(x, [1,28*(28+self.GConv_cells)])
        # x = self.reshape(x)
        # print('reshape后：')
        # print(x.shape)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense4(x)
        return x

    def predict(self,inputs):
        q_values = self(inputs)
        # am = tf.argmax(q_values,axis=-1)
        return int(tf.argmax(q_values,axis=-1))

In [5]:
def array2action(env, array):
    action = env.action_space.from_vect(array)
    return action

def get_model_res(obs_batch, batch_size):
    '''根据obs_batch获得一个batch的模型输出'''
    for i in range(batch_size):
        if i == 0:
            res = model(obs_batch[i])
        else:
            res = tf.concat([res,model(obs_batch[i])],0)
    return res

In [6]:
learning_rate = 10e-5
initial_epsilon = 0.2            # 探索起始时的探索率
final_epsilon = 0.01            # 探索终止时的探索率
num_episodes = 1000                # 游戏训练的总episode数量
batch_size = 64
gamma = 0.90                      # 折扣因子
num_exploration_episodes = np.floor(num_episodes/5)
DATA_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox'
SCENARIO_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox/chronics'
# DATA_PATH = '/Users/yuzhao/data_grid2op/l2rpn_icaps_2021_small'
# SCENARIO_PATH = '/Users/yuzhao/data_grid2op/l2rpn_icaps_2021_small/chronics'

In [7]:
if __name__ == '__main__':
    env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
    env.backend.runpf()
    features = MatrixX(env)
    all_actions = env.action_space.get_all_unitary_topologies_change(env.action_space)
    model = GC_DQN_Model(input_dim=features[2][1],act_dim=len(all_actions),num_features_non_zero=features[1].shape)
    summary_writer = tf.summary.create_file_writer('./tensorboardGCN/')
    checkpoint = tf.train.Checkpoint(myModel=model)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    replay_buffer = deque(maxlen=10000)
    epsilon = initial_epsilon
    stp = 0
    loss = 0
    for episode in range(num_episodes):
        print('====================================')
        cur_time = time.time()
        epsilon = max(initial_epsilon * (num_exploration_episodes - episode) / num_exploration_episodes,final_epsilon)# 计算当前探索率
        env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
        #以下两行作用是保存部分chronics，并舍弃部分chronics，见官方文档
        env.chronics_handler.set_filter(lambda path: re.match(".*00[0-9].*", path) is not None)
        kept = env.chronics_handler.reset()
        env.chronics_handler.shuffle()
        # 遍历chronics
        for chronic in range(len(kept)):
            SCN_STEP = 0
            env.reset()
            dst_step = 0
            print('Scenario 为 [%s]' % (env.chronics_handler.get_name()))
            env.fast_forward_chronics(dst_step)
            env.backend.runpf()
            obs, done = env.get_obs(), False

            # 定义矩阵A，D^-0.5AD^0.5
            MatA = MatrixA(Get_Conn(env,obs))
            support = Normalize_A(MatA)
            support = tf.cast(tf.SparseTensor(*support),dtype=tf.double)
            # 定义矩阵X，并预处理
            features = MatrixX(env)
            features = tf.SparseTensor(*features)
            obss = GenerateObs(features,support)

            while not done:
                stp+=1
                SCN_STEP+=1
                if random.random() < epsilon:
                    action = random.choice(all_actions)
                    action_idx = all_actions.index(action)
                else:
                    action_idx = model.predict(obss)
                    action = all_actions[int(action_idx)]
                obs, reward, done, _ = env.step(action)

                # 定义矩阵A，D^-0.5AD^0.5
                MatA_nxt = MatrixA(Get_Conn(env,obs))
                support_nxt = Normalize_A(MatA_nxt)
                support_nxt = tf.cast(tf.SparseTensor(*support_nxt),dtype=tf.double)
                # 定义矩阵X，并预处理
                features_nxt = MatrixX(env)
                features_nxt = tf.SparseTensor(*features_nxt)
                obss_nxt = GenerateObs(features_nxt,support_nxt)

                # reward = -10 if done else reward
                with summary_writer.as_default():
                    tf.summary.scalar("reward", reward, step=stp)
                replay_buffer.append((obss, action_idx, reward, obss_nxt, 1 if done else 0)) #放入经验回放池
                obss = obss_nxt

                if done:
                    print("episode: %4d, epsilon %.4f"%(episode, epsilon))
                    # print(loss)
                    print(SCN_STEP)
                    with summary_writer.as_default():
                        tf.summary.scalar("RUN_STEPS", SCN_STEP, step=stp)
                    break

                if len(replay_buffer) >= batch_size:
                    # 从经验回放池中随机取一个batch的数据
                    mini_batch = random.sample(replay_buffer, batch_size)
                    obs_batch, action_idx_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []
                    for experience in mini_batch:
                        s, a, r, s_p, dd = experience
                        obs_batch.append(s)
                        action_idx_batch.append(a)
                        reward_batch.append(r)
                        next_obs_batch.append(s_p)
                        done_batch.append(dd)
                    obs_batch = np.array(obs_batch)
                    action_idx_batch = np.array(action_idx_batch)
                    reward_batch = np.array(reward_batch)
                    next_obs_batch = np.array(next_obs_batch)
                    done_batch = np.array(done_batch)
                    # obs_batch, action_idx_batch, reward_batch, next_obs_batch, done_batch = \
                    # map(np.array, zip(*random.sample(replay_buffer, batch_size)))
                    # q_value = model(next_obs_batch)
                    # q_value = tf.squeeze(q_value)
                    q_value = tf.squeeze(get_model_res(next_obs_batch, batch_size))
                    y = reward_batch + (gamma * tf.reduce_max(q_value, axis=1)) * (1-done_batch)

                    with tf.GradientTape() as tape:
                        loss = tf.keras.losses.mean_squared_error(y_true=y,y_pred=tf.reduce_sum(tf.squeeze(get_model_res(obs_batch, batch_size)) * tf.one_hot(action_idx_batch, depth=len(all_actions)), axis=1))
                        # loss = tf.keras.losses.mean_squared_error(y_true=y,y_pred=tf.reduce_sum(tf.squeeze(model(obs_batch)) * tf.one_hot(action_idx_batch, depth=len(all_actions)), axis=1))
                    grads = tape.gradient(loss, model.variables)
                    with summary_writer.as_default():
                        tf.summary.scalar("loss", loss, step=stp)
                    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
        if episode % 100 == 0 and episode > 100:
            print(time.time()-cur_time)
            file_path = '/Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/GCDQN_1'+str(episode+1)
            model.save(filepath=file_path)



Scenario 为 [0002]
episode:    0, epsilon 0.2000
1
Scenario 为 [0000]
episode:    0, epsilon 0.2000
1
Scenario 为 [0001]
episode:    0, epsilon 0.2000
1
Scenario 为 [0001]
episode:    1, epsilon 0.1990
1
Scenario 为 [0002]
episode:    1, epsilon 0.1990
1
Scenario 为 [0000]
episode:    1, epsilon 0.1990
3
Scenario 为 [0002]
episode:    2, epsilon 0.1980
1
Scenario 为 [0001]
episode:    2, epsilon 0.1980
1
Scenario 为 [0000]
episode:    2, epsilon 0.1980
1
Scenario 为 [0001]
episode:    3, epsilon 0.1970
1
Scenario 为 [0002]
episode:    3, epsilon 0.1970
1
Scenario 为 [0000]
episode:    3, epsilon 0.1970
1
Scenario 为 [0002]
episode:    4, epsilon 0.1960
1
Scenario 为 [0000]
episode:    4, epsilon 0.1960
3
Scenario 为 [0001]
episode:    4, epsilon 0.1960
1
Scenario 为 [0002]
episode:    5, epsilon 0.1950
2
Scenario 为 [0000]
episode:    5, epsilon 0.1950
1
Scenario 为 [0001]
episode:    5, epsilon 0.1950
1
Scenario 为 [0001]
episode:    6, epsilon 0.1940
1
Scenario 为 [0002]
episode:    6, epsilon 0.1940
1





FOR DEVS: If you are overwriting _tracking_metadata in your class, this property has been used to save metadata in the SavedModel. The metadta field will be deprecated soon, so please move the metadata to a different file.
INFO:tensorflow:Assets written to: /Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/GCDQN_1201/assets
Scenario 为 [0000]
episode:  201, epsilon 0.0100
157
Scenario 为 [0002]
episode:  201, epsilon 0.0100
276
Scenario 为 [0001]
episode:  201, epsilon 0.0100
373
Scenario 为 [0001]
episode:  202, epsilon 0.0100
32
Scenario 为 [0000]
episode:  202, epsilon 0.0100
13
Scenario 为 [0002]
episode:  202, epsilon 0.0100
37
Scenario 为 [0002]
episode:  203, epsilon 0.0100
133
Scenario 为 [0000]
episode:  203, epsilon 0.0100
232
Scenario 为 [0001]
episode:  203, epsilon 0.0100
145
Scenario 为 [0001]
episode:  204, epsilon 0.0100
169
Scenario 为 [0002]
episode:  204, epsilon 0.0100
209
Scenario 为 [0000]
episode:  204, epsilon 0.0100
61
Scenario 为 [0000]
episode:  205, epsilon 0.0100

KeyboardInterrupt: 

In [20]:
file_path = '/Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/GCDQN_1'+str(episode+1)
model.save(filepath=file_path)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unsupported operand type(s) for -: 'NoneType' and 'int'





FOR DEVS: If you are overwriting _tracking_metadata in your class, this property has been used to save metadata in the SavedModel. The metadta field will be deprecated soon, so please move the metadata to a different file.
INFO:tensorflow:Assets written to: /Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/GCDQN_1281/assets


In [6]:
fileP = '/Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/t6/GCDQN_1301'
model = tf.keras.models.load_model(fileP)




In [9]:
model.summary()

Model: "gc_dqn__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
graph_convolution (GraphConv multiple                  64        
_________________________________________________________________
graph_convolution_1 (GraphCo multiple                  256       
_________________________________________________________________
reshape (Reshape)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  323280    
_________________________________________________________________
dense_1 (Dense)              multiple                  519120    
_________________________________________________________________
dense_2 (Dense)              multiple                  135548    
Total params: 978,268
Trainable params: 978,268
Non-trainable params: 0
_______________________________________________

In [18]:
class GCDQNAgent(BaseAgent):
    def __init__(self, env, action_space, model):
        super(GCDQNAgent, self).__init__(action_space=action_space)
        self.env = env
        self.actions = action_space
        # self.dqn_model = tf.keras.models.load_model('/Users/yuzhao/Desktop/StudyPool/pythonProject/GCDQNckpt/t7/GCDQN_1281')
        self.dqn_model = model

    def find_best_line_to_reconnect(self, obs, original_action):    #这里返回的是两个动作合在一起
        disconnected_lines = np.where(obs.line_status == False)[0]
        if not len(disconnected_lines):
            return original_action
        if (obs.time_before_cooldown_line[disconnected_lines] > 0).all():
            return original_action
        o, _, _, _ = obs.simulate(original_action)
        min_rho = o.rho.max()
        line_to_reconnect = -1
        for line in disconnected_lines:
            if not obs.time_before_cooldown_line[line]:
                reconnect_array = np.zeros_like(obs.rho)
                reconnect_array[line] = 1
                reconnect_action = deepcopy(original_action)
                reconnect_action.update({'set_line_status': reconnect_array})
                if not self.is_legal(reconnect_action, obs):
                    continue
                o, _, _, _ = obs.simulate(reconnect_action)
                if o.rho.max() < min_rho:
                    line_to_reconnect = line
                    min_rho = o.rho.max()
        if line_to_reconnect != -1:
            reconnect_array = np.zeros_like(obs.rho)
            reconnect_array[line_to_reconnect] = 1
            original_action.update({'set_line_status': reconnect_array})
        return original_action

    def act(self, observation, done=False):
        if observation.rho.max() < 0.999:
            a=self.env.action_space()
            return self.find_best_line_to_reconnect(observation, a)    #这里返回的是两个动作合在一起
            # return a
        else:# >0.999
            # 定义矩阵A，D^-0.5AD^0.5
            MatA = MatrixA(Get_Conn(env,observation))
            support = Normalize_A(MatA)
            support = tf.cast(tf.SparseTensor(*support),dtype=tf.double)
            # 定义矩阵X，并预处理
            features = MatrixX(env)
            features = tf.SparseTensor(*features)
            obss = GenerateObs(features,support)
            action_idx = self.dqn_model.predict(obss)
            a = self.actions[int(action_idx)]
            return a

In [19]:
GCDQNA = GCDQNAgent(env, all_actions, model)

In [20]:
gsteps = []
gtt_reward = []
gav_reward = []
old_t = time.time()
for i in range(30):
    print(i)
    done = False  #先把done置0
    time_step = int(0)  #计数
    cum_reward = 0.
    obs = env.reset()  #复位env
    reward = env.reward_range[0]
    max_iter = 8064  #设置最大迭代次数
    while not done:
        act = GCDQNA.act(observation=obs) # chose an action to do, in this case "do nothing"
        obs, reward, done, info = env.step(act) # implement this action on the powergrid
        cum_reward += reward
        time_step += 1
        if time_step >= max_iter:
            break
    gsteps.append(time_step)
    gtt_reward.append(cum_reward)
    gav_reward.append(cum_reward/time_step)
gttt = time.time()-old_t



0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


In [22]:
gsteps

[575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575,
 575]

In [23]:
gtt_reward


[446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078,
 446810.93225097656,
 458757.86993408203,
 461157.0147705078]

In [24]:
gav_reward

[777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832,
 777.0624908712635,
 797.8397737984035,
 802.0121996008832]

In [25]:
tf.reduce_mean(gav_reward)


<tf.Tensor: shape=(), dtype=float64, numpy=792.3048214235166>

In [26]:
gttt

160.52981519699097

In [7]:
env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
env.backend.runpf()

(True, None)

In [64]:
MatA = MatrixA(Get_Conn(env,obs))
support = Normalize_A(MatA)
support = tf.cast(tf.SparseTensor(*support),dtype=tf.double)

features = MatrixX(env)
features = tf.SparseTensor(*features)
obss = GenerateObs(features,support)

In [67]:
obss

<tf.Tensor: shape=(28, 32), dtype=float64, numpy=
array([[ 0.23990993,  1.        , -0.60247853,  0.24435313,  0.33333333,
         0.25819889,  0.        ,  0.        ,  0.25819889,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ],
       [ 0.23990993,  0.84248596, -0.44439079, -0.03211336,  0.25819889,
         0.2       ,  0.25819889,  0.18257419,  0.2       ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ],
    

In [36]:
G = nx.Graph()  # or DiGraph, MultiGraph, MultiDiGraph, etc
G.add_weighted_edges_from([(0, 1, 3.0), (1, 2, 7.5)])
m = nx.adjacency_matrix(G)
m

<networkx.classes.graph.Graph at 0x14dd28520>