In [1]:
import random
import re
import tensorflow as tf
import os
import sys
import grid2op
import numpy as np
import shutil
from grid2op.Agent import AgentWithConverter, BaseAgent
from collections import deque
import time
import  scipy.sparse as sp
import networkx as nx
from collections import defaultdict
tf.autograph.set_verbosity(0)

In [2]:
def sparse_to_tuple(sparse_mx):
    """
    Convert sparse matrix to tuple representation.
    """
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)
    return sparse_mx

def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)) # D
    d_inv_sqrt = np.power(rowsum, -0.5).flatten() # D^-0.5
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # D^-0.5
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() # D^-0.5AD^0.5

def preprocess_adj(adj):
    """输入：矩阵A
    返回：D^-0.5AD^0.5，格式为稀疏矩阵格式：元素位置，元素值，矩阵size"""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))  # adj + sp.eye(adj.shape[0])即为（A+I）矩阵
    return sparse_to_tuple(adj_normalized)

def preprocess_features(features):
    """
    Row-normalize feature matrix and convert to tuple representation
    输入：features，scipy稀疏矩阵格式
    输出：对特征矩阵归一化，每行除以行之和。格式为系数矩阵格式：元素位置，元素值，矩阵size
    """
    rowsum = np.array(features.sum(1)) # get sum of each row, [节点数, 1]，每个节点一个sum
    r_inv = np.power(rowsum, -1).flatten() # 1/rowsum, [节点数]
    r_inv[np.isinf(r_inv)] = 0. # zero inf data
    r_mat_inv = sp.diags(r_inv) # sparse diagonal matrix, [节点数, 节点数]，对角矩阵，元素为r_inv中的元素，以稀疏矩阵格式存储
    features = r_mat_inv.dot(features) # D^-1:[节点数, 节点数] 乘以 X:[节点数, 特征数]
    return sparse_to_tuple(features) # [coordinates, data, shape]


In [3]:
def MatrixA(env, cur_obs):
    '''返回Matrix A，目前只实现了无权图'''
    # cur_obs = env.get_obs()
    line_ex_buses = cur_obs.line_ex_bus
    line_or_buses = cur_obs.line_or_bus
    line_ex_subid = cur_obs.line_ex_to_subid
    line_or_subid = cur_obs.line_or_to_subid
    buses = env.backend._grid.bus

    con_dict = defaultdict(list)
    for i in range(len(buses.index)): #遍历所有的nodes，对每一个node：
        list_i = []
        for j in range(len(line_or_subid)):
            if line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1) == i: #有线路的始端在节点i
                if line_ex_buses[j] > 0:
                    list_i.append(line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1))
        for j in range(len(line_ex_subid)):
            if line_ex_subid[j] + env.n_sub * (line_ex_buses[j] - 1) == i: #有线路的末端在节点i
                if line_or_buses[j] > 0:
                    list_i.append(line_or_subid[j] + env.n_sub * (line_or_buses[j] - 1))
        list_i = list(set(list_i))
        con_dict[i] = list_i
    return con_dict

def MatrixX(env):
    '''返回Matrix X，格式为：元素位置，元素值，矩阵size
    目前feature选取了bus的V，theta，P，Q'''
    features = env.backend._grid.res_bus.values
    features = sp.csr_matrix(features)
    return preprocess_features(features)

def GenerateObs(features, support):
    '''输入：稀疏格式的features和support'''
    f_reordered = tf.sparse.reorder(features)
    s_reordered = tf.sparse.reorder(support)
    # f_dense = tf.compat.v1.sparse_to_dense(sparse_indices=f_reordered.indices,output_shape=f_reordered.shape,sparse_values=f_reordered.values)
    # s_dense = tf.compat.v1.sparse_to_dense(sparse_indices=s_reordered.indices,output_shape=s_reordered.shape,sparse_values=s_reordered.values)
    f_dense = tf.sparse.to_dense(f_reordered)
    s_dense = tf.sparse.to_dense(s_reordered)
    return tf.concat([f_dense, s_dense],axis=1)

In [4]:
def array2action(env, array):
    action = env.action_space.from_vect(array)
    return action

def get_model_res(obs_batch, batch_size):
    '''根据obs_batch获得一个batch的模型输出'''
    for i in range(batch_size):
        if i == 0:
            res = model(obs_batch[i])
        else:
            res = tf.concat([res,model(obs_batch[i])],0)
    return res

In [9]:
class DQN_Model(tf.keras.Model):
    def __init__(self, act_dim):
        '''act_dim：动作空间的长度'''
        super().__init__()
        self.dense_cells = 420

        self.act_dim = act_dim
        # self.reshape = tf.keras.layers.Reshape(target_shape=(32*28,))
        self.dense1 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense4 = tf.keras.layers.Dense(units=self.dense_cells, activation=tf.nn.relu)
        self.dense5 = tf.keras.layers.Dense(units=self.act_dim, activation=tf.nn.softmax)

    def call(self, inputs):
        # x = self.reshape(inputs)
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.dense5(x)
        return x

    def predict(self,inputs):
        # TODO: inputs：obs，return：动作
        q_values = self(inputs)
        return int(tf.argmax(q_values,axis=-1))


In [10]:
learning_rate = 10e-5
initial_epsilon = 0.2            # 探索起始时的探索率
final_epsilon = 0.01            # 探索终止时的探索率
num_episodes = 302                # 游戏训练的总episode数量
batch_size = 64
gamma = 0.90                      # 折扣因子
# num_exploration_episodes = np.floor(num_episodes/5)
num_exploration_episodes = 200
DATA_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox'
SCENARIO_PATH = '/Users/yuzhao/miniforge3/envs/env_rl/lib/python3.8/site-packages/grid2op/data/l2rpn_case14_sandbox/chronics'


In [13]:
if __name__ == '__main__':
    env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
    env.backend.runpf()
    features = MatrixX(env)
    all_actions = env.action_space.get_all_unitary_topologies_change(env.action_space)
    model = DQN_Model(act_dim=len(all_actions))
    summary_writer = tf.summary.create_file_writer('./tensorboard/')
    checkpoint = tf.train.Checkpoint(myModel=model)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    replay_buffer = deque(maxlen=10000)
    epsilon = initial_epsilon
    stp = 0
    loss = 0
    for episode in range(num_episodes):
        print('====================================')
        epsilon = max(initial_epsilon * (num_exploration_episodes - episode) / num_exploration_episodes,final_epsilon)# 计算当前探索率
        env = grid2op.make(dataset=DATA_PATH, chronics_path=SCENARIO_PATH)
        #以下两行作用是保存部分chronics，并舍弃部分chronics，见官方文档
        env.chronics_handler.set_filter(lambda path: re.match(".*00[0-9].*", path) is not None)
        kept = env.chronics_handler.reset()
        env.chronics_handler.shuffle()
        # 遍历chronics
        for chronic in range(len(kept)):
            SCN_STEP = 0
            env.reset()
            dst_step = 0
            print('Scenario 为 [%s]' % (env.chronics_handler.get_name()))
            env.fast_forward_chronics(dst_step)
            env.backend.runpf()
            obs, done = env.get_obs(), False

            # 定义矩阵A，D^-0.5AD^0.5
            MatA = nx.adjacency_matrix(nx.from_dict_of_lists(MatrixA(env,obs)))
            support = [preprocess_adj(MatA)]
            support = [tf.cast(tf.SparseTensor(*support[0]),dtype=tf.double)]
            # 定义矩阵X，并预处理
            features = MatrixX(env)
            features = tf.SparseTensor(*features)
            obss = GenerateObs(features,support[0])
            obss = tf.reshape(obss, (1,32*28))

            while not done:
                stp+=1
                SCN_STEP+=1
                if random.random() < epsilon:
                    action = random.choice(all_actions)
                    action_idx = all_actions.index(action)
                else:
                    action_idx = model.predict(obss)
                    action = all_actions[int(action_idx)]
                obs, reward, done, _ = env.step(action)

                # 定义矩阵A，D^-0.5AD^0.5
                MatA_nxt = nx.adjacency_matrix(nx.from_dict_of_lists(MatrixA(env,obs)))
                support_nxt = [preprocess_adj(MatA_nxt)]
                support_nxt = [tf.cast(tf.SparseTensor(*support_nxt[0]),dtype=tf.double)]
                # 定义矩阵X，并预处理
                features_nxt = MatrixX(env)
                features_nxt = tf.SparseTensor(*features_nxt)
                obss_nxt = GenerateObs(features_nxt,support_nxt[0])
                obss_nxt = tf.reshape(obss_nxt, (1,32*28))


                with summary_writer.as_default():
                    tf.summary.scalar("reward", reward, step=stp)
                replay_buffer.append((obss, action_idx, reward, obss_nxt, 1 if done else 0)) #放入经验回放池
                obss = obss_nxt

                if done:
                    print("episode: %4d, epsilon %.4f"%(episode, epsilon))
                    # print(loss)
                    print(SCN_STEP)
                    with summary_writer.as_default():
                        tf.summary.scalar("RUN_STEPS",SCN_STEP,step=stp)
                    break

                if len(replay_buffer) >= batch_size:
                    # 从经验回放池中随机取一个batch的数据
                    mini_batch = random.sample(replay_buffer, batch_size)
                    obs_batch, action_idx_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []
                    for experience in mini_batch:
                        s, a, r, s_p, dd = experience
                        obs_batch.append(s)
                        action_idx_batch.append(a)
                        reward_batch.append(r)
                        next_obs_batch.append(s_p)
                        done_batch.append(dd)
                    obs_batch = np.array(obs_batch)
                    action_idx_batch = np.array(action_idx_batch)
                    reward_batch = np.array(reward_batch)
                    next_obs_batch = np.array(next_obs_batch)
                    done_batch = np.array(done_batch)
                    # q_value = model(next_obs_batch)
                    # q_value = tf.squeeze(q_value)
                    q_value = tf.squeeze(get_model_res(next_obs_batch, batch_size))
                    y = reward_batch + (gamma * tf.reduce_max(q_value, axis=1)) * (1-done_batch)

                    with tf.GradientTape() as tape:
                        loss = tf.keras.losses.mean_squared_error(y_true=y,y_pred=tf.reduce_sum(tf.squeeze(get_model_res(obs_batch, batch_size)) * tf.one_hot(action_idx_batch, depth=len(all_actions)), axis=1))
                        # loss = tf.keras.losses.mean_squared_error(y_true=y,y_pred=tf.reduce_sum(tf.squeeze(model(obs_batch)) * tf.one_hot(action_idx_batch, depth=len(all_actions)), axis=1))
                    grads = tape.gradient(loss, model.variables)
                    with summary_writer.as_default():
                        tf.summary.scalar("loss", loss, step=stp)
                    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
        if episode % 100 == 0 and episode>100:
            file_path = '/Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/DQN_2'+str(episode+1)
            model.save(filepath=file_path)


Scenario 为 [0000]
episode:    0, epsilon 0.2000
1
Scenario 为 [0002]
episode:    0, epsilon 0.2000
1
Scenario 为 [0001]
episode:    0, epsilon 0.2000
1
Scenario 为 [0000]
episode:    1, epsilon 0.1990
1
Scenario 为 [0001]
episode:    1, epsilon 0.1990
1
Scenario 为 [0002]
episode:    1, epsilon 0.1990
1
Scenario 为 [0000]
episode:    2, epsilon 0.1980
1
Scenario 为 [0001]
episode:    2, epsilon 0.1980
1
Scenario 为 [0002]
episode:    2, epsilon 0.1980
1
Scenario 为 [0001]
episode:    3, epsilon 0.1970
1
Scenario 为 [0000]
episode:    3, epsilon 0.1970
1
Scenario 为 [0002]
episode:    3, epsilon 0.1970
1
Scenario 为 [0002]
episode:    4, epsilon 0.1960
1
Scenario 为 [0000]
episode:    4, epsilon 0.1960
1
Scenario 为 [0001]
episode:    4, epsilon 0.1960
1
Scenario 为 [0000]
episode:    5, epsilon 0.1950
1
Scenario 为 [0001]
episode:    5, epsilon 0.1950
2
Scenario 为 [0002]
episode:    5, epsilon 0.1950
1
Scenario 为 [0002]
episode:    6, epsilon 0.1940
1
Scenario 为 [0000]
episode:    6, epsilon 0.1940
1





FOR DEVS: If you are overwriting _tracking_metadata in your class, this property has been used to save metadata in the SavedModel. The metadta field will be deprecated soon, so please move the metadata to a different file.
INFO:tensorflow:Assets written to: /Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/DQN_2201/assets
Scenario 为 [0002]
episode:  201, epsilon 0.0100
19
Scenario 为 [0001]
episode:  201, epsilon 0.0100
114
Scenario 为 [0000]
episode:  201, epsilon 0.0100
6
Scenario 为 [0000]
episode:  202, epsilon 0.0100
121
Scenario 为 [0001]
episode:  202, epsilon 0.0100
40
Scenario 为 [0002]
episode:  202, epsilon 0.0100
71
Scenario 为 [0000]
episode:  203, epsilon 0.0100
68
Scenario 为 [0002]
episode:  203, epsilon 0.0100
192
Scenario 为 [0001]
episode:  203, epsilon 0.0100
50
Scenario 为 [0002]
episode:  204, epsilon 0.0100
82
Scenario 为 [0001]
episode:  204, epsilon 0.0100
208
Scenario 为 [0000]
episode:  204, epsilon 0.0100
136
Scenario 为 [0002]
episode:  205, epsilon 0.0100
131
Sce




FOR DEVS: If you are overwriting _tracking_metadata in your class, this property has been used to save metadata in the SavedModel. The metadta field will be deprecated soon, so please move the metadata to a different file.
INFO:tensorflow:Assets written to: /Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/DQN_2301/assets
Scenario 为 [0002]
episode:  301, epsilon 0.0100
93
Scenario 为 [0001]
episode:  301, epsilon 0.0100
132
Scenario 为 [0000]
episode:  301, epsilon 0.0100
6


In [None]:
class MyAgent(BaseAgent):
    def __init__(self, action_space, model):
        super(MyAgent, self).__init__(action_space=action_space, action_space_converter=action_space_converter)
        self.actions = action_space
        self.dqn_model = tf.keras.models.load_model('/Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt')


    def find_best_line_to_reconnect(self, obs, original_action):    #这里返回的是两个动作合在一起
        disconnected_lines = np.where(obs.line_status == False)[0]
        if not len(disconnected_lines):
            return original_action
        if (obs.time_before_cooldown_line[disconnected_lines] > 0).all():
            return original_action
        o, _, _, _ = obs.simulate(original_action)
        min_rho = o.rho.max()
        line_to_reconnect = -1
        for line in disconnected_lines:
            if not obs.time_before_cooldown_line[line]:
                reconnect_array = np.zeros_like(obs.rho)
                reconnect_array[line] = 1
                reconnect_action = deepcopy(original_action)
                reconnect_action.update({'set_line_status': reconnect_array})
                if not self.is_legal(reconnect_action, obs):
                    continue
                o, _, _, _ = obs.simulate(reconnect_action)
                if o.rho.max() < min_rho:
                    line_to_reconnect = line
                    min_rho = o.rho.max()
        if line_to_reconnect != -1:
            reconnect_array = np.zeros_like(obs.rho)
            reconnect_array[line_to_reconnect] = 1
            original_action.update({'set_line_status': reconnect_array})
        return original_action

    def act(self, observation, reward, done=False):
        if observation.rho.max() < 0.999:
            a=self.env.action_space()
            # return self.find_best_line_to_reconnect(observation, a)    #这里返回的是两个动作合在一起
            return a
        else:# >0.999
            action_idx = self.dqn_model.predict(obs)
            a = self.actions[action_idx]
            return a

In [10]:
type(tf.reduce_max(q_value,axis=1))


tensorflow.python.framework.ops.EagerTensor

In [12]:
tf.reshape(obss, (32*28,)).shape

TensorShape([896])

In [7]:
q_value.shape

TensorShape([64, 66918])

In [9]:
model.summary()

Model: "dqn__model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  2046000   
_________________________________________________________________
dense_1 (Dense)              multiple                  2251500   
_________________________________________________________________
dense_2 (Dense)              multiple                  2251500   
_________________________________________________________________
dense_3 (Dense)              multiple                  100443918 
Total params: 106,992,918
Trainable params: 106,992,918
Non-trainable params: 0
_________________________________________________________________


In [9]:
file_path = '/Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/DQN_2'+str(episode+1)
model.save(filepath=file_path)


NameError: name 'episode' is not defined

In [2]:
fileP = '/Users/yuzhao/Desktop/StudyPool/pythonProject/DQNckpt/t6/DQN_2271'
model = tf.keras.models.load_model(fileP)




In [12]:
model.summary()

Model: "dqn__model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              multiple                  376740    
_________________________________________________________________
dense_6 (Dense)              multiple                  176820    
_________________________________________________________________
dense_7 (Dense)              multiple                  176820    
_________________________________________________________________
dense_8 (Dense)              multiple                  176820    
_________________________________________________________________
dense_9 (Dense)              multiple                  79148     
Total params: 986,348
Trainable params: 986,348
Non-trainable params: 0
_________________________________________________________________
