## MAPDP论文网络复现

- [paired-context-embedding](#paired-context-embedding)
- [context-encoding](#context-encoding)
- [cooperative-multi-agent-decoders](#cooperative-multi-agent-decoders)

In [50]:
!pip install torch numpy



In [51]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim

print('torch版本: ', torch.__version__)

# 检测GPU，cuda、mps、cpu
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(device)

torch版本:  2.4.0
mps


### Paired Context Embedding

Embeding是NLP领域的一个概念，比如我们经常会对每一个文本token（单词）做词嵌入，因为深度学习模型的输入是数值张量，词嵌入本质上就是把文本变成向量。本文借鉴了词嵌入的概念，对MAPDP问题的输入节点信息做词嵌入，因为取件节点和接收节点是成对的，而且智能体在选择成对节点时是有时序约束的，为了让模型学习到这种节点（状态）之间的依赖关系，故做嵌入操作。

In [52]:
NODE_NUM = 7 # 所有节点数
N = 3 # 节点对数

import numpy as np

# 设置随机种子以确保结果可重复
np.random.seed(0)

def init_env(n=7):
    """初始化节点信息"""
    # 生成6个随机坐标，范围在0到10之间
    coordinates = np.random.uniform(low=0, high=10, size=(n, 2))

    # 生成6个随机容量，范围在0到100之间
    capacities = np.random.randint(low=0, high=101, size=n)

    # 将坐标和容量组合成一个矩阵
    matrix = np.hstack((coordinates, capacities[:, np.newaxis]))

    return torch.tensor(matrix ,dtype=torch.float32).to(device)

node_information = init_env(NODE_NUM)
print(node_information)

tensor([[ 5.4881,  7.1519, 77.0000],
        [ 6.0276,  5.4488, 72.0000],
        [ 4.2365,  6.4589,  9.0000],
        [ 4.3759,  8.9177, 20.0000],
        [ 9.6366,  3.8344, 80.0000],
        [ 7.9173,  5.2889, 69.0000],
        [ 5.6804,  9.2560, 79.0000]], device='mps:0')


In [53]:
class PairedContextEmbedding(nn.Module):
    """
    PairedContextEmbeding层
    """
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(N, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(256, 128)
    
    def forward(self, input):
        # input是节点信息向量，[7, 3]
        x = self.layer1(input)
        # 分段Embeding
        # i=0
        h0_1 = self.layer2(x[0].unsqueeze(0))
        # i∈[1, N]
        h0_2 = self.layer3(torch.cat((x[1:N+1], x[N+1:]), dim=1))
        # i∈[N+1, 2N]
        h0_3 = self.layer2(x[N+1:])
        h0 = torch.vstack((h0_1, h0_2, h0_3))
        return h0


model1 = PairedContextEmbedding().to(device)
print(model1)
h0 = model1(node_information)
print(h0.shape)
print(h0[0, :10])

PairedContextEmbedding(
  (layer1): Linear(in_features=3, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=128, bias=True)
  (layer3): Linear(in_features=256, out_features=128, bias=True)
)
torch.Size([7, 128])
tensor([  2.8656,   5.9786,  -4.8060,   6.5953,   0.0620,   1.8579,   4.8849,
         -1.5698, -12.6397,  14.8208], device='mps:0',
       grad_fn=<SliceBackward0>)


### Context Encoding

对节点信息做完嵌入操作后，参考Transformer的encoder部分对嵌入向量做encoding。

In [54]:
class ContextEncoding(nn.Module):
    """
    ContextEncoding层
    """
    def __init__(self, attention_nums=6):
        super().__init__()
        self.attention_nums = attention_nums
        self.mha = nn.MultiheadAttention(embed_dim=128, num_heads=8)
        self.batch_normalization = nn.BatchNorm1d(num_features=128)
        self.feed_forward = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128)
        )

    def forward(self, h):
        # h0是执行完嵌入层后的向量 [7,128]
        for _ in range(self.attention_nums):
            ha, _ = self.mha(h, h, h)
            hb = self.batch_normalization(h+ha)
            h = self.batch_normalization(hb+self.feed_forward(hb))
        return h

model2 = ContextEncoding().to(device)
h = model2(h0)
print(h.shape)
h_hat = h.mean(dim=0)
print(h_hat.shape)

torch.Size([7, 128])
torch.Size([128])


### Cooperative Multi-Agent Decoders

每个智能体的decode网络其实就是actor网络，基于当前状态的观测，选择下一步要到达的节点 $v_{I^t_k}$。

In [55]:
AGENT_NUM = 2 # 智能体数

class CooperativeDecoder(nn.Module):
    def __init__(self, k, D=10, has_comm=False):
        super().__init__()
        self.k = k
        self.D = D
        self.has_comm = has_comm
        self.mha = nn.MultiheadAttention(embed_dim=128, num_heads=8)
        self.W_C = nn.Linear(1, 128)
        self.W_Q = nn.Linear(128, 128)
        self.W_K = nn.Linear(128, 128)
        self.tanh = nn.Tanh()
        self.output = nn.Linear(3, 1)
        

    def forward(self, state, h, h_hat, node_visit):
        # state是所有智能体当前状态的矩阵，状态(h, C)，h表示隐藏态输出，C表示剩余容量
        h_c = torch.cat([h_hat.unsqueeze(0), h[state[self.k][0]].unsqueeze(0), self.W_C(torch.tensor([state[self.k][1]], dtype=torch.float32, device=device).unsqueeze(0))])
        if self.has_comm:
            comm = torch.cat([h[state[k]][0], self.W_C(h[state[k]][1])] for k in range(AGENT_NUM))
            h_c = torch.cat([h_c, comm])
        g, _ = self.mha(h_c, h_c, h_c)
        Q = self.W_Q(g)
        K = self.W_K(h)
        u = self.D * self.tanh(Q @ K.transpose(-1, -2) / np.sqrt(K.shape[0]))
        u = self.output(u.transpose(-1, -2)).transpose(-1, -2)
        # node_mask = -torch.inf * node_visit.unsqueeze(0).repeat(u.shape[0], 1)
        node_mask = node_visit # 访问过为0，未访问过1
        return F.softmax(node_mask*u, dim=1) # 选择下一个节点的概率值

model3 = CooperativeDecoder(k=0).to(device)
state = torch.tensor([
    (np.random.randint(0, NODE_NUM), np.random.randint(0, 10)) for _ in range(AGENT_NUM)
]).to(device)
node_visit = torch.randint(0, 2, (NODE_NUM,)).to(device)
action = model3(state, h, h_hat, node_visit)
print(action)

tensor([[0.0779, 0.5702, 0.0779, 0.0779, 0.0779, 0.0403, 0.0779]],
       device='mps:0', grad_fn=<SoftmaxBackward0>)
