In [None]:
import torch
import torch.nn as nn


class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)  # 词嵌入层
        self.lstm = nn.LSTM(hidden_size, hidden_size)  # lstm层

    def forward(self, input):
        embedded = self.embedding(input).squeeze(1)
        output, (hn, cn) = self.lstm(embedded)
        return output, hn, cn


class Decoder(nn.Module):
    def __init__(self, output_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)  # 词嵌入层
        self.lstm = nn.LSTM(hidden_size, hidden_size)  # lstm层
        self.linear = nn.Linear(hidden_size, output_size)  # 全连接层

    def forward(self, input, h0, c0):
        embedded = self.embedding(input)
        print(embedded.shape)
        output, (hn, cn) = self.lstm(embedded, (h0, c0))
        output = self.linear(output)
        return output, hn, cn


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = src.size(1)  # 批量大小
        trg_len = trg.size(0)  # 目标序列长度
        trg_vocab_size = self.decoder.linear.out_features  # 目标词表大小

        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size)  # 初始化输出
        output, hidden, cell = self.encoder(src)  # 获取编码器输出
        input = trg[0, :]  # 目标序列第一个词 <SOS>
        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input.unsqueeze(0), hidden, cell)  # 获取解码器输出
            print(output)
            outputs[:, t] = output.squeeze(1)  # 将解码器输出添加到输出中
            # 教师强制是指在解码器训练时，使用真实目标序值作为输入
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio  # 随机使用教师强制
            input = trg[t] if teacher_force else output.max(1)[1]  # 根据是否使用教师强制选择输入词
        return outputs


input_size, output_size, hidden_size = 5, 6, 256  # 假设词汇表大小
encoder = Encoder(input_size, hidden_size)
decoder = Decoder(output_size, hidden_size)
model = Seq2Seq(encoder, decoder)

src = torch.tensor([[0, 2, 3, 4, 1], [0, 2, 3, 4, 1]])
trg = torch.tensor([[0, 2, 3, 4, 5, 1], [0, 2, 3, 4, 5, 1]])
output = model(src, trg)

In [None]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attn = nn.Linear(hidden_size, hidden_size)