In [1]:
import torch
from d2l import torch as d2l
from torch import nn


In [2]:

#! 给定一个源语言的句子，自动翻译成目标语言
#! 编码器是一个RNN，读取输入句子，可以是双向的，将最后时刻的隐藏状态输入解码器进行输出
 

0
1
2
3
4


In [2]:
#* 自定义一个编码器
class Seq2SeqEncoder(d2l.Encoder):
    def __init__(self,vocab_size,embed_size,num_hiddens,num_layers,dropout=0,**kwargs):
        super(Seq2SeqEncoder,self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size,embed_size)
        self.rnn = nn.GRU(embed_size,num_hiddens,num_layers,batch_first=True,dropout=dropout)

    def forward(self,X,*args):
        X = self.embedding(X)
        #* [bs,sl,embed_size]
        output,state = self.rnn(X)
        return output,state
        #* [bs,sl,num_hiddens] [num_layers,bs,num_hiddens]



In [3]:

#! 实例化一个编码器
encoder = Seq2SeqEncoder(vocab_size=10,embed_size=8,num_hiddens=16,num_layers=2)
encoder.eval()
X = torch.zeros((4,7),dtype=torch.long)
output,state = encoder(X)
output.shape

torch.Size([4, 7, 16])

In [7]:

#* 自定义一个解码器
class Seq2SeqDecoder(d2l.Decoder):
    def __init__(self,vocab_size,embed_size,num_hiddens,num_layers,dropout=0,**kwargs):
        super(Seq2SeqDecoder,self).__init__()
        self.embedding = nn.Embedding(vocab_size,embed_size)
        #! 自己的embed层不与encoder共享
        self.rnn = nn.GRU(embed_size+num_hiddens,num_hiddens,num_layers,dropout=dropout,batch_first=True)
        self.dense = nn.Linear(num_hiddens,vocab_size)

    def init_state(self,enc_outputs,*args):
        return enc_outputs[1]
        # state[num_layers,bs,num_hiddens]

    def forward(self,X,state,*args):
        # X[bs,sl]
        X = self.embedding(X)
        #! 形成与X进行拼接的内容 [bs sl embed_size]
        context = state[-1].unsqueeze(0).permute(1,0,2).tile(1,X.shape[1],1)
        X_and_context = torch.cat((X,context),2)
        output,state = self.rnn(X_and_context)
        output = self.dense(output)
        return output,state


In [8]:

#! 实例化一个解码器
decoder = Seq2SeqDecoder(vocab_size=10,embed_size=8,num_hiddens=16,num_layers=2)
decoder.eval()
state = decoder.init_state(encoder(X))
output,state = decoder(X,state)
output.shape,state.shape

(torch.Size([4, 7, 10]), torch.Size([2, 4, 16]))

In [23]:

#! 通过零值化屏蔽不相关的项
X = torch.tensor([[1,2,3],[4,5,6]])
d2l.sequence_mask(X,torch.tensor([1,2]))


tensor([[1, 0, 0],
        [4, 5, 0]])

In [18]:

#! 通过扩展softmax交叉熵损失函数来遮蔽不相关规则


tensor([[-0.1947,  0.6278, -1.7384],
        [-0.0411, -0.2879,  0.3877]])

In [19]:
d2l.MaskedSoftmaxCELoss

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [32]:
a = torch.ones((3,4))
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [36]:
pred = torch.ones((3,10,4))#! 这里是10类
labels = torch.ones((3,4),dtype=torch.long)
cri = nn.CrossEntropyLoss()
cri(pred,labels)

tensor(2.3026)

In [28]:
pred = torch.randn((2,3))
label = torch.tensor([1,2])
crit = nn.CrossEntropyLoss()
crit(pred,label)

tensor(2.1078)

In [30]:
pred = pred.unsqueeze(2).tile(1,1,2)
pred.shape
crit(pred,label)

RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1