In [44]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

# 指定数据类型
dtype = torch.FloatTensor
# S: 解码后语句的开始start
# E: 解码后语句的结束end
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps
sentences = ['ich mochte ein bier P', 'S i want a beer', 'i want a beer E']

# 数据预处理
# 1.构造词表
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict)  # vocab list

# 参数
n_hidden = 128

n_class

11

In [17]:
# 2.构造数据批次
def make_batch(sentences):
    input_batch  = [np.eye(n_class)[[word_dict[n] for n in sentences[0].split()]]]
    output_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[1].split()]]]
    target_batch = [np.eye(n_class)[[word_dict[n] for n in sentences[2].split()]]]

    # 返回张量
    return torch.Tensor(input_batch), torch.Tensor(output_batch), torch.Tensor(target_batch)


# 每个批次的形状（batch_size, time_step, input_dim）
make_batch(sentences)[2].shape

torch.Size([1, 5, 11])

In [58]:
"""构造模型"""
class Attention(nn.Module):
    def __init__(self):
        super(Attention,self).__init__()
        self.encoder_cell = nn.RNN(input_size=n_class,
                                   hidden_size=n_hidden,#128
                                   dropout=0.5)
        self.decoder_cell = nn.RNN(input_size=n_class,
                                   hidden_size=n_hidden,
                                   dropout=0.5)
        # attention线性层
        self.attn = nn.Linear(n_hidden, n_hidden)
        self.out = nn.Linear(n_hidden*2, n_class)
        
    def forward(self, enc_inputs, hidden, dec_inputs):
        # 把批改为（time_step, batch_size, input_dim）
        enc_inputs = enc_inputs.transpose(0,1)
        # 把批改为（time_step, batch_size, intput_dim）
        dec_inputs = dec_inputs.transpose(0,1)

        # 把输入和隐含数据输入给编码器
        enc_outputs, enc_hidden = self.encoder_cell(enc_inputs, hidden)

        trained_attn = []
        # 把编码器得到的最后一个hidden_state拿出来作为解码器的hidden
        hidden = enc_hidden
        # 把解码器的输入个数作为步长
        n_step = len(dec_inputs)
        model = torch.empty([n_step, 1, n_class])

        for i in range(n_step):
            dec_output, hidden = self.decoder_cell(dec_inputs[i].unsqueeze(0),
                                                   hidden)
#             print(hidden.shape)
            attn_weights = self.get_att_weight(dec_output, enc_outputs)
            trained_attn.append(attn_weights.squeeze().data.numpy())

            # 矩阵乘法bmm[1,1,n_step]x[1,n_step,n_hidden]=[1,1,n_hidden]
            context = attn_weights.bmm(enc_outputs.transpose(0,1))
            # [batch_size=1, n_hidden]
            dec_output = dec_output.squeeze(0)
            context = context.squeeze(1)
            model[i] = self.out(torch.cat((dec_output, context),1))

        # make model shape
        return model.transpose(0,1).squeeze(0), trained_attn

    def get_att_weight(self, dec_output, enc_outputs):  # get attention weight one 'dec_output' with 'enc_outputs'
        n_step = len(enc_outputs)
        attn_scores = torch.zeros(n_step)  # attn_scores : [n_step]

        for i in range(n_step):
            attn_scores[i] = self.get_att_score(dec_output, enc_outputs[i])

        # Normalize scores to weights in range 0 to 1
        return F.softmax(attn_scores).view(1, 1, -1)

    def get_att_score(self,dec_output, enc_outputs):
        score = self.attn(enc_outputs)
        return torch.dot(dec_output.view(-1), score.view(-1))
        
    

In [59]:
"""训练环境配置"""
# 数据批处理
input_batch, output_batch, target_batch = make_batch(sentences)

# 初始化hidden
hidden = torch.zeros(1,1,n_hidden)

# 创建模型
model = Attention()
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 开始训练
for epoch in range(2000):
    # 梯度清零
    optimizer.zero_grad()
    # 计算结果
    output, _ = model(input_batch, hidden, output_batch)
    # 计算损失
    loss = loss_func(output, target_batch.squeeze(0))
    if (epoch + 1) % 400 == 0:
        print('Epoch:','%04d' % (epoch + 1), 'Cost =', '{:.6f}'.format(loss))
    # 误差反向传播
    loss.backward()
    # 优化器更新参数
    optimizer.step()




RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target'

In [21]:
a = torch.randn(3,4,5)
b = a.transpose(0,1)
len(b),b,a

(4, tensor([[[ 0.4431, -2.3067,  0.7377,  0.2033,  1.6705],
          [-2.5375,  0.7294, -0.1644,  0.6650,  0.3895],
          [-0.0613, -1.5592, -2.0909,  0.2348,  0.4698]],
 
         [[-0.0326, -0.1047,  0.1092, -0.0431, -2.1317],
          [ 0.4181, -0.7801, -1.5827,  1.2612,  0.4409],
          [-1.0802, -0.4284, -1.1630, -2.2613,  1.2779]],
 
         [[ 1.0326,  0.8520, -0.0111, -1.3376,  1.2378],
          [-1.2324, -0.3075, -0.7242, -0.8695,  0.6091],
          [-0.1361,  0.7926,  1.9040, -0.4173,  1.2122]],
 
         [[ 0.3052, -1.4391,  0.8925, -0.3845, -1.3879],
          [-0.2115,  0.1124,  1.6471, -1.0548,  0.0244],
          [ 1.4515, -1.6221,  0.8290, -0.6666, -1.6496]]]), tensor([[[ 0.4431, -2.3067,  0.7377,  0.2033,  1.6705],
          [-0.0326, -0.1047,  0.1092, -0.0431, -2.1317],
          [ 1.0326,  0.8520, -0.0111, -1.3376,  1.2378],
          [ 0.3052, -1.4391,  0.8925, -0.3845, -1.3879]],
 
         [[-2.5375,  0.7294, -0.1644,  0.6650,  0.3895],
          [ 0.