In [2]:
from __future__ import unicode_literals, print_function, division
import string
import re
import random

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [4]:
# from models import GRUEncoder, AttnDecoder

In [5]:
from dataset import DigitSeqDataset, DigitsSeq

In [6]:
# from util import *

In [7]:
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [8]:
with open('./dataset/task8_train_input.csv', 'r') as f:
    source_data = [''.join(line.split(',')).strip('\n').split('0')[0] for line in f.readlines()]
    

with open('./dataset/task8_train_output.csv', 'r') as f:
    target_data = [''.join(line.split(',')).strip('\n').split('0')[0] for line in f.readlines()]

In [9]:
def extract_character_vocab(data):
    """
    :param data:
    :return: 字符映射表
    """
    special_words = ['<PAD>','<UNK>','<GO>','<EOS>']
    set_words = list(set([character for line in data for character in line]))
    int_to_vocab = {idx:word for idx,word in enumerate(special_words + set_words)}
    vocab_to_int = {word:idx for idx,word in int_to_vocab.items()}

    return int_to_vocab,vocab_to_int

In [10]:
# 得到输入和输出的字符映射表

source_int_to_letter,source_letter_to_int = extract_character_vocab(source_data+target_data)

target_int_to_letter,target_letter_to_int = extract_character_vocab(source_data+target_data)

# 将每一行转换成字符id的list
source_int = [[source_letter_to_int.get(letter,source_letter_to_int['<UNK>'])
               for letter in line] for line in source_data]

# 在 output sequence 后添加 <EOS> tag
target_int = [[target_letter_to_int.get(letter, target_letter_to_int['<UNK>'])
               for letter in line] + [target_letter_to_int['<EOS>']] for line in target_data]

In [11]:
source_int_to_letter, target_int_to_letter

({0: '<PAD>',
  1: '<UNK>',
  2: '<GO>',
  3: '<EOS>',
  4: '3',
  5: '5',
  6: '7',
  7: '1',
  8: '6',
  9: '4',
  10: '8',
  11: '2',
  12: '9'},
 {0: '<PAD>',
  1: '<UNK>',
  2: '<GO>',
  3: '<EOS>',
  4: '3',
  5: '5',
  6: '7',
  7: '1',
  8: '6',
  9: '4',
  10: '8',
  11: '2',
  12: '9'})

In [12]:
MAX_LENGTH = 24

In [21]:
class GRUEncoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRUEncoder, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        
        # here I use GRU instead
        # 注意，这里GRU的 hide layer 维度和embeding维度一样，但并不是必须的
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        # (seq_len, batch, input_size)这是RNN的输入数据格式，这里只有1个时间步，但是为什么batch也是1？
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)


class GRUDecoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(GRUDecoder, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        
        # 这里要做分类输出
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

# 原始的attention使用了双向RNN，这里代码中做了简化，只用了单向RNN。
class AttnDecoder(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        # attention 
        attn_weights = F.softmax(
            # 连接输入的词向量和上一步的hide state并建立bp训练，他们决定了attention权重
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        
        # 施加权重到所有的语义向量上
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        # 加了attention的语义向量和输入的词向量共同作为输入，此处对应解码方式三+attention
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        # 输出分类结果
        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size)

In [40]:
teacher_forcing_ratio = 0.5

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    
#     print(input_length) #21
    #print(" ")
    
    target_length = target_tensor.size(0)
    
    # 这里 encoder_outputs 记录的就是编码到每一个单词产生的语义向量，比如10个英语单词的句子就应该有10个语义向量
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    # 一个个单词 feed encoder
    for ei in range(input_length):
        
        # 获取到每一步 Encoder 的输出
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        
        # 记录改单词处的语义向量
        encoder_outputs[ei] = encoder_output[0, 0]

        
    # Decoder 输入，需要 <GO>
    # decoder_input = torch.tensor([[SOS_token]], device=device)
    # SOS表示句子开始
    decoder_input = torch.tensor([[target_letter_to_int.get('<GO>')]], device=device)

    # decoder的 hide state就是 encoder 最后一步的语义向量
    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    # use_teacher_forcing = True

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        # 利用已知的上一步真实的单词去预测下一个单词
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            
            # 计算 loss
            loss += criterion(decoder_output, target_tensor[di])
            
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        # 利用自己上一步预测的单词作为输入预测下一个单词
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            
            # topk返回的前k最大值及其索引
            topv, topi = decoder_output.topk(1)
            
#             print('Decoder Output: ', decoder_output)
#             print('Decoder Output Shape: ',decoder_output.shape)
            
#             print('Decoder Output topk(1): ', decoder_output.topk(1))
#             print('topv: ',topv)
#             print('topi shape: ',topi.shape)
            
            decoder_input = topi.squeeze().detach()  # detach from history as input
#             print('decoder_input: ',decoder_input)
#             print(decoder_input.item())
            
            loss += criterion(decoder_output, target_tensor[di])
            
            #if decoder_input.item() == EOS_token:
            # 遇到EOS，表示翻译句子停止了
            if decoder_input.item() == target_letter_to_int.get('<EOS>'):
                break

    loss.backward()

    encoder_optimizer.step()
    
    decoder_optimizer.step()

    return loss.item() / target_length

In [41]:
source_target_pair = [list(line) for line in zip([list(i) for i in source_data[:10]], [list(i) for i in target_data[:10]])]

In [42]:
# source_int

In [43]:
seq_pairs = [list(line) for line in zip(source_int,target_int)]

In [47]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    
    start = time.time()
    
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    
    training_pairs = [random.choice(seq_pairs) for i in range(n_iters)]
    
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        print(training_pair[0])
        input_tensor = torch.from_numpy(np.array(training_pair[0]).reshape(-1,1))
        target_tensor = torch.from_numpy(np.array(training_pair[1]).reshape(-1,1))
        print(input_tensor)
        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [48]:
target_letter_to_int

{'1': 7,
 '2': 11,
 '3': 4,
 '4': 9,
 '5': 5,
 '6': 8,
 '7': 6,
 '8': 10,
 '9': 12,
 '<EOS>': 3,
 '<GO>': 2,
 '<PAD>': 0,
 '<UNK>': 1}

In [49]:
hidden_size = 64

# inputs, outputs, seq_pairs 
encoder = GRUEncoder(len(target_letter_to_int), hidden_size)

attn_decoder = AttnDecoder(hidden_size, len(target_letter_to_int), dropout_p=0.1)

# all 32000
trainIters(encoder, attn_decoder, 30000, print_every=500)

[11, 7, 6, 9, 7, 9, 4, 9, 12, 4, 10]
tensor([[11],
        [ 7],
        [ 6],
        [ 9],
        [ 7],
        [ 9],
        [ 4],
        [ 9],
        [12],
        [ 4],
        [10]])
[9, 8, 11, 6, 7, 10, 12, 6, 6, 5, 8, 7, 4, 11]
tensor([[ 9],
        [ 8],
        [11],
        [ 6],
        [ 7],
        [10],
        [12],
        [ 6],
        [ 6],
        [ 5],
        [ 8],
        [ 7],
        [ 4],
        [11]])
[10, 6, 10, 5, 12, 6, 11, 5, 11, 7, 9, 7, 10, 5, 5, 7, 11, 6]
tensor([[10],
        [ 6],
        [10],
        [ 5],
        [12],
        [ 6],
        [11],
        [ 5],
        [11],
        [ 7],
        [ 9],
        [ 7],
        [10],
        [ 5],
        [ 5],
        [ 7],
        [11],
        [ 6]])
[4, 12, 12, 11, 6, 12]
tensor([[ 4],
        [12],
        [12],
        [11],
        [ 6],
        [12]])
[6, 6, 9, 10, 4, 8]
tensor([[ 6],
        [ 6],
        [ 9],
        [10],
        [ 4],
        [ 8]])
[4, 6, 5, 6, 5, 10, 9, 11, 5]
tensor

[8, 12, 5, 5]
tensor([[ 8],
        [12],
        [ 5],
        [ 5]])
[12, 9, 5, 4, 12]
tensor([[12],
        [ 9],
        [ 5],
        [ 4],
        [12]])
[10, 4, 5, 10, 10, 8, 10, 7]
tensor([[10],
        [ 4],
        [ 5],
        [10],
        [10],
        [ 8],
        [10],
        [ 7]])
[4, 12, 11, 9, 6, 5, 10, 4, 6, 12, 4]
tensor([[ 4],
        [12],
        [11],
        [ 9],
        [ 6],
        [ 5],
        [10],
        [ 4],
        [ 6],
        [12],
        [ 4]])
[10, 7, 6, 11]
tensor([[10],
        [ 7],
        [ 6],
        [11]])
[10, 8, 6, 6, 7, 8, 8, 12, 6, 5, 12, 7, 6, 10]
tensor([[10],
        [ 8],
        [ 6],
        [ 6],
        [ 7],
        [ 8],
        [ 8],
        [12],
        [ 6],
        [ 5],
        [12],
        [ 7],
        [ 6],
        [10]])
[8, 10]
tensor([[ 8],
        [10]])
[12, 4, 4, 8]
tensor([[12],
        [ 4],
        [ 4],
        [ 8]])
[7, 12, 4, 12, 12, 8, 6, 8, 9, 11, 11, 12, 7, 5, 7]
tensor([[ 7],
        [12],
 

tensor([[10],
        [ 4],
        [ 6],
        [ 8],
        [ 6],
        [ 7],
        [ 8],
        [11],
        [11],
        [ 6],
        [10],
        [ 4],
        [ 9],
        [ 5],
        [ 7],
        [ 9],
        [ 6]])
[10, 5, 5]
tensor([[10],
        [ 5],
        [ 5]])
[7, 12, 5, 8, 6, 12, 12]
tensor([[ 7],
        [12],
        [ 5],
        [ 8],
        [ 6],
        [12],
        [12]])
[7, 6, 9, 4, 7, 8]
tensor([[7],
        [6],
        [9],
        [4],
        [7],
        [8]])
[7, 12, 6, 4]
tensor([[ 7],
        [12],
        [ 6],
        [ 4]])
[8, 7, 5, 10, 11, 8, 12]
tensor([[ 8],
        [ 7],
        [ 5],
        [10],
        [11],
        [ 8],
        [12]])
[7, 7]
tensor([[7],
        [7]])
[10, 8]
tensor([[10],
        [ 8]])
[6, 11, 8, 7, 9]
tensor([[ 6],
        [11],
        [ 8],
        [ 7],
        [ 9]])
[7, 7, 8, 11, 7, 9, 9, 12, 8, 9, 8, 8]
tensor([[ 7],
        [ 7],
        [ 8],
        [11],
        [ 7],
        [ 9],
        

[9, 8, 11, 8, 4, 8]
tensor([[ 9],
        [ 8],
        [11],
        [ 8],
        [ 4],
        [ 8]])
[12, 10, 9, 11, 12, 4, 5, 12, 7]
tensor([[12],
        [10],
        [ 9],
        [11],
        [12],
        [ 4],
        [ 5],
        [12],
        [ 7]])
[4, 9, 11, 7, 12, 8, 5, 11, 7, 6, 4, 12]
tensor([[ 4],
        [ 9],
        [11],
        [ 7],
        [12],
        [ 8],
        [ 5],
        [11],
        [ 7],
        [ 6],
        [ 4],
        [12]])
[7, 6, 12, 8, 5, 11, 8, 4, 11, 4, 12, 10, 9, 7, 7, 10, 4, 5, 10, 12]
tensor([[ 7],
        [ 6],
        [12],
        [ 8],
        [ 5],
        [11],
        [ 8],
        [ 4],
        [11],
        [ 4],
        [12],
        [10],
        [ 9],
        [ 7],
        [ 7],
        [10],
        [ 4],
        [ 5],
        [10],
        [12]])
[11, 4, 9, 7, 6, 7, 8, 7, 8, 6, 11, 6, 11, 12, 7, 10, 5, 11]
tensor([[11],
        [ 4],
        [ 9],
        [ 7],
        [ 6],
        [ 7],
        [ 8],
        [ 7],
  

[9, 12, 12, 7, 4, 10, 4, 12, 9, 5, 11, 10, 9, 6, 4, 10, 12, 4]
tensor([[ 9],
        [12],
        [12],
        [ 7],
        [ 4],
        [10],
        [ 4],
        [12],
        [ 9],
        [ 5],
        [11],
        [10],
        [ 9],
        [ 6],
        [ 4],
        [10],
        [12],
        [ 4]])
[7, 11, 12, 4, 5, 7, 9, 5, 7, 11, 6, 8, 12, 6, 5]
tensor([[ 7],
        [11],
        [12],
        [ 4],
        [ 5],
        [ 7],
        [ 9],
        [ 5],
        [ 7],
        [11],
        [ 6],
        [ 8],
        [12],
        [ 6],
        [ 5]])
[5, 4, 11, 7, 9, 6, 11]
tensor([[ 5],
        [ 4],
        [11],
        [ 7],
        [ 9],
        [ 6],
        [11]])
[4, 8, 7, 10, 4, 12, 11, 12, 11, 9, 6, 5, 9, 8, 7]
tensor([[ 4],
        [ 8],
        [ 7],
        [10],
        [ 4],
        [12],
        [11],
        [12],
        [11],
        [ 9],
        [ 6],
        [ 5],
        [ 9],
        [ 8],
        [ 7]])
[6, 9, 10, 5, 8, 7, 11, 5, 9, 6, 4, 12

tensor([[ 6],
        [ 4],
        [ 8],
        [ 4],
        [ 5],
        [ 8],
        [ 6],
        [12],
        [10],
        [ 8],
        [ 9],
        [ 6],
        [ 4],
        [ 7],
        [ 7],
        [ 5],
        [ 8],
        [ 5],
        [ 8]])
[6, 11, 4, 10, 4]
tensor([[ 6],
        [11],
        [ 4],
        [10],
        [ 4]])
[9, 6, 10, 11, 4, 6, 7, 5, 4, 7, 8, 5, 5, 10, 6, 11, 8, 6]
tensor([[ 9],
        [ 6],
        [10],
        [11],
        [ 4],
        [ 6],
        [ 7],
        [ 5],
        [ 4],
        [ 7],
        [ 8],
        [ 5],
        [ 5],
        [10],
        [ 6],
        [11],
        [ 8],
        [ 6]])
[12, 4, 12, 10]
tensor([[12],
        [ 4],
        [12],
        [10]])
[9, 4, 8, 4]
tensor([[9],
        [4],
        [8],
        [4]])
[11, 12, 6, 5]
tensor([[11],
        [12],
        [ 6],
        [ 5]])
[12, 6, 10, 9, 8, 11, 7, 9, 5, 12, 5, 9, 8, 9]
tensor([[12],
        [ 6],
        [10],
        [ 9],
        [ 8],
    

[11, 9, 5, 7, 4, 9, 6, 5, 9, 6, 9, 8, 5, 10, 10, 12, 9, 9]
tensor([[11],
        [ 9],
        [ 5],
        [ 7],
        [ 4],
        [ 9],
        [ 6],
        [ 5],
        [ 9],
        [ 6],
        [ 9],
        [ 8],
        [ 5],
        [10],
        [10],
        [12],
        [ 9],
        [ 9]])
[9, 7, 11, 8, 7, 9, 12, 5, 6, 7, 8, 6, 11, 9, 11, 5, 5]
tensor([[ 9],
        [ 7],
        [11],
        [ 8],
        [ 7],
        [ 9],
        [12],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 6],
        [11],
        [ 9],
        [11],
        [ 5],
        [ 5]])
[10, 9, 8, 5, 6, 8, 11, 12]
tensor([[10],
        [ 9],
        [ 8],
        [ 5],
        [ 6],
        [ 8],
        [11],
        [12]])
[4, 5, 5, 5, 6, 5, 4, 4, 10, 6, 5, 8, 11, 7, 12]
tensor([[ 4],
        [ 5],
        [ 5],
        [ 5],
        [ 6],
        [ 5],
        [ 4],
        [ 4],
        [10],
        [ 6],
        [ 5],
        [ 8],
        [11],
        [ 7],
       

tensor([[10],
        [ 9],
        [ 9],
        [ 7],
        [ 6],
        [ 7],
        [ 6],
        [12],
        [ 9],
        [ 5],
        [10],
        [ 8]])
[10, 7, 8, 11, 11]
tensor([[10],
        [ 7],
        [ 8],
        [11],
        [11]])
[11, 12, 7, 10, 10, 11, 4, 5, 12, 6, 9, 9, 10, 7, 11, 7, 4, 5, 11]
tensor([[11],
        [12],
        [ 7],
        [10],
        [10],
        [11],
        [ 4],
        [ 5],
        [12],
        [ 6],
        [ 9],
        [ 9],
        [10],
        [ 7],
        [11],
        [ 7],
        [ 4],
        [ 5],
        [11]])
[7, 12, 8, 10]
tensor([[ 7],
        [12],
        [ 8],
        [10]])
[7, 7, 5, 5, 4, 8, 11, 12, 10, 9, 11, 8, 8, 7, 4, 9, 11, 10, 9, 5]
tensor([[ 7],
        [ 7],
        [ 5],
        [ 5],
        [ 4],
        [ 8],
        [11],
        [12],
        [10],
        [ 9],
        [11],
        [ 8],
        [ 8],
        [ 7],
        [ 4],
        [ 9],
        [11],
        [10],
        [ 9],
  

[6, 10, 4, 5, 12, 7, 9, 8, 9, 12, 8, 10, 12, 5, 8]
tensor([[ 6],
        [10],
        [ 4],
        [ 5],
        [12],
        [ 7],
        [ 9],
        [ 8],
        [ 9],
        [12],
        [ 8],
        [10],
        [12],
        [ 5],
        [ 8]])
[5, 8]
tensor([[5],
        [8]])
[11, 6, 10, 4, 6, 7, 12]
tensor([[11],
        [ 6],
        [10],
        [ 4],
        [ 6],
        [ 7],
        [12]])
[8, 11, 10, 5, 4, 9, 12, 5, 4, 5, 12, 4, 11, 10, 11, 11]
tensor([[ 8],
        [11],
        [10],
        [ 5],
        [ 4],
        [ 9],
        [12],
        [ 5],
        [ 4],
        [ 5],
        [12],
        [ 4],
        [11],
        [10],
        [11],
        [11]])
[9, 8, 12, 4, 8, 10, 8, 11, 8, 4, 5, 4, 12, 5]
tensor([[ 9],
        [ 8],
        [12],
        [ 4],
        [ 8],
        [10],
        [ 8],
        [11],
        [ 8],
        [ 4],
        [ 5],
        [ 4],
        [12],
        [ 5]])
[4, 9, 10, 10]
tensor([[ 4],
        [ 9],
        [10

tensor([[ 7],
        [ 8],
        [10],
        [ 5],
        [ 5]])
[9, 10, 10, 10, 8, 12, 10, 9, 7, 8, 7, 10, 9, 9, 9, 4]
tensor([[ 9],
        [10],
        [10],
        [10],
        [ 8],
        [12],
        [10],
        [ 9],
        [ 7],
        [ 8],
        [ 7],
        [10],
        [ 9],
        [ 9],
        [ 9],
        [ 4]])
[12, 6, 10, 8, 8]
tensor([[12],
        [ 6],
        [10],
        [ 8],
        [ 8]])
[8, 4]
tensor([[8],
        [4]])
[11, 9, 7, 10, 5, 7]
tensor([[11],
        [ 9],
        [ 7],
        [10],
        [ 5],
        [ 7]])
[12, 11, 7, 12, 9, 8, 10, 10, 4, 5, 8, 5, 7, 8, 4, 8, 8, 8, 5, 11]
tensor([[12],
        [11],
        [ 7],
        [12],
        [ 9],
        [ 8],
        [10],
        [10],
        [ 4],
        [ 5],
        [ 8],
        [ 5],
        [ 7],
        [ 8],
        [ 4],
        [ 8],
        [ 8],
        [ 8],
        [ 5],
        [11]])
[5, 11, 11]
tensor([[ 5],
        [11],
        [11]])
[9, 10, 9, 4, 10,

[7, 10, 8, 8, 4, 8, 7, 12, 6, 4, 8, 8, 8, 9, 12]
tensor([[ 7],
        [10],
        [ 8],
        [ 8],
        [ 4],
        [ 8],
        [ 7],
        [12],
        [ 6],
        [ 4],
        [ 8],
        [ 8],
        [ 8],
        [ 9],
        [12]])
[5, 12, 9, 6, 4, 12, 11, 4, 6, 8, 7, 6, 12, 11, 9]
tensor([[ 5],
        [12],
        [ 9],
        [ 6],
        [ 4],
        [12],
        [11],
        [ 4],
        [ 6],
        [ 8],
        [ 7],
        [ 6],
        [12],
        [11],
        [ 9]])
[12, 7, 5, 10, 5, 10, 9, 4, 7, 12]
tensor([[12],
        [ 7],
        [ 5],
        [10],
        [ 5],
        [10],
        [ 9],
        [ 4],
        [ 7],
        [12]])
[8, 10, 6, 8, 12, 11, 6, 9]
tensor([[ 8],
        [10],
        [ 6],
        [ 8],
        [12],
        [11],
        [ 6],
        [ 9]])
[6, 6, 7, 12, 8, 10, 8]
tensor([[ 6],
        [ 6],
        [ 7],
        [12],
        [ 8],
        [10],
        [ 8]])
[6, 6]
tensor([[6],
        [6]])
[6, 

[10, 12, 12, 5, 5, 7, 7, 11, 10, 12, 11, 12, 10, 10, 9, 5, 10]
tensor([[10],
        [12],
        [12],
        [ 5],
        [ 5],
        [ 7],
        [ 7],
        [11],
        [10],
        [12],
        [11],
        [12],
        [10],
        [10],
        [ 9],
        [ 5],
        [10]])
[8, 8, 8, 10]
tensor([[ 8],
        [ 8],
        [ 8],
        [10]])
[6, 4, 8, 8, 4, 4, 5, 7, 10, 9, 7, 10, 11, 12, 7, 6, 8, 9, 5, 11]
tensor([[ 6],
        [ 4],
        [ 8],
        [ 8],
        [ 4],
        [ 4],
        [ 5],
        [ 7],
        [10],
        [ 9],
        [ 7],
        [10],
        [11],
        [12],
        [ 7],
        [ 6],
        [ 8],
        [ 9],
        [ 5],
        [11]])
[7, 11, 7, 8, 10, 5, 8, 8]
tensor([[ 7],
        [11],
        [ 7],
        [ 8],
        [10],
        [ 5],
        [ 8],
        [ 8]])
[11, 4, 10, 8, 4, 8, 8, 10]
tensor([[11],
        [ 4],
        [10],
        [ 8],
        [ 4],
        [ 8],
        [ 8],
        [10]])

KeyboardInterrupt: 

In [None]:
hidden_size = 8

# inputs, outputs, seq_pairs 
encoder = GRUEncoder(len(target_letter_to_int), hidden_size)

attn_decoder = AttnDecoder(hidden_size, len(target_letter_to_int), dropout_p=0.1)

# all 32000
trainIters(encoder, attn_decoder, 30000, print_every=500)

In [None]:
len(target_letter_to_int)

In [None]:
def evaluate(encoder, decoder, input_sequence, max_length=MAX_LENGTH):
    # To run the model, pass in a  vector
    # Here we don't need to train, so the code is wrapped in torch.no_grad()
    with torch.no_grad():
        
        input_tensor = input_sequence
        
        input_length = input_tensor.size()[0]
        
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
            
            encoder_outputs[ei] += encoder_output[0, 0]
            
        # SOS
        decoder_input = torch.tensor([[target_letter_to_int.get('<GO>')]], device=device)  

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            
            
            if topi.item() == target_letter_to_int.get('<EOS>'):
                decoded_words.append('<EOS>')
                break
            else:
                # 序号 to digit
                decoded_words.append(source_int_to_letter[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

In [30]:
def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(source_target_pair)
        
        print('>', pair[0])
        
        # 之前插入了 EOS, 去除最后一个字符
        print('=', pair[1]) 
        
        # 原始序列映射
        digit_id_sequence = [source_letter_to_int.get(str(letter), source_letter_to_int['<UNK>']) for letter in pair[0]]
        print("~", digit_id_sequence)
        
        # 转换为 tensor
        input_tensor = torch.tensor(digit_id_sequence).view(-1,1)
        # print(input_tensor)
        
        output_words, attentions = evaluate(encoder, decoder, input_tensor)
        
        output_sentence = ', '.join(output_words[:-1])
        
        print('< [', output_sentence,']',sep='')
        print('')

In [31]:
evaluateRandomly(encoder, attn_decoder)

> ['3', '2', '3', '9', '6', '1', '9', '3', '5', '8', '7', '7', '6', '1', '2', '8', '4', '3']
= ['3', '4', '8', '2', '1', '6', '7', '7', '8', '5', '3', '9', '1', '6', '9', '3', '2', '3']
~ [4, 11, 4, 12, 8, 7, 12, 4, 5, 10, 6, 6, 8, 7, 11, 10, 9, 4]
< [3, 4, 6, 8, 1, 1, 1, 1, 1, 1, 6, 6, 6]

> ['1', '4', '2', '9', '3', '9', '5', '8']
= ['8', '5', '9', '3', '9', '2', '4', '1']
~ [7, 9, 11, 12, 4, 12, 5, 10]
< [8, 5, 3, 1, 1, 1, 1, 1]

> ['8', '9', '4', '6', '1', '9', '3', '9', '3', '2', '3', '8', '9', '7', '5']
= ['5', '7', '9', '8', '3', '2', '3', '9', '3', '9', '1', '6', '4', '9', '8']
~ [10, 12, 9, 8, 7, 12, 4, 12, 4, 11, 4, 10, 12, 6, 5]
< [5, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

> ['3', '9', '4', '1', '9', '4', '6', '8', '9', '8', '3', '6', '1', '1', '2', '1', '4']
= ['4', '1', '2', '1', '1', '6', '3', '8', '9', '8', '6', '4', '9', '1', '4', '9', '3']
~ [4, 12, 9, 7, 12, 9, 8, 10, 12, 10, 4, 8, 7, 7, 11, 7, 9]
< [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

> ['9', '5', '3', '2'

In [None]:
with open('./dataset/task8_test_input.csv', 'r') as f:
    test_input_data = [''.join(line.split(',')).strip('\n').split('0')[0] for line in f.readlines()]

# 将每一行转换成字符id的list
digit_id_sequence = [[source_letter_to_int.get(letter,source_letter_to_int['<UNK>'])
               for letter in line] for line in test_input_data]

In [None]:
digit_id_sequence[:5]

In [None]:
def predict(encoder, decoder, test_sequence, n=len(digit_id_sequence)):
    for i in range(n):
        pair = random.choice(seq_pairs)
        print('>', pair[0])
        print('=', pair[1])
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
len(digit_id_sequence)

In [None]:
digit_id_sequence[0]

In [None]:
for i in range(10,20):
    
    seq = [int(i) for i in test_input_data[i]]
    
    digit_id_sequence = [source_letter_to_int.get(str(letter), source_letter_to_int['<UNK>']) for letter in seq]

    input_tensor = torch.tensor([int(i) for i in digit_id_sequence]).view(-1,1)

    print('>', seq)

    output_words, attentions = evaluate(encoder, attn_decoder, input_tensor)
        
    output_sentence = ', '.join(output_words[:-1])
        
    print('< [', output_sentence, ']',sep='')
        
    print('\n')

可以看到，由于参数设置原因，效果不是很好。另外的 TensorFlow 实现换了一套参数后，效果有了明显的提升。

In [None]:
def pad_sentence_batch(sentence_batch, pad_int, max_sequence_length):
    '''
    descirption: 对batch中的序列进行补全，保证 batch 中的每行都有相同的 sequence_length
    
    params：
    - sentence batch
    - pad_int: <PAD> 对应索引号
    '''
    # max_sentence = max([len(sentence) for sentence in sentence_batch])
    # return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]
    
    return [sentence + [pad_int] * (max_length - len(sentence)) for sentence in sentence_batch]


def get_batches(targets, sources, batch_size, source_pad_int, target_pad_int):

    for batch_i in range(0,len(sources)//batch_size):
        start_i = batch_i * batch_size
        sources_batch = sources[start_i : start_i + batch_size]
        targets_batch = targets[start_i : start_i + batch_size]

        pad_sources_batch = np.array(pad_sentence_batch(sources_batch, source_pad_int))
        pad_targets_batch = np.array(pad_sentence_batch(targets_batch, target_pad_int))

        targets_lengths = []
        for target in targets_batch:
            targets_lengths.append(len(target))

        source_lengths = []
        for source in sources_batch:
            source_lengths.append(len(source))

        yield pad_targets_batch, pad_sources_batch, targets_lengths, source_lengths

In [None]:
# Train
train_source = source_int[batch_size:]
train_target = target_int[batch_size:]

# 留出一个batch进行验证
valid_source = source_int[:batch_size]
valid_target = target_int[:batch_size]

(valid_targets_batch, valid_sources_batch, valid_targets_lengths, valid_sources_lengths) = next(get_batches(valid_target, valid_source, batch_size,
                           source_letter_to_int['<PAD>'],
                           target_letter_to_int['<PAD>']))

In [None]:
a = torch.linspace(0,15,6).view(2,3)
print("a:",a)

In [None]:
print("a.sort(2):\n",a.sort(dim=0))  # 在某个维度上排序

In [None]:
print("a.sort(2):\n",a.sort(dim=1))  # 在某个维度上排序
print("a.topk(2):\n",a.topk(2,dim=1))  # 在某个维度上寻找top-k
print("t.max(a):\n",t.max(a))  # 不输入dim的话就是普通的max
print("t.max(a,dim=1):\n",t.max(a,dim=1))  # 输入dim的话就会集成argmax的功能

In [None]:
x = torch.tensor([[5.5, 3],[1,2]]) #构建指定元素的矩阵

In [None]:
x

In [None]:
x.max(0)

In [None]:
topv, topi = x.topk(1)

In [None]:
topv

In [None]:
topi

In [None]:
topi.squeeze()[0].item()

In [None]:
v = topi.squeeze().detach()

In [None]:
v.item()

In [None]:
x = torch.tensor([[1]]) #构建指定元素的矩阵

In [None]:
x.squeeze().shape

In [None]:
output = torch.tensor([[-2.5267, -2.4569, -2.2682, -2.9591, -2.5931, -2.8346, -2.4738, -2.4122,
                    -2.6883, -2.5823, -2.4317, -2.8451, -2.5012]])

In [None]:
topv, topi = output.topk(1)

In [None]:
topv, topi 

In [None]:
topi.squeeze().detach()