# seq2seq-Test beam search

In [3]:
import tensorflow as tf
def config_gpu():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
                logical_gpus = tf.config.experimental.list_logical_devices('GPU')
                print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            print(e)

In [4]:
config_gpu()

1. nvidia驱动   nvidia-smi
2. CUDA

![](https://img-blog.csdn.net/20180414103300419)

解码是seq2seq模型的常见问题，常用方法有贪心搜索`（Greedy Search）`集束搜索`（Beam Search）`。

Decoder根据Encoder的中间语义编码向量c和`<s>`标签得到第一个输出的概率分布`[0.1,0.1,0.3,0.4,0.1]`，选择概率最大的`0.4`，即`moi`。

根据隐向量h1和moi得到第二个输出的概率分布`[0.1,0.1,0.1,0.1,0.6]`，选择概率最大的`0.6`，即`suis`。

以此类推，直到遇到`</s>`标签，得到最终的序列`moi suis étudiant`.

# 集束搜索

上面的贪心搜索只选择了概率最大的一个，而集束搜索则选择了概率最大的前k个。这个k值也叫做集束宽度（Beam Width）。

还是以上面的例子作为说明，k值等于2，则集束搜索的过程如下图：

![image.png](https://img-blog.csdn.net/20180414113522371?watermark/2/text/aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2d1b2xpbmRvbmdnbGQ=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70)

得到第一个输出的概率分布`[0.1,0.1,0.3,0.4,0.1]`，选择概率最大的前两个，`0.3`和`0.4`，即`Je`和`moi`。

然后`Je`和`moi`分别作为`Decoder`的输入，得到两个概率分布，然后再选择概率和最大的前两个序列，`0.3+0.8`和`0.4+0.6`，即`Je suis`和`moi suis`。

以此类推，最终可以得到两个序列，即`Je suis étudiant`和`moi suis étudiant`，很明显前者的概率和最大，为`2.2`，所以这个序列是最终得到的结果。

集束搜索本质上也是贪心的思想，只不过它考虑了更多的候选搜索空间，因此可以得到更多的翻译结果。

集束搜索可以认为是维特比算法的贪心形式，在维特比所有中由于利用动态规划导致当字典较大时效率低，而集束搜索使用beam size参数来限制在每一步保留下来的可能性词的数量。集束搜索是在测试阶段为了获得更好准确性而采取的一种策略，在训练阶段无需使用。


预测的时候,假设词表大小为3,内容为a,b,c。 beam size是2, decoder解码的时候

1:生成第1个词的时候,选择概率最大的2个词,假设为a,c,那么当前的2个序列就是a和c。

2:生成第2个词的时候,我们将当前序列a和c,分别与词表中的所有词进行组合,得到新的6个序列 aa ab ac ca cb cc,计算每个序列的得分并选择得分最高2个序列,作为新的当前序列,假如为aa、cb。

3:后面会不断重复这个过程,直到遇到结束符或者达到最大长度为止。最终输出得分最高的2个序列

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append('/Users/ianxiao/Code/DeepLearning/class04')
from utils.wv_loader import Vocab
from utils.data_loader import load_dataset
from utils.config import *
import numpy as np
from utils.gpu_utils import config_gpu
from seq2seq_tf2.seq2seq_model import Seq2Seq

Building prefix dict from the default dictionary ...
2020-07-14 21:22:32,033 : DEBUG : Building prefix dict from the default dictionary ...
Loading model from cache /var/folders/4m/kfgtcp1x5gbbvg4f44kcwwgw0000gn/T/jieba.cache
2020-07-14 21:22:32,034 : DEBUG : Loading model from cache /var/folders/4m/kfgtcp1x5gbbvg4f44kcwwgw0000gn/T/jieba.cache
Loading model cost 0.740 seconds.
2020-07-14 21:22:32,774 : DEBUG : Loading model cost 0.740 seconds.
Prefix dict has been built successfully.
2020-07-14 21:22:32,776 : DEBUG : Prefix dict has been built successfully.


# 1. GPU设置

In [7]:
config_gpu()

# 2. 加载数据 

## 2.1 加载vocab

In [8]:
vocab = Vocab(vocab_file=vocab_path)

In [9]:
vocab.count

31819

## 2.2 基本参数设置

In [10]:
params = {}

params["vocab_size"] = vocab.count
params["embedding_dim"] = 300
params["enc_units"] = 512
params["attn_units"] = 512
params["dec_units"] = 512

params["max_enc_len"] = 200
params["max_dec_len"] = 41
params["epochs"] = 5
params["batch_size"] = 32 
params["beam_size"]=3
params['min_dec_steps']=4
params['max_dec_steps']=50

## 2.3 加载数据集

In [11]:
train_X,train_Y,test_X = load_dataset()

In [12]:
len(train_X), len(train_Y), len(test_X)

(82873, 82873, 20000)

# 3. 载入训练好的模型

In [13]:
model = Seq2Seq(params)

In [14]:
from utils.config import seq2seq_checkpoint_dir,seq2seq_checkpoint_prefix

In [15]:
seq2seq_checkpoint_dir

'/Users/ianxiao/Code/DeepLearning/class04/data/checkpoints/training_checkpoints_seq2seq'

In [16]:
ckpt = tf.train.Checkpoint(Seq2Seq=model)
ckpt_manager = tf.train.CheckpointManager(ckpt, seq2seq_checkpoint_dir, max_to_keep=5)

In [17]:
# 如果检查点存在，则恢复最新的检查点。
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Restored from {}".format(ckpt_manager.latest_checkpoint))

Restored from /Users/ianxiao/Code/DeepLearning/class04/data/checkpoints/training_checkpoints_seq2seq/ckpt-3


# 4. 保存中间信息的类 

用于beam search解码过程

In [18]:
class Hypothesis:
    """ Class designed to hold hypothesises throughout the beamSearch decoding """
    def __init__(self, tokens, log_probs, hidden, attn_dists):
        self.tokens = tokens  # list of all the tokens from time 0 to the current time step t
        self.log_probs = log_probs  # list of the log probabilities of the tokens of the tokens
        self.hidden = hidden  # decoder hidden state after the last token decoding
        self.attn_dists = attn_dists  # attention dists of all the tokens
        self.abstract = ""

    def extend(self, token, log_prob, hidden, attn_dist):
        """Method to extend the current hypothesis by adding the next decoded token and all the informations associated with it"""
        return Hypothesis(tokens=self.tokens + [token],  # we add the decoded token
                          log_probs=self.log_probs + [log_prob],  # we add the log prob of the decoded token
                          hidden=hidden,  # we update the state
                          attn_dists=self.attn_dists + [attn_dist])
    @property
    def latest_token(self):
        return self.tokens[-1]

    @property
    def tot_log_prob(self):
        return sum(self.log_probs)

    @property
    def avg_log_prob(self):
        return self.tot_log_prob / len(self.tokens)

# 5. 单次搜索

## 5.1 构造输入

In [19]:
params["beam_size"]

3

In [20]:
row = test_X[:1]
print(row)
row.shape

[[    2  1080   280    27     8     5    70    22    61    62     4  1365
    302     4   758   192   231    85   116    17    77     7   215  3719
   1365    35   758  3094    77   231   116    17     4   280    27    18
   1314   281   873   173   587   535   285     7  1397   207    62 10334
     27     4   572  9753    27     4    27   390  1761    27   634  1870
      4   623  4513  4513   491     4    35   116    18    17    77     7
      5    88     6     3     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0     0     0     0     0     0     0
      0     0     0     0     0     0 

(1, 200)

In [21]:
beam_search_data = tf.convert_to_tensor([row for i in range(params["beam_size"])])

In [22]:
beam_search_data.shape

TensorShape([3, 1, 200])

In [23]:
beam_search_data = tf.squeeze(beam_search_data)

In [24]:
enc_inp=beam_search_data

In [25]:
enc_inp.shape

TensorShape([3, 200])

In [26]:
enc_hidden = tf.zeros((params["beam_size"], params['enc_units']))

In [27]:
enc_output, enc_hidden = model.encoder(enc_inp, enc_hidden)

In [28]:
enc_output.shape, enc_hidden.shape

(TensorShape([3, 200, 512]), TensorShape([3, 512]))

## 5.2 初始化一个Hypothesis类对象列表

**hyps列表中的每个对象 都包含 用于解码的三种信息 当前输入（tokens\[-1\]）、上一步隐状态（hidden）、注意力权重分布(attn_dists)**

In [29]:
hyps = [Hypothesis(tokens=[vocab.START_DECODING_INDEX],
                   log_probs=[0.0],
                   hidden=enc_hidden[0],
                   attn_dists=[],
                   ) for i in range(params['beam_size'])]

In [30]:
len(hyps)

3

In [31]:
hyps[0].tokens, hyps[1].tokens

([2], [2])

**获取最新tokens**

In [32]:
latest_tokens = [h.latest_token for h in hyps]

In [33]:
latest_tokens

[2, 2, 2]

**隐藏层状态**

In [34]:
hiddens = [h.hidden for h in hyps]

In [35]:
len(hiddens)

3

## 5.3 单步运行decode

In [36]:
import numpy as np
def log_softmax(x):
    x = x - np.max(x)
    exp_x = np.exp(x)
    #softmax_x = exp_x / np.sum(exp_x)
    log_softmax_x = x - np.log(np.sum(exp_x))
    return log_softmax_x

In [37]:
logits = np.array([123, 456, 789], dtype=np.float32)
print(log_softmax(logits))

[-666. -333.    0.]


In [38]:
logits = tf.convert_to_tensor(logits)
print(tf.nn.log_softmax(logits))

tf.Tensor([-666. -333.    0.], shape=(3,), dtype=float32)


In [39]:
def decoder_onestep(enc_output,dec_input,dec_hidden):
    # 单个时间步 运行
    preds, dec_hidden, context_vector,attention_weights = model.call_decoder_onestep(dec_input,dec_hidden, enc_output)
    # 拿到top k个index 和 概率
    preds = tf.nn.softmax(preds, axis=-1)
    top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(preds), k=params["beam_size"])
    # 计算log概率
    top_k_log_probs = tf.math.log(top_k_probs)
    # 返回需要保存的中间结果和概率
    return preds,dec_hidden,context_vector,attention_weights,top_k_log_probs,top_k_ids

In [40]:
len(latest_tokens), latest_tokens

(3, [2, 2, 2])

In [41]:
vocab.START_DECODING_INDEX

2

In [42]:
# 第一个decoder输入 开始标签
dec_input = tf.expand_dims(latest_tokens, 1)

In [43]:
# decoder 第一个隐藏层输入
dec_hidden = enc_hidden

In [44]:
# 单步运行
preds, dec_hidden, context_vector,attention_weights, top_k_log_probs, top_k_ids = decoder_onestep(enc_output,dec_input,dec_hidden)

In [45]:
preds

<tf.Tensor: shape=(3, 31819), dtype=float32, numpy=
array([[1.03091296e-08, 1.08978355e-08, 1.12608536e-08, ...,
        1.07896243e-08, 1.07373355e-08, 1.10080984e-08],
       [1.03091304e-08, 1.08978373e-08, 1.12608545e-08, ...,
        1.07896261e-08, 1.07373364e-08, 1.10081002e-08],
       [1.03091287e-08, 1.08978346e-08, 1.12608527e-08, ...,
        1.07896234e-08, 1.07373346e-08, 1.10080975e-08]], dtype=float32)>

In [46]:
top_k_log_probs

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.8029443, -2.9281745, -3.149047 ],
       [-1.8029442, -2.9281745, -3.149047 ],
       [-1.8029443, -2.9281747, -3.1490471]], dtype=float32)>

In [47]:
dec_hidden.shape, context_vector.shape, attention_weights.shape

(TensorShape([3, 512]), TensorShape([3, 512]), TensorShape([3, 200, 1]))

In [48]:
top_k_log_probs.shape, top_k_ids.shape

(TensorShape([3, 3]), TensorShape([3, 3]))

In [49]:
top_k_ids

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[  88,   35, 1306],
       [  88,   35, 1306],
       [  88,   35, 1306]], dtype=int32)>

In [50]:
# 第一轮
results = []  # list to hold the top beam_size hypothesises
steps = 0  # initial step

In [51]:
# 现阶段全部可能情况
all_hyps = []
# 原有的可能情况数量
num_orig_hyps = 1 if steps == 0 else len(hyps)

# 遍历添加所有可能结果
for i in range(num_orig_hyps):
    h, new_hidden, attn_dist = hyps[i], dec_hidden[i], attention_weights[i]
    # 分裂 添加 beam size 种可能性
    for j in range(params['beam_size']):
        # 构造可能的情况
        new_hyp = h.extend(token = top_k_ids[i, j].numpy(),
                                       log_prob = top_k_log_probs[i, j],
                                       hidden = new_hidden,
                                       attn_dist = attn_dist)
        # 添加可能情况
        all_hyps.append(new_hyp)

In [52]:
all_hyps

[<__main__.Hypothesis at 0x139197290>,
 <__main__.Hypothesis at 0x139197310>,
 <__main__.Hypothesis at 0x1391972d0>]

vocab {a c f e p}

token c b f 
1. `<start> c ` 
2. `<start> b`
3. `<start> f`

In [53]:
for elm_hyps in all_hyps:
    print(elm_hyps.tokens)
    print(elm_hyps.log_probs)

[2, 88]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-1.8029443>]
[2, 35]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-2.9281745>]
[2, 1306]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-3.149047>]


## 5.4 排序

In [54]:
# 重置
hyps = []

In [55]:
# 按照概率来排序
sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True)

In [56]:
for elm_hyps in sorted_hyps:
    print(elm_hyps.tokens)
    print(elm_hyps.log_probs)

[2, 88]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-1.8029443>]
[2, 35]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-2.9281745>]
[2, 1306]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-3.149047>]


In [57]:
# 筛选top前beam_size句话 top 3
for h in sorted_hyps:
    if h.latest_token == vocab.STOP_DECODING_INDEX:
        # 长度符合预期,遇到句尾,添加到结果集
        if steps >= params['min_dec_steps']:
            results.append(h)
    else:
        # 未到结束 ,添加到假设集
        hyps.append(h)
    
    # 如果假设句子正好等于beam_size 或者结果集正好等于beam_size 就不在添加
    if len(hyps) == params['beam_size'] or len(results) == params['beam_size']:
        break
steps += 1

In [58]:
for h in hyps:
    print(h.tokens)
    print(h.log_probs)

[2, 88]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-1.8029443>]
[2, 35]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-2.9281745>]
[2, 1306]
[0.0, <tf.Tensor: shape=(), dtype=float32, numpy=-3.149047>]


In [59]:
len(hyps),len(results)

(3, 0)

In [60]:
if len(results) == 0:
    results = hyps

## 5.5 结果

In [61]:
hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True)

In [62]:
best_hyp = hyps_sorted[0] #平均log_prob最高的句子

In [63]:
best_hyp.abstract = " ".join([vocab.id2word[index] for index in best_hyp.tokens])

In [64]:
best_hyp.tokens

[2, 88]

In [65]:
best_hyp.abstract

'<START> 你好'

References

[1] https://www.tensorflow.org/tutorials/seq2seq

[2] https://blog.csdn.net/guolindonggld/article/details/79938567

# 6. beam search 方法整合

In [66]:
def batch_beam_decode(model,batch_data,vocab, params):

    
    # 单步decoder
    def decoder_onestep(enc_output,dec_input,dec_hidden):
        # 单个时间步 运行
        preds, dec_hidden, context_vector,attention_weights = model.call_decoder_onestep(dec_input,dec_hidden, enc_output)
        # 拿到top k个index 和 概率
        preds = tf.nn.softmax(preds, axis=-1)
        top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(preds), k=params["beam_size"] * 2)
        # 计算log概率
        top_k_log_probs = tf.math.log(top_k_probs)
        # 返回需要保存的中间结果和概率
        return preds,dec_hidden,context_vector,attention_weights,top_k_log_probs,top_k_ids
    
    enc_hidden = tf.zeros((params["beam_size"], params['enc_units']))
    # 计算第encoder的输出
    enc_output, enc_hidden = model.encoder(batch_data, enc_hidden)
    # 第一个隐藏层输入
    dec_hidden = enc_hidden
    
    # 初始化batch size个 假设对象
    hyps = [Hypothesis(tokens=[vocab.START_DECODING_INDEX],
                   log_probs=[0.0],
                   hidden=enc_hidden[0],
                   attn_dists=[],
                   ) for _ in range(params["beam_size"])]

    
    # 初始化结果集
    results = []  # list to hold the top beam_size hypothesises
    # 遍历步数
    steps = 0  # initial step
    
    # 长度还不够 并且 结果还不够 继续搜索
    while steps < params['max_dec_steps'] and len(results) < params['beam_size']:
        # 获取最新待使用的token
        latest_tokens = [h.latest_token for h in hyps]
        # 获取所以隐藏层状态
        hiddens = [h.hidden for h in hyps]
        # 最新输入
        dec_input = tf.expand_dims(latest_tokens, 1)
        dec_hidden = tf.stack(hiddens, axis=0)
        # 单步运行decoder 计算需要的值
        preds, dec_hidden, context_vector,attention_weights, top_k_log_probs, top_k_ids = decoder_onestep(enc_output,dec_input,dec_hidden)
        
        # 现阶段全部可能情况
        all_hyps = []
        # 原有的可能情况数量
        num_orig_hyps = 1 if steps == 0 else len(hyps)

        # 遍历添加所有可能结果
        for i in range(num_orig_hyps):
            h, new_hidden, attn_dist = hyps[i], dec_hidden[i], attention_weights[i]
            # 分裂 添加 beam size 种可能性
            for j in range(params['beam_size'] * 2):
                # 构造可能的情况
                new_hyp = h.extend(token = top_k_ids[i, j].numpy(),
                                   log_prob = top_k_log_probs[i, j],
                                   hidden = new_hidden,
                                   attn_dist = attn_dist)
                # 添加可能情况
                all_hyps.append(new_hyp)
        
        # 重置
        hyps = []
        # 按照概率来排序
        sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True)
        
        # 筛选top前beam_size句话
        for h in sorted_hyps:
            if h.latest_token == vocab.STOP_DECODING_INDEX:
                # 长度符合预期,遇到句尾,添加到结果集
                if steps >= params['min_dec_steps']:
                    results.append(h)
            else:
                # 未到结束 ,添加到假设集
                hyps.append(h)

            # 如果假设句子正好等于beam_size 或者结果集正好等于beam_size 就不在添加
            if len(hyps) == params['beam_size'] or len(results) == params['beam_size']:
                break

        steps += 1
        
    if len(results) == 0:
        results = hyps
    
    hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True)
    best_hyp = hyps_sorted[0]
    best_hyp.abstract = " ".join([vocab.id2word[index] for index in best_hyp.tokens])
    return best_hyp

## 6.1 读取模型

In [67]:
model = Seq2Seq(params)
ckpt = tf.train.Checkpoint(Seq2Seq=model)
ckpt_manager = tf.train.CheckpointManager(ckpt, seq2seq_checkpoint_dir, max_to_keep=5)
# 如果检查点存在，则恢复最新的检查点。
if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Restored from {}".format(ckpt_manager.latest_checkpoint))

Restored from /Users/ianxiao/Code/DeepLearning/class04/data/checkpoints/training_checkpoints_seq2seq/ckpt-3


## 6.2 构造数据

In [68]:
from utils.data_loader import load_test_dataset
def beam_batch_generator(beam_size, data_X):
    for row in data_X:
        beam_search_data = tf.convert_to_tensor([row for i in range(beam_size)])
        yield beam_search_data

In [69]:
# 加载数据集
test_X = load_test_dataset(max_enc_len=params['max_enc_len'])
test_dataset = beam_batch_generator(params['beam_size'], test_X)

In [70]:
test_dataset

<generator object beam_batch_generator at 0x13d848650>

## 6.3 单条预测

In [71]:
row = test_X[4396:4397]

In [72]:
batch_data = tf.convert_to_tensor([row for i in range(params["beam_size"])])

In [73]:
batch_data = tf.squeeze(batch_data)

In [74]:
batch_data # test数据集中的第一条数据，构造了3份，方便beam search操作

<tf.Tensor: shape=(3, 200), dtype=int32, numpy=
array([[    2,   503,  1696,  3620,  3418,   701,     1, 22339,   261,
          928,  1718,     4,  1408, 15292,     8,  5676,  5171,     8,
         2319,  2104,    15,     7,    30,    16,     5, 16340,   182,
         2180,  4414,     1,  7296,  3088,     6,    41,     1,    27,
         3329,   422, 26936,  4490,   272,    56,   128,     1,   734,
        11599,     7,  5676,   924,     8,     5,  7428,     6,   591,
         5569,     6,     6,     5,    31,  1349,   900,     6,     6,
         7665,   229,    60,     5,    85,   205,    27,     1,    15,
            6,    12,     4,    80,    12,     5,    10,  8881,    12,
            6,     5,   218,  4527,  2404,     4,   101,  1336,  6621,
          358,     1,  1139,   182,     6,    80,    10,     1,     5,
          183,  1647,   921,    10, 16589,     5,    10,  2137,  1376,
            6,    12,     4,    30,     7,     5,   127,     9,   277,
            6,    12,     3, 

In [75]:
# 获得最好的语句
best_hyp=batch_beam_decode(model, batch_data,vocab, params)

In [94]:
best_hyp.abstract

'<START> 不是 假 机油 ， 假 假 机油 <STOP>'

In [103]:
predict = best_hyp.abstract

In [104]:
predict

'<START> 不是 假 机油 ， 假 假 机油 <STOP>'

In [105]:
predict = predict.replace('<START>','')
predict = predict.replace('<STOP>','')
predict = predict.strip()
predict

'不是 假 机油 ， 假 假 机油'

# 7. 结果预测及保存

In [77]:
test_X = load_test_dataset(max_enc_len=params['max_enc_len'])
test_dataset = beam_batch_generator(params['beam_size'], test_X)

In [78]:
from tqdm import tqdm
test_dataset_len = len(test_X)

In [79]:
test_dataset_len

20000

In [80]:
def beam_decode(model, dataset, dataset_len, vocab, params):
    results = []
    for batch_data in tqdm(dataset, total=dataset_len):
        best_hyp = batch_beam_decode(model, batch_data, vocab, params)
        results.append(best_hyp.abstract)
    return results

In [81]:
%%time
results = beam_decode(model, test_dataset, test_dataset_len, vocab, params)

100%|██████████| 20000/20000 [2:31:00<00:00,  2.21it/s]  

CPU times: user 5h 27min 38s, sys: 56min 18s, total: 6h 23min 56s
Wall time: 2h 31min





## 7.2 结果保存

**结果保存路径生成**

In [82]:
from utils.file_utils import get_result_filename
result_save_path = get_result_filename(params["batch_size"],params["epochs"] , params["max_enc_len"], params["embedding_dim"],commit='_4_1_submit_beam_search_seq2seq_code')


In [83]:
# 检测results中是否存在''
for idx,result in enumerate(results):
    if result=='':print(idx)

In [84]:
# 预测结果数据处理
def submit_proc(sentence):
    sentence = sentence.lstrip('，！。')
    sentence = sentence.replace(' ', '')
    sentence = sentence.replace('<START>','')
    sentence = sentence.replace('<STOP>','')
    sentence = sentence.strip()
    if sentence == '':
        sentence = '随时联系'
    return sentence

In [85]:
import pandas as pd
def save_predict_result(results, result_save_path):
    # 读取结果
    test_df = pd.read_csv(test_data_path)
    # 填充结果
    test_df['Prediction'] = results
    test_df['Prediction'] = test_df['Prediction'].apply(submit_proc)
    # 提取ID和预测结果两列
    test_df = test_df[['QID', 'Prediction']]
    # 保存结果
    test_df.to_csv(result_save_path, index=None, sep=',')

In [86]:
save_predict_result(results, result_save_path)

In [87]:
test_df = pd.read_csv(result_save_path)
test_df.head(5)

Unnamed: 0,QID,Prediction
0,Q1,START>你好！描述，该车发动机烧机油，建议及时进行维修。
1,Q2,START>抛光处理一下！亲
2,Q3,START>气囊问题，价格不贵，单独更换。
3,Q4,START>分析检查排气管漏气。
4,Q5,START>你好！描述，这种情况可能轮胎动平衡问题，建议做个四轮动平衡


In [106]:
test_df['Prediction'] = test_df['Prediction'].apply(lambda x: x.replace('START>', ''))

In [107]:
test_df.head(5)

Unnamed: 0,QID,Prediction
0,Q1,你好！描述，该车发动机烧机油，建议及时进行维修。
1,Q2,抛光处理一下！亲
2,Q3,气囊问题，价格不贵，单独更换。
3,Q4,分析检查排气管漏气。
4,Q5,你好！描述，这种情况可能轮胎动平衡问题，建议做个四轮动平衡


In [108]:
test_df.to_csv(result_save_path, index=None, sep=',')