From 4f35e0a18ade83ee46718a3a8b4f6f0790f9da58 Mon Sep 17 00:00:00 2001 From: Liyuan Liu Date: Thu, 16 Dec 2021 12:47:58 -0600 Subject: [PATCH] Update utils.py https://github.com/LiyuanLucasLiu/LM-LSTM-CRF/issues/71 --- model/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/model/utils.py b/model/utils.py index 05018e9..23da958 100644 --- a/model/utils.py +++ b/model/utils.py @@ -527,7 +527,9 @@ def construct_bucket_vb_wc(word_features, forw_features, fea_len, input_labels, Construct bucket by thresholds for viterbi decode, word-level and char-level """ # construct corpus for language model pre-training - forw_corpus = [pad_char_feature] + list(reduce(lambda x, y: x + [pad_char_feature] + y, forw_features)) + [pad_char_feature] + forw_corpus = [pad_char_feature] + for forw_feature in forw_features: + forw_corpus.extend(forw_feature + [pad_char_feature]) back_corpus = forw_corpus[::-1] # two way construct, first build the bucket, then calculate padding length, then do the padding buckets = [[[], [], [], [], [], [], [], []] for ind in range(len(thresholds))]