Skip to content

Commit

Permalink
fix embedding tokenize error
Browse files Browse the repository at this point in the history
add sequence length check at sequence labeling model
  • Loading branch information
BrikerMan committed Feb 28, 2019
1 parent 3ffb9ce commit 9fc6c2e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 1 addition & 1 deletion kashgari/embeddings/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def tokenize(self,
def tokenize_sentence(text: TextSeqType) -> TokenSeqType:
tokens = [self.token2idx.get(token, self.token2idx[k.UNK]) for token in text]
if add_bos_eos:
tokens = [self.token2idx[k.BOS]] + tokens + [self.token2idx[k.BOS]]
tokens = [self.token2idx[k.BOS]] + tokens + [self.token2idx[k.EOS]]
return tokens

if is_list:
Expand Down
2 changes: 2 additions & 0 deletions kashgari/tasks/seq_labeling/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def build_token2id_label2id_dict(self,
y_train: List[List[str]],
x_validate: List[List[str]] = None,
y_validate: List[List[str]] = None):
for index in range(len(x_train)):
assert len(x_train[index]) == len(y_train[index])
x_data = x_train
y_data = y_train
if x_validate:
Expand Down

0 comments on commit 9fc6c2e

Please sign in to comment.