Skip to content

Commit

Permalink
fix(Vocabulary): fix check vocabulary have unk/pad token bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
JT-Ushio committed Jan 9, 2019
1 parent 5e98651 commit 0280198
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions antu/io/vocabulary.py
Expand Up @@ -107,12 +107,12 @@ def extend_from_pretrained_vocab(

cnt = 0
# Handle unknown token
if not no_unk_namespace and vocab_name not in no_unk_namespace:
if vocab_name not in no_unk_namespace:
self.vocab[vocab_name][self._UNK_token] = cnt
cnt += 1

# Handle padding token
if not no_pad_namespace and vocab_name not in no_pad_namespace:
if vocab_name not in no_pad_namespace:
self.vocab[vocab_name][self._PAD_token] = cnt
cnt += 1

Expand Down Expand Up @@ -156,12 +156,12 @@ def extend_from_counter(
self.vocab[vocab_name] = bidict()
cnt = 0
# Handle unknown token
if not no_unk_namespace and vocab_name not in no_unk_namespace:
if vocab_name not in no_unk_namespace:
self.vocab[vocab_name][self._UNK_token] = cnt
cnt += 1

# Handle padding token
if not no_pad_namespace and vocab_name not in no_pad_namespace:
if vocab_name not in no_pad_namespace:
self.vocab[vocab_name][self._PAD_token] = cnt
cnt += 1

Expand Down Expand Up @@ -247,7 +247,7 @@ def get_vocab_size(self, namespace: str) -> int:
-------
Vocabulary size : ``int``
"""
return self.vocab[vocab_name]
return len(self.vocab[vocab_name])



Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -11,7 +11,7 @@ def read_file(fname):

setup(
name='antu',
version='0.0.1',
version='0.0.3',
author='AntNLP',
author_email='taoji.cs@gmail.com',
description='Universal data IO and neural network modules in NLP tasks',
Expand Down

0 comments on commit 0280198

Please sign in to comment.