Skip to content

Commit

Permalink
Upgrade Roberta tokenizer (#1821)
Browse files Browse the repository at this point in the history
* update roberta

* update roberta tokenizer

* update roberta tokenizer

* update

* update
  • Loading branch information
yingyibiao committed Mar 28, 2022
1 parent 46cbe60 commit 3351ab0
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 494 deletions.
3 changes: 2 additions & 1 deletion paddlenlp/transformers/auto/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
("MBartTokenizer", "mbart"),
("MPNetTokenizer", "mpnet"),
("NeZhaTokenizer", "nezha"),
("RobertaTokenizer", "roberta"),
("RobertaChineseTokenizer", "roberta"),
("RobertaBPETokenizer", "roberta"),
("RoFormerTokenizer", "roformer"),
("ReformerTokenizer", "reformer"),
("SqueezeBertTokenizer", "squeezebert"),
Expand Down
4 changes: 3 additions & 1 deletion paddlenlp/transformers/bert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,9 @@ def forward(self,
else:
if attention_mask.ndim == 2:
# attention_mask [batch_size, sequence_length] -> [batch_size, 1, 1, sequence_length]
attention_mask = attention_mask.unsqueeze(axis=[1, 2])
attention_mask = attention_mask.unsqueeze(
axis=[1, 2]).astype(paddle.get_default_dtype())
attention_mask = (1.0 - attention_mask) * -1e4

embedding_output = self.embeddings(
input_ids=input_ids,
Expand Down
Loading

0 comments on commit 3351ab0

Please sign in to comment.