diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index e86e67f67fd33d..8dc57874571a63 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -67,7 +67,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) @@ -786,9 +787,11 @@ def forward( # check if head_mask has a correct number of layers specified if desired if head_mask is not None: - assert head_mask.size()[0] == ( - len(self.layers) - ), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + if head_mask.size()[0] != (len(self.layers)): + raise ValueError( + f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) + for idx, encoder_layer in enumerate(self.layers): if output_hidden_states: encoder_states = encoder_states + (hidden_states,) @@ -1023,9 +1026,11 @@ def forward( # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): if attn_mask is not None: - assert attn_mask.size()[0] == ( - len(self.layers) - ), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + if attn_mask.size()[0] != (len(self.layers)): + raise ValueError( + "The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." + ) + for idx, decoder_layer in enumerate(self.layers): # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) if output_hidden_states: diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 33681397338014..73997067579fa1 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -1369,7 +1369,9 @@ def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_ effective_batch_size = input_shape[0] # add a dummy token - assert self.config.pad_token_id is not None, "The PAD token should be defined for generation" + if self.config.pad_token_id is None: + raise ValueError("The PAD token should be defined for generation") + attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1) dummy_token = torch.full( (effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index 3f152a40477a26..ceb32829cb271e 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -69,7 +69,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index aeaa5f2a07b75c..a1009ae11fc338 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -66,7 +66,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index daa907ba82e2e2..b9e5dcbe7ba4ba 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -64,7 +64,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index 5d071cba07538c..a0ac87ede93a76 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -67,7 +67,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index 429002ad410880..00956ac7560ba6 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -66,7 +66,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index 5f7653c07f4ce2..35a2efab30af42 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -63,7 +63,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 0] = decoder_start_token_id - assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." + if pad_token_id is None: + raise ValueError("self.model.config.pad_token_id has to be defined.") # replace possible -100 values in labels by `pad_token_id` shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)