-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,13 +81,13 @@ def load_tf_weights_in_bert(model, tf_checkpoint_path): | |
| ) | ||
| raise | ||
| tf_path = os.path.abspath(tf_checkpoint_path) | ||
| print("Converting TensorFlow checkpoint from {}".format(tf_path)) | ||
| print(f"Converting TensorFlow checkpoint from {tf_path}") | ||
| # Load weights from TF model | ||
| init_vars = tf.train.list_variables(tf_path) | ||
| names = [] | ||
| arrays = [] | ||
| for name, shape in init_vars: | ||
| print("Loading TF weight {} with shape {}".format(name, shape)) | ||
| print(f"Loading TF weight {name} with shape {shape}") | ||
| array = tf.train.load_variable(tf_path, name) | ||
| names.append(name) | ||
| arrays.append(array) | ||
|
|
@@ -97,20 +97,22 @@ def load_tf_weights_in_bert(model, tf_checkpoint_path): | |
| # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v | ||
| # which are not required for using pretrained model | ||
| if any(n in ["adam_v", "adam_m"] for n in name): | ||
| print("Skipping {}".format("/".join(name))) | ||
| print(f'Skipping {"/".join(name)}') | ||
| continue | ||
| pointer = model | ||
| for m_name in name: | ||
| if re.fullmatch(r'[A-Za-z]+_\d+', m_name): | ||
| l = re.split(r'_(\d+)', m_name) | ||
| else: | ||
| l = [m_name] | ||
| if l[0] == 'kernel' or l[0] == 'gamma': | ||
| if ( | ||
| l[0] in ['kernel', 'gamma'] | ||
| or l[0] not in ['output_bias', 'beta'] | ||
| and l[0] == 'output_weights' | ||
| ): | ||
| pointer = getattr(pointer, 'weight') | ||
| elif l[0] == 'output_bias' or l[0] == 'beta': | ||
| elif l[0] in ['output_bias', 'beta']: | ||
| pointer = getattr(pointer, 'bias') | ||
| elif l[0] == 'output_weights': | ||
| pointer = getattr(pointer, 'weight') | ||
| else: | ||
| pointer = getattr(pointer, l[0]) | ||
| if len(l) >= 2: | ||
|
|
@@ -125,7 +127,7 @@ def load_tf_weights_in_bert(model, tf_checkpoint_path): | |
| except AssertionError as e: | ||
| e.args += (pointer.shape, array.shape) | ||
| raise | ||
| print("Initialize PyTorch weight {}".format(name)) | ||
| print(f"Initialize PyTorch weight {name}") | ||
| pointer.data = torch.from_numpy(array) | ||
| return model | ||
|
|
||
|
|
@@ -207,8 +209,7 @@ def forward(self, input): | |
| return self.act_fn(F.linear(input, self.weight, self.bias)) | ||
|
|
||
| def extra_repr(self): | ||
| return 'in_features={}, out_features={}, bias={}'.format( | ||
| self.in_features, self.out_features, self.bias is not None) | ||
| return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}' | ||
|
Comment on lines
-210
to
+212
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertConfig(object): | ||
|
|
@@ -294,8 +295,7 @@ def __repr__(self): | |
|
|
||
| def to_dict(self): | ||
| """Serializes this instance to a Python dictionary.""" | ||
| output = copy.deepcopy(self.__dict__) | ||
| return output | ||
| return copy.deepcopy(self.__dict__) | ||
|
Comment on lines
-297
to
+298
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def to_json_string(self): | ||
| """Serializes this instance to a JSON string.""" | ||
|
|
@@ -450,8 +450,7 @@ def __init__(self, config): | |
|
|
||
| def forward(self, input_tensor, attention_mask): | ||
| self_output = self.self(input_tensor, attention_mask) | ||
| attention_output = self.output(self_output, input_tensor) | ||
| return attention_output | ||
| return self.output(self_output, input_tensor) | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertIntermediate(nn.Module): | ||
|
|
@@ -490,8 +489,7 @@ def __init__(self, config): | |
| def forward(self, hidden_states, attention_mask): | ||
| attention_output = self.attention(hidden_states, attention_mask) | ||
| intermediate_output = self.intermediate(attention_output) | ||
| layer_output = self.output(intermediate_output, attention_output) | ||
| return layer_output | ||
| return self.output(intermediate_output, attention_output) | ||
|
Comment on lines
-493
to
+492
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertEncoder(nn.Module): | ||
|
|
@@ -606,8 +604,7 @@ def forward(self, hidden_states): | |
| # We "pool" the model by simply taking the hidden state corresponding | ||
| # to the first token. | ||
| first_token_tensor = hidden_states[:, 0] | ||
| pooled_output = self.dense_act(first_token_tensor) | ||
| return pooled_output | ||
| return self.dense_act(first_token_tensor) | ||
|
Comment on lines
-609
to
+607
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertPredictionHeadTransform(nn.Module): | ||
|
|
@@ -641,8 +638,8 @@ def __init__(self, config, bert_model_embedding_weights): | |
| def forward(self, hidden_states): | ||
| hidden_states = self.transform(hidden_states) | ||
| torch.cuda.nvtx.range_push( | ||
| "decoder input.size() = {}, weight.size() = {}".format( | ||
| hidden_states.size(), self.decoder.weight.size())) | ||
| f"decoder input.size() = {hidden_states.size()}, weight.size() = {self.decoder.weight.size()}" | ||
| ) | ||
|
Comment on lines
-644
to
+642
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| hidden_states = self.decoder(hidden_states) + self.bias | ||
| torch.cuda.nvtx.range_pop() | ||
| return hidden_states | ||
|
|
@@ -655,8 +652,7 @@ def __init__(self, config, bert_model_embedding_weights): | |
| bert_model_embedding_weights) | ||
|
|
||
| def forward(self, sequence_output): | ||
| prediction_scores = self.predictions(sequence_output) | ||
| return prediction_scores | ||
| return self.predictions(sequence_output) | ||
|
Comment on lines
-658
to
+655
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertOnlyNSPHead(nn.Module): | ||
|
|
@@ -665,8 +661,7 @@ def __init__(self, config): | |
| self.seq_relationship = nn.Linear(config.hidden_size, 2) | ||
|
|
||
| def forward(self, pooled_output): | ||
| seq_relationship_score = self.seq_relationship(pooled_output) | ||
| return seq_relationship_score | ||
| return self.seq_relationship(pooled_output) | ||
|
Comment on lines
-668
to
+664
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertPreTrainingHeads(nn.Module): | ||
|
|
@@ -690,10 +685,8 @@ def __init__(self, config, *inputs, **kwargs): | |
| super(BertPreTrainedModel, self).__init__() | ||
| if not isinstance(config, BertConfig): | ||
| raise ValueError( | ||
| "Parameter config in `{}(config)` should be an instance of class `BertConfig`. " | ||
| "To create a model from a Google pretrained model use " | ||
| "`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format( | ||
| self.__class__.__name__, self.__class__.__name__)) | ||
| f"Parameter config in `{self.__class__.__name__}(config)` should be an instance of class `BertConfig`. To create a model from a Google pretrained model use `model = {self.__class__.__name__}.from_pretrained(PRETRAINED_MODEL_NAME)`" | ||
| ) | ||
|
Comment on lines
-693
to
+689
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| self.config = config | ||
|
|
||
| def init_bert_weights(self, module): | ||
|
|
@@ -834,15 +827,15 @@ def load(module, prefix=''): | |
| s.startswith('bert.') for s in state_dict.keys()): | ||
| start_prefix = 'bert.' | ||
| load(model, prefix=start_prefix) | ||
| if len(missing_keys) > 0: | ||
| if missing_keys: | ||
| logger.info( | ||
| "Weights of {} not initialized from pretrained model: {}". | ||
| format(model.__class__.__name__, missing_keys)) | ||
| if len(unexpected_keys) > 0: | ||
| if unexpected_keys: | ||
| logger.info( | ||
| "Weights from pretrained model not used in {}: {}".format( | ||
| model.__class__.__name__, unexpected_keys)) | ||
| if len(error_msgs) > 0: | ||
| if error_msgs: | ||
|
Comment on lines
-837
to
+838
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| raise RuntimeError( | ||
| 'Error(s) in loading state_dict for {}:\n\t{}'.format( | ||
| model.__class__.__name__, "\n\t".join(error_msgs))) | ||
|
|
@@ -1016,20 +1009,15 @@ def forward(self, batch, log=True): | |
| prediction_scores, seq_relationship_score = self.cls( | ||
| sequence_output, pooled_output) | ||
|
|
||
| if masked_lm_labels is not None and next_sentence_label is not None: | ||
| loss_fct = CrossEntropyLoss(ignore_index=-1) | ||
| masked_lm_loss = loss_fct( | ||
| prediction_scores.view(-1, self.config.vocab_size), | ||
| masked_lm_labels.view(-1)) | ||
| next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), | ||
| next_sentence_label.view(-1)) | ||
| #print("loss is {} {}".format(masked_lm_loss, next_sentence_loss)) | ||
| total_loss = masked_lm_loss + next_sentence_loss | ||
| # if log: | ||
| # self.log_summary_writer(logs={'train_loss': total_loss.item()}) | ||
| return total_loss | ||
| else: | ||
| if masked_lm_labels is None or next_sentence_label is None: | ||
| return prediction_scores, seq_relationship_score | ||
| loss_fct = CrossEntropyLoss(ignore_index=-1) | ||
| masked_lm_loss = loss_fct( | ||
| prediction_scores.view(-1, self.config.vocab_size), | ||
| masked_lm_labels.view(-1)) | ||
| next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), | ||
| next_sentence_label.view(-1)) | ||
| return masked_lm_loss + next_sentence_loss | ||
|
Comment on lines
-1019
to
+1020
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
This removes the following comments ( why? ): |
||
|
|
||
|
|
||
| class BertForMaskedLM(BertPreTrainedModel): | ||
|
|
@@ -1089,10 +1077,10 @@ def forward(self, | |
|
|
||
| if masked_lm_labels is not None: | ||
| loss_fct = CrossEntropyLoss(ignore_index=-1) | ||
| masked_lm_loss = loss_fct( | ||
| return loss_fct( | ||
| prediction_scores.view(-1, self.config.vocab_size), | ||
| masked_lm_labels.view(-1)) | ||
| return masked_lm_loss | ||
| masked_lm_labels.view(-1), | ||
| ) | ||
|
Comment on lines
-1092
to
+1083
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| else: | ||
| return prediction_scores | ||
|
|
||
|
|
@@ -1152,13 +1140,12 @@ def forward(self, | |
| output_all_encoded_layers=False) | ||
| seq_relationship_score = self.cls(pooled_output) | ||
|
|
||
| if next_sentence_label is not None: | ||
| loss_fct = CrossEntropyLoss(ignore_index=-1) | ||
| next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), | ||
| next_sentence_label.view(-1)) | ||
| return next_sentence_loss | ||
| else: | ||
| if next_sentence_label is None: | ||
| return seq_relationship_score | ||
| loss_fct = CrossEntropyLoss(ignore_index=-1) | ||
| return loss_fct( | ||
| seq_relationship_score.view(-1, 2), next_sentence_label.view(-1) | ||
| ) | ||
|
Comment on lines
-1155
to
+1148
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertForSequenceClassification(BertPreTrainedModel): | ||
|
|
@@ -1222,8 +1209,7 @@ def forward(self, | |
|
|
||
| if labels is not None: | ||
| loss_fct = CrossEntropyLoss() | ||
| loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) | ||
| return loss | ||
| return loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) | ||
|
Comment on lines
-1225
to
+1212
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| else: | ||
| return logits | ||
|
|
||
|
|
@@ -1291,12 +1277,10 @@ def forward(self, | |
| logits = self.classifier(pooled_output) | ||
| reshaped_logits = logits.view(-1, self.num_choices) | ||
|
|
||
| if labels is not None: | ||
| loss_fct = CrossEntropyLoss() | ||
| loss = loss_fct(reshaped_logits, labels) | ||
| return loss | ||
| else: | ||
| if labels is None: | ||
| return reshaped_logits | ||
| loss_fct = CrossEntropyLoss() | ||
| return loss_fct(reshaped_logits, labels) | ||
|
Comment on lines
-1294
to
+1283
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| class BertForTokenClassification(BertPreTrainedModel): | ||
|
|
@@ -1358,20 +1342,15 @@ def forward(self, | |
| sequence_output = self.dropout(sequence_output) | ||
| logits = self.classifier(sequence_output) | ||
|
|
||
| if labels is not None: | ||
| loss_fct = CrossEntropyLoss() | ||
| # Only keep active parts of the loss | ||
| if attention_mask is not None: | ||
| active_loss = attention_mask.view(-1) == 1 | ||
| active_logits = logits.view(-1, self.num_labels)[active_loss] | ||
| active_labels = labels.view(-1)[active_loss] | ||
| loss = loss_fct(active_logits, active_labels) | ||
| else: | ||
| loss = loss_fct(logits.view(-1, self.num_labels), | ||
| labels.view(-1)) | ||
| return loss | ||
| else: | ||
| if labels is None: | ||
| return logits | ||
| loss_fct = CrossEntropyLoss() | ||
| if attention_mask is None: | ||
| return loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) | ||
| active_loss = attention_mask.view(-1) == 1 | ||
| active_logits = logits.view(-1, self.num_labels)[active_loss] | ||
| active_labels = labels.view(-1)[active_loss] | ||
| return loss_fct(active_logits, active_labels) | ||
|
|
||
|
|
||
| class BertForQuestionAnswering(BertPreTrainedModel): | ||
|
|
@@ -1439,21 +1418,19 @@ def forward(self, | |
| start_logits = start_logits.squeeze(-1) | ||
| end_logits = end_logits.squeeze(-1) | ||
|
|
||
| if start_positions is not None and end_positions is not None: | ||
| # If we are on multi-GPU, split add a dimension | ||
| if len(start_positions.size()) > 1: | ||
| start_positions = start_positions.squeeze(-1) | ||
| if len(end_positions.size()) > 1: | ||
| end_positions = end_positions.squeeze(-1) | ||
| # sometimes the start/end positions are outside our model inputs, we ignore these terms | ||
| ignored_index = start_logits.size(1) | ||
| start_positions.clamp_(0, ignored_index) | ||
| end_positions.clamp_(0, ignored_index) | ||
|
|
||
| loss_fct = CrossEntropyLoss(ignore_index=ignored_index) | ||
| start_loss = loss_fct(start_logits, start_positions) | ||
| end_loss = loss_fct(end_logits, end_positions) | ||
| total_loss = (start_loss + end_loss) / 2 | ||
| return total_loss | ||
| else: | ||
| return start_logits, end_logits | ||
| if start_positions is None or end_positions is None: | ||
| return start_logits, end_logits | ||
| # If we are on multi-GPU, split add a dimension | ||
| if len(start_positions.size()) > 1: | ||
| start_positions = start_positions.squeeze(-1) | ||
| if len(end_positions.size()) > 1: | ||
| end_positions = end_positions.squeeze(-1) | ||
| # sometimes the start/end positions are outside our model inputs, we ignore these terms | ||
| ignored_index = start_logits.size(1) | ||
| start_positions.clamp_(0, ignored_index) | ||
| end_positions.clamp_(0, ignored_index) | ||
|
|
||
| loss_fct = CrossEntropyLoss(ignore_index=ignored_index) | ||
| start_loss = loss_fct(start_logits, start_positions) | ||
| end_loss = loss_fct(end_logits, end_positions) | ||
| return (start_loss + end_loss) / 2 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
load_tf_weights_in_bertrefactored with the following changes:use-fstring-for-formatting)merge-duplicate-blocks)inoperator [×2] (merge-comparisons)remove-redundant-if)