Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix trainer.global_steps in WandB logging #4366

Merged
merged 1 commit into from Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions nemo/collections/asr/models/classification_models.py
Expand Up @@ -481,6 +481,7 @@ def training_step(self, batch, batch_nb):

self.log('train_loss', loss_value)
self.log('learning_rate', self._optimizer.param_groups[0]['lr'])
self.log('global_step', self.trainer.global_step)

self._accuracy(logits=logits, labels=labels)
topk_scores = self._accuracy.compute()
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/asr/models/ctc_models.py
Expand Up @@ -494,7 +494,11 @@ def training_step(self, batch, batch_nb):
log_probs=log_probs, targets=transcript, input_lengths=encoded_len, target_lengths=transcript_len
)

tensorboard_logs = {'train_loss': loss_value, 'learning_rate': self._optimizer.param_groups[0]['lr']}
tensorboard_logs = {
'train_loss': loss_value,
'learning_rate': self._optimizer.param_groups[0]['lr'],
'global_step': self.trainer.global_step,
}

if hasattr(self, '_trainer') and self._trainer is not None:
log_every_n_steps = self._trainer.log_every_n_steps
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/asr/models/label_models.py
Expand Up @@ -277,6 +277,7 @@ def training_step(self, batch, batch_idx):

self.log('loss', loss)
self.log('learning_rate', self._optimizer.param_groups[0]['lr'])
self.log('global_step', self.trainer.global_step)

self._accuracy(logits=logits, labels=labels)
top_k = self._accuracy.compute()
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/asr/models/rnnt_models.py
Expand Up @@ -687,7 +687,11 @@ def training_step(self, batch, batch_nb):
log_probs=joint, targets=transcript, input_lengths=encoded_len, target_lengths=target_length
)

tensorboard_logs = {'train_loss': loss_value, 'learning_rate': self._optimizer.param_groups[0]['lr']}
tensorboard_logs = {
'train_loss': loss_value,
'learning_rate': self._optimizer.param_groups[0]['lr'],
'global_step': self.trainer.global_step,
}

if (sample_id + 1) % log_every_n_steps == 0:
self.wer.update(encoded, encoded_len, transcript, transcript_len)
Expand Down
5 changes: 4 additions & 1 deletion nemo/collections/asr/models/ssl_models.py
Expand Up @@ -469,7 +469,10 @@ def training_step(self, batch, batch_nb):
spectrograms, spec_masks, encoded, encoded_len, targets, target_lengths
)

tensorboard_logs = {'learning_rate': self._optimizer.param_groups[0]['lr']}
tensorboard_logs = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know why loss value is missing here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably attached below

'learning_rate': self._optimizer.param_groups[0]['lr'],
'global_step': self.trainer.global_step,
}

for loss_name, loss_val in loss_val_dict.items():
tensorboard_logs['train_' + loss_name] = loss_val
Expand Down