Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

Commit

Permalink
Back out "Fix gradients logging"
Browse files Browse the repository at this point in the history
Summary: Original commit changeset: 21719b0e0627

Reviewed By: anchit

Differential Revision: D22574604

fbshipit-source-id: 82e34853142e728b3e28809953deb199fb36bd40
  • Loading branch information
arbabu123 authored and facebook-github-bot committed Jul 16, 2020
1 parent 281ab67 commit 05af797
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 59 deletions.
49 changes: 23 additions & 26 deletions pytext/metric_reporters/channel.py
Expand Up @@ -5,9 +5,7 @@
import traceback
from typing import Tuple

import numpy as np
import torch
from numpy import linalg as LA
from pytext.common.constants import Stage
from pytext.utils.file_io import PathManager
from torch.utils.tensorboard import SummaryWriter
Expand Down Expand Up @@ -204,8 +202,6 @@ def report(
meta,
model,
optimizer,
log_gradient,
gradients,
*args,
):
"""
Expand Down Expand Up @@ -270,32 +266,33 @@ def stage2prefix(stage: Stage):
self.summary_writer.add_scalar(
f"optimizer.lr.param_group.{idx}", param_group["lr"], epoch
)
if log_gradient and gradients:
for key in gradients:
if len(gradients[key]):
sum_gradient = sum(gradients[key])
avg_gradient = sum_gradient / len(gradients[key])
grad_norms = np.array([LA.norm(g) for g in gradients[key]])
self.log_vector(key + "_avg_gradients", avg_gradient, epoch)
self.log_vector(key + "_sum_gradients", sum_gradient, epoch)
self.log_vector(key + "_l2norm_gradients", grad_norms, epoch)

for key, val in model.named_parameters():
if val is not None and len(val) > 0 and not (val == 0).all():
limit = 9.9e19
grad = val.grad
val = torch.clamp(val.float(), -limit, limit)
self.log_vector(key, val, epoch)

def log_vector(self, key, val, epoch):
if len(val) > 0 and not (val == 0).all():
try:
self.summary_writer.add_histogram(key, val, epoch)
except Exception:
print(
f"WARNING: Param {key} " "cannot be sent to Tensorboard",
file=sys.stderr,
)
traceback.print_exc(file=sys.stderr)
try:
self.summary_writer.add_histogram(key, val, epoch)
except Exception:
print(
f"WARNING: Param {key} cannot be sent to Tensorboard",
file=sys.stderr,
)
traceback.print_exc(file=sys.stderr)

if grad is not None and len(grad) > 0 and not (grad == 0).all():
grad = torch.clamp(grad.float(), -limit, limit)
try:
self.summary_writer.add_histogram(
key + "_gradients", grad, epoch
)
except Exception:
print(
f"WARNING: Grad for param {key} "
"cannot be sent to Tensorboard",
file=sys.stderr,
)
traceback.print_exc(file=sys.stderr)

def add_texts(self, tag, metrics):
"""
Expand Down
20 changes: 1 addition & 19 deletions pytext/metric_reporters/metric_reporter.py
Expand Up @@ -45,16 +45,14 @@ class MetricReporter(Component):
__COMPONENT_TYPE__ = ComponentType.METRIC_REPORTER

lower_is_better: bool = False
log_gradient: bool = False

class Config(ConfigBase):
output_path: str = "/tmp/test_out.txt"
pep_format: bool = False
#: Useful for KD training, column names that used by student but not teacher.
student_column_names: List[str] = []

def __init__(self, channels, log_gradient=False, pep_format=False) -> None:
self.log_gradient = log_gradient
def __init__(self, channels, pep_format=False) -> None:
self._reset()
self.channels = channels
self.pep_format = pep_format
Expand All @@ -68,7 +66,6 @@ def _reset(self):
self.all_scores: List = []
self.n_batches = 0
self.batch_size: List = []
self.all_gradients: Dict[str, List[List]] = {}

def _reset_realtime(self):
self.realtime_meters: Dict = {}
Expand Down Expand Up @@ -114,16 +111,6 @@ def add_batch_stats(
self.realtime_meters["tps"].update(context[DatasetFieldName.NUM_TOKENS])
self.realtime_meters["ups"].update(1)

def add_gradients(self, model):
if self.log_gradient:
for key, value in model.named_parameters():
grad = value.grad
if grad is not None and len(grad) > 0 and not (grad == 0).all():
if key in self.all_gradients:
self.all_gradients[key].append(grad.cpu().numpy())
else:
self.all_gradients[key] = [grad.cpu().numpy()]

def aggregate_preds(self, batch_preds, batch_context=None):
self.aggregate_data(self.all_preds, batch_preds)

Expand Down Expand Up @@ -267,8 +254,6 @@ def report_metric(
self.get_meta(),
model,
optimizer,
self.log_gradient,
self.get_gradients(),
)

if reset:
Expand Down Expand Up @@ -322,9 +307,6 @@ def compare_metric(self, new_metric, old_metric):
return False
return (new < old) == self.lower_is_better

def get_gradients(self):
return self.all_gradients


class PureLossMetricReporter(MetricReporter):
lower_is_better = True
Expand Down
4 changes: 2 additions & 2 deletions pytext/metric_reporters/seq2seq_compositional.py
Expand Up @@ -67,8 +67,8 @@ def gen_content(self, metrics, loss, preds, targets, scores, context):


class Seq2SeqCompositionalMetricReporter(Seq2SeqMetricReporter):
def __init__(self, channels, log_gradient, tensorizers, accept_flat_intents_slots):
super().__init__(channels, log_gradient, tensorizers)
def __init__(self, channels, tensorizers, accept_flat_intents_slots):
super().__init__(channels, tensorizers)
self.accept_flat_intents_slots = accept_flat_intents_slots

class Config(MetricReporter.Config):
Expand Down
4 changes: 2 additions & 2 deletions pytext/metric_reporters/seq2seq_metric_reporter.py
Expand Up @@ -43,8 +43,8 @@ class Seq2SeqMetricReporter(MetricReporter):
class Config(MetricReporter.Config):
pass

def __init__(self, channels, log_gradient, tensorizers):
super().__init__(channels, log_gradient)
def __init__(self, channels, tensorizers):
super().__init__(channels)
self.tensorizers = tensorizers

def _reset(self):
Expand Down
4 changes: 0 additions & 4 deletions pytext/metric_reporters/tests/tensorboard_test.py
Expand Up @@ -47,8 +47,6 @@ def test_report_metrics_with_nan(self):
meta={},
model=model,
optimizer=optimizer,
log_gradient=False,
gradients={},
)

def test_report_metrics_to_others(self):
Expand All @@ -73,6 +71,4 @@ def test_report_metrics_to_others(self):
meta={},
model=model,
optimizer=optimizer,
log_gradient=False,
gradients={},
)
6 changes: 0 additions & 6 deletions pytext/trainers/trainer.py
Expand Up @@ -619,9 +619,6 @@ def run_step(
)
# update gradients after len(samples) forward & backward
self.optimizer_step(state)
with timing.time("add gradients"):
if report_metric and state.stage == Stage.TRAIN:
metric_reporter.add_gradients(state.model)
self.sparsification_step(state)


Expand Down Expand Up @@ -676,9 +673,6 @@ def run_step(
metric_reporter.report_realtime_metric(state.stage)
# update gradients after #len(samples) forward & backward
self.optimizer_step(state)
with timing.time("add gradients"):
if report_metric and state.stage == Stage.TRAIN:
metric_reporter.add_gradients(state.model)
self.sparsification_step(state)

def _prepare_scheduler(self, training_batches, scheduler=None):
Expand Down

0 comments on commit 05af797

Please sign in to comment.