Skip to content

Commit

Permalink
Fix parsing of version in TensorBoardLogger and CSVLogger (#18897)
Browse files Browse the repository at this point in the history
(cherry picked from commit 98685c3)
  • Loading branch information
awaelchli authored and lantiga committed Nov 6, 2023
1 parent 1bda28a commit 15d4361
Show file tree
Hide file tree
Showing 8 changed files with 147 additions and 132 deletions.
4 changes: 3 additions & 1 deletion src/lightning/fabric/loggers/csv_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ def _get_next_version(self) -> int:
full_path = d["name"]
name = os.path.basename(full_path)
if _is_dir(self._fs, full_path) and name.startswith("version_"):
existing_versions.append(int(name.split("_")[1]))
dir_ver = name.split("_")[1]
if dir_ver.isdigit():
existing_versions.append(int(dir_ver))

if len(existing_versions) == 0:
return 0
Expand Down
3 changes: 2 additions & 1 deletion src/lightning/fabric/loggers/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ def _get_next_version(self) -> int:
bn = os.path.basename(d)
if _is_dir(self._fs, d) and bn.startswith("version_"):
dir_ver = bn.split("_")[1].replace("/", "")
existing_versions.append(int(dir_ver))
if dir_ver.isdigit():
existing_versions.append(int(dir_ver))
if len(existing_versions) == 0:
return 0

Expand Down
3 changes: 3 additions & 0 deletions src/lightning/pytorch/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Refined the FSDP saving logic and error messaging when path exists ([#18884](https://github.com/Lightning-AI/lightning/pull/18884))


- Fixed an issue parsing the version from folders that don't include a version number in `TensorBoardLogger` and `CSVLogger` ([#18897](https://github.com/Lightning-AI/lightning/issues/18897))


## [2.1.0] - 2023-10-11

### Added
Expand Down
3 changes: 2 additions & 1 deletion src/lightning/pytorch/loggers/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ def _get_next_version(self) -> int:
bn = os.path.basename(d)
if _is_dir(self._fs, d) and bn.startswith("version_"):
dir_ver = bn.split("_")[1].replace("/", "")
existing_versions.append(int(dir_ver))
if dir_ver.isdigit():
existing_versions.append(int(dir_ver))
if len(existing_versions) == 0:
return 0

Expand Down
43 changes: 23 additions & 20 deletions tests/tests_fabric/loggers/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def test_automatic_versioning(tmp_path):
"""Verify that automatic versioning works."""
(tmp_path / "exp" / "version_0").mkdir(parents=True)
(tmp_path / "exp" / "version_1").mkdir()
(tmp_path / "exp" / "version_nonumber").mkdir()
(tmp_path / "exp" / "other").mkdir()

logger = CSVLogger(root_dir=tmp_path, name="exp")
assert logger.version == 2

Expand All @@ -37,43 +40,43 @@ def test_automatic_versioning_relative_root_dir(tmp_path, monkeypatch):
assert logger.version == 2


def test_manual_versioning(tmpdir):
def test_manual_versioning(tmp_path):
"""Verify that manual versioning works."""
root_dir = tmpdir.mkdir("exp")
root_dir.mkdir("version_0")
root_dir.mkdir("version_1")
root_dir.mkdir("version_2")
root_dir = tmp_path / "exp"
(root_dir / "version_0").mkdir(parents=True)
(root_dir / "version_1").mkdir()
(root_dir / "version_2").mkdir()
logger = CSVLogger(root_dir=root_dir, name="exp", version=1)
assert logger.version == 1


def test_named_version(tmpdir):
def test_named_version(tmp_path):
"""Verify that manual versioning works for string versions, e.g. '2020-02-05-162402'."""
exp_name = "exp"
tmpdir.mkdir(exp_name)
(tmp_path / exp_name).mkdir()
expected_version = "2020-02-05-162402"

logger = CSVLogger(root_dir=tmpdir, name=exp_name, version=expected_version)
logger = CSVLogger(root_dir=tmp_path, name=exp_name, version=expected_version)
logger.log_metrics({"a": 1, "b": 2})
logger.save()
assert logger.version == expected_version
assert os.listdir(tmpdir / exp_name) == [expected_version]
assert os.listdir(tmpdir / exp_name / expected_version)
assert os.listdir(tmp_path / exp_name) == [expected_version]
assert os.listdir(tmp_path / exp_name / expected_version)


@pytest.mark.parametrize("name", ["", None])
def test_no_name(tmpdir, name):
def test_no_name(tmp_path, name):
"""Verify that None or empty name works."""
logger = CSVLogger(root_dir=tmpdir, name=name)
logger = CSVLogger(root_dir=tmp_path, name=name)
logger.log_metrics({"a": 1})
logger.save()
assert os.path.normpath(logger._root_dir) == tmpdir # use os.path.normpath to handle trailing /
assert os.listdir(tmpdir / "version_0")
assert os.path.normpath(logger._root_dir) == str(tmp_path) # use os.path.normpath to handle trailing /
assert os.listdir(tmp_path / "version_0")


@pytest.mark.parametrize("step_idx", [10, None])
def test_log_metrics(tmpdir, step_idx):
logger = CSVLogger(tmpdir)
def test_log_metrics(tmp_path, step_idx):
logger = CSVLogger(tmp_path)
metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)}
logger.log_metrics(metrics, step_idx)
logger.save()
Expand All @@ -85,14 +88,14 @@ def test_log_metrics(tmpdir, step_idx):
assert all(n in lines[0] for n in metrics)


def test_log_hyperparams(tmpdir):
logger = CSVLogger(tmpdir)
def test_log_hyperparams(tmp_path):
logger = CSVLogger(tmp_path)
with pytest.raises(NotImplementedError):
logger.log_hyperparams({})


def test_flush_n_steps(tmpdir):
logger = CSVLogger(tmpdir, flush_logs_every_n_steps=2)
def test_flush_n_steps(tmp_path):
logger = CSVLogger(tmp_path, flush_logs_every_n_steps=2)
metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)}
logger.save = MagicMock()
logger.log_metrics(metrics, step=0)
Expand Down
70 changes: 36 additions & 34 deletions tests/tests_fabric/loggers/test_tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,55 +27,57 @@
from tests_fabric.test_fabric import BoringModel


def test_tensorboard_automatic_versioning(tmpdir):
def test_tensorboard_automatic_versioning(tmp_path):
"""Verify that automatic versioning works."""
root_dir = tmpdir / "tb_versioning"
root_dir = tmp_path / "tb_versioning"
root_dir.mkdir()
(root_dir / "version_0").mkdir()
(root_dir / "version_1").mkdir()
(root_dir / "version_nonumber").mkdir()
(root_dir / "other").mkdir()

logger = TensorBoardLogger(root_dir=tmpdir, name="tb_versioning")
logger = TensorBoardLogger(root_dir=tmp_path, name="tb_versioning")
assert logger.version == 2


def test_tensorboard_manual_versioning(tmpdir):
def test_tensorboard_manual_versioning(tmp_path):
"""Verify that manual versioning works."""
root_dir = tmpdir / "tb_versioning"
root_dir = tmp_path / "tb_versioning"
root_dir.mkdir()
(root_dir / "version_0").mkdir()
(root_dir / "version_1").mkdir()
(root_dir / "version_2").mkdir()

logger = TensorBoardLogger(root_dir=tmpdir, name="tb_versioning", version=1)
logger = TensorBoardLogger(root_dir=tmp_path, name="tb_versioning", version=1)
assert logger.version == 1


def test_tensorboard_named_version(tmpdir):
def test_tensorboard_named_version(tmp_path):
"""Verify that manual versioning works for string versions, e.g. '2020-02-05-162402'."""
name = "tb_versioning"
(tmpdir / name).mkdir()
(tmp_path / name).mkdir()
expected_version = "2020-02-05-162402"

logger = TensorBoardLogger(root_dir=tmpdir, name=name, version=expected_version)
logger = TensorBoardLogger(root_dir=tmp_path, name=name, version=expected_version)
logger.log_hyperparams({"a": 1, "b": 2, 123: 3, 3.5: 4, 5j: 5}) # Force data to be written

assert logger.version == expected_version
assert os.listdir(tmpdir / name) == [expected_version]
assert os.listdir(tmpdir / name / expected_version)
assert os.listdir(tmp_path / name) == [expected_version]
assert os.listdir(tmp_path / name / expected_version)


@pytest.mark.parametrize("name", ["", None])
def test_tensorboard_no_name(tmpdir, name):
def test_tensorboard_no_name(tmp_path, name):
"""Verify that None or empty name works."""
logger = TensorBoardLogger(root_dir=tmpdir, name=name)
logger = TensorBoardLogger(root_dir=tmp_path, name=name)
logger.log_hyperparams({"a": 1, "b": 2, 123: 3, 3.5: 4, 5j: 5}) # Force data to be written
assert os.path.normpath(logger.root_dir) == tmpdir # use os.path.normpath to handle trailing /
assert os.listdir(tmpdir / "version_0")
assert os.path.normpath(logger.root_dir) == str(tmp_path) # use os.path.normpath to handle trailing /
assert os.listdir(tmp_path / "version_0")


def test_tensorboard_log_sub_dir(tmpdir):
def test_tensorboard_log_sub_dir(tmp_path):
# no sub_dir specified
root_dir = tmpdir / "logs"
root_dir = tmp_path / "logs"
logger = TensorBoardLogger(root_dir, name="name", version="version")
assert logger.log_dir == os.path.join(root_dir, "name", "version")

Expand Down Expand Up @@ -104,14 +106,14 @@ def test_tensorboard_expand_env_vars():


@pytest.mark.parametrize("step_idx", [10, None])
def test_tensorboard_log_metrics(tmpdir, step_idx):
logger = TensorBoardLogger(tmpdir)
def test_tensorboard_log_metrics(tmp_path, step_idx):
logger = TensorBoardLogger(tmp_path)
metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)}
logger.log_metrics(metrics, step_idx)


def test_tensorboard_log_hyperparams(tmpdir):
logger = TensorBoardLogger(tmpdir)
def test_tensorboard_log_hyperparams(tmp_path):
logger = TensorBoardLogger(tmp_path)
hparams = {
"float": 0.3,
"int": 1,
Expand All @@ -127,8 +129,8 @@ def test_tensorboard_log_hyperparams(tmpdir):
logger.log_hyperparams(hparams)


def test_tensorboard_log_hparams_and_metrics(tmpdir):
logger = TensorBoardLogger(tmpdir, default_hp_metric=False)
def test_tensorboard_log_hparams_and_metrics(tmp_path):
logger = TensorBoardLogger(tmp_path, default_hp_metric=False)
hparams = {
"float": 0.3,
"int": 1,
Expand All @@ -146,15 +148,15 @@ def test_tensorboard_log_hparams_and_metrics(tmpdir):


@pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)])
def test_tensorboard_log_graph(tmpdir, example_input_array):
def test_tensorboard_log_graph(tmp_path, example_input_array):
"""Test that log graph works with both model.example_input_array and if array is passed externally."""
# TODO(fabric): Test both nn.Module and LightningModule
# TODO(fabric): Assert _apply_batch_transfer_handler is calling the batch transfer hooks
model = BoringModel()
if example_input_array is not None:
model.example_input_array = None

logger = TensorBoardLogger(tmpdir)
logger = TensorBoardLogger(tmp_path)
logger._experiment = Mock()
logger.log_graph(model, example_input_array)
if example_input_array is not None:
Expand All @@ -169,11 +171,11 @@ def test_tensorboard_log_graph(tmpdir, example_input_array):


@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE))
def test_tensorboard_log_graph_warning_no_example_input_array(tmpdir):
def test_tensorboard_log_graph_warning_no_example_input_array(tmp_path):
"""Test that log graph throws warning if model.example_input_array is None."""
model = BoringModel()
model.example_input_array = None
logger = TensorBoardLogger(tmpdir, log_graph=True)
logger = TensorBoardLogger(tmp_path, log_graph=True)
with pytest.warns(
UserWarning,
match="Could not log computational graph to TensorBoard: The `model.example_input_array` .* was not given",
Expand All @@ -187,22 +189,22 @@ def test_tensorboard_log_graph_warning_no_example_input_array(tmpdir):
logger.log_graph(model)


def test_tensorboard_finalize(monkeypatch, tmpdir):
def test_tensorboard_finalize(monkeypatch, tmp_path):
"""Test that the SummaryWriter closes in finalize."""
if _TENSORBOARD_AVAILABLE:
import torch.utils.tensorboard as tb
else:
import tensorboardX as tb

monkeypatch.setattr(tb, "SummaryWriter", Mock())
logger = TensorBoardLogger(root_dir=tmpdir)
logger = TensorBoardLogger(root_dir=tmp_path)
assert logger._experiment is None
logger.finalize("any")

# no log calls, no experiment created -> nothing to flush
logger.experiment.assert_not_called()

logger = TensorBoardLogger(root_dir=tmpdir)
logger = TensorBoardLogger(root_dir=tmp_path)
logger.log_metrics({"flush_me": 11.1}) # trigger creation of an experiment
logger.finalize("any")

Expand All @@ -212,10 +214,10 @@ def test_tensorboard_finalize(monkeypatch, tmpdir):


@mock.patch("lightning.fabric.loggers.tensorboard.log")
def test_tensorboard_with_symlink(log, tmpdir):
def test_tensorboard_with_symlink(log, tmp_path):
"""Tests a specific failure case when tensorboard logger is used with empty name, symbolic link ``save_dir``, and
relative paths."""
os.chdir(tmpdir) # need to use relative paths
os.chdir(tmp_path) # need to use relative paths
source = os.path.join(".", "lightning_logs")
dest = os.path.join(".", "sym_lightning_logs")

Expand All @@ -228,10 +230,10 @@ def test_tensorboard_with_symlink(log, tmpdir):
log.warning.assert_not_called()


def test_tensorboard_missing_folder_warning(tmpdir, caplog):
def test_tensorboard_missing_folder_warning(tmp_path, caplog):
"""Verify that the logger throws a warning for invalid directory."""
name = "fake_dir"
logger = TensorBoardLogger(root_dir=tmpdir, name=name)
logger = TensorBoardLogger(root_dir=tmp_path, name=name)

with caplog.at_level(logging.WARNING):
assert logger.version == 0
Expand Down

0 comments on commit 15d4361

Please sign in to comment.