Skip to content

Commit

Permalink
Enable no op optimize (#19490)
Browse files Browse the repository at this point in the history
  • Loading branch information
tchaton committed Feb 16, 2024
1 parent 53ea76a commit bbc5488
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
9 changes: 7 additions & 2 deletions src/lightning/data/processing/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,12 @@ def _done(self, size: int, delete_cached_files: bool, output_dir: Dir) -> _Resul

size = sum([c["dim"] if c["dim"] is not None else c["chunk_size"] for c in config["chunks"]])
num_bytes = sum([c["chunk_bytes"] for c in config["chunks"]])
data_format = tree_unflatten(config["config"]["data_format"], treespec_loads(config["config"]["data_spec"]))
if config["config"] is not None:
data_format = tree_unflatten(
config["config"]["data_format"], treespec_loads(config["config"]["data_spec"])
)
else:
data_format = None
num_chunks = len(config["chunks"])

# The platform can't store more than 1024 entries.
Expand All @@ -735,7 +740,7 @@ def _done(self, size: int, delete_cached_files: bool, output_dir: Dir) -> _Resul
size=size,
num_bytes=num_bytes,
data_format=data_format,
compression=config["config"]["compression"],
compression=config["config"]["compression"] if config["config"] else None,
num_chunks=len(config["chunks"]),
num_bytes_per_chunk=num_bytes_per_chunk,
)
Expand Down
4 changes: 2 additions & 2 deletions src/lightning/data/processing/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import urllib
from contextlib import contextmanager
from subprocess import Popen
from subprocess import DEVNULL, Popen
from typing import Any, Callable, List, Optional, Tuple, Union

from lightning.data.constants import _IS_IN_STUDIO, _LIGHTNING_CLOUD_LATEST
Expand Down Expand Up @@ -134,7 +134,7 @@ def optimize_dns(enable: bool) -> None:
f"sudo /home/zeus/miniconda3/envs/cloudspace/bin/python"
f" -c 'from lightning.data.processing.utilities import _optimize_dns; _optimize_dns({enable})'"
)
Popen(cmd, shell=True).wait() # E501
Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL).wait() # E501


def _optimize_dns(enable: bool) -> None:
Expand Down
16 changes: 16 additions & 0 deletions tests/tests_data/processing/test_data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,3 +1023,19 @@ def test_map_is_last(num_workers, expected, tmpdir):
)

assert sorted(os.listdir(tmpdir)) == expected


def no_op(index):
pass


def test_empty_optimize(tmpdir):
optimize(
no_op,
list(range(10)),
output_dir=str(tmpdir),
chunk_bytes="64MB",
num_workers=1,
)

assert os.listdir(tmpdir) == ["index.json"]

0 comments on commit bbc5488

Please sign in to comment.