From 939b3b77e536fc3ff4409e3e2a1606e7c9da72bb Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:07:29 +1200 Subject: [PATCH 1/8] Add numpy2-deprecation (NPY201) ruff rule Xref https://docs.astral.sh/ruff/rules/numpy2-deprecation --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 845f811de..61127e5f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ lint.extend-select = [ "SIM", # see: https://pypi.org/project/flake8-simplify "RET", # see: https://pypi.org/project/flake8-return "PT", # see: https://pypi.org/project/flake8-pytest-style + "NPY201", # see: https://docs.astral.sh/ruff/rules/numpy2-deprecation "RUF100" # yesqa ] lint.ignore = [ From 6e3e3512aa85e5be194432a219f62181557c31a4 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:14:25 +1200 Subject: [PATCH 2/8] Switch from np.sctypes.values() to np.core.sctypes.values() Technically np.core.sctypes is private, but this is the simplest one-line change. --- src/litdata/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/litdata/constants.py b/src/litdata/constants.py index 6caa75201..32dbed4b0 100644 --- a/src/litdata/constants.py +++ b/src/litdata/constants.py @@ -59,7 +59,7 @@ 19: torch.bool, } -_NUMPY_SCTYPES = [v for values in np.sctypes.values() for v in values] +_NUMPY_SCTYPES = [v for values in np.core.sctypes.values() for v in values] # All NumPy scalar types _NUMPY_DTYPES_MAPPING = {i: np.dtype(v) for i, v in enumerate(_NUMPY_SCTYPES)} _TIME_FORMAT = "%Y-%m-%d_%H-%M-%S.%fZ" From 1d1b3b21afdccf212c87b87997f62ab4b6ac4c88 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:17:45 +1200 Subject: [PATCH 3/8] Hardcode list of numpy sctypes values Remove usage of `np.core.sctypes.values()` which is private. --- src/litdata/constants.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/litdata/constants.py b/src/litdata/constants.py index 32dbed4b0..e50cc0330 100644 --- a/src/litdata/constants.py +++ b/src/litdata/constants.py @@ -59,7 +59,28 @@ 19: torch.bool, } -_NUMPY_SCTYPES = [v for values in np.core.sctypes.values() for v in values] # All NumPy scalar types +_NUMPY_SCTYPES = [ # All NumPy scalar types from np.core.sctypes.values() + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, + np.longdouble, + np.complex64, + np.complex128, + np.clongdouble, + bool, + object, + bytes, + str, + np.void, +] _NUMPY_DTYPES_MAPPING = {i: np.dtype(v) for i, v in enumerate(_NUMPY_SCTYPES)} _TIME_FORMAT = "%Y-%m-%d_%H-%M-%S.%fZ" From e01434c04893e885f56795680601bfe0ef295e25 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:22:41 +1200 Subject: [PATCH 4/8] Remove upper pin on numpy 2.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 90f04087e..b4920f3ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch filelock -numpy < 2.0.0 +numpy boto3 requests From 945f9006396a8a4f78c0b77ced1a4999fe5ef7d5 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 19:45:06 +1200 Subject: [PATCH 5/8] Debug error to see what np.core.sctypes looks like in Windows --- tests/streaming/test_serializer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 746e41c08..623063247 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -207,7 +207,10 @@ def test_assert_no_header_numpy_serializer(): t = np.ones((10,)) assert serializer.can_serialize(t) data, name = serializer.serialize(t) - assert name == "no_header_numpy:10" + try: + assert name == "no_header_numpy:10" + except AssertionError: # debug what np.core.sctypes looks like on Windows + print(np.core.sctypes) assert serializer._dtype is None serializer.setup(name) assert serializer._dtype == np.dtype("float64") From 11c7b952f8080ddb9d5c4d1b1d3904a32f61c758 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 15 Jul 2024 20:37:13 +1200 Subject: [PATCH 6/8] Raise ValueError instead of print --- tests/streaming/test_serializer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 623063247..03f7e30dc 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -209,8 +209,8 @@ def test_assert_no_header_numpy_serializer(): data, name = serializer.serialize(t) try: assert name == "no_header_numpy:10" - except AssertionError: # debug what np.core.sctypes looks like on Windows - print(np.core.sctypes) + except AssertionError as e: # debug what np.core.sctypes looks like on Windows + raise ValueError(np.core.sctypes) from e assert serializer._dtype is None serializer.setup(name) assert serializer._dtype == np.dtype("float64") From 1edbe00ac096cdb2faa7e38d2bd4f38f0714056c Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 16 Jul 2024 08:05:39 +0100 Subject: [PATCH 7/8] update --- tests/streaming/test_serializer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 03f7e30dc..9a0d88ada 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -204,11 +204,11 @@ def test_assert_no_header_tensor_serializer(): def test_assert_no_header_numpy_serializer(): serializer = NoHeaderNumpySerializer() - t = np.ones((10,)) + t = np.ones((10,), dtype=np.float64) assert serializer.can_serialize(t) data, name = serializer.serialize(t) try: - assert name == "no_header_numpy:10" + assert name == "no_header_numpy:11" except AssertionError as e: # debug what np.core.sctypes looks like on Windows raise ValueError(np.core.sctypes) from e assert serializer._dtype is None From d97ddd37bfb770ae20ccea6252b37c03a26bc71d Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 16 Jul 2024 08:11:46 +0100 Subject: [PATCH 8/8] update --- src/litdata/constants.py | 5 ++--- tests/streaming/test_serializer.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/litdata/constants.py b/src/litdata/constants.py index e50cc0330..b001507bc 100644 --- a/src/litdata/constants.py +++ b/src/litdata/constants.py @@ -13,6 +13,7 @@ import os from pathlib import Path +from typing import Dict import numpy as np import torch @@ -71,17 +72,15 @@ np.float16, np.float32, np.float64, - np.longdouble, np.complex64, np.complex128, - np.clongdouble, bool, object, bytes, str, np.void, ] -_NUMPY_DTYPES_MAPPING = {i: np.dtype(v) for i, v in enumerate(_NUMPY_SCTYPES)} +_NUMPY_DTYPES_MAPPING: Dict[int, np.dtype] = {i: np.dtype(v) for i, v in enumerate(_NUMPY_SCTYPES)} _TIME_FORMAT = "%Y-%m-%d_%H-%M-%S.%fZ" _IS_IN_STUDIO = bool(os.getenv("LIGHTNING_CLOUD_PROJECT_ID", None)) and bool(os.getenv("LIGHTNING_CLUSTER_ID", None)) diff --git a/tests/streaming/test_serializer.py b/tests/streaming/test_serializer.py index 9a0d88ada..31789c833 100644 --- a/tests/streaming/test_serializer.py +++ b/tests/streaming/test_serializer.py @@ -208,7 +208,7 @@ def test_assert_no_header_numpy_serializer(): assert serializer.can_serialize(t) data, name = serializer.serialize(t) try: - assert name == "no_header_numpy:11" + assert name == "no_header_numpy:10" except AssertionError as e: # debug what np.core.sctypes looks like on Windows raise ValueError(np.core.sctypes) from e assert serializer._dtype is None