From b7e7896ca77ee49e1654c4dfb4d917d067f7dc2b Mon Sep 17 00:00:00 2001 From: rohitrango Date: Fri, 8 Aug 2025 11:35:15 -0700 Subject: [PATCH 1/4] kl logprob clamp Signed-off-by: rohitrango --- 3rdparty/NeMo-workspace/NeMo | 2 +- nemo_rl/algorithms/utils.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/3rdparty/NeMo-workspace/NeMo b/3rdparty/NeMo-workspace/NeMo index 8ddf438734..a632f41477 160000 --- a/3rdparty/NeMo-workspace/NeMo +++ b/3rdparty/NeMo-workspace/NeMo @@ -1 +1 @@ -Subproject commit 8ddf4387344c6423763ec9ee0c9a755cbb5d8d35 +Subproject commit a632f41477a5f9410cea9ee0c5052a79e07cce93 diff --git a/nemo_rl/algorithms/utils.py b/nemo_rl/algorithms/utils.py index 6d634e3ceb..1bf3e5ae0c 100644 --- a/nemo_rl/algorithms/utils.py +++ b/nemo_rl/algorithms/utils.py @@ -25,7 +25,7 @@ def calculate_kl_penalty_joschu2020( - logprobs_policy: torch.Tensor, logprobs_reference: torch.Tensor + logprobs_policy: torch.Tensor, logprobs_reference: torch.Tensor, clamp_value: Optional[float] = 20.0 ) -> torch.Tensor: """Calculates a per-token estimate of the KL Divergence between two log_probs. @@ -35,6 +35,8 @@ def calculate_kl_penalty_joschu2020( logprobs_reference: torch.Tensor (b, s) """ r = logprobs_reference - logprobs_policy + if clamp_value is not None: + r = r.clamp(min=-clamp_value, max=clamp_value) return torch.exp(r) - r - 1 From 465e6fffecb9dc816b6a8eafd7799dce601badf9 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 16 Sep 2025 21:48:26 -0700 Subject: [PATCH 2/4] chore: align Megatron-LM submodule pointer with origin/main --- 3rdparty/Megatron-LM-workspace/Megatron-LM | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/Megatron-LM-workspace/Megatron-LM b/3rdparty/Megatron-LM-workspace/Megatron-LM index 2ff0f099ff..383d1144c3 160000 --- a/3rdparty/Megatron-LM-workspace/Megatron-LM +++ b/3rdparty/Megatron-LM-workspace/Megatron-LM @@ -1 +1 @@ -Subproject commit 2ff0f099ffc30ffd152e3e29e921a1609d00855c +Subproject commit 383d1144c3b3f77096c63b7308402a0ea6ba47dd From 97ff7ea680c9aa3b51a9684207e3b40868d4f929 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 16 Sep 2025 21:49:12 -0700 Subject: [PATCH 3/4] Fix merge --- .../is_megatron_bridge_installed.py | 21 ---- .../Megatron-Bridge-workspace/pyproject.toml | 14 --- 3rdparty/Megatron-Bridge-workspace/setup.py | 116 ------------------ .../is_megatron_installed.py | 21 ---- 3rdparty/Megatron-LM-workspace/pyproject.toml | 15 --- 3rdparty/Megatron-LM-workspace/setup.py | 98 --------------- 3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB | 103 ---------------- 7 files changed, 388 deletions(-) delete mode 100644 3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py delete mode 100644 3rdparty/Megatron-Bridge-workspace/pyproject.toml delete mode 100644 3rdparty/Megatron-Bridge-workspace/setup.py delete mode 100644 3rdparty/Megatron-LM-workspace/is_megatron_installed.py delete mode 100644 3rdparty/Megatron-LM-workspace/pyproject.toml delete mode 100644 3rdparty/Megatron-LM-workspace/setup.py delete mode 100644 3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB diff --git a/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py deleted file mode 100644 index b2ae4cf651..0000000000 --- a/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -try: - from megatron.bridge import AutoBridge # noqa: F401 - - INSTALLED = True -except Exception: - INSTALLED = False - -print(f"Megatron Bridge {INSTALLED=}") diff --git a/3rdparty/Megatron-Bridge-workspace/pyproject.toml b/3rdparty/Megatron-Bridge-workspace/pyproject.toml deleted file mode 100644 index b76ae67595..0000000000 --- a/3rdparty/Megatron-Bridge-workspace/pyproject.toml +++ /dev/null @@ -1,14 +0,0 @@ -[build-system] -requires = [ - "setuptools>=61.0", - "wheel", -] -build-backend = "setuptools.build_meta" - -[project] -name = "megatron-bridge" -dynamic = ["dependencies", "version"] -authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] -description = "Standalone packaging for the Megatron Bridge sub-module." -requires-python = ">=3.10" - diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py deleted file mode 100644 index 06657bab31..0000000000 --- a/3rdparty/Megatron-Bridge-workspace/setup.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -import tomllib - -import setuptools - -# Conditional packaging mirroring NeMo and Megatron-LM workspaces -final_packages = [] -final_package_dir = {} - -# If the submodule is present, expose `megatron.bridge` package from the checkout -bridge_src_dir = "Megatron-Bridge/src/megatron/bridge" -bridge_package_name = "megatron.bridge" - -CACHED_DEPENDENCIES = [ - "accelerate>=1.6.0", - "datasets", - "numpy<2", - "omegaconf>=2.3.0", - "packaging", - "tensorboard>=2.19.0", - "torch", - "transformers>=4.51.3", - "typing-extensions", - "rich", - "wandb>=0.19.10", - "six>=1.17.0", - "regex>=2024.11.6", - "pyyaml>=6.0.2", - "einops>=0.8.1", - "sentencepiece>=0.2.0", - "tiktoken>=0.9.0", - "tqdm>=4.67.1", - "hydra-core>1.3,<=1.3.2", - "megatron-core>=0.14.0a0,<0.15.0", - "nvidia-modelopt[torch,onnx]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'", - "nvidia-resiliency-ext>=0.4.0a0,<0.5.0; sys_platform != 'darwin'", - "transformer-engine[pytorch]>=2.5.0a0,<2.6.0; sys_platform != 'darwin'", - "filelock", -] - -# If the bridge source exists, compare cached dependencies with the submodule's pyproject -if os.path.exists(bridge_src_dir): - pyproject_path = os.path.join("Megatron-Bridge", "pyproject.toml") - if not os.path.exists(pyproject_path): - raise FileNotFoundError( - f"[megatron-bridge][setup] {pyproject_path} not found; skipping dependency consistency check." - ) - - with open(pyproject_path, "rb") as f: - data = tomllib.load(f) - project = data["project"] - deps_list = project["dependencies"] - submodule_deps = set(str(d).strip() for d in deps_list) - - missing_in_cached = submodule_deps - set(CACHED_DEPENDENCIES) - extra_in_cached = set(CACHED_DEPENDENCIES) - submodule_deps - - if missing_in_cached or extra_in_cached: - print( - "[megatron-bridge][setup] Dependency mismatch between Megatron-Bridge-workspace/Megatron-Bridge/pyproject.toml vs Megatron-Bridge-workspace/setup.py::CACHED_DEPENDENCIES.", - file=sys.stderr, - ) - if missing_in_cached: - print( - " - Present in Megatron-Bridge/pyproject.toml but missing from CACHED_DEPENDENCIES:", - file=sys.stderr, - ) - for dep in sorted(missing_in_cached): - print(f" * {dep}", file=sys.stderr) - if extra_in_cached: - print( - " - Present in CACHED_DEPENDENCIES but not in Megatron-Bridge/pyproject.toml:", - file=sys.stderr, - ) - for dep in sorted(extra_in_cached): - print(f" * {dep}", file=sys.stderr) - print( - " Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.", - file=sys.stderr, - ) - sys.exit(1) - else: - print( - "[megatron-bridge][setup] Dependency sets are consistent with the submodule pyproject.", - file=sys.stderr, - ) - -if os.path.exists(bridge_src_dir): - final_packages.append(bridge_package_name) - final_package_dir[bridge_package_name] = bridge_src_dir - -setuptools.setup( - name="megatron-bridge", - version="0.0.0", - description="Standalone packaging for the Megatron Bridge sub-module.", - author="NVIDIA", - author_email="nemo-toolkit@nvidia.com", - packages=final_packages, - package_dir=final_package_dir, - py_modules=["is_megatron_bridge_installed"], - install_requires=CACHED_DEPENDENCIES, -) diff --git a/3rdparty/Megatron-LM-workspace/is_megatron_installed.py b/3rdparty/Megatron-LM-workspace/is_megatron_installed.py deleted file mode 100644 index 9a88db404f..0000000000 --- a/3rdparty/Megatron-LM-workspace/is_megatron_installed.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -try: - from megatron.core import parallel_state # noqa: F401 - - INSTALLED = True -except ImportError: - INSTALLED = False - -print(f"Megatron {INSTALLED=}") diff --git a/3rdparty/Megatron-LM-workspace/pyproject.toml b/3rdparty/Megatron-LM-workspace/pyproject.toml deleted file mode 100644 index 77f09f838a..0000000000 --- a/3rdparty/Megatron-LM-workspace/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. - -[build-system] -requires = [ - "setuptools", - "pybind11", -] -build-backend = "setuptools.build_meta" - -[project] -name = "megatron-core" -dynamic = ["dependencies", "version"] -description = "Megatron Core - a library for efficient and scalable training of transformer based models" -authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] -maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] \ No newline at end of file diff --git a/3rdparty/Megatron-LM-workspace/setup.py b/3rdparty/Megatron-LM-workspace/setup.py deleted file mode 100644 index f3492afcd5..0000000000 --- a/3rdparty/Megatron-LM-workspace/setup.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Setup for pip package.""" - -import os -import subprocess - -import setuptools -from setuptools import Extension - -############################################################################### -# Extension Making # -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # - -# --- Configuration Start --- -# These will be populated conditionally or with defaults -final_packages = [] -final_package_dir = {} -final_ext_modules = [] - -# --- megatron.core conditional section --- -# Directory for the megatron.core Python package source -megatron_core_python_package_source_dir = "Megatron-LM/megatron/core" -megatron_core_package_name = "megatron.core" - -# Path for the C++ extension's source file, relative to setup.py -# This path is taken from your original setup.py -megatron_core_cpp_extension_source_file = "megatron/core/datasets/helpers.cpp" - -# Check if the main directory for the megatron.core Python package exists -if os.path.exists(megatron_core_python_package_source_dir): - # Add Python package 'megatron.core' - final_packages.append(megatron_core_package_name) - final_package_dir[megatron_core_package_name] = ( - megatron_core_python_package_source_dir - ) - - # If the Python package is being added, then check if its C++ extension can also be added - # This requires the specific C++ source file to exist - if os.path.exists(megatron_core_cpp_extension_source_file): - megatron_extension = Extension( - "megatron.core.datasets.helpers_cpp", # Name of the extension - sources=[megatron_core_cpp_extension_source_file], # Path to C++ source - language="c++", - extra_compile_args=( - subprocess.check_output(["python3", "-m", "pybind11", "--includes"]) - .decode("utf-8") - .strip() - .split() - ) - + ["-O3", "-Wall", "-std=c++17"], - optional=True, # As in your original setup - ) - final_ext_modules.append(megatron_extension) -# --- End of megatron.core conditional section --- - -setuptools.setup( - name="megatron-core", - version="0.0.0", - packages=final_packages, - package_dir=final_package_dir, - py_modules=["is_megatron_installed"], - ext_modules=final_ext_modules, - # Add in any packaged data. - include_package_data=True, - install_requires=[ - # From requirements/pytorch_25.03/requirements.txt - "einops", - "flask-restful", - "nltk", - "pytest", - "pytest-cov", - "pytest_mock", - "pytest-random-order", - "sentencepiece", - "tiktoken", - "wrapt", - "zarr", - "wandb", - "tensorstore!=0.1.46,!=0.1.72", - "torch", - "nvidia-modelopt[torch]>=0.23.2; sys_platform != 'darwin'", - # From megatron/core/requirements.txt - "torch", # Repeated with ^ just to make it easy to map back to the original requirements.txt - "packaging", - ], -) diff --git a/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB b/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB deleted file mode 100644 index f672031455..0000000000 --- a/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB +++ /dev/null @@ -1,103 +0,0 @@ -NeMo-RL includes the following third-party software/licensing: - -* matplotlib; versions 1.3.0 and later -- https://github.com/matplotlib/matplotlib - -License agreement for matplotlib versions 1.3.0 and later -========================================================= - -1. This LICENSE AGREEMENT is between the Matplotlib Development Team -("MDT"), and the Individual or Organization ("Licensee") accessing and -otherwise using matplotlib software in source or binary form and its -associated documentation. - -2. Subject to the terms and conditions of this License Agreement, MDT -hereby grants Licensee a nonexclusive, royalty-free, world-wide license -to reproduce, analyze, test, perform and/or display publicly, prepare -derivative works, distribute, and otherwise use matplotlib -alone or in any derivative version, provided, however, that MDT's -License Agreement and MDT's notice of copyright, i.e., "Copyright (c) -2012- Matplotlib Development Team; All Rights Reserved" are retained in -matplotlib alone or in any derivative version prepared by -Licensee. - -3. In the event Licensee prepares a derivative work that is based on or -incorporates matplotlib or any part thereof, and wants to -make the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to matplotlib . - -4. MDT is making matplotlib available to Licensee on an "AS -IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB -WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. - -5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB - FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR -LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING -MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF -THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between MDT and -Licensee. This License Agreement does not grant permission to use MDT -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using matplotlib , -Licensee agrees to be bound by the terms and conditions of this License -Agreement. - -License agreement for matplotlib versions prior to 1.3.0 -======================================================== - -1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the -Individual or Organization ("Licensee") accessing and otherwise using -matplotlib software in source or binary form and its associated -documentation. - -2. Subject to the terms and conditions of this License Agreement, JDH -hereby grants Licensee a nonexclusive, royalty-free, world-wide license -to reproduce, analyze, test, perform and/or display publicly, prepare -derivative works, distribute, and otherwise use matplotlib -alone or in any derivative version, provided, however, that JDH's -License Agreement and JDH's notice of copyright, i.e., "Copyright (c) -2002-2011 John D. Hunter; All Rights Reserved" are retained in -matplotlib alone or in any derivative version prepared by -Licensee. - -3. In the event Licensee prepares a derivative work that is based on or -incorporates matplotlib or any part thereof, and wants to -make the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to matplotlib. - -4. JDH is making matplotlib available to Licensee on an "AS -IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB -WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. - -5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB - FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR -LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING -MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF -THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between JDH and -Licensee. This License Agreement does not grant permission to use JDH -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using matplotlib, -Licensee agrees to be bound by the terms and conditions of this License -Agreement. \ No newline at end of file From 4a4e5de713e11e9a7e3b5b0e9c8f1ae6dd305ffe Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 16 Sep 2025 21:54:55 -0700 Subject: [PATCH 4/4] revert: restore third-party files to match origin/main (undo unintended deletions) --- .../is_megatron_bridge_installed.py | 21 ++++ .../Megatron-Bridge-workspace/pyproject.toml | 14 +++ 3rdparty/Megatron-Bridge-workspace/setup.py | 116 ++++++++++++++++++ .../is_megatron_installed.py | 21 ++++ 3rdparty/Megatron-LM-workspace/pyproject.toml | 15 +++ 3rdparty/Megatron-LM-workspace/setup.py | 98 +++++++++++++++ 3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB | 103 ++++++++++++++++ 7 files changed, 388 insertions(+) create mode 100644 3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py create mode 100644 3rdparty/Megatron-Bridge-workspace/pyproject.toml create mode 100644 3rdparty/Megatron-Bridge-workspace/setup.py create mode 100644 3rdparty/Megatron-LM-workspace/is_megatron_installed.py create mode 100644 3rdparty/Megatron-LM-workspace/pyproject.toml create mode 100644 3rdparty/Megatron-LM-workspace/setup.py create mode 100644 3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB diff --git a/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py new file mode 100644 index 0000000000..b2ae4cf651 --- /dev/null +++ b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py @@ -0,0 +1,21 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +try: + from megatron.bridge import AutoBridge # noqa: F401 + + INSTALLED = True +except Exception: + INSTALLED = False + +print(f"Megatron Bridge {INSTALLED=}") diff --git a/3rdparty/Megatron-Bridge-workspace/pyproject.toml b/3rdparty/Megatron-Bridge-workspace/pyproject.toml new file mode 100644 index 0000000000..b76ae67595 --- /dev/null +++ b/3rdparty/Megatron-Bridge-workspace/pyproject.toml @@ -0,0 +1,14 @@ +[build-system] +requires = [ + "setuptools>=61.0", + "wheel", +] +build-backend = "setuptools.build_meta" + +[project] +name = "megatron-bridge" +dynamic = ["dependencies", "version"] +authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] +description = "Standalone packaging for the Megatron Bridge sub-module." +requires-python = ">=3.10" + diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py new file mode 100644 index 0000000000..06657bab31 --- /dev/null +++ b/3rdparty/Megatron-Bridge-workspace/setup.py @@ -0,0 +1,116 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import tomllib + +import setuptools + +# Conditional packaging mirroring NeMo and Megatron-LM workspaces +final_packages = [] +final_package_dir = {} + +# If the submodule is present, expose `megatron.bridge` package from the checkout +bridge_src_dir = "Megatron-Bridge/src/megatron/bridge" +bridge_package_name = "megatron.bridge" + +CACHED_DEPENDENCIES = [ + "accelerate>=1.6.0", + "datasets", + "numpy<2", + "omegaconf>=2.3.0", + "packaging", + "tensorboard>=2.19.0", + "torch", + "transformers>=4.51.3", + "typing-extensions", + "rich", + "wandb>=0.19.10", + "six>=1.17.0", + "regex>=2024.11.6", + "pyyaml>=6.0.2", + "einops>=0.8.1", + "sentencepiece>=0.2.0", + "tiktoken>=0.9.0", + "tqdm>=4.67.1", + "hydra-core>1.3,<=1.3.2", + "megatron-core>=0.14.0a0,<0.15.0", + "nvidia-modelopt[torch,onnx]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'", + "nvidia-resiliency-ext>=0.4.0a0,<0.5.0; sys_platform != 'darwin'", + "transformer-engine[pytorch]>=2.5.0a0,<2.6.0; sys_platform != 'darwin'", + "filelock", +] + +# If the bridge source exists, compare cached dependencies with the submodule's pyproject +if os.path.exists(bridge_src_dir): + pyproject_path = os.path.join("Megatron-Bridge", "pyproject.toml") + if not os.path.exists(pyproject_path): + raise FileNotFoundError( + f"[megatron-bridge][setup] {pyproject_path} not found; skipping dependency consistency check." + ) + + with open(pyproject_path, "rb") as f: + data = tomllib.load(f) + project = data["project"] + deps_list = project["dependencies"] + submodule_deps = set(str(d).strip() for d in deps_list) + + missing_in_cached = submodule_deps - set(CACHED_DEPENDENCIES) + extra_in_cached = set(CACHED_DEPENDENCIES) - submodule_deps + + if missing_in_cached or extra_in_cached: + print( + "[megatron-bridge][setup] Dependency mismatch between Megatron-Bridge-workspace/Megatron-Bridge/pyproject.toml vs Megatron-Bridge-workspace/setup.py::CACHED_DEPENDENCIES.", + file=sys.stderr, + ) + if missing_in_cached: + print( + " - Present in Megatron-Bridge/pyproject.toml but missing from CACHED_DEPENDENCIES:", + file=sys.stderr, + ) + for dep in sorted(missing_in_cached): + print(f" * {dep}", file=sys.stderr) + if extra_in_cached: + print( + " - Present in CACHED_DEPENDENCIES but not in Megatron-Bridge/pyproject.toml:", + file=sys.stderr, + ) + for dep in sorted(extra_in_cached): + print(f" * {dep}", file=sys.stderr) + print( + " Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.", + file=sys.stderr, + ) + sys.exit(1) + else: + print( + "[megatron-bridge][setup] Dependency sets are consistent with the submodule pyproject.", + file=sys.stderr, + ) + +if os.path.exists(bridge_src_dir): + final_packages.append(bridge_package_name) + final_package_dir[bridge_package_name] = bridge_src_dir + +setuptools.setup( + name="megatron-bridge", + version="0.0.0", + description="Standalone packaging for the Megatron Bridge sub-module.", + author="NVIDIA", + author_email="nemo-toolkit@nvidia.com", + packages=final_packages, + package_dir=final_package_dir, + py_modules=["is_megatron_bridge_installed"], + install_requires=CACHED_DEPENDENCIES, +) diff --git a/3rdparty/Megatron-LM-workspace/is_megatron_installed.py b/3rdparty/Megatron-LM-workspace/is_megatron_installed.py new file mode 100644 index 0000000000..9a88db404f --- /dev/null +++ b/3rdparty/Megatron-LM-workspace/is_megatron_installed.py @@ -0,0 +1,21 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +try: + from megatron.core import parallel_state # noqa: F401 + + INSTALLED = True +except ImportError: + INSTALLED = False + +print(f"Megatron {INSTALLED=}") diff --git a/3rdparty/Megatron-LM-workspace/pyproject.toml b/3rdparty/Megatron-LM-workspace/pyproject.toml new file mode 100644 index 0000000000..77f09f838a --- /dev/null +++ b/3rdparty/Megatron-LM-workspace/pyproject.toml @@ -0,0 +1,15 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + +[build-system] +requires = [ + "setuptools", + "pybind11", +] +build-backend = "setuptools.build_meta" + +[project] +name = "megatron-core" +dynamic = ["dependencies", "version"] +description = "Megatron Core - a library for efficient and scalable training of transformer based models" +authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] +maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] \ No newline at end of file diff --git a/3rdparty/Megatron-LM-workspace/setup.py b/3rdparty/Megatron-LM-workspace/setup.py new file mode 100644 index 0000000000..f3492afcd5 --- /dev/null +++ b/3rdparty/Megatron-LM-workspace/setup.py @@ -0,0 +1,98 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Setup for pip package.""" + +import os +import subprocess + +import setuptools +from setuptools import Extension + +############################################################################### +# Extension Making # +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # + +# --- Configuration Start --- +# These will be populated conditionally or with defaults +final_packages = [] +final_package_dir = {} +final_ext_modules = [] + +# --- megatron.core conditional section --- +# Directory for the megatron.core Python package source +megatron_core_python_package_source_dir = "Megatron-LM/megatron/core" +megatron_core_package_name = "megatron.core" + +# Path for the C++ extension's source file, relative to setup.py +# This path is taken from your original setup.py +megatron_core_cpp_extension_source_file = "megatron/core/datasets/helpers.cpp" + +# Check if the main directory for the megatron.core Python package exists +if os.path.exists(megatron_core_python_package_source_dir): + # Add Python package 'megatron.core' + final_packages.append(megatron_core_package_name) + final_package_dir[megatron_core_package_name] = ( + megatron_core_python_package_source_dir + ) + + # If the Python package is being added, then check if its C++ extension can also be added + # This requires the specific C++ source file to exist + if os.path.exists(megatron_core_cpp_extension_source_file): + megatron_extension = Extension( + "megatron.core.datasets.helpers_cpp", # Name of the extension + sources=[megatron_core_cpp_extension_source_file], # Path to C++ source + language="c++", + extra_compile_args=( + subprocess.check_output(["python3", "-m", "pybind11", "--includes"]) + .decode("utf-8") + .strip() + .split() + ) + + ["-O3", "-Wall", "-std=c++17"], + optional=True, # As in your original setup + ) + final_ext_modules.append(megatron_extension) +# --- End of megatron.core conditional section --- + +setuptools.setup( + name="megatron-core", + version="0.0.0", + packages=final_packages, + package_dir=final_package_dir, + py_modules=["is_megatron_installed"], + ext_modules=final_ext_modules, + # Add in any packaged data. + include_package_data=True, + install_requires=[ + # From requirements/pytorch_25.03/requirements.txt + "einops", + "flask-restful", + "nltk", + "pytest", + "pytest-cov", + "pytest_mock", + "pytest-random-order", + "sentencepiece", + "tiktoken", + "wrapt", + "zarr", + "wandb", + "tensorstore!=0.1.46,!=0.1.72", + "torch", + "nvidia-modelopt[torch]>=0.23.2; sys_platform != 'darwin'", + # From megatron/core/requirements.txt + "torch", # Repeated with ^ just to make it easy to map back to the original requirements.txt + "packaging", + ], +) diff --git a/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB b/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB new file mode 100644 index 0000000000..f672031455 --- /dev/null +++ b/3rdparty/THIRD_PARTY_LICENSE_MATPLOTLIB @@ -0,0 +1,103 @@ +NeMo-RL includes the following third-party software/licensing: + +* matplotlib; versions 1.3.0 and later -- https://github.com/matplotlib/matplotlib + +License agreement for matplotlib versions 1.3.0 and later +========================================================= + +1. This LICENSE AGREEMENT is between the Matplotlib Development Team +("MDT"), and the Individual or Organization ("Licensee") accessing and +otherwise using matplotlib software in source or binary form and its +associated documentation. + +2. Subject to the terms and conditions of this License Agreement, MDT +hereby grants Licensee a nonexclusive, royalty-free, world-wide license +to reproduce, analyze, test, perform and/or display publicly, prepare +derivative works, distribute, and otherwise use matplotlib +alone or in any derivative version, provided, however, that MDT's +License Agreement and MDT's notice of copyright, i.e., "Copyright (c) +2012- Matplotlib Development Team; All Rights Reserved" are retained in +matplotlib alone or in any derivative version prepared by +Licensee. + +3. In the event Licensee prepares a derivative work that is based on or +incorporates matplotlib or any part thereof, and wants to +make the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to matplotlib . + +4. MDT is making matplotlib available to Licensee on an "AS +IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB +WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR +LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING +MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF +THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between MDT and +Licensee. This License Agreement does not grant permission to use MDT +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using matplotlib , +Licensee agrees to be bound by the terms and conditions of this License +Agreement. + +License agreement for matplotlib versions prior to 1.3.0 +======================================================== + +1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the +Individual or Organization ("Licensee") accessing and otherwise using +matplotlib software in source or binary form and its associated +documentation. + +2. Subject to the terms and conditions of this License Agreement, JDH +hereby grants Licensee a nonexclusive, royalty-free, world-wide license +to reproduce, analyze, test, perform and/or display publicly, prepare +derivative works, distribute, and otherwise use matplotlib +alone or in any derivative version, provided, however, that JDH's +License Agreement and JDH's notice of copyright, i.e., "Copyright (c) +2002-2011 John D. Hunter; All Rights Reserved" are retained in +matplotlib alone or in any derivative version prepared by +Licensee. + +3. In the event Licensee prepares a derivative work that is based on or +incorporates matplotlib or any part thereof, and wants to +make the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to matplotlib. + +4. JDH is making matplotlib available to Licensee on an "AS +IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB +WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR +LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING +MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF +THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between JDH and +Licensee. This License Agreement does not grant permission to use JDH +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using matplotlib, +Licensee agrees to be bound by the terms and conditions of this License +Agreement. \ No newline at end of file