From 89287e8a4f3406adbc1fb938c4a62e870a4f8f65 Mon Sep 17 00:00:00 2001 From: Juntao Wang Date: Tue, 31 Mar 2026 16:48:26 +0300 Subject: [PATCH 1/2] Allow profiling ranks in string format with comma as separator --- src/cloudai/workloads/megatron_bridge/megatron_bridge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cloudai/workloads/megatron_bridge/megatron_bridge.py b/src/cloudai/workloads/megatron_bridge/megatron_bridge.py index 48d6baa6a..d418534f6 100644 --- a/src/cloudai/workloads/megatron_bridge/megatron_bridge.py +++ b/src/cloudai/workloads/megatron_bridge/megatron_bridge.py @@ -113,7 +113,10 @@ class MegatronBridgeCmdArgs(CmdArgs): profiling_stop_step: Optional[int] = Field(default=None) record_memory_history: Optional[bool] = Field(default=None) profiling_gpu_metrics: Optional[bool] = Field(default=None) - profiling_ranks: Optional[Union[int, List[int]]] = Field(default=None) + profiling_ranks: Optional[Union[int, str, List[int]]] = Field( + default=None, + description="Rank ID, comma-separated rank IDs as a string (e.g. '0,4,8'), or list of rank IDs.", + ) nsys_trace: Optional[Union[str, List[str]]] = Field( default=None, description="Comma-separated nsys trace events (e.g. cuda,nvtx).", From 8054958f9dc2c79d951c8d75e41066adffdba397 Mon Sep 17 00:00:00 2001 From: Juntao Wang Date: Wed, 1 Apr 2026 10:21:30 +0300 Subject: [PATCH 2/2] Add a test for string format of profiling ranks --- .../test_command_gen_strategy_slurm.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/workloads/megatron_bridge/test_command_gen_strategy_slurm.py b/tests/workloads/megatron_bridge/test_command_gen_strategy_slurm.py index 8c2e96acd..7b37683d5 100644 --- a/tests/workloads/megatron_bridge/test_command_gen_strategy_slurm.py +++ b/tests/workloads/megatron_bridge/test_command_gen_strategy_slurm.py @@ -439,3 +439,17 @@ def test_parse_srun_args_boolean_flags(self) -> None: "--exclusive --reservation my_reserv --overcommit" ) assert result == ["exclusive", "reservation=my_reserv", "overcommit"] + + def test_profiling_ranks_string_format( + self, + configured_slurm_system: SlurmSystem, + make_test_run: Callable[..., TestRun], + ) -> None: + tr = make_test_run( + cmd_args_overrides={"profiling_ranks": "0,1,2,3", "enable_nsys": True}, + output_subdir="out_prof_str", + ) + assert not tr.is_dse_job + cmd_gen = MegatronBridgeSlurmCommandGenStrategy(configured_slurm_system, tr) + wrapper_content = self._wrapper_content(cmd_gen) + assert "--profiling_ranks 0,1,2,3" in wrapper_content