Merge pull request #3692 from AI-Hypercomputer:hengtaoguo-rl-ep

Google-ML-Automation · Google-ML-Automation · commit 147168c3aa20 · 2026-04-17T14:33:20.000-07:00
PiperOrigin-RevId: 901473973
diff --git a/src/maxtext/configs/types.py b/src/maxtext/configs/types.py
@@ -1731,6 +1731,7 @@ class VLLM(BaseModel):
   hbm_utilization_vllm: float = Field(0.72, description="Target HBM utilization for vLLM.")
   swap_space_vllm_gb: int = Field(2, description="Swap space in GB for vLLM.")
   enable_dp_attention: bool = Field(False, description="Enable the attn_dp mesh axis in vLLM.")
+  enable_expert_parallel: bool = Field(False, description="Enable expert parallelism in vLLM.")
   async_scheduling: bool = Field(False, description="Enable asynchronous scheduling in vLLM.")
   max_num_batched_tokens: Optional[int] = Field(None, description="Max number of batched tokens in vLLM.")
   max_num_seqs: Optional[int] = Field(None, description="Max number of sequences in vLLM.")
diff --git a/src/maxtext/trainers/post_train/rl/train_rl.py b/src/maxtext/trainers/post_train/rl/train_rl.py
@@ -541,7 +541,7 @@ def create_rl_components(
           rollout_vllm_server_mode=trainer_config.rl.use_agentic_rollout,
           rollout_vllm_kwargs={
               "hf_overrides": trainer_config.vllm_hf_overrides,
-              "enable_expert_parallel": sampler_config.rollout_expert_parallelism > 1,
+              "enable_expert_parallel": sampler_config.enable_expert_parallel,
               "enable_prefix_caching": True,  # Enable prefix caching to speed up generation for long prompts
           },
           rollout_vllm_sampling_kwargs={