Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,14 @@ def __init__(
self.check()
self.print()

def _disable_sequence_parallel_moe_if_needed(self, mode_name):
if self.parallel_config.use_sequence_parallel_moe and self.graph_opt_config.use_cudagraph:
self.parallel_config.use_sequence_parallel_moe = False
logger.warning(
f"Sequence parallel MoE does not support {mode_name} mode with cudagraph. "
"Setting use_sequence_parallel_moe to False."
)

def postprocess(self):
"""
calculate some parameters
Expand Down Expand Up @@ -1685,10 +1693,12 @@ def postprocess(self):
logger.info("Multi-modal models do not support prefix caching when using CUDAGraph!")

if self.scheduler_config.splitwise_role == "mixed":
self._disable_sequence_parallel_moe_if_needed("Mixed")
self.model_config.moe_phase = MoEPhase(phase="prefill")
elif self.scheduler_config.splitwise_role == "prefill":
self.model_config.moe_phase = MoEPhase(phase="prefill")
elif self.scheduler_config.splitwise_role == "decode":
self._disable_sequence_parallel_moe_if_needed("PD's decode node")
self.model_config.moe_phase = MoEPhase(phase="decode")
else:
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ opentelemetry-api>=1.24.0
opentelemetry-sdk>=1.24.0
opentelemetry-instrumentation-redis
opentelemetry-instrumentation-mysql
opentelemetry-distro 
opentelemetry-distro
opentelemetry-exporter-otlp
opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-logging
Expand Down
Loading