diff --git a/megatron/neox_arguments/arguments.py b/megatron/neox_arguments/arguments.py index ff4f4bc21..d9a586eb4 100644 --- a/megatron/neox_arguments/arguments.py +++ b/megatron/neox_arguments/arguments.py @@ -1035,7 +1035,7 @@ def calculate_derived(self): # the sequential model without the PipelineModule wrapper to avoid the overhead it incurs self.update_value( "is_pipe_parallel", - self.pipe_parallel_size > 1 and self.moe_num_experts == 1, + self.pipe_parallel_size >= 1 and self.moe_num_experts == 1, ) if self.moe_num_experts > 1: assert not ( diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 501edf345..3ac92598a 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,6 +1,6 @@ -git+https://github.com/EleutherAI/DeeperSpeed.git@02e2ebf7dee6aaab3d89094ed470a4609763c742#egg=deepspeed +deepspeed@git+https://github.com/EleutherAI/DeeperSpeed.git@02e2ebf7dee6aaab3d89094ed470a4609763c742#egg=deepspeed ftfy>=6.0.1 -git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836 +lm_dataformat@git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836 huggingface_hub>=0.11.0 jinja2==3.1.4 lm_eval>=0.4.0,<=0.4.1