Merge branch 'checkpoint_normalization' into 'main'

Get normalization from the checkpoint when using checkpoint args. See merge request ADLR/megatron-lm!787
NVIDIA · Sep 13, 2023 · 52f1300 · 52f1300
2 parents 9465395 + a41f2d7
commit 52f1300
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 1 deletion.
diff --git a/docs/llama2.md b/docs/llama2.md
@@ -86,7 +86,6 @@ If loading for either inference or finetuning, use the following arguments:
 --no-load-optim \
 --no-load-rng \
 --fp16 \
---DDP-impl local \
 --untie-embeddings-and-output-weights \
 --use-rotary-position-embeddings \
 --normalization RMSNorm \

diff --git a/megatron/checkpointing.py b/megatron/checkpointing.py
@@ -482,6 +482,7 @@ def _set_arg(arg_name, old_arg_name=None, force=False):
     _set_arg('swiglu', force=True)
     _set_arg('untie_embeddings_and_output_weights', force=True)
     _set_arg('apply_layernorm_1p', force=True)
+    _set_arg('normalization', force=True)
     _set_arg('tokenizer_type')
     _set_arg('padded_vocab_size')
     if checkpoint_version < 3.0: