Skip to content

Commit

Permalink
fix: remove useless params
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaotaoliu committed Feb 26, 2024
1 parent 40518cb commit 90821a4
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions examples/pretrain_gpt_moe_demo.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/bin/bash

# Runs the "345M" parameter model

export CUDA_DEVICE_MAX_CONNECTIONS=1

Expand All @@ -13,9 +12,9 @@ NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))

CHECKPOINT_PATH=checkpoint
VOCAB_FILE=/mnt/nanjing3cephfs/mm-base-plt2/dev-xtl/data/test/gpt2-vocab.json
MERGE_FILE=/mnt/nanjing3cephfs/mm-base-plt2/dev-xtl/data/test/gpt2-merges.txt
DATA_PATH=/mnt/nanjing3cephfs/mm-base-plt2/dev-xtl/temp-test/Megatron-LM4/train_data/my-gpt2_text_document
VOCAB_FILE=/data/gpt2-vocab.json
MERGE_FILE=/data/gpt2-merges.txt
DATA_PATH=./train_data/my-gpt2_text_document


DISTRIBUTED_ARGS="
Expand All @@ -38,8 +37,8 @@ GPT_ARGS="
--seq-length 4096 \
--max-position-embeddings 4096 \
--bf16
--micro-batch-size 1 \
--global-batch-size 16 \
--micro-batch-size 2 \
--global-batch-size 32 \
--lr 3.4e-4 \
--train-iters 10000 \
--lr-decay-iters 8000 \
Expand Down Expand Up @@ -69,8 +68,9 @@ DATA_ARGS="
--split 949,50,1
"

# --expert-model-parallel-size 2 \
MOE_ARGS="
--num-experts 2 \
--num-experts 4 \
--moe-grouped-gemm \
--moe-router-topk 1 \
--moe-router-load-balancing-type aux_loss \
Expand All @@ -83,7 +83,7 @@ OUTPUT_ARGS="
--save-interval 10000 \
--eval-interval 1000 \
--eval-iters 10 \
--tensorboard-dir ./tensorboard/test_mcore_gpt_gemm \
--tensorboard-dir ./tensorboard/test_mcore_moe \
--tensorboard-log-interval 1 \
"

Expand Down

0 comments on commit 90821a4

Please sign in to comment.