# Perform Retune runs

In [1]:
GPU_DEVICES="auto"
ENABLE_WANDB=True
WANDB_PREFIX="Eagle-Retune"
DEEPSPEED_STRAT="deepspeed_stage_2"
LEARNING_RATE="5e-6"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# The model to start from
MODEL_PATH="/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth"
MICROBATCH_SIZE=8

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: True
GPU_DEVICES: auto
NOTEBOOK_DIR: /workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-retune
TRAINER_DIR: /workspace/picocreator/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /workspace/picocreator/RWKV-infctx-trainer


In [2]:
# The 7B model
EXPERIMENT_NAME="7B-Base-No-Mask"

# Perform the validation
!cd "{TRAINER_DIR}" && \
    export RWKV_TORCH_COMPILE=1 && \
    export RWKV_NO_CUDA=0 && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/retune-train-no-mask.yaml" \
        --model.load_model="{MODEL_PATH}" \
        --model.lr_init={LEARNING_RATE} \
        --model.lr_final={LEARNING_RATE} \
        --data.skip_datapath_setup=True \
        --trainer.callbacks.init_args.dirpath="/checkpoint/retune/{EXPERIMENT_NAME}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - {EXPERIMENT_NAME} ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.target_batch_size=1024 \
        --trainer.microbatch_size={MICROBATCH_SIZE} \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-03-07 05:57:14,913] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-compile' with torch '2.1.1+cu121'
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-retune/retune-train-no-mask.yaml', '--model.load_model=/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--model.lr_init=5e-6', '--model.lr_final=5e-6', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=/checkpoint/retune/7B-Base-No-Mask/', '--trainer.logger.init_args.name=Eagle-Retune - 7B-Base-No-Mask (deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.ta

In [3]:
# Lets export the model from the checkpoint
EXPERIMENT_NAME="7B-Base-No-Mask"
CKPT_DIR="last.ckpt"

!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "/checkpoint/retune/{EXPERIMENT_NAME}/{CKPT_DIR}/" "/workspace/main-models/R4-retune/R4-{EXPERIMENT_NAME}.pth"
!cd "{TRAINER_DIR}" && ls -alh "/workspace/main-models/R4-retune/R4-{EXPERIMENT_NAME}.pth"

[2024-03-07 09:38:14,170] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Processing zero checkpoint '/checkpoint/retune/7B-Base-No-Mask/last.ckpt/checkpoint'
Detected checkpoint of type zero stage 2, world_size: 8
Parsing checkpoint created by deepspeed==0.12.6
Reconstructed fp32 state dict with 710 params 7518044160 elements
Saving bf16 state dict to /workspace/main-models/R4-retune/R4-7B-Base-No-Mask.pth
-rw-r--r-- 1 nobody root 15G Mar  7 09:39 /workspace/main-models/R4-retune/R4-7B-Base-No-Mask.pth


In [4]:
EXPERIMENT_NAME="7B-Base-No-Mask"
!cd "/workspace/main-models/R4-retune/" && \
    huggingface-cli upload rwkv-x-dev/eagle-7b-experiment "./R4-{EXPERIMENT_NAME}.pth"

Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
R4-7B-Base-No-Mask.pth: 100%|██████████████| 15.0G/15.0G [06:28<00:00, 38.7MB/s]
https://huggingface.co/rwkv-x-dev/eagle-7b-experiment/blob/main/R4-7B-Base-No-Mask.pth


In [None]:
# The 7B model
EXPERIMENT_NAME="7B-Base-With-Mask"

# Perform the validation
!cd "{TRAINER_DIR}" && \
    export RWKV_TORCH_COMPILE=1 && \
    export RWKV_NO_CUDA=0 && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/retune-train-with-mask.yaml" \
        --model.load_model="{MODEL_PATH}" \
        --model.lr_init={LEARNING_RATE} \
        --model.lr_final={LEARNING_RATE} \
        --data.skip_datapath_setup=True \
        --trainer.callbacks.init_args.dirpath="/checkpoint/retune/{EXPERIMENT_NAME}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - {EXPERIMENT_NAME} ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.target_batch_size=1024 \
        --trainer.microbatch_size={MICROBATCH_SIZE} \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-03-07 09:46:36,561] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-compile' with torch '2.1.1+cu121'
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-retune/retune-train-with-mask.yaml', '--model.load_model=/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--model.lr_init=5e-6', '--model.lr_final=5e-6', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=/checkpoint/retune/7B-Base-With-Mask/', '--trainer.logger.init_args.name=Eagle-Retune - 7B-Base-With-Mask (deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trai

In [None]:
# Lets export the model from the checkpoint
EXPERIMENT_NAME="7B-Base-With-Mask"
CKPT_DIR="last.ckpt"

!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "/checkpoint/retune/{EXPERIMENT_NAME}/{CKPT_DIR}/" "/workspace/main-models/R4-retune/R4-{EXPERIMENT_NAME}.pth"
!cd "{TRAINER_DIR}" && ls -alh "/workspace/main-models/R4-retune/R4-{EXPERIMENT_NAME}.pth"

In [None]:
EXPERIMENT_NAME="7B-Base-With-Mask"
!cd "/workspace/main-models/R4-retune/" && \
    huggingface-cli upload rwkv-x-dev/eagle-7b-experiment "./R4-{EXPERIMENT_NAME}.pth"