# Perform validation runs

In [2]:
GPU_DEVICES="auto"
ENABLE_WANDB=True
WANDB_PREFIX="Eagle-Base Validation"
DEEPSPEED_STRAT="deepspeed_stage_2"

EXPERIMENT_NAME="Baseline Validation"
LEARNING_RATE="1e-5"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# The model sizing
MODEL_PATH="/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: True
GPU_DEVICES: auto
NOTEBOOK_DIR: /workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-calibration
TRAINER_DIR: /workspace/picocreator/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /workspace/picocreator/RWKV-infctx-trainer


In [3]:
# The 7B model
MODEL_PATH="/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth"

# Perform the validation
!cd "{TRAINER_DIR}" && \
    export RWKV_NO_CUDA=0 && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/ref-3e-5.yaml" \
        --model.load_model="{MODEL_PATH}" \
        --model.lr_init={LEARNING_RATE} \
        --model.lr_final={LEARNING_RATE} \
        --data.skip_datapath_setup=True \
        --trainer.callbacks.init_args.dirpath="/checkpoint/calibration/Eagle-2T/{EXPERIMENT_NAME}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - {EXPERIMENT_NAME} ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.target_batch_size=1024 \
        --trainer.microbatch_size=8 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 09:23:43,108] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-calibration/ref-3e-5.yaml', '--model.load_model=/workspace/main-models/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--model.lr_init=1e-5', '--model.lr_final=1e-5', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=/checkpoint/calibration/Eagle-2T/Baseline Validation/', '--trainer.logger.init_args.name=Eagle-Base Validation - Baseline Validation (deepspeed_stage_2)', '--trainer.strategy=deepspee

In [6]:
# The 1B5 model
MODEL_PATH="/workspace/main-models/RWKV-5-World-1B5-v2-20231025-ctx4096.pth"

# Perform the validation
!cd "{TRAINER_DIR}" && \
    export RWKV_NO_CUDA=0 && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/ref-3e-5.yaml" \
        --model.load_model="{MODEL_PATH}" \
        --model.lr_init={LEARNING_RATE} \
        --model.lr_final={LEARNING_RATE} \
        --data.skip_datapath_setup=True \
        --trainer.callbacks.init_args.dirpath="/checkpoint/calibration/Eagle-2T/{EXPERIMENT_NAME}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - {EXPERIMENT_NAME} ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.target_batch_size=1024 \
        --trainer.microbatch_size=8 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 09:12:09,440] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.1+cu121'
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/workspace/picocreator/RWKV-infctx-trainer/notebook/major-runs/Eagle-2T-calibration/ref-3e-5.yaml', '--model.load_model=/workspace/main-models/RWKV-5-World-1B5-v2-20231025-ctx4096.pth', '--model.lr_init=8e-5', '--model.lr_final=8e-5', '--data.skip_datapath_setup=True', '--trainer.callbacks.init_args.dirpath=/checkpoint/calibration/Eagle-2T/8e-5-with-cuda/', '--trainer.logger.init_args.name=Eagle-Base Validation - 8e-5-with-cuda (deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--t