# Long Range Loss validation

This is based on the theory, given how the model improve over longer range context length, the loss value approaches 1 over longer context lengths.

In [1]:
GPU_DEVICES="auto"
ENABLE_WANDB=True
WANDB_PREFIX="infctx-v5-LR-validation-loss"
DEEPSPEED_STRAT="deepspeed_stage_2"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# The model sizing
MODEL_NAME="RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth"
MODEL_URL="https://huggingface.co/RWKV/v5-Eagle-7B/resolve/main/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth?download=true"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: True
GPU_DEVICES: auto
NOTEBOOK_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation
TRAINER_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer


# Preload the model and the data

In [2]:
# Lets wget the model files
!mkdir -p "{PROJECT_DIR}/model"
!cd "{PROJECT_DIR}/model" && \
    wget -O "{MODEL_NAME}" -nc "{MODEL_URL}"

--2024-02-04 11:41:09--  https://huggingface.co/RWKV/v5-Eagle-7B/resolve/main/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth?download=true
Resolving huggingface.co (huggingface.co)... 13.33.33.102, 13.33.33.55, 13.33.33.20, ...
Connecting to huggingface.co (huggingface.co)|13.33.33.102|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/d5/6f/d56f8718b68e0e1840ad1e209498db64132d773e8c85a1bf4f194501bc3cddcf/a88c7274184b211e5545c8f992f0b80d03c40a447980bbfcd0f6d5858982615a?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth%3B+filename%3D%22RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth%22%3B&Expires=1707276777&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNzI3Njc3N319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2Q1LzZmL2Q1NmY4NzE4YjY4ZTBlMTg0MGFkMWUyMDk0OThkYjY0MTMyZDc3M2U4Yzg1YTFiZjRmMTk

In [5]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/enwiki_100k-world-4096.yaml"

Saving the dataset (4/4 shards): 100%|█| 79975/79975 [00:01<00:00, 74118.18 exam
Saving the dataset (1/1 shards): 100%|█| 808/808 [00:00<00:00, 58901.19 examples


In [4]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-rechunk.yaml"

Saving the dataset (4/4 shards): 100%|█| 19987/19987 [00:01<00:00, 18402.93 exam
Saving the dataset (1/1 shards): 100%|█| 202/202 [00:00<00:00, 13692.26 examples


# Validation loss for 4k / BS=1

In [17]:
# With microbatch = 1
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-4096.yaml" \
        --model.load_model="../model/{MODEL_NAME}" \
        --data.skip_datapath_setup=True \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - 7B - 4k loss validation (bs=1, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.microbatch_size=1 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 12:42:57,250] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_100k-world-4096.yaml', '--model.load_model=../model/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--data.skip_datapath_setup=True', '--trainer.logger.init_args.name=infctx-v5-LR-validation-loss - 7B - 4k loss validation (bs=1, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=1', '--model.ctx_len=4096', '--trainer.devices=auto'], arg

# Validation loss for 16k / BS=1

In [16]:
# With microbatch = 1
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-rechunk.yaml" \
        --model.load_model="../model/{MODEL_NAME}" \
        --data.skip_datapath_setup=True \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - 7B - 16k loss validation (bs=1, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.microbatch_size=1 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 12:33:49,231] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-rechunk.yaml', '--model.load_model=../model/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--data.skip_datapath_setup=True', '--trainer.logger.init_args.name=infctx-v5-LR-validation-loss - 7B - 16k loss validation (bs=1, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=1', '--model.ctx_len=4096', '--trainer.devices=aut

# Validation loss for 4k / BS=4

In [15]:
# With microbatch = 4
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-4096.yaml" \
        --model.load_model="../model/{MODEL_NAME}" \
        --data.skip_datapath_setup=True \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - 7B - 4k loss validation (bs=4, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.microbatch_size=4 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 12:26:32,718] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_100k-world-4096.yaml', '--model.load_model=../model/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--data.skip_datapath_setup=True', '--trainer.logger.init_args.name=infctx-v5-LR-validation-loss - 7B - 4k loss validation (bs=4, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=4', '--model.ctx_len=4096', '--trainer.devices=auto'], arg

# Validation loss for 16k / BS=4

In [14]:
# With microbatch = 1
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py validate \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-16k-rechunk.yaml" \
        --model.load_model="../model/{MODEL_NAME}" \
        --data.skip_datapath_setup=True \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - 7B - 16k loss validation (bs=4, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.microbatch_size=4 \
        --model.ctx_len=4096 \
        --trainer.devices="{GPU_DEVICES}"

[2024-02-04 12:19:13,110] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['validate', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_100k-world-16k-rechunk.yaml', '--model.load_model=../model/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth', '--data.skip_datapath_setup=True', '--trainer.logger.init_args.name=infctx-v5-LR-validation-loss - 7B - 16k loss validation (bs=4, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=4', '--model.ctx_len=4096', '--trainer.devices=aut