# RWKV v5 minipile validation

**L12-D768 model**

## Preparing the init model and test dataset

In [1]:
GPU_DEVICES="1"
ENABLE_WANDB=True
WANDB_PREFIX="infctx-v5-validation - MiniPile"
DEEPSPEED_STRAT="deepspeed_stage_1"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: True
GPU_DEVICES: auto
NOTEBOOK_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation
TRAINER_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer


In [7]:
# First lets setup the various directories
!mkdir -p "{PROJECT_DIR}/model/"
!mkdir -p "{PROJECT_DIR}/dataset/"
!mkdir -p "{PROJECT_DIR}/datapath/"
!mkdir -p "{PROJECT_DIR}/checkpoint/"

In [8]:
# Download the minipile files
!cd "{PROJECT_DIR}" && wget --continue -O dataset/minipile.idx https://huggingface.co/datasets/BlinkDL/minipile-tokenized/resolve/main/rwkv_vocab_v20230424/minipile.idx
!cd "{PROJECT_DIR}" && wget --continue -O dataset/minipile.bin https://huggingface.co/datasets/BlinkDL/minipile-tokenized/resolve/main/rwkv_vocab_v20230424/minipile.bin



--2023-12-18 07:39:15--  https://huggingface.co/datasets/BlinkDL/minipile-tokenized/resolve/main/rwkv_vocab_v20230424/minipile.idx
Resolving huggingface.co (huggingface.co)... 13.33.33.55, 13.33.33.110, 13.33.33.102, ...
Connecting to huggingface.co (huggingface.co)|13.33.33.55|:443... connected.
HTTP request sent, awaiting response... 

302 Found
Location: https://cdn-lfs-us-1.huggingface.co/repos/09/8d/098d39f30da901c320a0b91b647dbfcdb64742d734ad97ab2247383b7265662e/f526abddaa06d376443e69c9a6c0fcbe4302afc0cb1aed08faf3fb97fc5acd10?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27minipile.idx%3B+filename%3D%22minipile.idx%22%3B&Expires=1703115555&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwMzExNTU1NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzA5LzhkLzA5OGQzOWYzMGRhOTAxYzMyMGEwYjkxYjY0N2RiZmNkYjY0NzQyZDczNGFkOTdhYjIyNDczODNiNzI2NTY2MmUvZjUyNmFiZGRhYTA2ZDM3NjQ0M2U2OWM5YTZjMGZjYmU0MzAyYWZjMGNiMWFlZDA4ZmFmM2ZiOTdmYzVhY2QxMD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=UycA%7Eo%7EEWgjN6kwZtAZSB6k5Nz7B5MQttQCeTVw5OD5T1lTLfhRIX3aFxwLTAyMDMOnWK0KGcnWfha6OcBl9%7EGTSfu408xpCk-PyW0E9W45m5fvR5FqLWgR41zakLePM0Ssu0Wb2syrSKCFElocrwluDvNykuHhUQgdhN9hutXENfd6qC8LZmn68eo-PlqIh6ka8sFyfJa-Bteb3mT1SAPmW19if1jiwcWmtFrB-HrdVtrxrGf033Mki

In [6]:
# Lets initialized the L6-D512 model with the init_model.py code
!cd "{TRAINER_DIR}" && python3 init_model.py \
    --n_layer 12 --n_embd 768 \
    --vocab_size world \
    --skip-if-exists --safe-init \
    ../model/L12-D768-world-init.pth

[2023-12-18 07:32:33,143] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
---- Initializing model ----
No of layers: 12
Embedding size: 768
Output model path: ../model/L12-D768-world-init.pth
Vocab size: 65536
Emb scale: 0.0001
Note: this process takes a significant time (and ram) for large models
---- ----- ----
---
[RWKV.TimeMix] Compiling CUDA kernel with HEAD_SIZE=64
Using /home/picocreator/.cache/torch_extensions/py311_cu121 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/picocreator/.cache/torch_extensions/py311_cu121/wkv5/build.ninja...
Building extension module wkv5...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
ninja: no work to do.
Loading extension module wkv5...
[RWKV.TimeMix] CUDA kernel compiled & loaded globally
---
65536 768   -0.0001 em

In [25]:
# Preload the dataset
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/minipile-world-512.yaml"

num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1.
Map (num_proc=16): 100%|█████| 1010499/1010499 [03:35<00:00, 4692.38 examples/s]
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1.
Map: 100%|████████████████████████████████| 1/1 [00:00<00:00, 432.54 examples/s]
Map (num_proc=16): 100%|████| 2928070/2928070 [01:24<00:00, 34652.87 examples/s]
Saving the dataset (19/19 shards): 100%|█| 2928070/2928070 [00:19<00:00, 152003.
Saving the dataset (1/1 shards): : 0 examples [00:00, ? examples/s]


In [28]:
# Minipile training
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/minipile-world-512.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} (train-ctx=512, data-ctx=512, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}" \
        --model.load_model="../model/L12-D768-world-init.pth"
        

[2023-12-18 11:57:34,927] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/minipile-world-512.yaml', '--trainer.logger.init_args.name=infctx-v5-validation - MiniPile (train-ctx=512, data-ctx=512, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--model.load_model=../model/L12-D768-world-init.pth'], args=['fit', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/minipile-world-512.y