# Validation of RWKV v5 model inference code

In [1]:
# Update the RWKV pip package, found here : https://pypi.org/project/rwkv/
!python3 -m pip install --upgrade rwkv

Collecting rwkv
  Downloading rwkv-0.8.20-py3-none-any.whl.metadata (4.9 kB)
Downloading rwkv-0.8.20-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.8/400.8 kB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rwkv
Successfully installed rwkv-0.8.20


In [3]:
INFERENCE_MODE="cpu"
INFERENCE_TYPE="fp32"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

NOTEBOOK_DIR: /home/ubuntu/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation
TRAINER_DIR: /home/ubuntu/rwkv-proj/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /home/ubuntu/rwkv-proj/RWKV-infctx-trainer


In [4]:
!mkdir -p ../../model/
!cd ../../model/ && wget -nc "https://huggingface.co/BlinkDL/rwkv-5-world/resolve/8eb0273bd6935fa310c57532637d93d055d72f05/RWKV-5-World-1B5-v2-20231025-ctx4096.pth"
!cd ../../model/ && ls
!cd ../../model/ && pwd

--2023-11-05 05:40:27--  https://huggingface.co/BlinkDL/rwkv-5-world/resolve/8eb0273bd6935fa310c57532637d93d055d72f05/RWKV-5-World-1B5-v2-20231025-ctx4096.pth
Resolving huggingface.co (huggingface.co)... 52.85.151.66, 52.85.151.16, 52.85.151.31, ...
Connecting to huggingface.co (huggingface.co)|52.85.151.66|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/9b/0f/9b0f165daa456f007e672051275f10ff7862f8e2de07462884701e8f793c4518/5a89f56be7f82ab9dd0835af9a6838f788477471616c02f7b041e3aea0c57435?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-1B5-v2-20231025-ctx4096.pth%3B+filename%3D%22RWKV-5-World-1B5-v2-20231025-ctx4096.pth%22%3B&Expires=1699422027&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5OTQyMjAyN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy85Yi8wZi85YjBmMTY1ZGFhNDU2ZjAwN2U2NzIwNTEyNzVmMTBmZjc4NjJmOGUyZGUwNzQ2Mjg4NDcwMW

## Reference code inference

In [14]:
import os
os.environ['RWKV_JIT_ON'] = '1'
os.environ["RWKV_CUDA_ON"] = '0' # '1' to compile CUDA kernel (10x faster), requires c++ compiler & cuda libraries

import torch
from rwkv.model import RWKV
# from rwkv.utils import PIPELINE, PIPELINE_ARGS
from rwkv.rwkv_tokenizer import TRIE_TOKENIZER

# Tokenizer setup
PROMPT_STR = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
WORLD_TOKENIZER = TRIE_TOKENIZER(TRAINER_DIR + '/src/dataflow/rwkv_vocab_v20230424.txt')        
PROMPT_TOKENS = WORLD_TOKENIZER.encode(PROMPT_STR)
LENGTH=200

# Load the model
MODEL = RWKV(model=os.path.join(PROJECT_DIR, "model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth"), strategy='cpu fp32')

# Build the baseline state
LOGITS, STATE = MODEL.forward(PROMPT_TOKENS, None)
TOKEN_ID = torch.argmax(LOGITS, dim=-1).item()

# Print the prompt prefix
print("------")
print(PROMPT_STR, end='')
print(WORLD_TOKENIZER.decode([
    TOKEN_ID
]), end='', flush=True)

# And get the next LENGTH tokens
for i in range(LENGTH-1):
    LOGITS, STATE = MODEL.forward([TOKEN_ID], STATE)
    TOKEN_ID = torch.argmax(LOGITS, dim=-1).item()

    print(WORLD_TOKENIZER.decode([
        TOKEN_ID
    ]), end='', flush=True)


RWKV_JIT_ON 1 RWKV_CUDA_ON 0 RESCALE_LAYER 0

Loading /home/ubuntu/rwkv-proj/RWKV-infctx-trainer/model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth ...
Strategy: (total 24+1=25 layers)
* cpu [float32, float32], store 25 layers
0-cpu-float32-float32 1-cpu-float32-float32 2-cpu-float32-float32 3-cpu-float32-float32 4-cpu-float32-float32 5-cpu-float32-float32 6-cpu-float32-float32 7-cpu-float32-float32 8-cpu-float32-float32 9-cpu-float32-float32 10-cpu-float32-float32 11-cpu-float32-float32 12-cpu-float32-float32 13-cpu-float32-float32 14-cpu-float32-float32 15-cpu-float32-float32 16-cpu-float32-float32 17-cpu-float32-float32 18-cpu-float32-float32 19-cpu-float32-float32 20-cpu-float32-float32 21-cpu-float32-float32 22-cpu-float32-float32 23-cpu-float32-float32 24-cpu-float32-float32 
emb.weight                        f32      cpu  65536  2048 
blocks.0.ln1.weight               f32      cpu   2048       
blocks.0.ln1.bias                 f32      cpu   2048       
blocks.0.ln2.weight         

# Expected result should be

```

The researchers, who were led by Dr. David Doubilet, a photographer, were able to capture the dragons in their natural habitat.
The dragons were found in the remote valley of the Himalayas, in the Tibetan Plateau. The valley is located in the province of Qinghai, in the northwestern part of China.
The valley is home to a large population of Tibetan antelopes, which are known for their unique horns.
The researchers were able to capture the dragons in their natural habitat, using a camera mounted on a drone.
The footage was then edited and turned into a short film, which was released in 2016.
The footage was also used in a documentary about the dragons, which was released in 2017.
The footage was also used in a documentary about the dragons, which was released in 2017.
The footage was also used in a documentary about the dragons, which was released in 2017.
The footage was also used
```


# RWKV infctx trainer, in inference mode

Should match the above result (200 tokens)

In [13]:
# Run the reference implementation
!cd $TRAINER_DIR && python3 ./dragon_test.py "../model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth" "ref"

[2023-11-05 06:23:52,737] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.0'
  return self.fget.__get__(instance, owner)()
  batch_tokens = torch.tensor(
--- DRAGON PROMPT (REF RUN) ---
In a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese.
The researchers, who were led by Dr. David Doubilet, a photographer, were able to capture the dragons in their natural habitat.
The dragons were found in the remote valley of the Himalayas, in the Tibetan Plateau. The valley is located in the province of Qinghai, in the northwestern part of China.
The valley is home to a large population of Tibetan antelopes, which are known for their unique horns.
The researchers were able to capture the dragons in their natural habitat, using a ca