# Validation of RWKV v5 model inference code

In [1]:
# Update the RWKV pip package, found here : https://pypi.org/project/rwkv/
!python3 -m pip install --upgrade rwkv

Collecting rwkv
  Using cached rwkv-0.8.16-py3-none-any.whl (400 kB)
Installing collected packages: rwkv
  Attempting uninstall: rwkv
    Found existing installation: rwkv 0.8.0
    Uninstalling rwkv-0.8.0:
      Successfully uninstalled rwkv-0.8.0
Successfully installed rwkv-0.8.16


In [2]:
INFERENCE_MODE="cpu"
INFERENCE_TYPE="fp32"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

NOTEBOOK_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation
TRAINER_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer


In [3]:
!mkdir -p ../../model/
!cd ../../model/ && wget -nc "https://huggingface.co/BlinkDL/rwkv-5-world/resolve/8eb0273bd6935fa310c57532637d93d055d72f05/RWKV-5-World-1B5-v2-20231025-ctx4096.pth"
!cd ../../model/ && ls
!cd ../../model/ && pwd

--2023-10-28 14:23:27--  https://huggingface.co/BlinkDL/rwkv-5-world/resolve/8eb0273bd6935fa310c57532637d93d055d72f05/RWKV-5-World-1B5-v2-20231025-ctx4096.pth
Resolving huggingface.co (huggingface.co)... 13.33.33.110, 13.33.33.20, 13.33.33.102, ...
Connecting to huggingface.co (huggingface.co)|13.33.33.110|:443... connected.
HTTP request sent, awaiting response... 

302 Found
Location: https://cdn-lfs.huggingface.co/repos/9b/0f/9b0f165daa456f007e672051275f10ff7862f8e2de07462884701e8f793c4518/5a89f56be7f82ab9dd0835af9a6838f788477471616c02f7b041e3aea0c57435?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27RWKV-5-World-1B5-v2-20231025-ctx4096.pth%3B+filename%3D%22RWKV-5-World-1B5-v2-20231025-ctx4096.pth%22%3B&Expires=1698733407&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5ODczMzQwN319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy85Yi8wZi85YjBmMTY1ZGFhNDU2ZjAwN2U2NzIwNTEyNzVmMTBmZjc4NjJmOGUyZGUwNzQ2Mjg4NDcwMWU4Zjc5M2M0NTE4LzVhODlmNTZiZTdmODJhYjlkZDA4MzVhZjlhNjgzOGY3ODg0Nzc0NzE2MTZjMDJmN2IwNDFlM2FlYTBjNTc0MzU%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=iJ4lcyyXsH5t5vA8nu3adBBsWe62uiw%7EwAJltoniEDh0RzCAdHn5pA%7EzlLBBYzsulfRgoJRYWuREKiXgKhlwlPy3kghC9%7EbcNTxdtmjPw2HypuNqqFetGTQg5l3gXOZzWgI2Mx8xDzga7N9JGePVykFnURsGrheXcjdJU7AW5%7EPyBIkKXBqbzJ4p1lwj1GCZYnb9p

## Reference code inference

In [6]:
import os
os.environ['RWKV_JIT_ON'] = '1'
os.environ["RWKV_CUDA_ON"] = '0' # '1' to compile CUDA kernel (10x faster), requires c++ compiler & cuda libraries

from rwkv.model import RWKV
from rwkv.utils import PIPELINE, PIPELINE_ARGS

model = RWKV(model=os.path.join(PROJECT_DIR, "model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth"), strategy='cpu fp32')
pipeline = PIPELINE(model, "rwkv_vocab_v20230424") # Using the world tokenizer

ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
print(ctx, end='')

def my_print(s):
    print(s, end='', flush=True)

# # For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
# # https://platform.openai.com/docs/api-reference/parameter-details

# print("\n")

# We use top_k = 1 to effectively always take the highest choice token
args = PIPELINE_ARGS(temperature = 0.5, top_p = 0.7, top_k = 1, # top_k = 0 then ignore
                    #  alpha_frequency = 0.25,
                    #  alpha_presence = 0.25,
                    #  alpha_decay = 0.996, # gradually decay the penalty
                     token_ban = [0], # ban the generation of some tokens
                     token_stop = [], # stop generation whenever you see any token here
                     chunk_len = 256) # split input into chunks to save VRAM (shorter -> slower)

pipeline.generate(ctx, token_count=200, args=args, callback=my_print)
print('\n')

# out, state = model.forward([187, 510, 1563, 310, 247], None)
# print(out.detach().cpu().numpy())                   # get logits
# out, state = model.forward([187, 510], None)
# out, state = model.forward([1563], state)           # RNN has state (use deepcopy to clone states)
# out, state = model.forward([310, 247], state)
# print(out.detach().cpu().numpy())                   # same result as above
# print('\n')

RWKV_JIT_ON 1 RWKV_CUDA_ON 0 RESCALE_LAYER 0

Loading /home/picocreator/rwkv-proj/RWKV-infctx-trainer/model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth ...


Strategy: (total 24+1=25 layers)
* cpu [float32, float32], store 25 layers
0-cpu-float32-float32 1-cpu-float32-float32 2-cpu-float32-float32 3-cpu-float32-float32 4-cpu-float32-float32 5-cpu-float32-float32 6-cpu-float32-float32 7-cpu-float32-float32 8-cpu-float32-float32 9-cpu-float32-float32 10-cpu-float32-float32 11-cpu-float32-float32 12-cpu-float32-float32 13-cpu-float32-float32 14-cpu-float32-float32 15-cpu-float32-float32 16-cpu-float32-float32 17-cpu-float32-float32 18-cpu-float32-float32 19-cpu-float32-float32 20-cpu-float32-float32 21-cpu-float32-float32 22-cpu-float32-float32 23-cpu-float32-float32 24-cpu-float32-float32 
emb.weight                        f32      cpu  65536  2048 
blocks.0.ln1.weight               f32      cpu   2048       
blocks.0.ln1.bias                 f32      cpu   2048       
blocks.0.ln2.weight               f32      cpu   2048       
blocks.0.ln2.bias                 f32      cpu   2048       
blocks.0.att.time_mix_k           f32      cpu   2048 

# Expected result should be

```

The researchers, who were led by Dr. David Doubilet, a photographer and filmmaker, were able to capture the dragons in their natural habitat. The team was able to film the dragons for over two hours, and they were able to capture the dragons in their natural habitat.
The researchers were able to capture the dragons in their natural habitat. The team was able to film the dragons for over two hours, and they were able to capture the dragons in their natural habitat.
The researchers were able to capture the dragons in their natural habitat. The team was able to film the dragons for over two hours, and they were able to capture the dragons in their natural habitat.
The researchers were able to capture the dragons in their natural habitat. The team was able to film the dragons for over two hours, and they were able to capture the dragons in their natural habitat.
The researchers were able to capture the dragons in their natural habitat. The team was able to film the dragons for
```


# RWKV infctx trainer, in inference mode

Should match the above result (200 tokens)

In [7]:
# Run the reference implementation
!cd $TRAINER_DIR && python3 ./dragon_test.py "../model/RWKV-5-World-1B5-v2-20231025-ctx4096.pth" "ref"

[2023-10-28 14:29:17,956] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1'
