# RWKV Token Shift - From an existing raven model
Due to the weights overlap, what if we take an existing raven model, and finetune it to the tokenshift format?
What will happen?

**Note:** This project assumes you have the rwkv-infctx conda env setup

# Basic Setup

In [4]:
# Setup the various required folders
!mkdir -p ../../../../model/
!mkdir -p ../../../../datapath/
!mkdir -p ../../../../checkpoint/

# Intialize the modelwqa0sxz
!cd ../../../../model/ && wget -nc https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v12-Eng98%25-Other2%25-20230520-ctx4096.pth

File ‘RWKV-4-Raven-1B5-v12-Eng98%-Other2%-20230520-ctx4096.pth’ already there; not retrieving.



In [5]:
DEEPSPEED_STRAT="deepspeed_stage_2_offload"
GPU_DEVICES="auto"
ENABLE_WANDB=False
WANDB_PREFIX="FrankinShift-1B5"

print("DEEPSPEED_STRAT:", DEEPSPEED_STRAT)
print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v4wavenet/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v4wavenet/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("INFERENCE_DIR:", INFERENCE_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

DEEPSPEED_STRAT: deepspeed_stage_2_offload
ENABLE_WANDB: False
GPU_DEVICES: auto
NOTEBOOK_DIR: /home/picocreator/rwkv-proj/picocreator-memory-experiment/notebook/experiment/tokenshift-exp/FrankinShift-1B5
INFERENCE_DIR: /home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet
TRAINER_DIR: /home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet
PROJECT_DIR: /home/picocreator/rwkv-proj/picocreator-memory-experiment


## Stage 1 : Foundation model training

In [6]:
# Lets preload the requried dataset (enwiki_100k)
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/FrankinShift-1B5-enwiki.yaml"

python3: can't open file '/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/preload_dataset.py': [Errno 2] No such file or directory


In [7]:
# Start the foundation model training
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/FrankinShift-1B5-enwiki.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Enwiki Retrain (train-ctx=4096, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}" 

python: can't open file '/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/new_train.py': [Errno 2] No such file or directory


In [8]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "../checkpoint/FrankinShift-1B5-enwiki/last.ckpt" "../model/FrankinShift-1B5-Stage1.pth"
!cd "{TRAINER_DIR}" && ls -alh "../model/FrankinShift-1B5-Stage1.pth"

[2023-08-02 17:07:10,627] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 623, in <module>
    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file)
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 537, in convert_zero_checkpoint_to_fp32_state_dict
    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 516, in get_fp32_state_dict_from_zero_checkpoint
    raise ValueError(f"Unable to find 'latest' file at {latest_path}")
ValueError: Unable to find 'latest' file at ../checkpoint/FrankinShift-1B5-enwiki/last.ckpt

In [9]:
# # Lets do a quick dragon prompt validation
!cd "{INFERENCE_DIR}" && python3 dragon_test.py ../model/FrankinShift-1B5-Stage1.pth "cuda fp32"

[2023-08-02 17:07:14,144] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1'
[RWKV.model]: Preloading model from '../model/FrankinShift-1B5-Stage1.pth'
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/dragon_test.py", line 39, in <module>
    model = SimpleRWKV(MODEL_PATH, device=DEVICE)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 1211, in __init__
    self.model = RWKV(**model_config)
                 ^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 465, in __init__
    raise ValueError(f"load_model file '{load_model}' does not exist")
ValueError: load_model file '../model/FrankinShift-1B5-Stage1.pth' does not exist


In [10]:
# Lets do a quick memory test
# (We dun expect this to work, as we have not finetune for memory recall, but its a baseline)
!python3 ../memory_script/eval_model_memory_guided.py "{PROJECT_DIR}/model/FrankinShift-1B5-Stage1.pth"

[2023-08-02 17:07:17,428] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1'
[RWKV.model]: Preloading model from '/home/picocreator/rwkv-proj/picocreator-memory-experiment/model/FrankinShift-1B5-Stage1.pth'
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/notebook/experiment/tokenshift-exp/FrankinShift-1B5/../memory_script/eval_model_memory_guided.py", line 41, in <module>
    model = SimpleRWKV(model_path, device="cuda")
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 1211, in __init__
    self.model = RWKV(**model_config)
                 ^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 465, in __init__
    raise ValueError(f"load_model file '{load_m

# Stage 2 : Instruct Tuning

In [11]:
# Lets preload the requried dataset
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/FrankinShift-1B5-instruct.yaml"

python3: can't open file '/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/preload_dataset.py': [Errno 2] No such file or directory


In [12]:
# Start the instruct finetuning
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/FrankinShift-1B5-instruct.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Instruct Retrain (train-ctx=4096, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}"

python: can't open file '/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/new_train.py': [Errno 2] No such file or directory


In [13]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "../checkpoint/FrankinShift-1B5-instruct/last.ckpt" "../model/FrankinShift-1B5-Stage2.pth"
!cd "{TRAINER_DIR}" && ls -alh "../model/FrankinShift-1B5-Stage2.pth"

[2023-08-02 17:07:20,196] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 623, in <module>
    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, output_file)
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 537, in convert_zero_checkpoint_to_fp32_state_dict
    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/export_checkpoint.py", line 516, in get_fp32_state_dict_from_zero_checkpoint
    raise ValueError(f"Unable to find 'latest' file at {latest_path}")
ValueError: Unable to find 'latest' file at ../checkpoint/FrankinShift-1B5-instruct/last.ck

In [14]:
# Lets do a quick dragon prompt validation
!cd "{INFERENCE_DIR}" && python3 dragon_test.py "../model/FrankinShift-1B5-Stage2.pth" "cuda fp32"

[2023-08-02 17:07:23,539] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1'
[RWKV.model]: Preloading model from '../model/FrankinShift-1B5-Stage2.pth'
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/dragon_test.py", line 39, in <module>
    model = SimpleRWKV(MODEL_PATH, device=DEVICE)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 1211, in __init__
    self.model = RWKV(**model_config)
                 ^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 465, in __init__
    raise ValueError(f"load_model file '{load_model}' does not exist")
ValueError: load_model file '../model/FrankinShift-1B5-Stage2.pth' does not exist


In [15]:
# Lets do a quick memory test
# (We dun expect this to work, as we have not finetune for memory recall, but its a baseline)
!python3 ../memory_script/eval_model_memory_guided.py "{PROJECT_DIR}/model/FrankinShift-1B5-Stage2.pth"

[2023-08-02 17:07:26,836] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1'
[RWKV.model]: Preloading model from '/home/picocreator/rwkv-proj/picocreator-memory-experiment/model/FrankinShift-1B5-Stage2.pth'
Traceback (most recent call last):
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/notebook/experiment/tokenshift-exp/FrankinShift-1B5/../memory_script/eval_model_memory_guided.py", line 41, in <module>
    model = SimpleRWKV(model_path, device="cuda")
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 1211, in __init__
    self.model = RWKV(**model_config)
                 ^^^^^^^^^^^^^^^^^^^^
  File "/home/picocreator/rwkv-proj/picocreator-memory-experiment/RWKV-v4wavenet/src/model.py", line 465, in __init__
    raise ValueError(f"load_model file '{load_m