# RWKV v5

Simple memory training for a small model

**Note:** This project assumes you have the rwkv-infctx conda env setup

# Basic Setup

In [None]:
# First lets setup the various directories, and init the model
!ls ../../../../../
!mkdir -p ../../../../../models/
!mkdir -p ../../../../../datapath/
!mkdir -p ../../../../../checkpoint/

In [2]:
# Additional dependencies for eval stuff
!pip3 install -q aiocsv aiofiles

In [3]:
DEEPSPEED_STRAT="deepspeed_stage_1"
GPU_DEVICES="auto"
ENABLE_WANDB=True

# Layer count and embed dim to start with
LAYER_COUNT=6
EMBED_DIM=2048

EMBED_SCALE=0.1
EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(".", "_")

WANDB_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}"
FILENAME_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}"

print("DEEPSPEED_STRAT:", DEEPSPEED_STRAT)
print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "./"))
PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, "../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

# Get the notebook dir name
DIR_NAME=os.path.basename(NOTEBOOK_DIR)

# Log names and dir
print("DIR_NAME:", DIR_NAME)
print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("INFERENCE_DIR:", INFERENCE_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

DEEPSPEED_STRAT: deepspeed_stage_1
ENABLE_WANDB: True
GPU_DEVICES: auto
DIR_NAME: infctx-math-and-name
NOTEBOOK_DIR: /data/chris/rwkv-fork/RWKV-infctx-trainer/notebook/experiment/infctx-math-and-name
INFERENCE_DIR: /data/chris/rwkv-fork/RWKV-infctx-trainer/RWKV-v5
TRAINER_DIR: /data/chris/rwkv-fork/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /data/chris/rwkv-fork/RWKV-infctx-trainer


In [6]:
# Download the model directly (stop gap till HF sync issues is resolved)
!cd "{TRAINER_DIR}" && cd "../models/" && \
    wget -nc "https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth"

!cd "{TRAINER_DIR}" && cd "../models/" && \
    ls -alh .

--2023-09-16 17:59:33--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/v5r3-L6-D2048-E0_1-enwiki-instruct.pth
Resolving huggingface.co (huggingface.co)... 108.138.246.71, 108.138.246.85, 108.138.246.67, ...
Connecting to huggingface.co (huggingface.co)|108.138.246.71|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/0a83bdbbf6d686bfa77529fc9bbde3a91fc8d182e1dc33ce8d18f2a0abbe2576?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5r3-L6-D2048-E0_1-enwiki-instruct.pth%3B+filename%3D%22v5r3-L6-D2048-E0_1-enwiki-instruct.pth%22%3B&Expires=1695171573&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5NTE3MTU3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZk

## Tune 2 : Context size (1024) 

In [7]:
%%script bash

########################################
# Generate the required jsonl dataset
########################################

# Go to config dir
cd "../"

# Reset the dataset dir
mkdir -p ../dataset
rm -rf ../dataset/*.jsonl

# Generate the various datasets
echo "## Generating math and name dataset ##"

#
# We reduce the training set for lower word count - and shift the focus upwards
#
# do
python3 infctx-math-and-name/generate_math_and_name_dataset.py --out-file ../dataset/questions_numbers.jsonl --max-numbers 1024 --num-samples 10000
# done

wait
echo "## Done ##"

ls -alh ../dataset/

## Generating math and name dataset ##


## Done ##
total 25M
drwx------  2 christopherchou u-christopherchou   3 Sep 16 17:59 .
drwx------ 13 christopherchou u-christopherchou  14 Sep 16 17:59 ..
-rw-------  1 christopherchou u-christopherchou 55M Sep 16 17:59 questions_numbers.jsonl


In [8]:
# Start the finetune model training
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{CONFIG_DIR}/config-mem-template.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Mem-Tune ctx-1024 (train-ctx=1024, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}"  \
        --trainer.callbacks.init_args.dirpath="../checkpoint/{FILENAME_PREFIX}-mem-ctx-1024/" \
        --model.lr_init=5e-4 \
        --model.lr_final=4e-4 \
        --data.max_token_size=1024 \
        --model.ctx_len=1024 \
        --model.bptt_learning_range=1 \
        --model.load_model="../model/{FILENAME_PREFIX}-mem-instruct.pth"

[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu117'
[31m╭─[0m[31m────────────────────[0m[31m [0m[1;31mTraceback [0m[1;2;31m(most recent call last)[0m[31m [0m[31m─────────────────────[0m[31m─╮[0m
[31m│[0m [2;33m/data/chris/rwkv-fork/RWKV-infctx-trainer/RWKV-v5/[0m[1;33mlightning_trainer.py[0m:[94m278[0m   [31m│[0m
[31m│[0m in [92m<module>[0m                                                                  [31m│[0m
[31m│[0m                                                                              [31m│[0m
[31m│[0m   [2m275 [0m[2m│   [0m)                                                                  [31m│[0m
[31m│[0m   [2m276 [0m                                                                       [31m│[0m
[31m│[0m   [2m277 [0m[94mif[0m [91m__name__[0m == [33m"[0m[33m__main__[0m[33m"[0m:                                             [31m│[0m
[31m│[0m [31m❱ [0m278 [2m│   [0mcli_main()       

In [None]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python3 export_checkpoint.py \
        "../checkpoint/{FILENAME_PREFIX}-mem-ctx-1024/last.ckpt" \
        "../model/{FILENAME_PREFIX}-mem-ctx-1024.pth" "bf16"
!cd "{TRAINER_DIR}" && ls -alh "../models/{FILENAME_PREFIX}-mem-ctx-1024.pth"

In [None]:
# Lets do a quick memory test
!python3 ../../memory_script/eval_v5_memory_guided.py "{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-1024.pth"