# Example title of your finetune / training project

Describe your project here in overall

Modify the `training-config.yaml` according to your use case

Your Discord handle: @your-discord-handle

# Project setup and initialization

> !!! Change the project prefix to something sutible for your project 
>
> !!! Change the model download code / model init code

In [None]:
# Project prefix, for wandb and filename logging
# follow the format of "dicordhandle"-"shortprojectname"
PROJECT_PREFIX="userhandle-example-finetune-proj"

# Model version you are using, use v5 or v4 respectively
MODEL_VERSION="v4"

# Deepspeed strategy to use, you can leave this unchanged
DEEPSPEED_STRAT="deepspeed_stage_1"
GPU_DEVICES="auto"
ENABLE_WANDB=True

# Prefixes we will be using
WANDB_PREFIX=f"{PROJECT_PREFIX}"
FILENAME_PREFIX=f"{PROJECT_PREFIX}"

print("DEEPSPEED_STRAT:", DEEPSPEED_STRAT)
print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)\

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, f"./RWKV-{MODEL_VERSION}/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, f"./RWKV-{MODEL_VERSION}/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("INFERENCE_DIR:", INFERENCE_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

In [None]:
# Setup the required project directories
!mkdir -p "{PROJECT_DIR}/model/"
!mkdir -p "{PROJECT_DIR}/datapath/"
!mkdir -p "{PROJECT_DIR}/checkpoint/"

In [None]:
#
# MODIFY TO EITHER INITIALIZE A NEW MODEL, OR FINE-TUNE AN EXISTING MODEL
# AND FINALIZE THE INIT MODEL NAME YOU ARE FINETUNING FROM
#

# Download an existing model to finetune
!cd "{PROJECT_DIR}/model/" && \
    wget -nc https://huggingface.co/BlinkDL/rwkv-4-world/resolve/main/RWKV-4-World-1.5B-v1-fixed-20230612-ctx4096.pth

# # OR initialize a new model accordingly
# !cd "{TRAINER_DIR}" && \
#     python3 ./init_model.py \
#         --n_layer 24 --n_embd 2048 \
#         --vocab_size neox --skip-if-exists \
#         "../model/L24-D2048-neox-init.pth"

# Configure the initial model name you are using
INIT_MODEL_NAME="RWKV-4-World-1.5B-v1-fixed-20230612-ctx4096.pth"

In [None]:
# Preloading the dataset
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/training-config.yaml"

# Training the model!

In [None]:
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/training-config.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} training ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/RWKV-community-training/" \
        --model.load_model="../model/{INIT_MODEL_NAME}"

In [None]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "../checkpoint/RWKV-community-training/last.ckpt" "../model/{FILENAME_PREFIX}.pth"
!cd "{TRAINER_DIR}" && ls -alh "../model/{FILENAME_PREFIX}.pth"

In [None]:
# # Lets do a quick dragon prompt validation
!cd "{INFERENCE_DIR}" && \
    python3 dragon_test.py "../model/{FILENAME_PREFIX}.pth" "cuda fp32"