# Selective Loss Run

With all experimental tweaks

In [None]:
GPU_DEVICES="auto"
ENABLE_WANDB=True
WANDB_PREFIX="infctx-v5-selective-loss"
DEEPSPEED_STRAT="deepspeed_stage_1"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# The model sizing
MODEL_LAYERS=8
MODEL_DIMS=512

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

In [None]:
# Lets initialized the model with the init_model.py code
!cd "{TRAINER_DIR}" && python3 init_model.py \
    --n_layer {MODEL_LAYERS} --n_embd {MODEL_DIMS} \
    --vocab_size world \
    --skip-if-exists --safe-init \
    ../model/L{MODEL_LAYERS}-D{MODEL_DIMS}-world-v5-init.pth

In [None]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/sloss-enwiki_100k-world-16k-packing.yaml"

# Multi-epoch training

In [None]:
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/sloss-enwiki_100k-world-16k-packing.yaml" \
        --model.load_model="../model/L{MODEL_LAYERS}-D{MODEL_DIMS}-world-v5-init.pth" \
        --data.skip_datapath_setup=True \
        --trainer.callbacks.init_args.dirpath="../checkpoint/selective-loss/baseline/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Selective Loss (trainsize=2k,packsize=16k) - ({DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.target_batch_size=128 \
        --trainer.microbatch_size=8 \
        --trainer.devices="{GPU_DEVICES}"