# Testing rechunking vs repacking training

Starting from scratch

In [2]:
GPU_DEVICES="auto"
ENABLE_WANDB=True
WANDB_PREFIX="infctx-v6-LRX"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v6-LRX/"))

# Deepspeed and batch size
DEEPSPEED_STAGE="deepspeed_stage_2"
BATCH_SIZE=4

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: True
GPU_DEVICES: auto
NOTEBOOK_DIR: /home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v6-layerNbits
TRAINER_DIR: /home/recursal/RWKV-infctx-trainer/RWKV-v6-LRX
PROJECT_DIR: /home/recursal/RWKV-infctx-trainer


In [3]:
# Init the model
!cd "{TRAINER_DIR}" && \
    python3 ./init_model.py \
        --n_layer 6 --n_embd 1024 \
        --vocab_size world --skip-if-exists \
        "../model/L6-D1024-world-v6base-init.pth"

[2024-04-11 06:48:49,707] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
[RWKV] Layer Repeat Multiplier: 2
---- Initializing model ----
No of layers: 6
Embedding size: 1024
Output model path: ../model/L6-D1024-world-v6base-init.pth
Vocab size: 65536
Emb scale: 0.0001
Note: this process takes a significant time (and ram) for large models
---- ----- ----
Output model exists, skipping init_model


# Dataset preloading

In [4]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/enwiki_100k-world-32k-rechunk.yaml"

Saving the dataset (4/4 shards): 100%|█| 10020/10020 [00:17<00:00, 573.82 exampl
Saving the dataset (1/1 shards): 100%|█████| 2/2 [00:00<00:00, 97.04 examples/s]


# LRX=1

In [5]:
# Configure the LRX experiment count
LRX_COUNT=1

# Nuke python3 (for back to back run cleanup)
!killall -9 python3

# Run with the LRX setting
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    export RWKV_LAYER_REPEAT_MULTIPLIER="{LRX_COUNT}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-32k-rechunk.yaml" \
        --model.load_model="../model/L6-D1024-world-v6base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v6-LRX-enwiki-100k-LRX-{LRX_COUNT}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Rechunk 32k (LRX={LRX_COUNT}, {DEEPSPEED_STAGE})" \
        --trainer.strategy="{DEEPSPEED_STAGE}" \
        --trainer.microbatch_size={BATCH_SIZE} \
        --trainer.devices="{GPU_DEVICES}"

[2024-04-11 06:49:24,412] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
[RWKV] Layer Repeat Multiplier: 1
/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v6-layerNbits/config/enwiki_100k-world-32k-rechunk.yaml', '--model.load_model=../model/L6-D1024-world-v6base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v6-LRX-enwiki-100k-LRX-1/', '--trainer.logger.init_args.name=infctx-v6-LRX - Rechunk 32k (LRX=1, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=4', '--trainer.devices=auto'],

# LRX=2

In [6]:
# Configure the LRX experiment count
LRX_COUNT=2

# Nuke python3 (for back to back run cleanup)
!killall -9 python3

# Run with the LRX setting
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    export RWKV_LAYER_REPEAT_MULTIPLIER="{LRX_COUNT}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-32k-rechunk.yaml" \
        --model.load_model="../model/L6-D1024-world-v6base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v6-LRX-enwiki-100k-LRX-{LRX_COUNT}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Rechunk 32k (LRX={LRX_COUNT}, {DEEPSPEED_STAGE})" \
        --trainer.strategy="{DEEPSPEED_STAGE}" \
        --trainer.microbatch_size={BATCH_SIZE} \
        --trainer.devices="{GPU_DEVICES}"

[2024-04-11 07:26:27,449] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
[RWKV] Layer Repeat Multiplier: 2
/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v6-layerNbits/config/enwiki_100k-world-32k-rechunk.yaml', '--model.load_model=../model/L6-D1024-world-v6base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v6-LRX-enwiki-100k-LRX-2/', '--trainer.logger.init_args.name=infctx-v6-LRX - Rechunk 32k (LRX=2, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=4', '--trainer.devices=auto'],

# LRX=4

In [7]:
# Configure the LRX experiment count
LRX_COUNT=4

# Nuke python3 (for back to back run cleanup)
!killall -9 python3

# Run with the LRX setting
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    export RWKV_LAYER_REPEAT_MULTIPLIER="{LRX_COUNT}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_100k-world-32k-rechunk.yaml" \
        --model.load_model="../model/L6-D1024-world-v6base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v6-LRX-enwiki-100k-LRX-{LRX_COUNT}/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Rechunk 32k (LRX={LRX_COUNT}, {DEEPSPEED_STAGE})" \
        --trainer.strategy="{DEEPSPEED_STAGE}" \
        --trainer.microbatch_size={BATCH_SIZE} \
        --trainer.devices="{GPU_DEVICES}"

[2024-04-11 08:19:01,988] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV infctx using 'torch-jit' with torch '2.1.2'
[RWKV] Layer Repeat Multiplier: 4
/home/recursal/miniconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/recursal/RWKV-infctx-trainer/notebook/rwkv-x-exp/v6-layerNbits/config/enwiki_100k-world-32k-rechunk.yaml', '--model.load_model=../model/L6-D1024-world-v6base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v6-LRX-enwiki-100k-LRX-4/', '--trainer.logger.init_args.name=infctx-v6-LRX - Rechunk 32k (LRX=4, deepspeed_stage_2)', '--trainer.strategy=deepspeed_stage_2', '--trainer.microbatch_size=4', '--trainer.devices=auto'],