# Dataset microbatch testing

Testing runs on multiple micro batch settings

In [20]:
GPU_DEVICES="auto"
ENABLE_WANDB=False
WANDB_PREFIX="infctx-v5-microbatch"

print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

ENABLE_WANDB: False
GPU_DEVICES: auto
NOTEBOOK_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation
TRAINER_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /home/picocreator/rwkv-proj/RWKV-infctx-trainer


In [21]:
# Init the model
!cd "{TRAINER_DIR}" && \
    python3 ./init_model.py \
        --n_layer 6 --n_embd 512 \
        --vocab_size world --skip-if-exists \
        "../model/L6-D512-world-v5base-init.pth"

[2024-01-18 11:19:39,010] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
---- Initializing model ----
No of layers: 6
Embedding size: 512
Output model path: ../model/L6-D512-world-v5base-init.pth
Vocab size: 65536
Emb scale: 0.0001
Note: this process takes a significant time (and ram) for large models
---- ----- ----
Model exists, skipping init_model


In [25]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{NOTEBOOK_DIR}/config/enwiki_10k-world-full.yaml"

Map (num_proc=16): 100%|█████████| 10000/10000 [00:01<00:00, 9575.14 examples/s]
Filter (num_proc=16): 100%|█████| 10000/10000 [00:00<00:00, 12203.75 examples/s]
Map (num_proc=16): 100%|██████████| 9892/9892 [00:00<00:00, 20646.21 examples/s]
Saving the dataset (1/1 shards): 100%|█| 9892/9892 [00:00<00:00, 241357.37 examp
Saving the dataset (1/1 shards): 100%|█| 100/100 [00:00<00:00, 28064.93 examples


# microbatch=1

Note: We are intentionally testing without rechunk, as that has known edge case issues.

In [29]:
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_10k-world-full.yaml" \
        --model.load_model="../model/L6-D512-world-v5base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v5-enwiki-10k-full/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Microbatch 1 - (deepspeed_stage_1)" \
        --trainer.strategy="deepspeed_stage_1" \
        --trainer.microbatch_size=1 \
        --trainer.devices="{GPU_DEVICES}"

[2024-01-18 12:00:55,830] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_10k-world-full.yaml', '--model.load_model=../model/L6-D512-world-v5base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-enwiki-10k-full/', '--trainer.logger.init_args.name=infctx-v5-microbatch - Microbatch 1 - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.microbatch_size=1', '--trainer.devices=auto'], args=['fit', '-c', '/home/picocrea

# microbatch=2

In [32]:
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_10k-world-full.yaml" \
        --model.load_model="../model/L6-D512-world-v5base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v5-enwiki-10k-full/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Microbatch 2 - (deepspeed_stage_1)" \
        --trainer.strategy="deepspeed_stage_1" \
        --trainer.microbatch_size=2 \
        --trainer.devices="{GPU_DEVICES}"

[2024-01-18 12:11:50,734] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_10k-world-full.yaml', '--model.load_model=../model/L6-D512-world-v5base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-enwiki-10k-full/', '--trainer.logger.init_args.name=infctx-v5-microbatch - Microbatch 2 - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.microbatch_size=2', '--trainer.devices=auto'], args=['fit', '-c', '/home/picocrea

# microbatch=8

In [33]:
!cd "{TRAINER_DIR}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python3 lightning_trainer.py fit \
        -c "{NOTEBOOK_DIR}/config/enwiki_10k-world-full.yaml" \
        --model.load_model="../model/L6-D512-world-v5base-init.pth" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/v5-enwiki-10k-full/" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Microbatch 2 - (deepspeed_stage_1)" \
        --trainer.strategy="deepspeed_stage_1" \
        --trainer.microbatch_size=4 \
        --trainer.devices="{GPU_DEVICES}"

[2024-01-18 12:17:38,330] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.1.1'
/home/picocreator/anaconda3/envs/rwkv-infctx/lib/python3.11/site-packages/lightning/pytorch/cli.py:518: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/home/picocreator/rwkv-proj/RWKV-infctx-trainer/notebook/trainer-v5-validation/config/enwiki_10k-world-full.yaml', '--model.load_model=../model/L6-D512-world-v5base-init.pth', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-enwiki-10k-full/', '--trainer.logger.init_args.name=infctx-v5-microbatch - Microbatch 2 - (deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.microbatch_size=8', '--trainer.devices=auto'], args=['fit', '-c', '/home/picocrea