# Wave Field LLM — S3 Benchmark (100M params, 100M tokens)

**V4.3.7 SPECTRE-Wave** at S3 scale on Colab T4 (16GB).

| Scale | Params | Tokens | Dataset | embed/layers/heads | T4 time |
|-------|--------|--------|---------|-------------------|---------|
| S1 | 22M | 20M | WikiText-2 | 384/8/8 | ~25 min |
| S2 | 55M | 50M | WikiText-103 | 512/12/8 | ~2.5 hrs |
| **S3** | **100M** | **100M** | **WikiText-103** | **768/12/12** | **~3-4 hrs** |

**Strategy for free tier:**
- Checkpoints saved to **Google Drive** (survives disconnects)
- Auto-resume from last checkpoint on reconnect
- Wave model only (skip Standard to save time)
- Gradient checkpointing + fp16 AMP (T4 optimized)
- Eval every 5M tokens (~20 checkpoints total)

**If disconnected:** Just re-run all cells. It will auto-resume from the last checkpoint on Drive.

In [None]:
# Cell 1: Mount Google Drive for persistent checkpoints
from google.colab import drive
drive.mount('/content/drive')

# Create persistent directory
import os
DRIVE_DIR = '/content/drive/MyDrive/wave-field-llm'
CKPT_DIR = os.path.join(DRIVE_DIR, 'checkpoints')
CACHE_DIR = os.path.join(DRIVE_DIR, 'cache')
os.makedirs(CKPT_DIR, exist_ok=True)
os.makedirs(CACHE_DIR, exist_ok=True)
print(f'Drive checkpoint dir: {CKPT_DIR}')
print(f'Existing files: {os.listdir(CKPT_DIR) if os.path.exists(CKPT_DIR) else []}')

In [None]:
# Cell 2: Clone repo + install deps
!git clone https://github.com/Pankh-AI/wave-field-llm.git 2>/dev/null || (cd wave-field-llm && git pull)
%cd wave-field-llm
!pip install -q tokenizers datasets

import sys
sys.path.insert(0, '.')
from src import __version__
print(f'\nCode version: V{__version__}')
assert __version__ >= '4.3.7', f'Need V4.3.7+, got {__version__}'

In [None]:
# Cell 3: GPU check + VRAM estimation
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')

if not torch.cuda.is_available():
    raise RuntimeError('No GPU! Go to Runtime > Change runtime type > T4 GPU')

gpu_name = torch.cuda.get_device_name()
vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f'GPU: {gpu_name}')
print(f'VRAM: {vram_gb:.1f} GB')

# S3 VRAM: ~100M model (400MB) + AdamW (1.2GB) + activations w/ checkpointing (~3GB)
# Total ~5-6GB. T4 (16GB) has plenty.
if vram_gb < 12:
    print(f'WARNING: Only {vram_gb:.1f} GB VRAM. S3 needs ~6GB. Might be tight.')
else:
    print(f'VRAM OK: {vram_gb:.1f} GB (S3 needs ~6GB)')

# T4 doesn't support bf16 well — we'll use fp16
if 'T4' in gpu_name:
    print('\nT4 detected: Using fp16 AMP + GradScaler')
    print('Estimated time: ~3-4 hours (Wave only)')
elif 'A100' in gpu_name:
    print('\nA100 detected: Will use bf16 AMP (no GradScaler needed)')
    print('Estimated time: ~1.5-2 hours (Wave only)')
else:
    print(f'\nGPU: {gpu_name}. Estimated ~3-5 hours.')

In [None]:
# Cell 4: Symlink Drive dirs so benchmark_scaling.py saves there
import os, shutil

# Symlink results/checkpoints → Drive
local_ckpt = 'results/checkpoints'
os.makedirs('results', exist_ok=True)
if os.path.islink(local_ckpt):
    os.unlink(local_ckpt)
elif os.path.isdir(local_ckpt):
    # Move any existing checkpoints to Drive first
    for f in os.listdir(local_ckpt):
        src = os.path.join(local_ckpt, f)
        dst = os.path.join(CKPT_DIR, f)
        if not os.path.exists(dst):
            shutil.move(src, dst)
    shutil.rmtree(local_ckpt)
os.symlink(CKPT_DIR, local_ckpt)
print(f'Checkpoints symlinked: {local_ckpt} -> {CKPT_DIR}')

# Symlink results/cache → Drive (tokenizer + token arrays, ~500MB)
local_cache = 'results/cache'
if os.path.islink(local_cache):
    os.unlink(local_cache)
elif os.path.isdir(local_cache):
    for f in os.listdir(local_cache):
        src = os.path.join(local_cache, f)
        dst = os.path.join(CACHE_DIR, f)
        if not os.path.exists(dst):
            shutil.move(src, dst)
    shutil.rmtree(local_cache)
os.symlink(CACHE_DIR, local_cache)
print(f'Cache symlinked: {local_cache} -> {CACHE_DIR}')

# Create other results dirs locally (non-critical, don't need persistence)
os.makedirs('results/data', exist_ok=True)
os.makedirs('results/plots', exist_ok=True)
os.makedirs('results/monitor', exist_ok=True)

print(f'\nDrive checkpoint files: {os.listdir(CKPT_DIR)}')

In [None]:
# Cell 5: Run S3 benchmark (Wave only)
#
# If disconnected: re-run all cells. RESUME=1 auto-detects the
# checkpoint on Drive and continues from where it left off.
#
import os
os.environ['SCALE'] = 'S3'              # S3: 100M params, 100M tokens
os.environ['MODEL'] = 'wave'            # Wave only (skip Standard, save ~3hrs)
os.environ['DATASET'] = '103'           # WikiText-103 (133M tokens, 1 epoch)
os.environ['RESUME'] = '1'              # Auto-resume from Drive checkpoint
os.environ['MONITOR'] = '1'             # Keep monitor on for diagnostics
# os.environ['BATCH_SIZE'] = '6'        # Uncomment if OOM (unlikely on T4 16GB)

!python benchmarks/benchmark_scaling.py

In [None]:
# Cell 6: Results analysis
import json
import os

# Try to load results
results_path = 'results/data/scaling_s3.json'
if not os.path.exists(results_path):
    # Fall back to generic file
    results_path = 'results/data/scaling_benchmark.json'

if os.path.exists(results_path):
    with open(results_path) as f:
        data = json.load(f)

    print('=' * 65)
    print('  S3 BENCHMARK RESULTS (100M params, 100M tokens)')
    print(f'  Dataset: {data["metadata"].get("dataset", "unknown")}')
    print(f'  GPU: {data["metadata"].get("gpu", "unknown")}')
    print('=' * 65)

    for r in data['results']:
        print(f"\n  {r['run_name']}")
        print(f"    PPL:    {r['best_ppl']:.2f}")
        print(f"    Acc:    {r['best_acc']:.2f}%")
        print(f"    Params: {r['params']:,}")
        print(f"    Speed:  {r['tokens_per_sec']:,} tok/s")
        print(f"    Time:   {r['total_time_s']:.0f}s ({r['total_time_s']/60:.0f} min)")
        print(f"    Epochs: {r.get('epochs', '?')}")

        # Print training curve highlights
        curve = r.get('curve', [])
        if curve:
            print(f"\n    Training curve ({len(curve)} checkpoints):")
            print(f"    {'Tokens':>10} {'PPL':>10} {'Acc':>8}")
            for pt in curve:
                print(f"    {pt['tokens_M']:>8.1f}M {pt['ppl']:>10.2f} {pt['acc']:>7.1f}%")
else:
    print('No results file found yet.')
    print('If training was interrupted, check checkpoint status:')
    ckpt_path = os.path.join(CKPT_DIR, 'spectre-wave_s3_resume.pt')
    if os.path.exists(ckpt_path):
        import torch
        ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
        print(f'\n  Checkpoint found on Drive!')
        print(f'  Step: {ckpt["step"]} / ~16,276')
        print(f'  Tokens: {ckpt["tokens_seen"]/1e6:.1f}M / 100M')
        print(f'  Best PPL: {ckpt["best_ppl"]:.2f}')
        print(f'  Best Acc: {ckpt["best_acc"]:.2f}%')
        print(f'  Progress: {ckpt["tokens_seen"]/100e6*100:.1f}%')
        print(f'\n  Re-run Cell 5 to continue training from this checkpoint.')
    else:
        print(f'  No checkpoint on Drive either. Training may not have started.')

In [None]:
# Cell 7: Run causality test on trained S3 model
!python tests/test_causality.py

In [None]:
# Cell 8: Run verification suite on S3 checkpoint
# (Only run this after training completes or has enough checkpoints)
import os
ckpt_path = os.path.join(CKPT_DIR, 'spectre-wave_s3.pt')
if os.path.exists(ckpt_path):
    !python tests/verify_results.py --scale S3 --dataset 103 --skip-generation
else:
    print('No S3 checkpoint yet. Run Cell 5 first.')

In [None]:
# Cell 9: Copy results to Drive (in case session dies before download)
import shutil
import os

DRIVE_RESULTS = os.path.join(DRIVE_DIR, 'results_s3')
os.makedirs(DRIVE_RESULTS, exist_ok=True)

# Copy data files
for f in ['scaling_s3.json', 'scaling_benchmark.json', 'verification_s3.json']:
    src = os.path.join('results/data', f)
    if os.path.exists(src):
        shutil.copy2(src, DRIVE_RESULTS)
        print(f'Copied: {f}')

# Copy monitor data
mon_dir = 'results/monitor/spectre-wave_s3'
if os.path.isdir(mon_dir):
    dst = os.path.join(DRIVE_RESULTS, 'monitor')
    if os.path.exists(dst):
        shutil.rmtree(dst)
    shutil.copytree(mon_dir, dst)
    print('Copied: monitor data')

print(f'\nAll results saved to: {DRIVE_RESULTS}')
print(f'Files: {os.listdir(DRIVE_RESULTS)}')

In [None]:
# Cell 10: Download results locally (optional)
from google.colab import files

for f in ['results/data/scaling_s3.json', 'results/data/verification_s3.json']:
    if os.path.exists(f):
        files.download(f)
        print(f'Downloaded: {f}')