# Wave Field LLM — V4.3.3 Benchmark Verification

Reproduces the S1 result: **SPECTRE-Wave PPL 239 vs Standard PPL 171**

Runtime: ~20-25 min on T4, ~12 min on A100

In [None]:
# Cell 1: Setup — pin to V4.3.3 commit (NOT V4.3.4 which regresses!)
!git clone https://github.com/Pankh-AI/wave-field-llm.git
%cd wave-field-llm
!git checkout 51e7015   # V4.3.3 exactly — V4.3.4 has untested NormalizedExp/damping changes
!pip install -q tokenizers datasets

In [None]:
# Cell 2: Check GPU
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name()}')
    print(f'VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')
else:
    raise RuntimeError('No GPU! Go to Runtime > Change runtime type > T4 GPU')

In [None]:
# Cell 3: Run V4.3.3 SPECTRE-Wave only (~20 min on T4, ~10 min on A100)
import os
os.environ['MODEL'] = 'wave'            # Wave only — Standard PPL 171 already verified
os.environ['MONITOR'] = '0'             # Skip monitor for speed
os.environ['BATCH_SIZE'] = '16'

!python benchmarks/benchmark_scaling.py

In [None]:
# Cell 5: Compare results
import json

with open('results/scaling_s1.json') as f:
    data = json.load(f)

print('=' * 60)
print('  RESULTS')
print('=' * 60)
for r in data['results']:
    print(f"\n  {r['run_name']}")
    print(f"    PPL:    {r['best_ppl']:.2f}")
    print(f"    Acc:    {r['best_acc']:.2f}%")
    print(f"    Params: {r['params']:,}")
    print(f"    Speed:  {r['tokens_per_sec']:,} tok/s")
    print(f"    Time:   {r['total_time_s']:.0f}s")