# Wave Field LLM -- V4.3.3 S1 Benchmark

Reproduces the S1 result: **SPECTRE-Wave PPL 239 vs Standard PPL 171** (1.40x gap)

Runtime: ~25 min on T4, ~12 min on A100

In [None]:
# Cell 1: Setup
!git clone https://github.com/Pankh-AI/wave-field-llm.git
%cd wave-field-llm
!pip install -q tokenizers datasets

# Verify we have V4.3.3 code
import sys
sys.path.insert(0, '.')
from src import __version__
print(f'\nCode version: {__version__}')
assert __version__ == '4.3.3', f'ERROR: Expected V4.3.3, got {__version__}!'
print('Version check PASSED')

In [None]:
# Cell 2: Check GPU
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name()
    vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f'GPU: {gpu_name}')
    print(f'VRAM: {vram_gb:.1f} GB')
else:
    raise RuntimeError('No GPU! Go to Runtime > Change runtime type > T4 GPU')

In [None]:
# Cell 3: Run S1 benchmark (~25 min on T4)
import os
os.environ['SCALE'] = 'S1'              # S1 only (22M params, 20M tokens)
os.environ['DATASET'] = '2'             # WikiText-2 (matches verified PPL 229 result)
os.environ['MONITOR'] = '0'             # Skip monitor for speed
os.environ['BATCH_SIZE'] = '16'
# os.environ['MODEL'] = 'wave'          # Uncomment to run Wave only (skip Standard)

!python benchmarks/benchmark_scaling.py

In [None]:
# Cell 4: Results
import json

with open('results/scaling_s1.json') as f:
    data = json.load(f)

print('=' * 60)
print('  S1 RESULTS (22M params, 20M tokens)')
print('=' * 60)
for r in data['results']:
    print(f"\n  {r['run_name']}")
    print(f"    PPL:    {r['best_ppl']:.2f}")
    print(f"    Acc:    {r['best_acc']:.2f}%")
    print(f"    Params: {r['params']:,}")
    print(f"    Speed:  {r['tokens_per_sec']:,} tok/s")
    print(f"    Time:   {r['total_time_s']:.0f}s")