# Audio Regression Benchmark

Run curated audio fixtures through the configured Whisper model to track accuracy changes as fine-tuning progresses. Adjust the model name or language hint to compare different checkpoints.

In [None]:
from pathlib import Path
from datetime import datetime
import json

from wavecap_backend.audio_processing import AudioFrontEndConfig
from wavecap_backend.audio_regression import evaluate_corpus, load_regression_cases
from wavecap_backend.config import load_config
from wavecap_backend.whisper_transcriber import WhisperTranscriber

CASES_PATH = Path('../backend/audio_regression/cases.jsonl')
RESULTS_DIR = Path('../backend/audio_regression')
RESULT_LOG = RESULTS_DIR / 'benchmark-history.jsonl'

cases = load_regression_cases(CASES_PATH)
print(f'Loaded {len(cases)} regression cases from {CASES_PATH}')


In [None]:
config = load_config()
whisper_config = config.whisper.model_copy(deep=True)
# Override the checkpoint or language hint here if required.
# whisper_config.model = 'small.en'
# whisper_config.language = 'en'
deemphasis = (
    None
    if whisper_config.deemphasisTimeConstantMicros is None
    else float(whisper_config.deemphasisTimeConstantMicros) * 1e-6
)
frontend_config = AudioFrontEndConfig(
    sample_rate=whisper_config.sampleRate,
    highpass_cutoff_hz=whisper_config.highpassCutoffHz,
    lowpass_cutoff_hz=whisper_config.lowpassCutoffHz,
    deemphasis_time_constant=deemphasis,
    agc_target_rms=None,
)
transcriber = WhisperTranscriber(whisper_config)
summary = evaluate_corpus(
    cases,
    transcriber,
    sample_rate=whisper_config.sampleRate,
    language=whisper_config.language,
    frontend_config=frontend_config,
    agc_target_rms=whisper_config.agcTargetRms,
)
summary.to_report()


In [None]:
record = summary.to_report()
record.update(
    {
        'model': whisper_config.model,
        'language': whisper_config.language,
        'evaluated_at': datetime.utcnow().isoformat() + 'Z',
    }
)
RESULT_LOG.parent.mkdir(parents=True, exist_ok=True)
with RESULT_LOG.open('a', encoding='utf-8') as handle:
    handle.write(json.dumps(record))
    handle.write('
')
print(f'Appended results to {RESULT_LOG}')
