Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,54 @@ LOGICAL Z (lz):

- `data.noise_model`: a **25-parameter circuit-level** noise model (SPAM, idles, and CNOT Pauli channels).

Training may apply a **training-only** “noise floor” sparsity guard (scales probabilities up if the grouped totals are too small). Evaluation uses the user-specified noise model **as-is**. We have seen that it is preferrable to train on data that is more dense and then apply it to sparser data that training on the sparse data.
#### Training noise upscaling (surface code)

When training a surface-code pre-decoder the noise parameters you specify may be very small (e.g. `p = 1e-4`), which produces extremely sparse syndromes and slow convergence. To address this, the training pipeline **automatically upscales** all 25 noise-model parameters so that the largest grouped total `max(P_prep, P_meas, P_idle_cnot, P_idle_spam, P_cnot)` equals a fixed target of **6 × 10⁻³** (just below the surface-code threshold of ~7.5 × 10⁻³).

The five grouped totals are:

| Group | Sum of |
|-------|--------|
| P_prep | `p_prep_X + p_prep_Z` |
| P_meas | `p_meas_X + p_meas_Z` |
| P_idle_cnot | `p_idle_cnot_X + p_idle_cnot_Y + p_idle_cnot_Z` |
| P_idle_spam | `p_idle_spam_X + p_idle_spam_Y + p_idle_spam_Z` |
| P_cnot | sum of all 15 `p_cnot_*` |

**Upscaling rules:**

- If `max_group < 6e-3`: all 25 p's are multiplied by `6e-3 / max_group` for training data generation only. Evaluation always uses the original user-specified noise model as-is.
- If `max_group >= 6e-3`: parameters are **not** modified (the training log emits a warning in case this indicates a configuration error).
- Non-surface-code types (`code_type != "surface_code"`) are never upscaled.

We have found that training on denser syndromes and then evaluating on sparser data produces better results than training directly on sparse data.

#### Skipping noise upscaling

If you need to train with your **exact** noise parameters (e.g. for benchmarking or controlled experiments), you can disable upscaling via config or environment variable:

**Config** (`conf/config_public.yaml`):

```yaml
data:
skip_noise_upscaling: true
noise_model:
p_prep_X: 0.002
# ... rest of 25 params
```

**Environment variable:**

```bash
PREDECODER_SKIP_NOISE_UPSCALING=1 bash code/scripts/local_run.sh
```

Either method causes the training pipeline to use the user-specified noise model verbatim — no scaling is applied. The training log will confirm:

```
[Train] noise_model upscaling SKIPPED (skip_noise_upscaling=true or PREDECODER_SKIP_NOISE_UPSCALING=1).
```


### Precomputed frames (recommended)

Expand Down
120 changes: 119 additions & 1 deletion code/qec/noise_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,14 @@
"""

from dataclasses import dataclass, field, asdict
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, Any
import numpy as np

# Surface-code training upscale target (below threshold ~7.5e-3). Used when sampling training data.
SURFACE_CODE_TRAINING_UPSCALE_TARGET = 6e-3
# Approximate surface code threshold for user-facing warnings.
SURFACE_CODE_THRESHOLD_APPROX = 7.5e-3


# Internal helper for depolarizing-equivalent 25p mapping (tests/docs).
def _single_p_mapping(p: float, spam_factor: float = 2.0 / 3.0) -> Dict[str, float]:
Expand Down Expand Up @@ -388,6 +393,119 @@ def __repr__(self) -> str:
)


def get_grouped_totals(nm: NoiseModel) -> Dict[str, float]:
"""
Compute the sum of p's per fault channel (capital P's) for the 25-p noise model.

Returns:
Dict with keys: p_prep, p_meas, p_idle_cnot, p_idle_spam, p_cnot, max_group.
"""
p_prep = float(nm.p_prep_X + nm.p_prep_Z)
p_meas = float(nm.p_meas_X + nm.p_meas_Z)
p_idle_cnot = float(nm.get_total_idle_cnot_probability())
p_idle_spam = float(nm.get_total_idle_spam_probability())
p_cnot = float(nm.get_total_cnot_probability())
max_group = max(p_prep, p_meas, p_idle_cnot, p_idle_spam, p_cnot)
return {
"p_prep": p_prep,
"p_meas": p_meas,
"p_idle_cnot": p_idle_cnot,
"p_idle_spam": p_idle_spam,
"p_cnot": p_cnot,
"max_group": max_group,
}


def get_training_upscaled_noise_model(
noise_model: NoiseModel,
code_type: str = "surface_code",
skip_upscale: bool = False,
) -> Tuple[NoiseModel, Dict[str, Any]]:
"""
For surface code only: optionally upscale the noise model for training so that
max(P's) = SURFACE_CODE_TRAINING_UPSCALE_TARGET (6e-3). Training data sampling
should use the returned model; evaluation should use the original user-specified model.

- Upscaling (max_group < target): scale all 25 p's by target/max_group; info contains details.
- Downscaling (max_group > target): do NOT change parameters; info contains a clear warning.
- If max_group > target: info indicates the user may be above threshold / have made an error.

For code_type != "surface_code", returns (noise_model unchanged, info with applied=False).

Args:
noise_model: The user-specified NoiseModel.
code_type: Code type string (upscaling only for "surface_code").
skip_upscale: If True, skip upscaling entirely and return the original model unchanged.
Useful for training with exact user-specified noise parameters (e.g. benchmarking).

Returns:
(training_noise_model, info_dict) where info_dict has:
- applied_upscale: bool
- scale_factor: float (only if upscaling applied)
- max_group: float
- group_totals: dict (p_prep, p_meas, ...)
- above_target_warning: bool (max_group > UPSCALE_TARGET)
- downscale_skipped: bool (max_group > target, params not modified)
- skipped_by_user: bool (skip_upscale was True)
"""
target = SURFACE_CODE_TRAINING_UPSCALE_TARGET
totals = get_grouped_totals(noise_model)
max_group = totals["max_group"]

info: Dict[str, Any] = {
"max_group": max_group,
"group_totals": totals,
"above_target_warning": max_group > target,
"downscale_skipped": False,
"applied_upscale": False,
"skipped_by_user": skip_upscale,
}

if skip_upscale:
info["message"] = (
"Noise upscaling SKIPPED by user (skip_noise_upscaling=true). "
f"Training will use the exact user-specified noise model (max_group={max_group:.6g})."
)
return (noise_model, info)

if code_type != "surface_code":
info["message"
] = f"Noise upscaling is not applied for code_type={code_type!r} (surface_code only)."
return (noise_model, info)

if max_group <= 0.0:
raise ValueError(
"Invalid noise_model: all grouped totals are <= 0 "
f"(prep={totals['p_prep']}, meas={totals['p_meas']}, "
f"idle_cnot={totals['p_idle_cnot']}, idle_spam={totals['p_idle_spam']}, cnot={totals['p_cnot']})."
)

scale_factor = target / max_group

if scale_factor >= 1.0:
# Upscaling: apply scale to all 25 parameters
params = noise_model.to_config_dict()
scaled_params = {k: float(v) * scale_factor for k, v in params.items()}
training_nm = NoiseModel.from_config_dict(scaled_params)
training_nm._reference = dict(noise_model._reference)
info["applied_upscale"] = True
info["scale_factor"] = scale_factor
info["message"] = (
f"Upscaled training noise: max_group={max_group:.6g} -> target={target:.1e} "
f"(scale={scale_factor:.6g}). Evaluation uses user-specified noise model as-is."
)
return (training_nm, info)

# Downscaling: do not modify parameters
info["downscale_skipped"] = True
info["scale_factor"] = scale_factor
info["message"] = (
f"Downscale NOT applied: max_group={max_group:.6g} > target={target:.1e}. "
"Parameters unchanged. If you intended a lower noise regime, check your noise model values."
)
return (noise_model, info)


def noise_model_from_config(cfg) -> Optional[NoiseModel]:
"""
Create a NoiseModel from a Hydra config object.
Expand Down
141 changes: 140 additions & 1 deletion code/tests/test_noise_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,16 @@

sys.path.insert(0, str(Path(__file__).parent.parent))

from qec.noise_model import NoiseModel, CNOT_ERROR_TYPES, CNOT_ERROR_INDEX, _single_p_mapping
from qec.noise_model import (
NoiseModel,
CNOT_ERROR_TYPES,
CNOT_ERROR_INDEX,
_single_p_mapping,
get_grouped_totals,
get_training_upscaled_noise_model,
SURFACE_CODE_TRAINING_UPSCALE_TARGET,
SURFACE_CODE_THRESHOLD_APPROX,
)
from qec.surface_code.memory_circuit import MemoryCircuit
from qec.surface_code.data_mapping import (
normalized_weight_mapping_Xstab_memory,
Expand Down Expand Up @@ -322,5 +331,135 @@ def test_stim_circuit_audit_no_cnot_noise_in_logical_measurement_section(self):
)


class TestNoiseModelUpscaling(unittest.TestCase):
"""Tests for surface-code training noise model upscaling (get_training_upscaled_noise_model)."""

def test_get_grouped_totals(self):
"""get_grouped_totals returns correct P_prep, P_meas, P_idle_cnot, P_idle_spam, P_cnot and max_group."""
nm = _noise_model_from_p(0.01)
tot = get_grouped_totals(nm)
self.assertAlmostEqual(
tot["p_prep"], 2.0 * 0.01 / 3.0 * 2, places=12
) # p_prep_X + p_prep_Z
self.assertAlmostEqual(tot["p_meas"], 2.0 * 0.01 / 3.0 * 2, places=12)
self.assertAlmostEqual(tot["p_idle_cnot"], 0.01, places=12)
self.assertAlmostEqual(tot["p_idle_spam"], nm.get_total_idle_spam_probability(), places=12)
self.assertAlmostEqual(tot["p_cnot"], 0.01, places=12)
self.assertGreater(tot["max_group"], 0)
self.assertEqual(
tot["max_group"],
max(
tot["p_prep"], tot["p_meas"], tot["p_idle_cnot"], tot["p_idle_spam"], tot["p_cnot"]
)
)

def test_upscale_small_noise(self):
"""When max_group < target, all 25 p's are scaled so that new max_group = target."""
# Single-p 1e-4 -> max_group is around 1e-4 (order of magnitude)
nm = _noise_model_from_p(1e-4)
tot = get_grouped_totals(nm)
self.assertLess(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertTrue(info["applied_upscale"])
self.assertFalse(info["downscale_skipped"])
scale = info["scale_factor"]
self.assertGreaterEqual(scale, 1.0)
self.assertAlmostEqual(
scale, SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot["max_group"], places=10
)
new_tot = get_grouped_totals(training_nm)
self.assertAlmostEqual(
new_tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET, places=10
)
# All params scaled by the same factor
for k, v in nm.to_config_dict().items():
self.assertAlmostEqual(training_nm.to_config_dict()[k], v * scale, places=12, msg=k)

def test_upscale_exact_target_scale_one(self):
"""When max_group equals target, scale_factor is 1.0 and model is unchanged."""
# Build a model with max_group = target by scaling a small model up
nm_small = _noise_model_from_p(1e-4)
tot_small = get_grouped_totals(nm_small)
scale_to_target = SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot_small["max_group"]
params = {k: v * scale_to_target for k, v in nm_small.to_config_dict().items()}
nm = NoiseModel.from_config_dict(params)
training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertTrue(info["applied_upscale"])
self.assertAlmostEqual(info["scale_factor"], 1.0, places=10)
self.assertAlmostEqual(
training_nm.get_total_cnot_probability(), nm.get_total_cnot_probability(), places=12
)

def test_downscale_not_applied(self):
"""When max_group > target, parameters are NOT modified; downscale_skipped is True."""
nm = _noise_model_from_p(1e-2) # max_group well above 6e-3
tot = get_grouped_totals(nm)
self.assertGreater(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertFalse(info["applied_upscale"])
self.assertTrue(info["downscale_skipped"])
self.assertTrue(info["above_target_warning"])
# Same object parameters (identity)
self.assertEqual(nm.to_config_dict(), training_nm.to_config_dict())
self.assertIs(training_nm, nm)

def test_above_target_warning(self):
"""When max_group > target, above_target_warning is True."""
nm = _noise_model_from_p(0.01)
_, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertTrue(info["above_target_warning"])
nm_low = _noise_model_from_p(1e-4)
_, info_low = get_training_upscaled_noise_model(nm_low, code_type="surface_code")
self.assertFalse(info_low["above_target_warning"])

def test_non_surface_code_no_upscaling(self):
"""For code_type != 'surface_code', no scaling is applied; original model returned."""
nm = _noise_model_from_p(1e-4)
training_nm, info = get_training_upscaled_noise_model(nm, code_type="color_code")
self.assertFalse(info.get("applied_upscale", False))
self.assertEqual(nm.to_config_dict(), training_nm.to_config_dict())
self.assertIn("message", info)
self.assertIn("surface_code", info["message"])

def test_invalid_zero_totals_raises(self):
"""When all grouped totals are zero, get_training_upscaled_noise_model raises ValueError."""
nm = NoiseModel() # all zeros
with self.assertRaises(ValueError) as ctx:
get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertIn("all grouped totals are <= 0", str(ctx.exception))

def test_upscale_preserves_reference(self):
"""Upscaled training model preserves _reference from the original."""
nm = _noise_model_from_p(1e-4)
ref = dict(nm._reference)
training_nm, _ = get_training_upscaled_noise_model(nm, code_type="surface_code")
self.assertEqual(training_nm._reference, ref)

def test_skip_upscale_returns_original(self):
"""When skip_upscale=True, the original model is returned unchanged regardless of max_group."""
nm = _noise_model_from_p(1e-4)
tot = get_grouped_totals(nm)
self.assertLess(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
training_nm, info = get_training_upscaled_noise_model(
nm, code_type="surface_code", skip_upscale=True
)
self.assertIs(training_nm, nm)
self.assertFalse(info["applied_upscale"])
self.assertFalse(info["downscale_skipped"])
self.assertTrue(info["skipped_by_user"])
self.assertIn("SKIPPED", info["message"])

def test_skip_upscale_above_target(self):
"""skip_upscale=True also works when max_group > target (no warning about downscale)."""
nm = _noise_model_from_p(1e-2)
training_nm, info = get_training_upscaled_noise_model(
nm, code_type="surface_code", skip_upscale=True
)
self.assertIs(training_nm, nm)
self.assertTrue(info["skipped_by_user"])
self.assertFalse(info["applied_upscale"])
self.assertFalse(info["downscale_skipped"])


if __name__ == "__main__":
unittest.main()
Loading