From 7ef8ce42d22df54a77bd6cffaade2d7bd9a361a7 Mon Sep 17 00:00:00 2001
From: Ivan Basov <ibasov@nvidia.com>
Date: Wed, 4 Mar 2026 10:58:50 -0800
Subject: [PATCH 1/2] feat(qec): surface-code noise model upscaling for
 training (6e-3 target)

Replace the old 1e-3 "sparsity guard" in train.py with a proper
noise-model upscaling module in noise_model.py that:
- Scales all 25 noise-model parameters so max(grouped totals) = 6e-3
  (just below surface-code threshold ~7.5e-3) for training data only.
- Skips upscaling for non-surface-code types.
- Provides skip_noise_upscaling config flag and
  PREDECODER_SKIP_NOISE_UPSCALING=1 env var to bypass upscaling.
- Emits clear warnings when noise is above target or downscale is skipped.

Adds 10 unit tests covering upscale, downscale, skip, non-surface-code,
zero-totals, and reference-preservation scenarios.

Updates README with detailed documentation of the upscaling rules and
how to disable them.

Ported from gitlab MR !33 (feature/surface-code-noise-upscale-6e3).

Made-with: Cursor
---
 README.md                      |  49 ++++++++++++-
 code/qec/noise_model.py        | 119 ++++++++++++++++++++++++++++++-
 code/tests/test_noise_model.py | 125 ++++++++++++++++++++++++++++++++-
 code/training/train.py         | 102 +++++++++++++--------------
 4 files changed, 338 insertions(+), 57 deletions(-)

diff --git a/README.md b/README.md
index 4005000..a97e8e1 100644
--- a/README.md
+++ b/README.md
@@ -255,7 +255,54 @@ LOGICAL Z (lz):
 
 - `data.noise_model`: a **25-parameter circuit-level** noise model (SPAM, idles, and CNOT Pauli channels).
 
-Training may apply a **training-only** “noise floor” sparsity guard (scales probabilities up if the grouped totals are too small). Evaluation uses the user-specified noise model **as-is**. We have seen that it is preferrable to train on data that is more dense and then apply it to sparser data that training on the sparse data.
+#### Training noise upscaling (surface code)
+
+When training a surface-code pre-decoder the noise parameters you specify may be very small (e.g. `p = 1e-4`), which produces extremely sparse syndromes and slow convergence. To address this, the training pipeline **automatically upscales** all 25 noise-model parameters so that the largest grouped total `max(P_prep, P_meas, P_idle_cnot, P_idle_spam, P_cnot)` equals a fixed target of **6 × 10⁻³** (just below the surface-code threshold of ~7.5 × 10⁻³).
+
+The five grouped totals are:
+
+| Group | Sum of |
+|-------|--------|
+| P_prep | `p_prep_X + p_prep_Z` |
+| P_meas | `p_meas_X + p_meas_Z` |
+| P_idle_cnot | `p_idle_cnot_X + p_idle_cnot_Y + p_idle_cnot_Z` |
+| P_idle_spam | `p_idle_spam_X + p_idle_spam_Y + p_idle_spam_Z` |
+| P_cnot | sum of all 15 `p_cnot_*` |
+
+**Upscaling rules:**
+
+- If `max_group < 6e-3`: all 25 p's are multiplied by `6e-3 / max_group` for training data generation only. Evaluation always uses the original user-specified noise model as-is.
+- If `max_group >= 6e-3`: parameters are **not** modified (the training log emits a warning in case this indicates a configuration error).
+- Non-surface-code types (`code_type != "surface_code"`) are never upscaled.
+
+We have found that training on denser syndromes and then evaluating on sparser data produces better results than training directly on sparse data.
+
+#### Skipping noise upscaling
+
+If you need to train with your **exact** noise parameters (e.g. for benchmarking or controlled experiments), you can disable upscaling via config or environment variable:
+
+**Config** (`conf/config_public.yaml`):
+
+```yaml
+data:
+  skip_noise_upscaling: true
+  noise_model:
+    p_prep_X: 0.002
+    # ... rest of 25 params
+```
+
+**Environment variable:**
+
+```bash
+PREDECODER_SKIP_NOISE_UPSCALING=1 bash code/scripts/local_run.sh
+```
+
+Either method causes the training pipeline to use the user-specified noise model verbatim — no scaling is applied. The training log will confirm:
+
+```
+[Train] noise_model upscaling SKIPPED (skip_noise_upscaling=true or PREDECODER_SKIP_NOISE_UPSCALING=1).
+```
+
 
 ### Precomputed frames (recommended)
 
diff --git a/code/qec/noise_model.py b/code/qec/noise_model.py
index 2e67b32..e37f559 100644
--- a/code/qec/noise_model.py
+++ b/code/qec/noise_model.py
@@ -37,9 +37,14 @@
 """
 
 from dataclasses import dataclass, field, asdict
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Any
 import numpy as np
 
+# Surface-code training upscale target (below threshold ~7.5e-3). Used when sampling training data.
+SURFACE_CODE_TRAINING_UPSCALE_TARGET = 6e-3
+# Approximate surface code threshold for user-facing warnings.
+SURFACE_CODE_THRESHOLD_APPROX = 7.5e-3
+
 
 # Internal helper for depolarizing-equivalent 25p mapping (tests/docs).
 def _single_p_mapping(p: float, spam_factor: float = 2.0 / 3.0) -> Dict[str, float]:
@@ -388,6 +393,118 @@ def __repr__(self) -> str:
         )
 
 
+def get_grouped_totals(nm: NoiseModel) -> Dict[str, float]:
+    """
+    Compute the sum of p's per fault channel (capital P's) for the 25-p noise model.
+
+    Returns:
+        Dict with keys: p_prep, p_meas, p_idle_cnot, p_idle_spam, p_cnot, max_group.
+    """
+    p_prep = float(nm.p_prep_X + nm.p_prep_Z)
+    p_meas = float(nm.p_meas_X + nm.p_meas_Z)
+    p_idle_cnot = float(nm.get_total_idle_cnot_probability())
+    p_idle_spam = float(nm.get_total_idle_spam_probability())
+    p_cnot = float(nm.get_total_cnot_probability())
+    max_group = max(p_prep, p_meas, p_idle_cnot, p_idle_spam, p_cnot)
+    return {
+        "p_prep": p_prep,
+        "p_meas": p_meas,
+        "p_idle_cnot": p_idle_cnot,
+        "p_idle_spam": p_idle_spam,
+        "p_cnot": p_cnot,
+        "max_group": max_group,
+    }
+
+
+def get_training_upscaled_noise_model(
+    noise_model: NoiseModel,
+    code_type: str = "surface_code",
+    skip_upscale: bool = False,
+) -> Tuple[NoiseModel, Dict[str, Any]]:
+    """
+    For surface code only: optionally upscale the noise model for training so that
+    max(P's) = SURFACE_CODE_TRAINING_UPSCALE_TARGET (6e-3). Training data sampling
+    should use the returned model; evaluation should use the original user-specified model.
+
+    - Upscaling (max_group < target): scale all 25 p's by target/max_group; info contains details.
+    - Downscaling (max_group > target): do NOT change parameters; info contains a clear warning.
+    - If max_group > target: info indicates the user may be above threshold / have made an error.
+
+    For code_type != "surface_code", returns (noise_model unchanged, info with applied=False).
+
+    Args:
+        noise_model: The user-specified NoiseModel.
+        code_type: Code type string (upscaling only for "surface_code").
+        skip_upscale: If True, skip upscaling entirely and return the original model unchanged.
+            Useful for training with exact user-specified noise parameters (e.g. benchmarking).
+
+    Returns:
+        (training_noise_model, info_dict) where info_dict has:
+        - applied_upscale: bool
+        - scale_factor: float (only if upscaling applied)
+        - max_group: float
+        - group_totals: dict (p_prep, p_meas, ...)
+        - above_target_warning: bool (max_group > UPSCALE_TARGET)
+        - downscale_skipped: bool (max_group > target, params not modified)
+        - skipped_by_user: bool (skip_upscale was True)
+    """
+    target = SURFACE_CODE_TRAINING_UPSCALE_TARGET
+    totals = get_grouped_totals(noise_model)
+    max_group = totals["max_group"]
+
+    info: Dict[str, Any] = {
+        "max_group": max_group,
+        "group_totals": totals,
+        "above_target_warning": max_group > target,
+        "downscale_skipped": False,
+        "applied_upscale": False,
+        "skipped_by_user": skip_upscale,
+    }
+
+    if skip_upscale:
+        info["message"] = (
+            "Noise upscaling SKIPPED by user (skip_noise_upscaling=true). "
+            f"Training will use the exact user-specified noise model (max_group={max_group:.6g})."
+        )
+        return (noise_model, info)
+
+    if code_type != "surface_code":
+        info["message"] = f"Noise upscaling is not applied for code_type={code_type!r} (surface_code only)."
+        return (noise_model, info)
+
+    if max_group <= 0.0:
+        raise ValueError(
+            "Invalid noise_model: all grouped totals are <= 0 "
+            f"(prep={totals['p_prep']}, meas={totals['p_meas']}, "
+            f"idle_cnot={totals['p_idle_cnot']}, idle_spam={totals['p_idle_spam']}, cnot={totals['p_cnot']})."
+        )
+
+    scale_factor = target / max_group
+
+    if scale_factor >= 1.0:
+        # Upscaling: apply scale to all 25 parameters
+        params = noise_model.to_config_dict()
+        scaled_params = {k: float(v) * scale_factor for k, v in params.items()}
+        training_nm = NoiseModel.from_config_dict(scaled_params)
+        training_nm._reference = dict(noise_model._reference)
+        info["applied_upscale"] = True
+        info["scale_factor"] = scale_factor
+        info["message"] = (
+            f"Upscaled training noise: max_group={max_group:.6g} -> target={target:.1e} "
+            f"(scale={scale_factor:.6g}). Evaluation uses user-specified noise model as-is."
+        )
+        return (training_nm, info)
+
+    # Downscaling: do not modify parameters
+    info["downscale_skipped"] = True
+    info["scale_factor"] = scale_factor
+    info["message"] = (
+        f"Downscale NOT applied: max_group={max_group:.6g} > target={target:.1e}. "
+        "Parameters unchanged. If you intended a lower noise regime, check your noise model values."
+    )
+    return (noise_model, info)
+
+
 def noise_model_from_config(cfg) -> Optional[NoiseModel]:
     """
     Create a NoiseModel from a Hydra config object.
diff --git a/code/tests/test_noise_model.py b/code/tests/test_noise_model.py
index 1ad2377..84f4675 100644
--- a/code/tests/test_noise_model.py
+++ b/code/tests/test_noise_model.py
@@ -30,7 +30,16 @@
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-from qec.noise_model import NoiseModel, CNOT_ERROR_TYPES, CNOT_ERROR_INDEX, _single_p_mapping
+from qec.noise_model import (
+    NoiseModel,
+    CNOT_ERROR_TYPES,
+    CNOT_ERROR_INDEX,
+    _single_p_mapping,
+    get_grouped_totals,
+    get_training_upscaled_noise_model,
+    SURFACE_CODE_TRAINING_UPSCALE_TARGET,
+    SURFACE_CODE_THRESHOLD_APPROX,
+)
 from qec.surface_code.memory_circuit import MemoryCircuit
 from qec.surface_code.data_mapping import (
     normalized_weight_mapping_Xstab_memory,
@@ -322,5 +331,119 @@ def test_stim_circuit_audit_no_cnot_noise_in_logical_measurement_section(self):
         )
 
 
+
+class TestNoiseModelUpscaling(unittest.TestCase):
+    """Tests for surface-code training noise model upscaling (get_training_upscaled_noise_model)."""
+
+    def test_get_grouped_totals(self):
+        """get_grouped_totals returns correct P_prep, P_meas, P_idle_cnot, P_idle_spam, P_cnot and max_group."""
+        nm = _noise_model_from_p(0.01)
+        tot = get_grouped_totals(nm)
+        self.assertAlmostEqual(tot["p_prep"], 2.0 * 0.01 / 3.0 * 2, places=12)  # p_prep_X + p_prep_Z
+        self.assertAlmostEqual(tot["p_meas"], 2.0 * 0.01 / 3.0 * 2, places=12)
+        self.assertAlmostEqual(tot["p_idle_cnot"], 0.01, places=12)
+        self.assertAlmostEqual(tot["p_idle_spam"], nm.get_total_idle_spam_probability(), places=12)
+        self.assertAlmostEqual(tot["p_cnot"], 0.01, places=12)
+        self.assertGreater(tot["max_group"], 0)
+        self.assertEqual(tot["max_group"], max(tot["p_prep"], tot["p_meas"], tot["p_idle_cnot"], tot["p_idle_spam"], tot["p_cnot"]))
+
+    def test_upscale_small_noise(self):
+        """When max_group < target, all 25 p's are scaled so that new max_group = target."""
+        # Single-p 1e-4 -> max_group is around 1e-4 (order of magnitude)
+        nm = _noise_model_from_p(1e-4)
+        tot = get_grouped_totals(nm)
+        self.assertLess(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertTrue(info["applied_upscale"])
+        self.assertFalse(info["downscale_skipped"])
+        scale = info["scale_factor"]
+        self.assertGreaterEqual(scale, 1.0)
+        self.assertAlmostEqual(scale, SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot["max_group"], places=10)
+        new_tot = get_grouped_totals(training_nm)
+        self.assertAlmostEqual(new_tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET, places=10)
+        # All params scaled by the same factor
+        for k, v in nm.to_config_dict().items():
+            self.assertAlmostEqual(training_nm.to_config_dict()[k], v * scale, places=12, msg=k)
+
+    def test_upscale_exact_target_scale_one(self):
+        """When max_group equals target, scale_factor is 1.0 and model is unchanged."""
+        # Build a model with max_group = target by scaling a small model up
+        nm_small = _noise_model_from_p(1e-4)
+        tot_small = get_grouped_totals(nm_small)
+        scale_to_target = SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot_small["max_group"]
+        params = {k: v * scale_to_target for k, v in nm_small.to_config_dict().items()}
+        nm = NoiseModel.from_config_dict(params)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertTrue(info["applied_upscale"])
+        self.assertAlmostEqual(info["scale_factor"], 1.0, places=10)
+        self.assertAlmostEqual(training_nm.get_total_cnot_probability(), nm.get_total_cnot_probability(), places=12)
+
+    def test_downscale_not_applied(self):
+        """When max_group > target, parameters are NOT modified; downscale_skipped is True."""
+        nm = _noise_model_from_p(1e-2)  # max_group well above 6e-3
+        tot = get_grouped_totals(nm)
+        self.assertGreater(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertFalse(info["applied_upscale"])
+        self.assertTrue(info["downscale_skipped"])
+        self.assertTrue(info["above_target_warning"])
+        # Same object parameters (identity)
+        self.assertEqual(nm.to_config_dict(), training_nm.to_config_dict())
+        self.assertIs(training_nm, nm)
+
+    def test_above_target_warning(self):
+        """When max_group > target, above_target_warning is True."""
+        nm = _noise_model_from_p(0.01)
+        _, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertTrue(info["above_target_warning"])
+        nm_low = _noise_model_from_p(1e-4)
+        _, info_low = get_training_upscaled_noise_model(nm_low, code_type="surface_code")
+        self.assertFalse(info_low["above_target_warning"])
+
+    def test_non_surface_code_no_upscaling(self):
+        """For code_type != 'surface_code', no scaling is applied; original model returned."""
+        nm = _noise_model_from_p(1e-4)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="color_code")
+        self.assertFalse(info.get("applied_upscale", False))
+        self.assertEqual(nm.to_config_dict(), training_nm.to_config_dict())
+        self.assertIn("message", info)
+        self.assertIn("surface_code", info["message"])
+
+    def test_invalid_zero_totals_raises(self):
+        """When all grouped totals are zero, get_training_upscaled_noise_model raises ValueError."""
+        nm = NoiseModel()  # all zeros
+        with self.assertRaises(ValueError) as ctx:
+            get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertIn("all grouped totals are <= 0", str(ctx.exception))
+
+    def test_upscale_preserves_reference(self):
+        """Upscaled training model preserves _reference from the original."""
+        nm = _noise_model_from_p(1e-4)
+        ref = dict(nm._reference)
+        training_nm, _ = get_training_upscaled_noise_model(nm, code_type="surface_code")
+        self.assertEqual(training_nm._reference, ref)
+
+    def test_skip_upscale_returns_original(self):
+        """When skip_upscale=True, the original model is returned unchanged regardless of max_group."""
+        nm = _noise_model_from_p(1e-4)
+        tot = get_grouped_totals(nm)
+        self.assertLess(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code", skip_upscale=True)
+        self.assertIs(training_nm, nm)
+        self.assertFalse(info["applied_upscale"])
+        self.assertFalse(info["downscale_skipped"])
+        self.assertTrue(info["skipped_by_user"])
+        self.assertIn("SKIPPED", info["message"])
+
+    def test_skip_upscale_above_target(self):
+        """skip_upscale=True also works when max_group > target (no warning about downscale)."""
+        nm = _noise_model_from_p(1e-2)
+        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code", skip_upscale=True)
+        self.assertIs(training_nm, nm)
+        self.assertTrue(info["skipped_by_user"])
+        self.assertFalse(info["applied_upscale"])
+        self.assertFalse(info["downscale_skipped"])
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/code/training/train.py b/code/training/train.py
index 896cf85..dc770f1 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -729,44 +729,21 @@ def init_process_group_with_timeout(*args, **kwargs):
         if nm_dict is not None:
             noise_model_user_obj = NoiseModel.from_config_dict(dict(nm_dict))
 
-            # Training-only sparsity guard:
-            # We compute grouped totals per mechanism:
-            #   P_PREP, P_MEAS, P_IDLE_CNOT, P_IDLE_SPAM, P_CNOT
-            # If max(P_*) < 1e-3, we scale ALL 25 parameters by (1e-3 / max(P_*))
-            # for TRAINING DATA ONLY. Evaluation/inference uses the user-specified noise model as-is.
-            min_group_total = 1e-3
-            p_prep = float(noise_model_user_obj.p_prep_X + noise_model_user_obj.p_prep_Z)
-            p_meas = float(noise_model_user_obj.p_meas_X + noise_model_user_obj.p_meas_Z)
-            p_idle_cnot = float(noise_model_user_obj.get_total_idle_cnot_probability())
-            p_idle_spam = float(noise_model_user_obj.get_total_idle_spam_probability())
-            p_cnot = float(noise_model_user_obj.get_total_cnot_probability())
-            max_group = max(p_prep, p_meas, p_idle_cnot, p_idle_spam, p_cnot)
-            if max_group <= 0.0:
-                raise ValueError(
-                    "Invalid noise_model: all grouped totals are <= 0 "
-                    f"(prep={p_prep}, meas={p_meas}, idle_cnot={p_idle_cnot}, idle_spam={p_idle_spam}, cnot={p_cnot})."
-                )
-
-            scale = 1.0
-            if max_group < min_group_total:
-                scale = float(min_group_total / max_group)
-                scaled = {
-                    k: float(v) * scale for k, v in noise_model_user_obj.to_config_dict().items()
-                }
-                noise_model_train_obj = NoiseModel.from_config_dict(scaled)
-            else:
-                noise_model_train_obj = noise_model_user_obj
-
-            # If sparsity guard triggered, be conservative:
-            # - reduce LR a bit
-            # - increase val/test sample sizes to get cleaner evaluation signals
-            if scale != 1.0:
-                try:
-                    cfg.optimizer.lr = float(cfg.optimizer.lr) * 0.75
-                except Exception:
-                    cfg.optimizer.lr = 0.75 * float(cfg.optimizer.lr)
-                cfg.val.num_samples = max(int(cfg.val.num_samples), 262144)
-                cfg.test.num_samples = max(int(cfg.test.num_samples), 1048576)
+            # Surface-code training upscaling: bring max(P's) to 6e-3 for training data only.
+            # Evaluation uses the user-specified noise model as-is.
+            # For other code types (e.g. color codes) upscaling is not applied by default.
+            from qec.noise_model import (
+                get_training_upscaled_noise_model,
+                SURFACE_CODE_TRAINING_UPSCALE_TARGET,
+                SURFACE_CODE_THRESHOLD_APPROX,
+            )
+            code_type = getattr(cfg.data, "code_type", "surface_code")
+            skip_upscale = bool(getattr(cfg.data, "skip_noise_upscaling", False))
+            if os.environ.get("PREDECODER_SKIP_NOISE_UPSCALING", "0") == "1":
+                skip_upscale = True
+            noise_model_train_obj, upscale_info = get_training_upscaled_noise_model(
+                noise_model_user_obj, code_type=code_type, skip_upscale=skip_upscale,
+            )
 
             # Force fixed-p mode with a conservative scalar placeholder when using noise_model.
             # IMPORTANT: during training we apply drift (±2%) around the *training* noise model reference, so we
@@ -775,35 +752,52 @@ def init_process_group_with_timeout(*args, **kwargs):
             p_min_value = p_error_value
             p_max_value = p_error_value
             if dist.rank == 0:
-                # Always print the grouped totals + decision to make verification easy from logs.
+                tot = upscale_info["group_totals"]
+                max_group = upscale_info["max_group"]
                 print(
                     "[Train] noise_model grouped totals: "
-                    f"prep={p_prep:.6g}, meas={p_meas:.6g}, "
-                    f"idle_cnot={p_idle_cnot:.6g}, idle_spam={p_idle_spam:.6g}, cnot={p_cnot:.6g}; "
+                    f"prep={tot['p_prep']:.6g}, meas={tot['p_meas']:.6g}, "
+                    f"idle_cnot={tot['p_idle_cnot']:.6g}, idle_spam={tot['p_idle_spam']:.6g}, cnot={tot['p_cnot']:.6g}; "
                     f"max_group={max_group:.6g}"
                 )
-                if scale != 1.0:
+                print(f"[Train] {upscale_info['message']}")
+                if upscale_info.get("skipped_by_user"):
                     print(
-                        f"[Train] noise_model sparsity guard: max_group={max_group:.6g} < {min_group_total:.1e}; "
-                        f"scaling training noise_model by {scale:.6g} (evaluation uses user noise_model as-is)."
+                        "[Train] noise_model upscaling SKIPPED (skip_noise_upscaling=true or "
+                        "PREDECODER_SKIP_NOISE_UPSCALING=1). Training uses exact user-specified parameters."
                     )
+                if upscale_info.get("applied_upscale"):
                     print(
-                        f"[Train] sparsity guard adjustments: lr*=0.75 -> {float(cfg.optimizer.lr):.6g}, "
-                        f"val.num_samples={int(cfg.val.num_samples):,}, test.num_samples={int(cfg.test.num_samples):,}"
+                        f"[Train] noise_model upscaling (surface code): scale_factor={upscale_info['scale_factor']:.6g}, "
+                        f"target={SURFACE_CODE_TRAINING_UPSCALE_TARGET:.1e}. "
+                        "Evaluation uses user-specified noise model as-is."
                     )
-                else:
+                    print(f"[Train] noise_model (training, upscaled) summary: {noise_model_train_obj!r}")
+                if upscale_info.get("downscale_skipped"):
+                    print(
+                        "\n"
+                        + "!" * 80 + "\n"
+                        + "[Train] WARNING: Noise model DOWNSCALE was NOT applied (max_group > target). "
+                        "Parameters are unchanged. If you intended a lower noise regime, check your noise model "
+                        "parameter values (e.g. a typo may have set a single p too high).\n"
+                        + "!" * 80
+                    )
+                if upscale_info.get("above_target_warning"):
                     print(
-                        f"[Train] noise_model sparsity guard: max_group={max_group:.6g} >= {min_group_total:.1e}; "
-                        "no scaling applied."
+                        "\n"
+                        + "#" * 80 + "\n"
+                        + "[Train] WARNING: Your noise model max_group ({:.6g}) is ABOVE the surface-code training "
+                        "target ({:.1e}). Surface code threshold is approximately {:.1e}. "
+                        "You may be above threshold or have introduced a noise model that is not the one you intended "
+                        "(e.g. a typo in one of the 25 p's). Please verify your noise_model configuration.\n".format(
+                            max_group, SURFACE_CODE_TRAINING_UPSCALE_TARGET, SURFACE_CODE_THRESHOLD_APPROX
+                        )
+                        + "#" * 80
                     )
                 print(
                     f"[Train] Using explicit noise_model from config (25p). Overriding p_error/p_min/p_max -> {p_error_value:.6g}"
                 )
                 print(f"[Train] noise_model (user) summary: {noise_model_user_obj!r}")
-                if scale != 1.0:
-                    print(
-                        f"[Train] noise_model (training, scaled) summary: {noise_model_train_obj!r}"
-                    )
                 print(
                     "[Train] noise_model idle semantics: "
                     "bulk/CNOT-layer idles use p_idle_cnot_*, "
@@ -812,7 +806,7 @@ def init_process_group_with_timeout(*args, **kwargs):
                 )
                 print(
                     "[Train] noise_model totals: "
-                    f"prep_total={p_prep:.6g}, meas_total={p_meas:.6g}, "
+                    f"prep_total={tot['p_prep']:.6g}, meas_total={tot['p_meas']:.6g}, "
                     f"idle_cnot_total={noise_model_user_obj.get_total_idle_cnot_probability():.6g}, "
                     f"idle_spam_total={noise_model_user_obj.get_total_idle_spam_probability():.6g}, "
                     f"cnot_total={noise_model_user_obj.get_total_cnot_probability():.6g}"

From 0852a99ff03f9cc3f7ffc6a0a877e2078abbaa92 Mon Sep 17 00:00:00 2001
From: Ivan Basov <ibasov@nvidia.com>
Date: Fri, 6 Mar 2026 15:37:39 -0800
Subject: [PATCH 2/2] Apply yapf style to rebased branch

Made-with: Cursor
---
 code/qec/noise_model.py        |  3 ++-
 code/tests/test_noise_model.py | 32 ++++++++++++++++++++++++--------
 code/training/train.py         | 31 +++++++++++++++++--------------
 3 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/code/qec/noise_model.py b/code/qec/noise_model.py
index e37f559..0d4642e 100644
--- a/code/qec/noise_model.py
+++ b/code/qec/noise_model.py
@@ -469,7 +469,8 @@ def get_training_upscaled_noise_model(
         return (noise_model, info)
 
     if code_type != "surface_code":
-        info["message"] = f"Noise upscaling is not applied for code_type={code_type!r} (surface_code only)."
+        info["message"
+            ] = f"Noise upscaling is not applied for code_type={code_type!r} (surface_code only)."
         return (noise_model, info)
 
     if max_group <= 0.0:
diff --git a/code/tests/test_noise_model.py b/code/tests/test_noise_model.py
index 84f4675..cca9806 100644
--- a/code/tests/test_noise_model.py
+++ b/code/tests/test_noise_model.py
@@ -331,7 +331,6 @@ def test_stim_circuit_audit_no_cnot_noise_in_logical_measurement_section(self):
         )
 
 
-
 class TestNoiseModelUpscaling(unittest.TestCase):
     """Tests for surface-code training noise model upscaling (get_training_upscaled_noise_model)."""
 
@@ -339,13 +338,20 @@ def test_get_grouped_totals(self):
         """get_grouped_totals returns correct P_prep, P_meas, P_idle_cnot, P_idle_spam, P_cnot and max_group."""
         nm = _noise_model_from_p(0.01)
         tot = get_grouped_totals(nm)
-        self.assertAlmostEqual(tot["p_prep"], 2.0 * 0.01 / 3.0 * 2, places=12)  # p_prep_X + p_prep_Z
+        self.assertAlmostEqual(
+            tot["p_prep"], 2.0 * 0.01 / 3.0 * 2, places=12
+        )  # p_prep_X + p_prep_Z
         self.assertAlmostEqual(tot["p_meas"], 2.0 * 0.01 / 3.0 * 2, places=12)
         self.assertAlmostEqual(tot["p_idle_cnot"], 0.01, places=12)
         self.assertAlmostEqual(tot["p_idle_spam"], nm.get_total_idle_spam_probability(), places=12)
         self.assertAlmostEqual(tot["p_cnot"], 0.01, places=12)
         self.assertGreater(tot["max_group"], 0)
-        self.assertEqual(tot["max_group"], max(tot["p_prep"], tot["p_meas"], tot["p_idle_cnot"], tot["p_idle_spam"], tot["p_cnot"]))
+        self.assertEqual(
+            tot["max_group"],
+            max(
+                tot["p_prep"], tot["p_meas"], tot["p_idle_cnot"], tot["p_idle_spam"], tot["p_cnot"]
+            )
+        )
 
     def test_upscale_small_noise(self):
         """When max_group < target, all 25 p's are scaled so that new max_group = target."""
@@ -358,9 +364,13 @@ def test_upscale_small_noise(self):
         self.assertFalse(info["downscale_skipped"])
         scale = info["scale_factor"]
         self.assertGreaterEqual(scale, 1.0)
-        self.assertAlmostEqual(scale, SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot["max_group"], places=10)
+        self.assertAlmostEqual(
+            scale, SURFACE_CODE_TRAINING_UPSCALE_TARGET / tot["max_group"], places=10
+        )
         new_tot = get_grouped_totals(training_nm)
-        self.assertAlmostEqual(new_tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET, places=10)
+        self.assertAlmostEqual(
+            new_tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET, places=10
+        )
         # All params scaled by the same factor
         for k, v in nm.to_config_dict().items():
             self.assertAlmostEqual(training_nm.to_config_dict()[k], v * scale, places=12, msg=k)
@@ -376,7 +386,9 @@ def test_upscale_exact_target_scale_one(self):
         training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code")
         self.assertTrue(info["applied_upscale"])
         self.assertAlmostEqual(info["scale_factor"], 1.0, places=10)
-        self.assertAlmostEqual(training_nm.get_total_cnot_probability(), nm.get_total_cnot_probability(), places=12)
+        self.assertAlmostEqual(
+            training_nm.get_total_cnot_probability(), nm.get_total_cnot_probability(), places=12
+        )
 
     def test_downscale_not_applied(self):
         """When max_group > target, parameters are NOT modified; downscale_skipped is True."""
@@ -428,7 +440,9 @@ def test_skip_upscale_returns_original(self):
         nm = _noise_model_from_p(1e-4)
         tot = get_grouped_totals(nm)
         self.assertLess(tot["max_group"], SURFACE_CODE_TRAINING_UPSCALE_TARGET)
-        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code", skip_upscale=True)
+        training_nm, info = get_training_upscaled_noise_model(
+            nm, code_type="surface_code", skip_upscale=True
+        )
         self.assertIs(training_nm, nm)
         self.assertFalse(info["applied_upscale"])
         self.assertFalse(info["downscale_skipped"])
@@ -438,7 +452,9 @@ def test_skip_upscale_returns_original(self):
     def test_skip_upscale_above_target(self):
         """skip_upscale=True also works when max_group > target (no warning about downscale)."""
         nm = _noise_model_from_p(1e-2)
-        training_nm, info = get_training_upscaled_noise_model(nm, code_type="surface_code", skip_upscale=True)
+        training_nm, info = get_training_upscaled_noise_model(
+            nm, code_type="surface_code", skip_upscale=True
+        )
         self.assertIs(training_nm, nm)
         self.assertTrue(info["skipped_by_user"])
         self.assertFalse(info["applied_upscale"])
diff --git a/code/training/train.py b/code/training/train.py
index dc770f1..10566bd 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -742,7 +742,9 @@ def init_process_group_with_timeout(*args, **kwargs):
             if os.environ.get("PREDECODER_SKIP_NOISE_UPSCALING", "0") == "1":
                 skip_upscale = True
             noise_model_train_obj, upscale_info = get_training_upscaled_noise_model(
-                noise_model_user_obj, code_type=code_type, skip_upscale=skip_upscale,
+                noise_model_user_obj,
+                code_type=code_type,
+                skip_upscale=skip_upscale,
             )
 
             # Force fixed-p mode with a conservative scalar placeholder when using noise_model.
@@ -772,27 +774,28 @@ def init_process_group_with_timeout(*args, **kwargs):
                         f"target={SURFACE_CODE_TRAINING_UPSCALE_TARGET:.1e}. "
                         "Evaluation uses user-specified noise model as-is."
                     )
-                    print(f"[Train] noise_model (training, upscaled) summary: {noise_model_train_obj!r}")
+                    print(
+                        f"[Train] noise_model (training, upscaled) summary: {noise_model_train_obj!r}"
+                    )
                 if upscale_info.get("downscale_skipped"):
                     print(
-                        "\n"
-                        + "!" * 80 + "\n"
-                        + "[Train] WARNING: Noise model DOWNSCALE was NOT applied (max_group > target). "
+                        "\n" + "!" * 80 + "\n" +
+                        "[Train] WARNING: Noise model DOWNSCALE was NOT applied (max_group > target). "
                         "Parameters are unchanged. If you intended a lower noise regime, check your noise model "
-                        "parameter values (e.g. a typo may have set a single p too high).\n"
-                        + "!" * 80
+                        "parameter values (e.g. a typo may have set a single p too high).\n" +
+                        "!" * 80
                     )
                 if upscale_info.get("above_target_warning"):
                     print(
-                        "\n"
-                        + "#" * 80 + "\n"
-                        + "[Train] WARNING: Your noise model max_group ({:.6g}) is ABOVE the surface-code training "
+                        "\n" + "#" * 80 + "\n" +
+                        "[Train] WARNING: Your noise model max_group ({:.6g}) is ABOVE the surface-code training "
                         "target ({:.1e}). Surface code threshold is approximately {:.1e}. "
                         "You may be above threshold or have introduced a noise model that is not the one you intended "
-                        "(e.g. a typo in one of the 25 p's). Please verify your noise_model configuration.\n".format(
-                            max_group, SURFACE_CODE_TRAINING_UPSCALE_TARGET, SURFACE_CODE_THRESHOLD_APPROX
-                        )
-                        + "#" * 80
+                        "(e.g. a typo in one of the 25 p's). Please verify your noise_model configuration.\n"
+                        .format(
+                            max_group, SURFACE_CODE_TRAINING_UPSCALE_TARGET,
+                            SURFACE_CODE_THRESHOLD_APPROX
+                        ) + "#" * 80
                     )
                 print(
                     f"[Train] Using explicit noise_model from config (25p). Overriding p_error/p_min/p_max -> {p_error_value:.6g}"