freeze the level1 shift of the matching pseudodata

Previousely the level-1 shift was changed replica-by-replica. This should not be done because it corresponds to a covmat different from the experimental one. While freezing level-1 fluctuations produces a central value that differs from the experimental value, this is not a problem as our methodology accounts for this (since it also occurs in experimental measurements).
NNPDF · Oct 14, 2022 · 5db6400 · 5db6400
1 parent 1c526f9
commit 5db6400
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 9 deletions.
diff --git a/runcards/fit_runcard.yml b/runcards/fit_runcard.yml
@@ -67,6 +67,7 @@ rescale_inputs: True
 
 # Various seed values
 global_seeds: 1865683875
+matching_seed: 2
 
 # Define Neural Network Parameters
 fit_parameters:

diff --git a/src/nnusf/sffit/load_data.py b/src/nnusf/sffit/load_data.py
@@ -47,19 +47,29 @@ def load_experimental_data(
     return raw_experimental_data, experimental_data
 
 
-def add_pseudodata(experimental_datasets, shift=True):
-    """If `shift=False` no pseudodata is generated and real data is used
+def add_pseudodata(experimental_datasets, matching_seed=1, shift=True):
+    """`matching_seed` is used to generate the level-1 fluctuation in the 
+    matching pseudodat.
+    If `shift=False` no pseudodata is generated and real data is used
     instead. This is only relevant for debugging purposes.
     """
     for dataset_name, dataset in experimental_datasets.items():
         cholesky = np.linalg.cholesky(dataset.covmat)
+
+        # If needed, generate the level-1 shift
+        shift_data = 0
+        if dataset_name.endswith('_MATCHING') and shift:
+            np_rng_state = np.random.get_state()
+            np.random.seed(matching_seed)
+            random_samples = np.random.randn(dataset.n_data)
+            np.random.set_state(np_rng_state)
+            shift_data = cholesky @ random_samples
+
+        # Generate the usual shift
         random_samples = np.random.randn(dataset.n_data)
-        # Matching pseudodata variance is increased to account for the fact
-        # that, unlike experimental data, they don't contain sampling
-        # fluctuations
-        if dataset_name.endswith('_MATCHING'):
-            random_samples *= np.sqrt(2)
-        shift_data = cholesky @ random_samples if shift else 0
+        shift_data += cholesky @ random_samples if shift else 0
+
+        # Add the shifts to the experimental central values
         pseudodata = dataset.central_values + shift_data
         dataset.pseudodata = pseudodata
 

diff --git a/src/nnusf/sffit/run_sffit.py b/src/nnusf/sffit/run_sffit.py
@@ -57,8 +57,9 @@ def main(
         kincuts=runcard_content.get("kinematic_cuts", {}),
     )
     # create pseudodata and add it to the data_info object
+    matching_seed = runcard_content.get("genrep", runcard_content["global_seeds"])
     genrep = runcard_content.get("genrep", None)
-    load_data.add_pseudodata(data_info, shift=genrep)
+    load_data.add_pseudodata(data_info, matching_seed=matching_seed, shift=genrep)
     # create a training mask and add it to the data_info object
     load_data.add_tr_filter_mask(data_info)