Merge pull request #895 from iasonkrom/boost_histogram_compliance

fix: comply with `boost-histogram` 1.4.0
CoffeaTeam · Sep 19, 2023 · fa73cb5 · fa73cb5
2 parents 538b3dc + 522f38b
commit fa73cb5
Show file tree

Hide file tree

Showing 2 changed files with 73 additions and 52 deletions.
diff --git a/src/coffea/analysis_tools.py b/src/coffea/analysis_tools.py
@@ -418,7 +418,7 @@ def variations(self):
 
 
 class NminusOneToNpz:
-    """Object to be returned by NmiusOne.to_npz()"""
+    """Object to be returned by NminusOne.to_npz()"""
 
     def __init__(self, file, labels, nev, masks, saver):
         self._file = file
@@ -494,11 +494,17 @@ def maskscutflow(self):
         return self._maskscutflow
 
     def compute(self):
-        self._nevonecut = list(dask.compute(*self._nevonecut))
-        self._nevcutflow = list(dask.compute(*self._nevcutflow))
-        self._masksonecut = list(dask.compute(*self._masksonecut))
-        self._maskscutflow = list(dask.compute(*self._maskscutflow))
-        numpy.savez(
+        self._nevonecut, self._nevcutflow = dask.compute(
+            self._nevonecut, self._nevcutflow
+        )
+        self._masksonecut, self._maskscutflow = dask.compute(
+            self._masksonecut, self._maskscutflow
+        )
+        self._nevonecut = list(self._nevonecut)
+        self._nevcutflow = list(self._nevcutflow)
+        self._masksonecut = list(self._masksonecut)
+        self._maskscutflow = list(self._maskscutflow)
+        self._saver(
             self._file,
             labels=self._labels,
             nevonecut=self._nevonecut,
@@ -538,7 +544,7 @@ def result(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         return NminusOneResult(labels, self._nev, self._masks)
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the N-1 selection to a .npz file
 
         Parameters
@@ -554,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
@@ -580,22 +586,29 @@ def print(self):
         """Prints the statistics of the N-1 selection"""
 
         if self._delayed_mode:
+            warnings.warn(
+                "Printing the N-1 selection statistics is going to compute dask_awkward objects."
+            )
             self._nev = list(dask.compute(*self._nev))
+
         nev = self._nev
         print("N-1 selection stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[i+1]*100/nev[0]:.1f} %"
+            stats = (
+                f"Ignoring {name:<20}"
+                f"pass = {nev[i+1]:<20}"
+                f"all = {nev[0]:<20}"
+                f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
             )
+            print(stats)
 
-        if True:
-            print(
-                f"All cuts {'':<20}: pass = {nev[-1]:<20}\
-                all = {nev[0]:<20}\
-                -- eff = {nev[-1]*100/nev[0]:.1f} %"
-            )
+        stats_all = (
+            f"All cuts {'':<20}"
+            f"pass = {nev[-1]:<20}"
+            f"all = {nev[0]:<20}"
+            f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
+        )
+        print(stats_all)
 
     def yieldhist(self):
         """Returns the N-1 selection yields as a ``hist.Hist`` object
@@ -610,13 +623,13 @@ def yieldhist(self):
         labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
         if not self._delayed_mode:
             h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
-            h.fill(numpy.arange(len(labels)), weight=self._nev)
+            h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)
 
         else:
             h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
             for i, weight in enumerate(self._masks, 1):
                 h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
-            h.fill(dask_awkward.zeros_like(weight))
+            h.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return h, labels
 
@@ -712,7 +725,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = awkward.flatten(var)
-                h.fill(arr, awkward.zeros_like(arr))
+                h.fill(arr, awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = awkward.flatten(var[mask])
                     h.fill(arr, awkward.full_like(arr, i, dtype=int))
@@ -725,7 +738,7 @@ def plot_vars(
                     hist.axis.Integer(0, len(labels), name="N-1"),
                 )
                 arr = dask_awkward.flatten(var)
-                h.fill(arr, dask_awkward.zeros_like(arr))
+                h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
                 for i, mask in enumerate(self.result().masks, 1):
                     arr = dask_awkward.flatten(var[mask])
                     h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
@@ -780,7 +793,7 @@ def result(self):
             self._maskscutflow,
         )
 
-    def to_npz(self, file, compressed=False, compute=True):
+    def to_npz(self, file, compressed=False, compute=False):
         """Saves the results of the cutflow to a .npz file
 
         Parameters
@@ -796,7 +809,7 @@ def to_npz(self, file, compressed=False, compute=True):
             compute : bool, optional
                 Whether to immediately start writing or to return an object
                 that the user can choose when to start writing by calling compute().
-                Default is True.
+                Default is False.
 
         Returns
         -------
@@ -824,19 +837,27 @@ def print(self):
         """Prints the statistics of the Cutflow"""
 
         if self._delayed_mode:
-            self._nevonecut = list(dask.compute(*self._nevonecut))
-            self._nevcutflow = list(dask.compute(*self._nevcutflow))
+            warnings.warn(
+                "Printing the cutflow statistics is going to compute dask_awkward objects."
+            )
+            self._nevonecut, self._nevcutflow = dask.compute(
+                self._nevonecut, self._nevcutflow
+            )
+
         nevonecut = self._nevonecut
         nevcutflow = self._nevcutflow
+
         print("Cutflow stats:")
         for i, name in enumerate(self._names):
-            print(
-                f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
-                cumulative pass = {nevcutflow[i+1]:<20}\
-                all = {nevonecut[0]:<20}\
-                --  eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-                -- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
+            stats = (
+                f"Cut {name:<20}:"
+                f"pass = {nevonecut[i+1]:<20}"
+                f"cumulative pass = {nevcutflow[i+1]:<20}"
+                f"all = {nevonecut[0]:<20}"
+                f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
+                f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
             )
+            print(stats)
 
     def yieldhist(self):
         """Returns the cutflow yields as ``hist.Hist`` objects
@@ -856,8 +877,8 @@ def yieldhist(self):
             honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
             hcutflow = honecut.copy()
             hcutflow.axes.name = ("cutflow",)
-            honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
-            hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
+            honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
+            hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)
 
         else:
             honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
@@ -868,12 +889,12 @@ def yieldhist(self):
                 honecut.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            honecut.fill(dask_awkward.zeros_like(weight))
+            honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
             for i, weight in enumerate(self._maskscutflow, 1):
                 hcutflow.fill(
                     dask_awkward.full_like(weight, i, dtype=int), weight=weight
                 )
-            hcutflow.fill(dask_awkward.zeros_like(weight))
+            hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))
 
         return honecut, hcutflow, labels
 
@@ -975,8 +996,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = awkward.flatten(var)
-                honecut.fill(arr, awkward.zeros_like(arr))
-                hcutflow.fill(arr, awkward.zeros_like(arr))
+                honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = awkward.flatten(var[mask])
@@ -998,8 +1019,8 @@ def plot_vars(
                 hcutflow.axes.name = name, "cutflow"
 
                 arr = dask_awkward.flatten(var)
-                honecut.fill(arr, dask_awkward.zeros_like(arr))
-                hcutflow.fill(arr, dask_awkward.zeros_like(arr))
+                honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
+                hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
 
                 for i, mask in enumerate(self.result().masksonecut, 1):
                     arr = dask_awkward.flatten(var[mask])

diff --git a/tests/test_analysis_tools.py b/tests/test_analysis_tools.py
@@ -513,14 +513,14 @@ def test_packed_selection_nminusone():
     ):
         assert np.all(mask == truth)
 
-    nminusone.to_npz("nminusone.npz", compressed=False)
+    nminusone.to_npz("nminusone.npz", compressed=False).compute()
     with np.load("nminusone.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nev"] == nev)
         assert np.all(file["masks"] == masks)
     os.remove("nminusone.npz")
 
-    nminusone.to_npz("nminusone.npz", compressed=True)
+    nminusone.to_npz("nminusone.npz", compressed=True).compute()
     with np.load("nminusone.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nev"] == nev)
@@ -619,7 +619,7 @@ def test_packed_selection_cutflow():
     ):
         assert np.all(mask == truth)
 
-    cutflow.to_npz("cutflow.npz", compressed=False)
+    cutflow.to_npz("cutflow.npz", compressed=False).compute()
     with np.load("cutflow.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nevonecut"] == nevonecut)
@@ -628,7 +628,7 @@ def test_packed_selection_cutflow():
         assert np.all(file["maskscutflow"] == maskscutflow)
     os.remove("cutflow.npz")
 
-    cutflow.to_npz("cutflow.npz", compressed=True)
+    cutflow.to_npz("cutflow.npz", compressed=True).compute()
     with np.load("cutflow.npz") as file:
         assert np.all(file["labels"] == labels)
         assert np.all(file["nevonecut"] == nevonecut)
@@ -854,14 +854,14 @@ def test_packed_selection_nminusone_dak(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        nminusone.to_npz("nminusone.npz", compressed=False)
+        nminusone.to_npz("nminusone.npz", compressed=False).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
             assert np.all(file["masks"] == list(dask.compute(*masks)))
         os.remove("nminusone.npz")
 
-        nminusone.to_npz("nminusone.npz", compressed=True)
+        nminusone.to_npz("nminusone.npz", compressed=True).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
@@ -978,7 +978,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        cutflow.to_npz("cutflow.npz", compressed=False)
+        cutflow.to_npz("cutflow.npz", compressed=False).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -987,7 +987,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
             assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
         os.remove("cutflow.npz")
 
-        cutflow.to_npz("cutflow.npz", compressed=True)
+        cutflow.to_npz("cutflow.npz", compressed=True).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -1109,14 +1109,14 @@ def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        nminusone.to_npz("nminusone.npz", compressed=False)
+        nminusone.to_npz("nminusone.npz", compressed=False).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
             assert np.all(file["masks"] == list(dask.compute(*masks)))
         os.remove("nminusone.npz")
 
-        nminusone.to_npz("nminusone.npz", compressed=True)
+        nminusone.to_npz("nminusone.npz", compressed=True).compute()
         with np.load("nminusone.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nev"] == list(dask.compute(*nev)))
@@ -1233,7 +1233,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
         ):
             assert np.all(mask.compute() == truth.compute())
 
-        cutflow.to_npz("cutflow.npz", compressed=False)
+        cutflow.to_npz("cutflow.npz", compressed=False).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
@@ -1242,7 +1242,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
             assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
         os.remove("cutflow.npz")
 
-        cutflow.to_npz("cutflow.npz", compressed=True)
+        cutflow.to_npz("cutflow.npz", compressed=True).compute()
         with np.load("cutflow.npz") as file:
             assert np.all(file["labels"] == labels)
             assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))