Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: comply with boost-histogram 1.4.0 #895

Merged
merged 8 commits into from
Sep 19, 2023
101 changes: 61 additions & 40 deletions src/coffea/analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def variations(self):


class NminusOneToNpz:
"""Object to be returned by NmiusOne.to_npz()"""
"""Object to be returned by NminusOne.to_npz()"""

def __init__(self, file, labels, nev, masks, saver):
self._file = file
Expand Down Expand Up @@ -494,11 +494,17 @@ def maskscutflow(self):
return self._maskscutflow

def compute(self):
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
self._masksonecut = list(dask.compute(*self._masksonecut))
self._maskscutflow = list(dask.compute(*self._maskscutflow))
numpy.savez(
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)
self._masksonecut, self._maskscutflow = dask.compute(
self._masksonecut, self._maskscutflow
)
self._nevonecut = list(self._nevonecut)
self._nevcutflow = list(self._nevcutflow)
self._masksonecut = list(self._masksonecut)
self._maskscutflow = list(self._maskscutflow)
self._saver(
self._file,
labels=self._labels,
nevonecut=self._nevonecut,
Expand Down Expand Up @@ -538,7 +544,7 @@ def result(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
return NminusOneResult(labels, self._nev, self._masks)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the N-1 selection to a .npz file

Parameters
Expand All @@ -554,7 +560,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.

Returns
-------
Expand All @@ -580,22 +586,29 @@ def print(self):
"""Prints the statistics of the N-1 selection"""

if self._delayed_mode:
warnings.warn(
"Printing the N-1 selection statistics is going to compute dask_awkward objects."
)
self._nev = list(dask.compute(*self._nev))

nev = self._nev
print("N-1 selection stats:")
for i, name in enumerate(self._names):
print(
f"Ignoring {name:<20}: pass = {nev[i+1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[i+1]*100/nev[0]:.1f} %"
stats = (
f"Ignoring {name:<20}"
f"pass = {nev[i+1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[i+1]*100/nev[0]:.1f} %"
)
print(stats)

if True:
print(
f"All cuts {'':<20}: pass = {nev[-1]:<20}\
all = {nev[0]:<20}\
-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
stats_all = (
f"All cuts {'':<20}"
f"pass = {nev[-1]:<20}"
f"all = {nev[0]:<20}"
f"-- eff = {nev[-1]*100/nev[0]:.1f} %"
)
print(stats_all)

def yieldhist(self):
"""Returns the N-1 selection yields as a ``hist.Hist`` object
Expand All @@ -610,13 +623,13 @@ def yieldhist(self):
labels = ["initial"] + [f"N - {i}" for i in self._names] + ["N"]
if not self._delayed_mode:
h = hist.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
h.fill(numpy.arange(len(labels)), weight=self._nev)
h.fill(numpy.arange(len(labels), dtype=int), weight=self._nev)

else:
h = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="N-1"))
for i, weight in enumerate(self._masks, 1):
h.fill(dask_awkward.full_like(weight, i, dtype=int), weight=weight)
h.fill(dask_awkward.zeros_like(weight))
h.fill(dask_awkward.zeros_like(weight, dtype=int))

return h, labels

Expand Down Expand Up @@ -712,7 +725,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = awkward.flatten(var)
h.fill(arr, awkward.zeros_like(arr))
h.fill(arr, awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = awkward.flatten(var[mask])
h.fill(arr, awkward.full_like(arr, i, dtype=int))
Expand All @@ -725,7 +738,7 @@ def plot_vars(
hist.axis.Integer(0, len(labels), name="N-1"),
)
arr = dask_awkward.flatten(var)
h.fill(arr, dask_awkward.zeros_like(arr))
h.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
for i, mask in enumerate(self.result().masks, 1):
arr = dask_awkward.flatten(var[mask])
h.fill(arr, dask_awkward.full_like(arr, i, dtype=int))
Expand Down Expand Up @@ -780,7 +793,7 @@ def result(self):
self._maskscutflow,
)

def to_npz(self, file, compressed=False, compute=True):
def to_npz(self, file, compressed=False, compute=False):
"""Saves the results of the cutflow to a .npz file

Parameters
Expand All @@ -796,7 +809,7 @@ def to_npz(self, file, compressed=False, compute=True):
compute : bool, optional
Whether to immediately start writing or to return an object
that the user can choose when to start writing by calling compute().
Default is True.
Default is False.

Returns
-------
Expand Down Expand Up @@ -824,19 +837,27 @@ def print(self):
"""Prints the statistics of the Cutflow"""

if self._delayed_mode:
self._nevonecut = list(dask.compute(*self._nevonecut))
self._nevcutflow = list(dask.compute(*self._nevcutflow))
warnings.warn(
"Printing the cutflow statistics is going to compute dask_awkward objects."
)
self._nevonecut, self._nevcutflow = dask.compute(
self._nevonecut, self._nevcutflow
)

nevonecut = self._nevonecut
nevcutflow = self._nevcutflow

print("Cutflow stats:")
for i, name in enumerate(self._names):
print(
f"Cut {name:<20}: pass = {nevonecut[i+1]:<20}\
cumulative pass = {nevcutflow[i+1]:<20}\
all = {nevonecut[0]:<20}\
-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %\
-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
stats = (
f"Cut {name:<20}:"
f"pass = {nevonecut[i+1]:<20}"
f"cumulative pass = {nevcutflow[i+1]:<20}"
f"all = {nevonecut[0]:<20}"
f"-- eff = {nevonecut[i+1]*100/nevonecut[0]:.1f} %{'':<20}"
f"-- cumulative eff = {nevcutflow[i+1]*100/nevcutflow[0]:.1f} %"
)
print(stats)

def yieldhist(self):
"""Returns the cutflow yields as ``hist.Hist`` objects
Expand All @@ -856,8 +877,8 @@ def yieldhist(self):
honecut = hist.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
hcutflow = honecut.copy()
hcutflow.axes.name = ("cutflow",)
honecut.fill(numpy.arange(len(labels)), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels)), weight=self._nevcutflow)
honecut.fill(numpy.arange(len(labels), dtype=int), weight=self._nevonecut)
hcutflow.fill(numpy.arange(len(labels), dtype=int), weight=self._nevcutflow)

else:
honecut = hist.dask.Hist(hist.axis.Integer(0, len(labels), name="onecut"))
Expand All @@ -868,12 +889,12 @@ def yieldhist(self):
honecut.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
honecut.fill(dask_awkward.zeros_like(weight))
honecut.fill(dask_awkward.zeros_like(weight, dtype=int))
for i, weight in enumerate(self._maskscutflow, 1):
hcutflow.fill(
dask_awkward.full_like(weight, i, dtype=int), weight=weight
)
hcutflow.fill(dask_awkward.zeros_like(weight))
hcutflow.fill(dask_awkward.zeros_like(weight, dtype=int))

return honecut, hcutflow, labels

Expand Down Expand Up @@ -975,8 +996,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = awkward.flatten(var)
honecut.fill(arr, awkward.zeros_like(arr))
hcutflow.fill(arr, awkward.zeros_like(arr))
honecut.fill(arr, awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = awkward.flatten(var[mask])
Expand All @@ -998,8 +1019,8 @@ def plot_vars(
hcutflow.axes.name = name, "cutflow"

arr = dask_awkward.flatten(var)
honecut.fill(arr, dask_awkward.zeros_like(arr))
hcutflow.fill(arr, dask_awkward.zeros_like(arr))
honecut.fill(arr, dask_awkward.zeros_like(arr, dtype=int))
hcutflow.fill(arr, dask_awkward.zeros_like(arr, dtype=int))

for i, mask in enumerate(self.result().masksonecut, 1):
arr = dask_awkward.flatten(var[mask])
Expand Down
24 changes: 12 additions & 12 deletions tests/test_analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,14 +513,14 @@ def test_packed_selection_nminusone():
):
assert np.all(mask == truth)

nminusone.to_npz("nminusone.npz", compressed=False)
nminusone.to_npz("nminusone.npz", compressed=False).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == nev)
assert np.all(file["masks"] == masks)
os.remove("nminusone.npz")

nminusone.to_npz("nminusone.npz", compressed=True)
nminusone.to_npz("nminusone.npz", compressed=True).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == nev)
Expand Down Expand Up @@ -619,7 +619,7 @@ def test_packed_selection_cutflow():
):
assert np.all(mask == truth)

cutflow.to_npz("cutflow.npz", compressed=False)
cutflow.to_npz("cutflow.npz", compressed=False).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == nevonecut)
Expand All @@ -628,7 +628,7 @@ def test_packed_selection_cutflow():
assert np.all(file["maskscutflow"] == maskscutflow)
os.remove("cutflow.npz")

cutflow.to_npz("cutflow.npz", compressed=True)
cutflow.to_npz("cutflow.npz", compressed=True).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == nevonecut)
Expand Down Expand Up @@ -854,14 +854,14 @@ def test_packed_selection_nminusone_dak(optimization_enabled):
):
assert np.all(mask.compute() == truth.compute())

nminusone.to_npz("nminusone.npz", compressed=False)
nminusone.to_npz("nminusone.npz", compressed=False).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == list(dask.compute(*nev)))
assert np.all(file["masks"] == list(dask.compute(*masks)))
os.remove("nminusone.npz")

nminusone.to_npz("nminusone.npz", compressed=True)
nminusone.to_npz("nminusone.npz", compressed=True).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == list(dask.compute(*nev)))
Expand Down Expand Up @@ -978,7 +978,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
):
assert np.all(mask.compute() == truth.compute())

cutflow.to_npz("cutflow.npz", compressed=False)
cutflow.to_npz("cutflow.npz", compressed=False).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
Expand All @@ -987,7 +987,7 @@ def test_packed_selection_cutflow_dak(optimization_enabled):
assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
os.remove("cutflow.npz")

cutflow.to_npz("cutflow.npz", compressed=True)
cutflow.to_npz("cutflow.npz", compressed=True).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
Expand Down Expand Up @@ -1109,14 +1109,14 @@ def test_packed_selection_nminusone_dak_uproot_only(optimization_enabled):
):
assert np.all(mask.compute() == truth.compute())

nminusone.to_npz("nminusone.npz", compressed=False)
nminusone.to_npz("nminusone.npz", compressed=False).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == list(dask.compute(*nev)))
assert np.all(file["masks"] == list(dask.compute(*masks)))
os.remove("nminusone.npz")

nminusone.to_npz("nminusone.npz", compressed=True)
nminusone.to_npz("nminusone.npz", compressed=True).compute()
with np.load("nminusone.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nev"] == list(dask.compute(*nev)))
Expand Down Expand Up @@ -1233,7 +1233,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
):
assert np.all(mask.compute() == truth.compute())

cutflow.to_npz("cutflow.npz", compressed=False)
cutflow.to_npz("cutflow.npz", compressed=False).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
Expand All @@ -1242,7 +1242,7 @@ def test_packed_selection_cutflow_dak_uproot_only(optimization_enabled):
assert np.all(file["maskscutflow"] == list(dask.compute(*maskscutflow)))
os.remove("cutflow.npz")

cutflow.to_npz("cutflow.npz", compressed=True)
cutflow.to_npz("cutflow.npz", compressed=True).compute()
with np.load("cutflow.npz") as file:
assert np.all(file["labels"] == labels)
assert np.all(file["nevonecut"] == list(dask.compute(*nevonecut)))
Expand Down