Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Make rochester_lookup compatible with dask_awkward #875

Merged
merged 11 commits into from
Oct 7, 2023
30 changes: 23 additions & 7 deletions src/coffea/lookup_tools/rochester_lookup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import awkward
import dask_awkward as dak
import numpy

from coffea.lookup_tools.dense_lookup import dense_lookup
Expand Down Expand Up @@ -75,7 +76,7 @@ def _error(self, func, *args):

newargs = args + (0, 0)
default = func(*newargs)
result = numpy.zeros_like(default)
result = awkward.zeros_like(default)
for s in range(self._nsets):
oneOver = 1.0 / self._members[s]
for m in range(self._members[s]):
Expand Down Expand Up @@ -226,12 +227,27 @@ def _kExtra(self, kpt, eta, nl, u, s=0, m=0):
cbN_flat = awkward.flatten(cbN)
cbS_flat = awkward.flatten(cbS)

invcdf = awkward.unflatten(
doublecrystalball.ppf(
u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat
),
counts,
)
args = (u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat)

if any(isinstance(arg, dak.Array) for arg in args):

def apply(*args):
args_lz = [
awkward.typetracer.length_zero_if_typetracer(arg) for arg in args
]
out = awkward.Array(doublecrystalball.ppf(*args_lz))
if awkward.backend(args[0]) == "typetracer":
out = awkward.Array(
out.layout.to_typetracer(forget_length=True),
behavior=out.behavior,
)
return out

invcdf = dak.map_partitions(apply, *args)
else:
invcdf = doublecrystalball.ppf(*args)

invcdf = awkward.unflatten(invcdf, counts)

x = awkward.where(
mask,
Expand Down
28 changes: 14 additions & 14 deletions tests/test_lookup_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,6 @@ def test_jec_txt_effareas():


def test_rochester():
pytest.xfail("weird side effect from running other tests... passes by itself")

rochester_data = lookup_tools.txt_converters.convert_rochester_file(
"tests/samples/RoccoR2018.txt.gz", loaduncs=True
)
Expand All @@ -390,27 +388,29 @@ def test_rochester():

# test against nanoaod
events = NanoEventsFactory.from_root(
os.path.abspath("tests/samples/nano_dimuon.root")
{os.path.abspath("tests/samples/nano_dimuon.root"): "Events"},
permit_dask=True,
).events()

data_k = rochester.kScaleDT(
events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi
)
data_k = np.array(ak.flatten(data_k))
data_k = ak.flatten(data_k).compute().to_numpy()
assert all(np.isclose(data_k, official_data_k))
data_err = rochester.kScaleDTerror(
events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi
)
data_err = np.array(ak.flatten(data_err), dtype=float)
data_err = ak.flatten(data_err).compute().to_numpy()
assert all(np.isclose(data_err, official_data_err, atol=1e-8))

# test against mc
events = NanoEventsFactory.from_root(
os.path.abspath("tests/samples/nano_dy.root")
{os.path.abspath("tests/samples/nano_dy.root"): "Events"},
permit_dask=True,
).events()

hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan))
mc_rand = ak.unflatten(mc_rand, ak.num(hasgen))
mc_rand = ak.unflatten(dak.from_awkward(ak.Array(mc_rand), 1), ak.num(hasgen))
mc_kspread = rochester.kSpreadMC(
events.Muon.charge[hasgen],
events.Muon.pt[hasgen],
Expand All @@ -426,10 +426,10 @@ def test_rochester():
events.Muon.nTrackerLayers[~hasgen],
mc_rand[~hasgen],
)
mc_k = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
hasgen_flat = np.array(ak.flatten(hasgen))
mc_k[hasgen_flat] = np.array(ak.flatten(mc_kspread))
mc_k[~hasgen_flat] = np.array(ak.flatten(mc_ksmear))
mc_k = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy()
hasgen_flat = ak.flatten(hasgen).compute().to_numpy()
mc_k[hasgen_flat] = ak.flatten(mc_kspread).compute().to_numpy()
mc_k[~hasgen_flat] = ak.flatten(mc_ksmear).compute().to_numpy()
assert all(np.isclose(mc_k, official_mc_k))

mc_errspread = rochester.kSpreadMCerror(
Expand All @@ -447,9 +447,9 @@ def test_rochester():
events.Muon.nTrackerLayers[~hasgen],
mc_rand[~hasgen],
)
mc_err = np.array(ak.flatten(ak.ones_like(events.Muon.pt)))
mc_err[hasgen_flat] = np.array(ak.flatten(mc_errspread))
mc_err[~hasgen_flat] = np.array(ak.flatten(mc_errsmear))
mc_err = ak.flatten(ak.ones_like(events.Muon.pt)).compute().to_numpy()
mc_err[hasgen_flat] = ak.flatten(mc_errspread).compute().to_numpy()
mc_err[~hasgen_flat] = ak.flatten(mc_errsmear).compute().to_numpy()
assert all(np.isclose(mc_err, official_mc_err, atol=1e-8))


Expand Down