# Binning and Projections of Histograms

NUISANCE provides some simple binning primitives and compositors to support `HistFrame` functionality.

Starting with a simple contiguous linear binning

In [1]:
import pyNUISANCE as pn

In [2]:
#if you would like to follow the binning operations in this notebook
#  uncomment the below and execute the cell
#  this will only produce output if NUISANCEv3 was built with the 
#  log macro level set to trace. 
#  Which happens for Debug builds by default.
print(pn.log.get_macro_level())
pn.log.set_level("Binning",pn.log.level.trace)

level.trace


In [3]:
nd = pn.Binning.lin_space(0,5,5)
print(nd)
print("first bin: %s" % nd(0))
print("can also use find_bin: first bin: %s" % nd.find_bin(0))
print("second bin: %s" % nd(1.5))
print("semi-open range: %s" % nd(1))
print("out of range signified by a magic number: %s" % pn.Binning.npos)
print("out of range: %s =? %s" % (nd(-1), pn.Binning.npos))
print("out of range: %s =? %s" % (nd(5), pn.Binning.npos))
print("out of range: %s =? %s" % (nd(1E3), pn.Binning.npos))

Axis lables: [""]
Bins: [
  0: [(0.00 - 1.00)]
  1: [(1.00 - 2.00)]
  2: [(2.00 - 3.00)]
  3: [(3.00 - 4.00)]
  4: [(4.00 - 5.00)]
]

first bin: 0
can also use find_bin: first bin: 0
second bin: 1
semi-open range: 1
out of range signified by a magic number: 4294967295
out of range: 4294967295 =? 4294967295
out of range: 4294967295 =? 4294967295
out of range: 4294967295 =? 4294967295
[Binning:trace]: [lin_space(0,5,5).find_bin] x.size() = 1, x[0] = 0
[Binning:trace]: [lin_space(0,5,5).find_bin] Found bin: 0, (0 -- 1)
[Binning:trace]: [lin_space(0,5,5).find_bin] x.size() = 1, x[0] = 0
[Binning:trace]: [lin_space(0,5,5).find_bin] Found bin: 0, (0 -- 1)
[Binning:trace]: [lin_space(0,5,5).find_bin] x.size() = 1, x[0] = 1.5
[Binning:trace]: [lin_space(0,5,5).find_bin] Found bin: 1, (1 -- 2)
[Binning:trace]: [lin_space(0,5,5).find_bin] x.size() = 1, x[0] = 1
[Binning:trace]: [lin_space(0,5,5).find_bin] Found bin: 1, (1 -- 2)
[Binning:trace]: [lin_space(0,5,5).find_bin] x.size() = 1, x[0] = -1

This can be extended to a N dimensional, hyper-rectangular, uniform binning, each bin gets a global bin number which maps to an N dimensional extent:

In [4]:
nd3 = pn.Binning.lin_spaceND([[0,3,3],[3,6,3],[6,9,3]],["x","y","z"])
print(nd3)

Axis lables: ["x", "y", "z"]
Bins: [
  0: [(0.00 - 1.00), (3.00 - 4.00), (6.00 - 7.00)]
  1: [(1.00 - 2.00), (3.00 - 4.00), (6.00 - 7.00)]
  2: [(2.00 - 3.00), (3.00 - 4.00), (6.00 - 7.00)]
  3: [(0.00 - 1.00), (4.00 - 5.00), (6.00 - 7.00)]
  4: [(1.00 - 2.00), (4.00 - 5.00), (6.00 - 7.00)]
  5: [(2.00 - 3.00), (4.00 - 5.00), (6.00 - 7.00)]
  6: [(0.00 - 1.00), (5.00 - 6.00), (6.00 - 7.00)]
  7: [(1.00 - 2.00), (5.00 - 6.00), (6.00 - 7.00)]
  8: [(2.00 - 3.00), (5.00 - 6.00), (6.00 - 7.00)]
  9: [(0.00 - 1.00), (3.00 - 4.00), (7.00 - 8.00)]
  10: [(1.00 - 2.00), (3.00 - 4.00), (7.00 - 8.00)]
  11: [(2.00 - 3.00), (3.00 - 4.00), (7.00 - 8.00)]
  12: [(0.00 - 1.00), (4.00 - 5.00), (7.00 - 8.00)]
  13: [(1.00 - 2.00), (4.00 - 5.00), (7.00 - 8.00)]
  14: [(2.00 - 3.00), (4.00 - 5.00), (7.00 - 8.00)]
  15: [(0.00 - 1.00), (5.00 - 6.00), (7.00 - 8.00)]
  16: [(1.00 - 2.00), (5.00 - 6.00), (7.00 - 8.00)]
  17: [(2.00 - 3.00), (5.00 - 6.00), (7.00 - 8.00)]
  18: [(0.00 - 1.00), (3.00 - 4.00), 

We can apply the binning function, which takes an N dimensional vector of doubles from the binning operation:

In [5]:
print("first bin %s: " % nd3([0,3,6]))
print("second bin along x %s: " % nd3([1,3,6]))
print("second bin along y %s: " % nd3([0,4,6]))
print("second bin along z %s: " % nd3([0,4,7]))

first bin 0: 
second bin along x 1: 
second bin along y 3: 
second bin along z 12: 
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] x.size() = 3/3 axes, x = [0, 3, 6]
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] Found bin[0]: 0, (0 -- 1)
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] gbin after 0 axes: 0
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] Found bin[1]: 0, (3 -- 4)
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] gbin after 1 axes: 0
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] Found bin[2]: 0, (6 -- 7)
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] gbin after 2 axes: 0
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] returning gbin 0 for x = [0, 3, 6]
[Binning:trace]: [lin_spaceND([(0, 3, 3), (3, 6, 3), (6, 9, 3)]).find_bin] x.size() = 3/3 axes, x = [1, 3

We can also do logarithmic binning

In [6]:
from math import log
ndl = pn.Binning.log10_space(1E-2,10,10, "x")
print(ndl)

print("bin: %s" % ndl(1))
print("bin out of range: %s" % ndl(10))
print("bin out of range: %s" % ndl(0.001))

Axis lables: ["x"]
Bins: [
  0: [(0.01 - 0.02)]
  1: [(0.02 - 0.04)]
  2: [(0.04 - 0.08)]
  3: [(0.08 - 0.16)]
  4: [(0.16 - 0.32)]
  5: [(0.32 - 0.63)]
  6: [(0.63 - 1.26)]
  7: [(1.26 - 2.51)]
  8: [(2.51 - 5.01)]
  9: [(5.01 - 10.00)]
]

bin: 6
bin out of range: 4294967295
bin out of range: 4294967295
[Binning:trace]: [log10_space(0.01,10,10).find_bin] x.size() = 1, x[0] = 1
[Binning:trace]: [log10_space(0.01,10,10).find_bin] x.size() = 1, x[0] = 10
[Binning:trace]: [log10_space(0.01,10,10).find_bin] x.size() = 1, x[0] = 0.001


## Combining binnings
Binnings of initially different types can be composited to allow, for example, a 2D log/linear binning

In [7]:
etrue_ax = pn.Binning.log10_space(0.1,5,3, "enu")
erec_ax = pn.Binning.lin_space(0,5,5, "erec")
etrue_erec = pn.Binning.product([etrue_ax,erec_ax])
print(etrue_erec)

Axis lables: ["enu", "erec"]
Bins: [
  0: [(0.10 - 0.37), (0.00 - 1.00)]
  1: [(0.37 - 1.36), (0.00 - 1.00)]
  2: [(1.36 - 5.00), (0.00 - 1.00)]
  3: [(0.10 - 0.37), (1.00 - 2.00)]
  4: [(0.37 - 1.36), (1.00 - 2.00)]
  5: [(1.36 - 5.00), (1.00 - 2.00)]
  6: [(0.10 - 0.37), (2.00 - 3.00)]
  7: [(0.37 - 1.36), (2.00 - 3.00)]
  8: [(1.36 - 5.00), (2.00 - 3.00)]
  9: [(0.10 - 0.37), (3.00 - 4.00)]
  10: [(0.37 - 1.36), (3.00 - 4.00)]
  11: [(1.36 - 5.00), (3.00 - 4.00)]
  12: [(0.10 - 0.37), (4.00 - 5.00)]
  13: [(0.37 - 1.36), (4.00 - 5.00)]
  14: [(1.36 - 5.00), (4.00 - 5.00)]
]



In [8]:
hf_a = pn.HistFrame(etrue_erec)
hf_a.fill([0.1,1],1)
hf_a.fill([0.4,2],1)
hf_a.fill([1,1],1)
print(hf_a)
print(hf_a.binning)

 ------------------
 | bin | mc | err |
 ------------------
 |   0 |  0 |   0 |
 |   1 |  0 |   0 |
 |   2 |  0 |   0 |
 |   3 |  1 |   1 |
 |   4 |  1 |   1 |
 |   5 |  0 |   0 |
 |   6 |  0 |   0 |
 |   7 |  1 |   1 |
 |   8 |  0 |   0 |
 |   9 |  0 |   0 |
 |  10 |  0 |   0 |
 |  11 |  0 |   0 |
 |  12 |  0 |   0 |
 |  13 |  0 |   0 |
 |  14 |  0 |   0 |
 ------------------
Axis lables: ["enu", "erec"]
Bins: [
  0: [(0.10 - 0.37), (0.00 - 1.00)]
  1: [(0.37 - 1.36), (0.00 - 1.00)]
  2: [(1.36 - 5.00), (0.00 - 1.00)]
  3: [(0.10 - 0.37), (1.00 - 2.00)]
  4: [(0.37 - 1.36), (1.00 - 2.00)]
  5: [(1.36 - 5.00), (1.00 - 2.00)]
  6: [(0.10 - 0.37), (2.00 - 3.00)]
  7: [(0.37 - 1.36), (2.00 - 3.00)]
  8: [(1.36 - 5.00), (2.00 - 3.00)]
  9: [(0.10 - 0.37), (3.00 - 4.00)]
  10: [(0.37 - 1.36), (3.00 - 4.00)]
  11: [(1.36 - 5.00), (3.00 - 4.00)]
  12: [(0.10 - 0.37), (4.00 - 5.00)]
  13: [(0.37 - 1.36), (4.00 - 5.00)]
  14: [(1.36 - 5.00), (4.00 - 5.00)]
]

[Binning:trace]: [log10_space(0.1,5

## Projections
For rectangular binnings where projections onto lower dimensions are well defined, we can make histogram projections

In [9]:
hf_a_erec = pn.Binning.project(hf_a,1)
print(hf_a_erec.binning)
print(hf_a_erec)
print("nfills: %s" % hf_a_erec.nfills)

Axis lables: ["erec"]
Bins: [
  0: [(0.00 - 1.00)]
  1: [(1.00 - 2.00)]
  2: [(2.00 - 3.00)]
  3: [(3.00 - 4.00)]
  4: [(4.00 - 5.00)]
]

 --------------------
 | bin | mc |   err |
 --------------------
 |   0 |  0 |     0 |
 |   1 |  2 | 1.414 |
 |   2 |  1 |     1 |
 |   3 |  0 |     0 |
 |   4 |  0 |     0 |
 --------------------
nfills: 3


While it is unlikely that we would want to continue calling fill on a projection histogram, it is supported.

In [10]:
hf_a_erec.fill(1,1)
hf_a_erec.fill(3,1)
print(hf_a_erec)
print("nfills: %s" % hf_a_erec.nfills)

 --------------------
 | bin | mc |   err |
 --------------------
 |   0 |  0 |     0 |
 |   1 |  3 | 1.732 |
 |   2 |  1 |     1 |
 |   3 |  1 |     1 |
 |   4 |  0 |     0 |
 --------------------
nfills: 5
