In [1]:
import numpy as np

def approx_global_sig(sig: np.array, bkg: np.array, N: float = 0.0) -> float:
    """
    Calculates the statistical significance of a signal over background in a given dataset using a modified version of
    the formula (S -N sqrt(B))/sqrt(S+B), where S is the number of signal events, B is the number of background events,
    and N is the expected number of background events in the signal region.

    Parameters:
    sig (np.array): 1D array containing the number of signal events in each bin of the dataset.
    bkg (np.array): 1D array containing the number of background events in each bin of the dataset.
    N (float): Expected number of background events in the signal region. Default value is 0.0.

    Returns:
    float: The statistical significance of the signal over background in the dataset.
    """

    # calculate weight factor w for each bin
    w = np.log(1. + sig/(bkg + 1e-9))

    # calculate intermediate quantities
    s_w = sig * w
    b_w = bkg * w
    s_ww = sig * w ** 2
    b_ww = bkg * w ** 2

    # calculate numerator and denominator of modified formula
    num = np.sum(s_w) - N * np.sqrt(np.sum(b_ww))
    den = np.sqrt(np.sum(s_ww + b_ww))

    # calculate statistical significance and return it
    return num / den


In [2]:
s=np.array([1,2,3,4,5,6,7,8,19,25])
b=np.array([13,10,9,8,8,4,2,3,1,1])

In [3]:
approx_global_sig(s,b, 0.15)

7.498759554825828

In [4]:
import numpy as np

def rebin_histogram(hist_2n):
    """
    Re-bins a histogram with 2n bins to n bins.

    Parameters:
    hist_2n (numpy array): 1D numpy array containing the histogram with 2n bins.

    Returns:
    numpy array: 1D numpy array containing the re-binned histogram with n bins.

    """

    # Check that the input histogram has an even number of bins
    assert len(hist_2n) % 2 == 0, "Input histogram must have an even number of bins."

    # Reshape the input histogram to a 2D array with two columns
    hist_2n = hist_2n.reshape(-1, 2)

    # Sum the values in each row of the 2D array to re-bin the histogram
    hist_n = np.sum(hist_2n, axis=1)

    return hist_n


In [5]:
s=rebin_histogram(s)
b=rebin_histogram(b)

In [6]:
approx_global_sig(s,b,N=0.15)

7.476299421592801

In [7]:
import os
path_to_txt = os.path.join(os.getcwd(),"Histograms","hadronic_non-resonant","M1250")

In [8]:
!ls {path_to_txt}

high_per_bin_Combined.txt  high_per_bin_V+jets.txt  high_per_bin_tau_tau.txt
high_per_bin_Diboson.txt   high_per_bin_stop.txt    high_per_bin_tbart.txt
high_per_bin_Lq_Lq.txt	   high_per_bin_tau_Lq.txt


In [9]:
sig1 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Lq_Lq.txt"))
sig1+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_Lq.txt"))
sig1+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_tau.txt"))
bkg1 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Diboson.txt"))
bkg1+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_V+jets.txt"))
bkg1+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_stop.txt"))
bkg1+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tbart.txt"))

In [10]:
approx_global_sig(sig1,bkg1)

15.478900861416191

In [11]:
path_to_txt =path_to_txt = os.path.join(os.getcwd(),"Histograms","hadronic_sLQ","M1250")
sig2 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Lq_Lq.txt"))
sig2+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_Lq.txt"))
sig2+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_tau.txt"))
bkg2 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Diboson.txt"))
bkg2+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_V+jets.txt"))
bkg2+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_stop.txt"))
bkg2+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tbart.txt"))

In [12]:
approx_global_sig(sig2,bkg2)

16.5421953513909

In [13]:
path_to_txt =path_to_txt = os.path.join(os.getcwd(),"Histograms","hadronic_non-resonant","M1250")
sig3 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Lq_Lq.txt"))
sig3+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_Lq.txt"))
sig3+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tau_tau.txt"))
bkg3 = np.loadtxt(os.path.join(path_to_txt,"high_per_bin_Diboson.txt"))
bkg3+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_V+jets.txt"))
bkg3+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_stop.txt"))
bkg3+= np.loadtxt(os.path.join(path_to_txt,"high_per_bin_tbart.txt"))

In [14]:
approx_global_sig(sig3,bkg3)

15.478900861416191

In [15]:
sig = np.concatenate((sig1,sig2,sig3))
bkg = np.concatenate((bkg1,bkg2,bkg3))

In [16]:
approx_global_sig(sig,bkg)

27.25090950371868

In [17]:
np.sqrt(approx_global_sig(sig1,bkg1)**2 + approx_global_sig(sig2,bkg2)**2 + approx_global_sig(sig3,bkg3)**2)

27.43787475003632