# PHY3004W - Advanced Physics
## ATLAS OpenData Project 
### Jessica James [JMSJES004]

The following code was written by Kyra Kummer in 2022.

In [1]:
import uproot3 as uproot
import uproot3_methods.classes.TLorentzVector as LVepm
import matplotlib.pyplot as plt
from matplotlib import gridspec
import infofile 
import numpy as np
import mplhep as hep
import awkward as ak

lumi = 10 #fb^-1 "inverse femtobarns"

def get_xsec_weight(sample): 
    
    # extracts cross section weight from the info file.
    
    info = infofile.infos[sample] # open infofile
    xsec_weight = (lumi*1000*info["xsec"])/(info["sumw"]*info["red_eff"]) #*1000 to go from fb-1 to pb-1
    return xsec_weight # return cross-section weight

# Files can be downloaded from this webpage and should be placed in the same directory as the webpage
#https://atlas-opendata.web.cern.ch/atlas-opendata/samples/2020/2lep/MC/

# we start with two files: top quark pair simulation, and real data, others can be added later
files = [#"mc_361108.Ztautau.2lep.root",
#          "mc_410011.single_top_tchan.2lep.root",
#          "mc_410012.single_antitop_tchan.2lep.root",
#          "mc_410013.single_top_wtchan.2lep.root",
#          "mc_410014.single_antitop_wtchan.2lep.root",
#          "mc_410155.ttW.2lep.root",
          "mc_410000.ttbar_lep.2lep.root", # This is the top quark pair simulation
          "data.2lep.root"               # This is the real data. Using _A version as it's much smaller,
          ]

samples = ["tt"]
#samples = ["Z tau tau", "t", "tbar", "tW", "tbarW", "ttW", "tt"]


In [2]:
def calc_chisquare(observed, expected):
    
    # takes chi_square for 
    
    chi_square = np.sum( (observed - expected)**2 / expected)
    
    chi_square_per_dof = chi_square / len(observed)
    
    return (chi_square, chi_square_per_dof)

In [3]:
num_events_precut = []
num_events_cut1 = []
num_events_cut2 = []
num_events_cut3 = []
num_events_cut4 = []

# Hold the histograms of the distributions for each cut that has been made. Don't forget to weight them.
cut1_hist_mll = []
cut1_hist_mbb = []
cut1_hist_mllbb = []

cut2_hist_mll = []
cut2_hist_mbb = []
cut2_hist_mllbb = []

cut3_hist_mll = []
cut3_hist_mbb = []
cut3_hist_mllbb = []

cut4_hist = []

delR_dist = []
delR_bins = np.linspace(0,7,12)

eta_dist = []
eta_bins = np.linspace(0,3,11)

delphi_dist = []
delphi_bins = np.linspace(-3.2,3.2,11)

pT_dist = []
pT_bins = np.linspace(0, 1700, 18)

# dictionary = {"key:value"} access by dictionary["key"]
chi_square_values = {}

# histogram bins:
bins_mll = np.linspace(0, 400, num=40)
bins_mbb = np.linspace(0, 400, num=40)
bins_mllbb = np.linspace(70, 700, num=40)

bins_mll_cut3 = np.linspace(0,40,num=9) # Cut 3 restricts mll to <40 GeV, so no point going to 400

data_to_tt_ratios = []

cut_titles = ["Cut 1", "Cut 2", "Cut 3", "Cut 4"]
sample_names = []

btagWP77 = 0.6459 # Constant is to do with b-tagging: classifying the b-jets.





for file in files:
    sample_name = file.split(".")[1] # file is a string, splits it at ., and takes entry at index 1.
    print("Sample name: ", sample_name) 
    
    sample_names.append(sample_name)
    print("Sample names: ", sample_names)
        
    tree = uproot.open(file)["mini"]
    
    mcWeight, SumWeights, XSection, trigM, trigE, scaleFactor_PILEUP, scaleFactor_ELE, scaleFactor_MUON, scaleFactor_LepTRIGGER, scaleFactor_BTAG, lep_type, lep_pt, lep_eta, lep_phi, lep_E, lep_charge, lep_etcone20, lep_ptcone30, jet_n, jet_pt, jet_eta, jet_phi, jet_E, jet_MV2c10,jet_trueflav, met_et, met_phi = tree.arrays(["mcWeight", "SumWeights", "XSection","trigM", "trigE","scaleFactor_PILEUP", "scaleFactor_ELE", "scaleFactor_MUON","scaleFactor_LepTRIGGER","scaleFactor_BTAG", "lep_type","lep_pt", "lep_eta","lep_phi", "lep_E", "lep_charge", "lep_etcone20", "lep_ptcone30", "jet_n", "jet_pt", "jet_eta", "jet_phi","jet_E", "jet_MV2c10", "jet_trueflav", "met_et", "met_phi"])#, outputtype=tuple)#, entrystop = 20000)
    print("File has been successfully opened!")
    
    # lorentz vector
    leplv = LVepm.TLorentzVectorArray.from_ptetaphi(lep_pt, lep_eta, lep_phi, lep_E)

    # isolation for the leptons
    lep_reliso_pt = (lep_ptcone30 / lep_pt)
    lep_reliso_et = (lep_etcone20 / lep_pt)
    sum_lep_type = lep_type.sum()
    
    # Create Lorentz vector for the jets
    jetlv = LVepm.TLorentzVectorArray.from_ptetaphi(jet_pt, jet_eta, jet_phi, jet_E)
    jetlv = jetlv[jet_MV2c10.argsort()]    
    tags = jet_pt[jet_MV2c10 > btagWP77]   #take all jets with jet_MV2c10 > btagWP77
    
    trig_cut = ( (trigM==1) | (trigE==1))

    lep_type_cut  = (sum_lep_type == 24)    # requires an electron and muon.
    lep_iso_cut =  ((lep_reliso_pt.max() < 0.1) & (lep_reliso_et.max() < 0.1)) # making sure they are isolated.
    lept_count_cut = (leplv.counts ==2)  
    lept_charge_cut = (lep_charge.sum()==0)  
    
    # need to limit jets to two b-jets only
    num_jets_cut = (jetlv.counts == 2) # number of jets must be equal to 2
    ntag_cut = (tags.counts==2) # number of b-tagged jets must be equal to 2
    
    pre_cut = (lep_iso_cut  & lept_count_cut & lept_charge_cut & lep_type_cut & ntag_cut & num_jets_cut)
    
    #first_lep_p4 =  leplv[pre_cut, 0] # This gets the leptons from all the events without actually cutting them
    #second_lep_p4 = leplv[pre_cut, 1] # which lets us combine the del_R_cut (hopefully) but may prove difficult later?
    
    #limiting transverse momentum and pseudorapidity using values as in paper. 
    #lep_kinematics_cut  = ( (lep_pt.min() > 25000) & (lep_eta.min() >-2.5) & (lep_eta.max() < 2.5))
    
    # 1. Apply precut to the leptons and jets
    
    lep_pre = leplv[pre_cut]
    jet_pre = jetlv[pre_cut]
    
    # 2. Unpack individual leptons and jets from the pre-selected data (see 1)
    first_lep_p4 =  lep_pre[:, 0] 
    second_lep_p4 = lep_pre[:, 1]
    ll_p4 = first_lep_p4 + second_lep_p4 # 4 momentum of the dilepton system
    
    first_jet_p4 = jet_pre[:,0]
    second_jet_p4 = jet_pre[:,1]
    
    # 3. Get distribution of eta and pT for leading order leptons and del R
    lead_lep_pT = first_lep_p4.pt
    lead_lep_eta = first_lep_p4.eta 
    
    ll_delR = first_lep_p4.delta_r(second_lep_p4)
    
    mcWeight = mcWeight[pre_cut]
    
    if(file.split("_")[0] == "mc"):
        finalWeight = get_xsec_weight(sample_name)*(mcWeight)
    else:
        finalWeight = np.ones(len(mcWeight))
        
    # get a distribution of del_R
    H_delR, b = np.histogram(ll_delR, weights=np.full(len(ll_delR),finalWeight[0]), bins = delR_bins) #
    delR_dist.append(H_delR)
    
    H_eta, b = np.histogram(lead_lep_eta, weights=np.full(len(lead_lep_eta),finalWeight[0]), bins = eta_bins) 
    eta_dist.append(H_eta)
    
    H_pT, b = np.histogram(lead_lep_pT/1000, weights=np.full(len(lead_lep_pT),finalWeight[0]), bins = pT_bins) 
    pT_dist.append(H_pT)
    
    
    delR_cut = (ll_delR > 0.4)
    
    lep_kinematics_cut  = ( (lep_pre.pt.min() > 25000) & (lep_pre.eta.min() >-2.5) & (lep_pre.eta.max() < 2.5))
    
    cut_1 = (lep_kinematics_cut & delR_cut)
    
    # Now to apply cut 1 to the model and data
    first_lep_p4_1 = lep_pre[cut_1, 0]
    second_lep_p4_1 = lep_pre[cut_1, 1]
    
    ll_p4_1 = first_lep_p4_1 + second_lep_p4_1 # 4 vectors of ll system
    
    first_jet_p4_1 = jet_pre[cut_1, 0]
    second_jet_p4_1 = jet_pre[cut_1, 1]
    
    bb_p4_1 = first_jet_p4_1 + second_jet_p4_1 # 4 vectors of bb system
    
    llbb_p4_1 = ll_p4_1 + bb_p4_1 # 4 vectors of the llbb system
    
    #print("Length of ll_p4 after cut 1:", len(ll_p4_1))
    #print("Length of bb_p4 after cut 1:", len(bb_p4_1))
    
    if len(ll_p4_1) == 0:
        num_events_cut1.append(0)    
    else:
        num_events_cut1.append(len(ll_p4_1))
    
    print("Initial number of events:", num_events_precut)
    print("Events after cut 1:", num_events_cut1)
    
    # I want the weighting corresponding to cut 1:
    mcWeight = mcWeight[cut_1] 
        
    if(file.split("_")[0] == "mc"):
        finalWeight = get_xsec_weight(sample_name)*(mcWeight)
    else:
        finalWeight = np.ones(len(mcWeight)) 

    
    H_mll, b = np.histogram(ll_p4_1.mass/1000.0, weights=np.full(len(ll_p4_1.mass),finalWeight), bins=bins_mll) # /1000.0 to change units to GeV
    cut1_hist_mll.append(H_mll)
    
    H_mbb, b = np.histogram(bb_p4_1.mass/1000.0, weights = np.full(len(bb_p4_1.mass),finalWeight), bins=bins_mbb)
    cut1_hist_mbb.append(H_mbb)
    
    H_mllbb, b = np.histogram(llbb_p4_1.mass/1000.0, weights = np.full(len(llbb_p4_1.mass),finalWeight), bins = bins_mllbb)
    cut1_hist_mllbb.append(H_mllbb)
    
    #=====================================================================================================================
    # APPLYING CUT 2
    ll_1_delphi = first_lep_p4_1.delta_phi(second_lep_p4_1)
    #print("Delta phi: ", ll_1_delphi)
    
    H_delphi, b = np.histogram(ll_1_delphi, weights=np.full(len(ll_1_delphi),finalWeight[0]), bins = delphi_bins)
    delphi_dist.append(H_delphi) 

    delphi_cut = np.abs(ll_1_delphi) < (np.pi / 5)

    cut_2 = delphi_cut
    
    # Apply cut to leptons and jets
    first_lep_p4_2 = first_lep_p4_1[cut_2]
    second_lep_p4_2 = second_lep_p4_1[cut_2]

    ll_p4_2 = first_lep_p4_2 + second_lep_p4_2
    
    first_jet_p4_2 = first_jet_p4_1[cut_2]
    second_jet_p4_2 = second_jet_p4_1[cut_2]
    
    bb_p4_2 = first_jet_p4_2 + second_jet_p4_2
    
    llbb_p4_2 = ll_p4_2 + bb_p4_2

    print(len(ll_p4_2))

    if len(ll_p4_2) == 0:
        num_events_cut2.append(0)    
    else:
        num_events_cut2.append(len(ll_p4_2))

    print("Events after cut 2:", num_events_cut2)

    ## produce a histogram of the system
    
    # I want the weighting corresponding to cut 2:
    mcWeight = mcWeight[cut_2] 
        
    if(file.split("_")[0] == "mc"):
        finalWeight = get_xsec_weight(sample_name)*(mcWeight)
    else:
        finalWeight = np.ones(len(mcWeight)) 
    
    H_mll, b = np.histogram(ll_p4_2.mass/1000.0, weights=np.full(len(ll_p4_2.mass),finalWeight), bins=bins_mll) # /1000.0 to change units to GeV
    cut2_hist_mll.append(H_mll)
    
    H_mbb, b = np.histogram(bb_p4_2.mass/1000.0, weights = np.full(len(bb_p4_2.mass),finalWeight), bins=bins_mbb)
    cut2_hist_mbb.append(H_mbb)
    
    H_mllbb, b = np.histogram(llbb_p4_2.mass/1000.0, weights = np.full(len(llbb_p4_2.mass),finalWeight), bins = bins_mllbb)
    cut2_hist_mllbb.append(H_mllbb)



    # Now applying cut 3, limit mass of dilepton system. This will be given by ll_p4_2.mass

    cut_3 = ll_p4_2.mass < (40000) # 40000 MeV = 40 GeV
    #print(cut_3)

    first_lep_p4_3 = first_lep_p4_2[cut_3]
    second_lep_p4_3 = second_lep_p4_2[cut_3]

    ll_p4_3 = first_lep_p4_3 + second_lep_p4_3
    
    first_jet_p4_3 = first_jet_p4_2[cut_3]
    second_jet_p4_3 = second_jet_p4_2[cut_3]
    
    bb_p4_3 = first_jet_p4_3 + second_jet_p4_3
    
    llbb_p4_3 = ll_p4_3 + bb_p4_3

    print(len(ll_p4_3))

    if len(ll_p4_3) == 0:
        num_events_cut3.append(0)    
    else:
        num_events_cut3.append(len(ll_p4_3))

    print("Events after cut 3:", num_events_cut3)
    
    
    mcWeight = mcWeight[cut_3] 
        
    if(file.split("_")[0] == "mc"):
        finalWeight = get_xsec_weight(sample_name)*(mcWeight)
    else:
        finalWeight = np.ones(len(mcWeight)) 
    
    H_mll, b = np.histogram(ll_p4_3.mass/1000.0, weights=np.full(len(ll_p4_3.mass),finalWeight), bins=bins_mll_cut3) # /1000.0 to change units to GeV
    cut3_hist_mll.append(H_mll)
    
    H_mbb, b = np.histogram(bb_p4_3.mass/1000.0, weights = np.full(len(bb_p4_3.mass),finalWeight), bins=bins_mbb)
    cut3_hist_mbb.append(H_mbb)
    
    H_mllbb, b = np.histogram(llbb_p4_3.mass/1000.0, weights = np.full(len(llbb_p4_3.mass),finalWeight), bins = bins_mllbb)
    cut3_hist_mllbb.append(H_mllbb)


    ## Cut 4 requires that the transverse mass of the llbbvv system is known.

Sample name:  ttbar_lep
Sample names:  ['ttbar_lep']
File has been successfully opened!


ValueError: buffer size must be a multiple of element size

In [None]:
print(lead_lep_pT.max()/1000)
print(lead_lep_pT.min()/1000)

In [None]:
# Cut 1 Histograms
plt.figure()
#fig = plt.figure(figsize=(6,8))
#fig.tight_layout()


hep.histplot(delR_dist[0], delR_bins, label=r"$\Delta R$ ($t\bar{t}$)", histtype='fill', color = "cornflowerblue")
hep.histplot(delR_dist[1], delR_bins, label=r"$\Delta R$ (data)", yerr=False, histtype="errorbar", color="black")

plt.fill_betweenx((0,10e8), x1 = 0.4, x2 = 7, alpha=0.4, color="powderblue",label = "$\Delta R > 0.4$")
#plt.vlines(0.4, ymin=0, ymax=10e8, linestyle="--", linewidth=0.9, color="powderblue")
plt.ylim(1e0,1e6)
plt.xlim(0,6)
plt.yscale("log")
plt.xlabel(r"$\Delta R$ between the leptons", loc="right", fontsize=18)
plt.ylabel("Number of events", fontsize = 18)
plt.legend(loc = "upper right", frameon=True, facecolor="white", framealpha=1, fontsize=14)
hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

#hep.atlas.label(label="OpenData", data = True, year = 2020, fontsize=14,)
plt.show()

plt.figure()
#fig = plt.figure(figsize=(6,8))

#plt.vlines(2.5, ymin=10e0, ymax=10e7, linestyle="--", color="k", label="$|\eta | = 2.5$")
hep.histplot(eta_dist[0], eta_bins, label=r"$|\eta |$ ($t\bar{t}$)", histtype='fill', color = "cornflowerblue")
hep.histplot(eta_dist[1], eta_bins, label=r"$|\eta |$ (data)", yerr=False, histtype="errorbar", color="black")
plt.fill_betweenx((10e0,10e6), x1 = 0, x2 = 2.5, alpha=0.5, color="powderblue",label = "$|\eta| < 2.5$")

plt.ylim(10e0,10e6)
plt.xlim(0,2.8)
plt.yscale("log")
plt.xlabel(r"Leading lepton $|\eta |$", loc="right", fontsize=18)
plt.ylabel("Number of events", fontsize = 18)
plt.legend(loc = "upper left", frameon=True, facecolor="white", framealpha=1, fontsize=14)
hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()

plt.figure()


#plt.vlines(2.5, ymin=10e0, ymax=10e8, linestyle="--", color="k", label="$|\eta | = 2.5$")
hep.histplot(pT_dist[0], pT_bins, label=r"$p_T$ ($t\bar{t}$)", histtype='fill', hatch="///", color = "cornflowerblue")
hep.histplot(pT_dist[1], pT_bins, label=r"$p_T$ (data)", yerr=False, histtype="errorbar", color="black")
plt.fill_betweenx((0,10e5), x1 = 25, x2 = 1700, alpha=0.3, color="powderblue",label = "$p_T > 25$ GeV")

plt.ylim(1,10e5)
plt.xlim(0,600) # only two events fall above 600 GeV. one at 800, one at 1600
plt.yscale("log")
plt.xlabel(r"Leading lepton $p_T$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", fontsize = 18)
plt.legend(loc = "upper right", frameon=True, facecolor="white", framealpha=1, fontsize=14)
hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()

print(pT_dist[0], pT_dist[1])


In [None]:
#s_to_b = (cut3_hist_mllbb[1] - cut3_hist_mllbb[0]) / cut3_hist_mllbb[0]
#s_to_b = np.nan_to_num(s_to_b, posinf = 0)


def ratio(data, tt):
    
    results = []
    
    for i in range(len(data)):
        if tt[i] == 0 and data[i]==0: # based on assumption that if data == 0, then tt == 0 because tt not > than data for low counts
            results.append(0)
        else:
            results.append( (data[i]-tt[i])/tt[i] )
        
    results = np.array(results)
    results = np.nan_to_num(results, posinf = 1) # set to 1 so it doesn't hide the case of tt = 0 and data != 0
    return results
        

def u_on_ratio(data, ratio):
    
    results = []
    for i in range(len(data)):
        
        if data[i]==0:
            results.append(0)
        else:
            u_data = np.sqrt(data[i])
            rel_u = u_data/data[i]
    
            u_ratio = rel_u * ratio[i]
            results.append(u_ratio)
    
    results = np.abs(np.array(results))
    return results

In [None]:
plt.rcParams. update({'font.family':'sans-serif'})
# M_ll DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut1_hist_mll[0], bins=bins_mll, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut1_hist_mll[1], bins=bins_mll, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 1", (20,3e3), fontsize = 14)
plt.annotate("$\Delta R>0.4$, $p_T > 25$ GeV, $|\eta| < 2.5$", (20,1e3 + 300), fontsize = 14)

plt.legend(loc=1, ncol=1, fontsize=14, frameon=False)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim([0.1,80000])
plt.yscale("log")
plt.xlim(0,400)


# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut1_hist_mll[1], cut1_hist_mll[0])
unc =  u_on_ratio(cut1_hist_mll[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mll, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (20,0.35), fontsize = 16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,400)
plt.show()


# M_BB HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut1_hist_mbb[0], bins=bins_mbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut1_hist_mbb[1], bins=bins_mbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 1:", (20,3e3), fontsize = 14)
plt.annotate("$\Delta R>0.4$, $p_T > 25$ GeV, $|\eta| < 2.5$", (20,1e3 + 300), fontsize = 14)

plt.legend(loc=1, ncol=1, fontsize=14, frameon=False)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim([0.1,80000])
plt.yscale("log")
plt.xlim(0,400)


# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut1_hist_mbb[1], cut1_hist_mbb[0])
unc =  u_on_ratio(cut1_hist_mbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (240,0.25), fontsize = 16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,400)
plt.show()



# M_LLBB DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut1_hist_mllbb[0], bins=bins_mllbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", hatch ="///", color = "cornflowerblue")
data = hep.histplot(cut1_hist_mllbb[1], bins=bins_mllbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 1:", (100,3e3), fontsize = 14)
plt.annotate("$\Delta R>0.4$, $p_T > 25$ GeV, $|\eta| < 2.5$", (100,1e3+300), fontsize = 14)

plt.legend(loc=1, ncol=1, fontsize=14, frameon=False)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim([0.1,80000])
plt.xlim(70,700)
plt.yscale("log")

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut1_hist_mllbb[1], cut1_hist_mllbb[0])
unc =  u_on_ratio(cut1_hist_mllbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mllbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (460,4.5), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(70,700)
plt.show()

print(len(cut1_hist_mll[0]))

In [None]:
# Cut 2 Histograms
plt.figure()
#fig = plt.figure(figsize=(6,8))


hep.histplot(delphi_dist[0], delphi_bins, label=r"$\Delta \phi$ ($t\bar{t}$)", histtype='fill', color = "cornflowerblue")
hep.histplot(delphi_dist[1], delphi_bins, label=r"$\Delta \phi$ (data)", yerr=False, histtype="errorbar", color="black")
plt.fill_betweenx((0, 2700), x1 = -(np.pi/5), x2 =np.pi/5, alpha=0.5, color="indianred",label = "$|\Delta \phi| <\dfrac{\pi}{5}$")
plt.ylim(0,2700)
plt.xlim(-3.2,3.2)
plt.xlabel(r"$\Delta \phi$ between the leptons", loc="right", fontsize=18)
plt.ylabel("Number of events", fontsize = 18)
plt.legend(loc = "upper right", frameon=True, facecolor="white", framealpha=1, fontsize=14)
hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.show()

In [None]:
# M_ll DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut2_hist_mll[0], bins=bins_mll, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut2_hist_mll[1], bins=bins_mll, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 2:", (20,210), fontsize = 14)
plt.annotate(r"$|\Delta \phi| < \frac{\pi}{5}$", (75,190), fontsize = 14)

plt.legend(loc=1, ncol=1, frameon=False, fontsize=14)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim([0,270])
plt.xlim(0,400)


# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut2_hist_mll[1], cut2_hist_mll[0])
unc =  u_on_ratio(cut2_hist_mll[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mll, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
#plt.ylim([0.6,1.4])
plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (20,1.9), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,400)
plt.show()



# M_BB HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut2_hist_mbb[0], bins=bins_mbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut2_hist_mbb[1], bins=bins_mbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 2:", (20,92), fontsize = 14)
plt.annotate(r"$|\Delta \phi| < \frac{\pi}{5}$", (75,82), fontsize = 14)

plt.legend(loc=1, frameon=False, fontsize=14)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim([0,120])
plt.xlim(0,400)

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut2_hist_mbb[1], cut2_hist_mbb[0])
unc =  u_on_ratio(cut2_hist_mbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (25,1.4), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,400)
plt.show()


# M_LLBB DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut2_hist_mllbb[0], bins=bins_mllbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut2_hist_mllbb[1], bins=bins_mllbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 2:", (100,140), fontsize = 14)
plt.annotate(r"$|\Delta \phi| < \frac{\pi}{5}$", (100,125), fontsize = 14)

plt.legend(loc=1, ncol=1, frameon=False, fontsize=14)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim(0,180)
plt.xlim(70,700)
#plt.yscale("log")

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut2_hist_mllbb[1], cut2_hist_mllbb[0])
unc =  u_on_ratio(cut2_hist_mllbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mllbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (120,1.8), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,2.5)
plt.xlim(70,700)
plt.show()

In [None]:
# mll distributions after cut 2
plt.figure()

hep.histplot(cut2_hist_mll[0], bins=bins_mll, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
hep.histplot(cut2_hist_mll[1], bins=bins_mll, stack=False, yerr=False, histtype="errorbar", color="black",label="Data")

plt.fill_betweenx((0, 200), x1 = 0, x2 = 40, alpha=0.4, color="indianred", label = "$m_{ll} < 40$ GeV")
plt.ylim(0,200)
plt.xlim(0,250)
plt.xlabel(r"$m_{ll}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", fontsize = 18)
plt.legend(loc = "upper right", frameon=True, facecolor="white", framealpha=1, fontsize=14)
hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)

plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

In [None]:
# Cut 3 histograms
# M_ll DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()

spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut3_hist_mll[0], bins=bins_mll_cut3, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut3_hist_mll[1], bins=bins_mll_cut3, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 3:", (2,150), fontsize = 14)
plt.annotate("$m_{ll}<40$ GeV", (2,138), fontsize = 14)

plt.legend(loc=1, ncol=1, frameon=False, fontsize=14)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim(0,200)
plt.xlim(0,40)

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut3_hist_mll[1], cut3_hist_mll[0])
unc =  u_on_ratio(cut3_hist_mll[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mll_cut3, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")

plt.xlabel("m$_{ll}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (2,0.15), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,40)
plt.show()



# M_BB HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()
spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut3_hist_mbb[0], bins=bins_mbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut3_hist_mbb[1], bins=bins_mbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 3:", (20,60), fontsize = 14)
plt.annotate("$m_{ll}<40$ GeV", (20,55), fontsize = 14)

plt.legend(loc=1, ncol=1, frameon=False, fontsize=14)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim(0,80)
plt.xlim(0,400)

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut3_hist_mbb[1], cut3_hist_mbb[0])
unc =  u_on_ratio(cut3_hist_mbb[1], s_to_b)


unc =  u_on_ratio(cut3_hist_mbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
#plt.ylim([0.6,1.4])
plt.xlabel("m$_{bb}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (20,2.8), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(0,400)
plt.show()



# M_LLBB DISTRIBUTION HISTOGRAM
plt.figure()
fig = plt.figure(figsize=(6,8))
fig.tight_layout()
spec = gridspec.GridSpec(ncols=1, nrows=2, height_ratios=[2, 1], hspace=0.3)
ax0 = fig.add_subplot(spec[0])

total_mc = hep.histplot(cut3_hist_mllbb[0], bins=bins_mllbb, stack=True, label = r"$t\bar{t}$ model", histtype="fill", color = "cornflowerblue")
data = hep.histplot(cut3_hist_mllbb[1], bins=bins_mllbb, stack=False, yerr=True, histtype="errorbar", color="black",label="Data")

hep.atlas.label(label="OpenData Release", data = True, year = 2020, fontsize=14)
#hep.cms.lumitext(text=r'ATLAS OpenData $\sqrt{s}=$13 TeV  L=10 fb$^{-1}$', ax=None, fontname=None, fontsize=18)
plt.annotate("Events surviving Cut 3:", (100,75), fontsize = 14)
plt.annotate("$m_{ll}<40$ GeV", (100,68), fontsize = 14)

plt.legend(loc=1, ncol=1, fontsize=14, frameon=False)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Number of events", loc="top", fontsize=18)
plt.ylim(0,100)
plt.xlim(70,700)

# Plot signal over background:
ax1 = fig.add_subplot(spec[1])

s_to_b = ratio(cut3_hist_mllbb[1], cut3_hist_mllbb[0])
unc =  u_on_ratio(cut3_hist_mllbb[1], s_to_b)

s_to_b_plot = hep.histplot(s_to_b, bins=bins_mllbb, stack=False, yerr=unc, histtype="errorbar", color="black",label="Data/MC")
#plt.ylim([0.6,1.4])
plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.annotate(r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$", (110,1.6), fontsize = 16)
plt.ylabel(r"$R_{top}$", fontsize=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,)
plt.xlim(70,700)
plt.show()

In [None]:
# Ratio Plot
# compute the ratios:
cut_1_ratio_mllbb = (np.sum(cut1_hist_mllbb[1]) - np.sum(cut1_hist_mllbb[0])) / np.sum(cut1_hist_mllbb[0])
cut_2_ratio_mllbb = (np.sum(cut2_hist_mllbb[1]) - np.sum(cut2_hist_mllbb[0])) / np.sum(cut2_hist_mllbb[0])
cut_3_ratio_mllbb = (np.sum(cut3_hist_mllbb[1]) - np.sum(cut3_hist_mllbb[0])) / np.sum(cut3_hist_mllbb[0])

cut_1_ratio_mll = (np.sum(cut1_hist_mll[1]) - np.sum(cut1_hist_mll[0])) / np.sum(cut1_hist_mll[0])
cut_2_ratio_mll = (np.sum(cut2_hist_mll[1]) - np.sum(cut2_hist_mll[0])) / np.sum(cut2_hist_mll[0])
cut_3_ratio_mll = (np.sum(cut3_hist_mll[1]) - np.sum(cut3_hist_mll[0])) / np.sum(cut3_hist_mll[0])



unc_cut_1_mllbb =  (np.sqrt(np.sum(cut1_hist_mllbb[1])) / np.sum(cut1_hist_mllbb[1]) ) * cut_1_ratio_mllbb
unc_cut_2_mllbb =  (np.sqrt(np.sum(cut2_hist_mllbb[1])) / np.sum(cut2_hist_mllbb[1]) ) * cut_2_ratio_mllbb
unc_cut_3_mllbb =  (np.sqrt(np.sum(cut3_hist_mllbb[1])) / np.sum(cut3_hist_mllbb[1]) ) * cut_3_ratio_mllbb

unc_cut_1_mll =  (np.sqrt(np.sum(cut1_hist_mll[1])) / np.sum(cut1_hist_mll[1]) ) * cut_1_ratio_mll
unc_cut_2_mll =  (np.sqrt(np.sum(cut2_hist_mll[1])) / np.sum(cut2_hist_mll[1]) ) * cut_2_ratio_mll
unc_cut_3_mll =  (np.sqrt(np.sum(cut3_hist_mll[1])) / np.sum(cut3_hist_mll[1]) ) * cut_3_ratio_mll

unc_ratio_mllbb = np.abs([unc_cut_1_mllbb, unc_cut_2_mllbb, unc_cut_3_mllbb])
unc_ratio_mll = np.abs([unc_cut_1_mll, unc_cut_2_mll, unc_cut_3_mll])

cut_labels = ["Cut 1", "Cut 2", "Cut 3"]
paper_ratios = [0.0075, 0.023, 0.036]
my_ratios_mllbb = [cut_1_ratio_mllbb,cut_2_ratio_mllbb,cut_3_ratio_mllbb]
my_ratios_mll = [cut_1_ratio_mll,cut_2_ratio_mll,cut_3_ratio_mll]

#plt.figure(figsize=(4,8))
plt.scatter(cut_labels, paper_ratios, marker= "o", color = "blue", label = r"$\frac{S}{B}$ (ref. [1])")
#plt.errorbar(cut_labels, my_ratios_mll, yerr=unc_ratio_mll, fmt = "ok", label = r"$R_{top}$")
plt.errorbar(cut_labels, my_ratios_mllbb, fmt = "xr", yerr=unc_ratio_mllbb, label = r"$R_{top}=\frac{N_{data}-N_{t\bar{t}}}{N_{t\bar{t}}}$")

#plt.xlabel("m$_{llbb}$ [GeV]", loc="right", fontsize=18)
plt.ylabel("Ratio", fontsize=18)
plt.legend(frameon=True, fontsize = 14, ncol=1, loc="upper left")
plt.ylim(-0.005,0.06)
plt.xticks(fontsize=16)
plt.yticks(fontsize=14)
plt.title(r"Ratio of toponium to $t\bar{t}$", fontsize=18)
#hep.cms.lumitext(text=r"Ratio of toponium to $t\bar{t}$", ax=None, fontname=None, fontsize=18)

print(my_ratios_mllbb)
print(unc_ratio_mllbb)

In [None]:
def calc_chisquare(observed, expected):
    
    # takes chi_square for 
    
    results = []
    
    for i in range(len(observed)):
        if expected[i]==0: continue
        else: results.append((observed[i] - expected[i])**2 / expected[i])
            
    
    chi_square = np.sum(results)
    
    chi_square_per_dof = chi_square / len(observed)
    
    return (chi_square, chi_square_per_dof)

In [None]:
print(len(cut1_hist_mllbb[1]))
cut1_hist_mllbb[0]

In [None]:
#calc_chisquare(observed, expected) returns chi_square, chi_square_per_dof
# Calculating chi_square for whole distribution 
chi_cut1, chidof_cut1mllbb = calc_chisquare(cut1_hist_mllbb[1],cut1_hist_mllbb[0])
chi_cut2, chidof_cut2mllbb = calc_chisquare(cut2_hist_mllbb[1],cut2_hist_mllbb[0])
chi_cut3, chidof_cut3mllbb = calc_chisquare(cut3_hist_mllbb[1],cut3_hist_mllbb[0])

chidof_wholemllbb = [chidof_cut1mllbb,chidof_cut2mllbb,chidof_cut3mllbb]

# calculating chidof for the first 15 bins only (where toponium is expected)
chi_cut1, chidof_cut1 = calc_chisquare(cut1_hist_mllbb[1][0:21],cut1_hist_mllbb[0][0:21])
chi_cut2, chidof_cut2 = calc_chisquare(cut2_hist_mllbb[1][0:21],cut2_hist_mllbb[0][0:21])
chi_cut3, chidof_cut3 = calc_chisquare(cut3_hist_mllbb[1][0:21],cut3_hist_mllbb[0][0:21])

chidof_first15 = [chidof_cut1,chidof_cut2,chidof_cut3]

#plt.hlines(xmin=cut_labels[0],xmax=cut_labels[2],y=1, linestyle = "--", linewidth=0.9, color ="k")
plt.axline(xy1=(0,1), xy2=None, slope =0, color="k", linestyle="--",linewidth=0.9)
plt.scatter(cut_labels, chidof_whole, marker= "x", color="k", label=r"$m_{llbb}: \chi ^2$/dof (whole distribution)")
plt.scatter(cut_labels, chidof_first15, marker= "x", color="b", label=r"$m_{llbb}: \chi ^2$/dof ($m<409$ GeV)")
plt.ylabel(r"$ \chi ^2$/dof", fontsize=18)
plt.legend(frameon=True, fontsize = 12, ncol=1, loc="upper right")
plt.xticks(fontsize=16)
plt.yticks(fontsize=14)
plt.ylim(0,3)

In [None]:
chidof_wholemllbb

In [None]:
#calc_chisquare(observed, expected) returns chi_square, chi_square_per_dof
# Calculating chi_square for whole distribution 
chi_cut1, chidof_cut1mll = calc_chisquare(cut1_hist_mll[1],cut1_hist_mll[0])
chi_cut2, chidof_cut2mll = calc_chisquare(cut2_hist_mll[1],cut2_hist_mll[0])
chi_cut3, chidof_cut3mll = calc_chisquare(cut3_hist_mll[1],cut3_hist_mll[0])

chidof_wholemll = [chidof_cut1mll,chidof_cut2mll,chidof_cut3mll]

# calculating chidof for the first 20 bins only (where toponium is expected)
chi_cut1, chidof_cut1 = calc_chisquare(cut1_hist_mll[1][0:5],cut1_hist_mll[0][0:5])
chi_cut2, chidof_cut2 = calc_chisquare(cut2_hist_mll[1][0:5],cut2_hist_mll[0][0:5])
chi_cut3, chidof_cut3 = calc_chisquare(cut3_hist_mll[1][0:],cut3_hist_mll[0][0:])

chidof_first15 = [chidof_cut1,chidof_cut2,chidof_cut3]

#plt.hlines(xmin=cut_labels[0],xmax=cut_labels[2],y=1, linestyle = "--", linewidth=0.9, color ="k")
plt.axline(xy1=(0,1), xy2=None, slope =0, color="k", linestyle="--",linewidth=0.9)
plt.scatter(cut_labels, chidof_whole, marker= "^", color="k", label=r"$m_{ll}: \chi ^2$/dof (whole distribution)")
plt.scatter(cut_labels, chidof_first15, marker= "^", color="b", label=r"$m_{ll}: \chi ^2$/dof ($m<41$ GeV)")
plt.ylabel(r"$ \chi ^2$/dof", fontsize=18)
plt.legend(frameon=True, fontsize = 12, ncol=1, loc="upper right")
plt.xticks(fontsize=16)
plt.yticks(fontsize=14)
plt.ylim(0,3)

In [None]:
cut3_hist_mll[1]

In [None]:
chidof_first15

In [None]:
chidof_wholemll