In [4]:
import numpy as np
import matplotlib.pyplot as plt
import hist
import mplhep
import matplotlib as mpl

mpl.rcParams["lines.linewidth"] = 2
mpl.rcParams["grid.color"] = "#CCCCCC"
mpl.rcParams["grid.linewidth"] = 0.5
mpl.rcParams["figure.edgecolor"] = "none"

import pandas as pd
import vector

import sys
sys.path.append("..")

import utils

In [8]:
def make_vector(events: pd.DataFrame, obj: str):
    """Create a ``vector`` object from the columns of the dataframe"""
    mstring = "PNetMass" if obj == "ak8FatJet" else "Mass"

    return vector.array(
        {
            "pt": events[f"{obj}Pt"],
            "phi": events[f"{obj}Phi"],
            "eta": events[f"{obj}Eta"],
            "M": events[f"{obj}{mstring}"],
        }
    )

## Semi-boosted discrimination

H1: 1 fatjet
H2: 2 aK4 jets (j3, j4)

- log(pT) of HH system
- eta of HH system
- dR (H1, H2)
- mass of HH system
- dphi (j3, j4)
- dR (j3, j4)
- log(pT) of H1
- log(pT) of j3
- log(pT) of j4
- H1 pT / H2 pT
- number of jets in event (not in ntuples)

BDT train
https://github.com/rkansal47/HHbbVV/blob/main/src/HHbbVV/postprocessing/TrainBDT.py


In [7]:
samples = {
    "hh4b": ["GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    #"hh4b-c2p45": ["GluGlutoHHto4B_cHHH2p45_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    #"hh4b-c5": ["GluGlutoHHto4B_cHHH5_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    #"hh4b-c0": ["GluGlutoHHto4B_cHHH0_TuneCP5_PSWeights_13TeV-powheg-pythia8"],
    "qcd": [
        "QCD_HT-200to300-13TeV",
        "QCD_HT-300to500-13TeV",
        "QCD_HT-500to700-13TeV",
        "QCD_HT-700to1000-13TeV",
        "QCD_HT-1000to1500-13TeV",
        "QCD_HT-1500to2000-13TeV",
        "QCD_HT-2000toInf-13TeV",
    ],
    # TODO: include ttbar
}

year = "2018"
dir_name = "Oct30ApplySelection"
path_to_dir = f"/eos/uscms/store/user/cmantill/bbbb/matching/{dir_name}/"
dirs = {path_to_dir: samples}

filters = [
    [
        # one good fatjet (fatjet with index 0 has the largest Xbb score)
        ("('ak8FatJetPt', '0')", ">=", 300),
        ("('ak8FatJetMsd', '0')", ">=", 60),
        ("('ak8FatJetPNetXbb', '0')", ">=", 0.9734), # medium WP
        # second fatjet has lower
        ("('ak8FatJetPNetXbb', '1')", "<", 0.9880), # tight WP
    ]
]
events_dict = {}
for input_dir, samples in dirs.items():
    events_dict = {
        **events_dict,
        # this function will load files (only the columns selected), apply filters and compute a weight per event
        **utils.load_samples(input_dir, samples, year, filters=filters),
    }

samples_loaded = list(events_dict.keys())
keys_loaded = list(events_dict[samples_loaded[0]].keys())
print(f"Keys in events_dict")
for i in keys_loaded:
    print(i)

Loading GluGlutoHHto4B_cHHH1_TuneCP5_PSWeights_13TeV-powheg-pythia8
Loading GluGlutoHHto4B_cHHH2p45_TuneCP5_PSWeights_13TeV-powheg-pythia8
Loading GluGlutoHHto4B_cHHH5_TuneCP5_PSWeights_13TeV-powheg-pythia8
Loading GluGlutoHHto4B_cHHH0_TuneCP5_PSWeights_13TeV-powheg-pythia8
Loading QCD_HT-1000to1500-13TeV
Loading QCD_HT-1500to2000-13TeV
Loading QCD_HT-2000toInf-13TeV
Loading QCD_HT-200to300-13TeV
Loading QCD_HT-300to500-13TeV
Loading QCD_HT-500to700-13TeV
Loading QCD_HT-700to1000-13TeV
Keys in events_dict
('ht', 0)
('GenHiggsEta', 0)
('GenHiggsEta', 1)
('GenHiggsPhi', 0)
('GenHiggsPhi', 1)
('GenHiggsMass', 0)
('GenHiggsMass', 1)
('GenHiggsPt', 0)
('GenHiggsPt', 1)
('GenbEta', 0)
('GenbEta', 1)
('GenbEta', 2)
('GenbEta', 3)
('GenbPhi', 0)
('GenbPhi', 1)
('GenbPhi', 2)
('GenbPhi', 3)
('GenbMass', 0)
('GenbMass', 1)
('GenbMass', 2)
('GenbMass', 3)
('GenbPt', 0)
('GenbPt', 1)
('GenbPt', 2)
('GenbPt', 3)
('ak4JetHiggsMatch', 0)
('ak4JetHiggsMatch', 1)
('ak4JetHiggsMatch', 2)
('ak4JetHiggsMa

In [23]:
# Higgs candidate selection example
events = events_dict["hh4b"]

# jets outside the fatjet - sorted by b-score
jets_outside = make_vector(events, "ak4JetOutside")
# fatjets sorted by xbb
fatjets = make_vector(events, "ak8FatJet")

# H1 candidate
h1 = fatjets[:, 0]
h1_xbb = events.ak8FatJetPNetXbb[0]

# H2 candidate
# ak4 jet (outside) with highest b-tagging score (btagDeepFlavB)
j3 = jets_outside[:, 0]
j4 = jets_outside[:, 1]
j3_btag = events.ak4JetOutsidebtagDeepFlavB[0]
j4_btag = events.ak4JetOutsidebtagDeepFlavB[1]
h2 = (j3 + j4)


In [25]:
h2.deltaR(h1)

array([2.62564097, 2.80510923, 3.23198471, ..., 1.8560489 , 3.16856139,
       3.05751364])

In [20]:
events.ak4JetOutsidebtagDeepFlavB[0]

0       0.999512
1       0.998535
2       0.978027
3       0.999023
4       0.953613
          ...   
1651    0.620117
1652    0.999512
1653    0.366699
1654    0.680664
1655    0.997559
Name: 0, Length: 1656, dtype: float64

In [13]:
events.ak8FatJetPNetXbb[0]

0       0.992938
1       0.988888
2       0.984413
3       0.993599
4       0.982805
          ...   
1651    0.999809
1652    0.987335
1653    0.987891
1654    0.995915
1655    0.994721
Name: 0, Length: 1656, dtype: float64