In [None]:

import ROOT
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from library.trigger_efficiency_ML import *
from IPython.display import display

import warnings
warnings.filterwarnings("ignore")

In [7]:
version = input("Enter the version you want (e.g. v1): ")
run_name = input("Enter the run name: ")

In [8]:
# --- 0. Configuration ---
signal_triggers = [
    "HLT_AK8PFHT800_TrimMass50",
    "HLT_AK8PFJet400_TrimMass30",
    "HLT_AK8PFJet500",
    "HLT_PFJet500",
    "HLT_PFHT1050",
    "HLT_PFHT500_PFMET100_PFMHT100_IDTight",
    "HLT_PFHT700_PFMET85_PFMHT85_IDTight",
    "HLT_PFHT800_PFMET75_PFMHT75_IDTight",
    "HLT_DoubleMediumChargedIsoPFTauHPS35_Trk1_eta2p1_Reg",
    "HLT_MediumChargedIsoPFTau180HighPtRelaxedIso_Trk50_eta2p1"
]

signal_trigger   = "Combo"

reference_triggers = ["HLT_AK8PFJet260"]

reference_trigger = reference_triggers[0]

filter_pass_real = signal_trigger

filter_pass_meas = signal_trigger + "&&" + reference_trigger

filter_all_meas  = reference_trigger

all_columns      = ["HighestPt", "HT", "MET_pt", "mHH","HighestMass", "SecondHighestPt", 
                    "SecondHighestMass", "FatHT", "MET_FatJet", "mHHwithMET", "HighestEta", 
                    "SecondHighestEta", "DeltaEta", "DeltaPhi", "Combo", "HLT_AK8PFJet260"]

In [10]:
# --- 1. Loading Data ---
print(f"Data version {version}")
if version == "v1":
    suffix = "-briar"
    df_QCD = ROOT.RDataFrame("briar-NewQCD", "data/processed/briar/briar-NewQCD.root")
    df_ggF = ROOT.RDataFrame("briar-NewggF", "data/processed/briar/briar-NewggF.root")
    df_VBF = ROOT.RDataFrame("briar-NewVBF", "data/processed/briar/briar-NewVBF.root")
    df_DATA = ROOT.RDataFrame("briar-NewDATA", "data/processed/briar/briar-NewDATA.root")
elif version == "v2":
    suffix = "-azura"
    df_QCD = ROOT.RDataFrame("azura-NewQCD", "data/processed/azura/azura-NewQCD.root")
    df_ggF = ROOT.RDataFrame("azura-NewggF", "data/processed/azura/azura-NewggF.root")
    df_VBF = ROOT.RDataFrame("azura-NewVBF", "data/processed/azura/azura-NewVBF.root")
    df_DATA = ROOT.RDataFrame("azura-NewDATA", "data/processed/azura/azura-NewDATA.root")
elif version == "v3":
    suffix = "-ashe"
    df_QCD = ROOT.RDataFrame("ashe-NewQCD", "data/processed/ashe/ashe-NewQCD.root")
    df_ggF = ROOT.RDataFrame("ashe-NewggF", "data/processed/ashe/ashe-NewggF.root")
    df_VBF = ROOT.RDataFrame("ashe-NewVBF", "data/processed/ashe/ashe-NewVBF.root")
    df_DATA = ROOT.RDataFrame("ashe-NewDATA", "data/processed/ashe/ashe-NewDATA.root")
elif version == "v4":
    suffix = "-cypress"
    df_QCD = ROOT.RDataFrame("cypress-NewQCD", "data/processed/cypress/cypress-NewQCD.root")
    df_ggF = ROOT.RDataFrame("cypress-NewggF", "data/processed/cypress/cypress-NewggF.root")
    df_VBF = ROOT.RDataFrame("cypress-NewVBF", "data/processed/cypress/cypress-NewVBF.root")
    df_DATA = ROOT.RDataFrame("cypress-NewDATA", "data/processed/cypress/cypress-NewDATA.root")
elif version == "v5":
    suffix = "-testing"
    df_QCD = ROOT.RDataFrame("testing-NewQCD", "data/processed/testing/testing-NewQCD.root")
    df_ggF = ROOT.RDataFrame("testing-NewggF", "data/processed/testing/testing-NewggF.root")
    df_VBF = ROOT.RDataFrame("testing-NewVBF", "data/processed/testing/testing-NewVBF.root")
    df_DATA = ROOT.RDataFrame("testing-NewDATA", "data/processed/testing/testing-NewDATA.root")
elif version == "v6":
    suffix = "-newQCD"
    df_QCD = ROOT.RDataFrame("newQCD-NewQCD", "data/processed/newQCD/newQCD-NewQCD.root")
    df_ggF = ROOT.RDataFrame("newQCD-NewggF", "data/processed/newQCD/newQCD-NewggF.root")
    df_VBF = ROOT.RDataFrame("newQCD-NewVBF", "data/processed/newQCD/newQCD-NewVBF.root")
    df_DATA = ROOT.RDataFrame("newQCD-NewDATA", "data/processed/newQCD/newQCD-NewDATA.root")
elif version == "v7":
    suffix = "-oldQCD"
    df_QCD = ROOT.RDataFrame("oldQCD-NewQCD", "data/processed/oldQCD/oldQCD-NewQCD.root")
    df_ggF = ROOT.RDataFrame("oldQCD-NewggF", "data/processed/oldQCD/oldQCD-NewggF.root")
    df_VBF = ROOT.RDataFrame("oldQCD-NewVBF", "data/processed/oldQCD/oldQCD-NewVBF.root")
    df_DATA = ROOT.RDataFrame("oldQCD-NewDATA", "data/processed/oldQCD/oldQCD-NewDATA.root")

Data version v7


# Distribution v1

In [None]:
# Create lists for distributions of events passing the signal trigger and all events in the simulation samples

variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter("dist")

num_QCD_list = []
denom_QCD_list = []
num_ggF_list = []
denom_ggF_list = []
num_VBF_list = []
denom_VBF_list = []
all_nums_and_denoms = []

y_range_list = [1000 for t in range(len(variable_list))]

for j in range(len(range_min_list)):
    num_QCD, denom_QCD = numerator_and_denominator(df_QCD, "QCD", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], y_range_list[j], run_name, suffix)
    num_QCD_list.append(num_QCD)
    denom_QCD_list.append(denom_QCD)

    num_ggF, denom_ggF = numerator_and_denominator(df_ggF, "ggF", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], y_range_list[j], run_name, suffix)
    num_ggF_list.append(num_ggF)
    denom_ggF_list.append(denom_ggF)

    num_VBF, denom_VBF = numerator_and_denominator(df_VBF, "VBF", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], y_range_list[j], run_name, suffix)
    num_VBF_list.append(num_VBF)
    denom_VBF_list.append(denom_VBF)

    # Combine all nums and denoms for this iteration into a single list and append to all_numsa_and_denoms
    combined_list = [num_QCD, denom_QCD, num_ggF, denom_ggF, num_VBF, denom_VBF]
    all_nums_and_denoms.append(combined_list)

In [None]:
# Distributions of all the simulation samples in the same plot
# variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter("dist")

y_range_list = [1100, 1200, 2700, 2500, 10000, 1300, 5400, 1200, 2700, 2300, 20000, 25000, 17000, 12000]

color_list   = [ROOT.kViolet, ROOT.kAzure-5, ROOT.kRed+1, ROOT.kOrange+2, ROOT.kGray+3, ROOT.kGreen+2]

legend_list  = ["#splitline{QCD events passing the}{signal trigger}", "QCD all events", "#splitline{ggF events passing the}{signal trigger}",
               "ggF all events", "#splitline{VBF events passing the}{signal trigger}", "VBF all events"]

for i in range(len(num_QCD_list)):
    draw_histograms_same(all_nums_and_denoms[i], color_list, y_range_list[i], legend_list, names_list_plot[i], "Events", "Distribution_MC_" + names_list[i] + ".png", "QCD", suffix)

In [None]:
# Distributions of event passing the reference trigger and signal and reference trigger for the data sample
y_range_list = [700, 600, 2300, 1300, 6000, 800, 4100, 600, 1600, 1200, 7000, 7000, 5000, 9000]
num_DATA_list   = []
denom_DATA_list = []

for j in range(len(range_min_list)):
    num_DATA, denom_DATA = numerator_and_denominator(df_DATA, "DATA", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], y_range_list[j], run_name, suffix)
    num_DATA_list.append(num_DATA)
    denom_DATA_list.append(denom_DATA)

# Distribution v2

In [None]:
# Create lists for distributions of events passing the signal trigger and all events in the simulation samples

variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter("dist")

sig_QCD_list = []; all_QCD_list = []; sar_QCD_list = []; ref_QCD_list = []
sig_ggF_list = []; all_ggF_list = []; sar_ggF_list = []; ref_ggF_list = []
sig_VBF_list = []; all_VBF_list = []; sar_VBF_list = []; ref_VBF_list = []
all_meas_list = []
all_real_list = []

for j in range(len(range_min_list)):
    sig_QCD, all_QCD, sar_QCD, ref_QCD = comparing_plot(df_QCD, "QCD", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], run_name, suffix)
    sig_QCD_list.append(sig_QCD); all_QCD_list.append(all_QCD)
    sar_QCD_list.append(sar_QCD); ref_QCD_list.append(ref_QCD)

    sig_ggF, all_ggF, sar_ggF, ref_ggF = comparing_plot(df_ggF, "ggF", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], run_name, suffix)
    sig_ggF_list.append(sig_ggF); all_ggF_list.append(all_ggF)
    sar_ggF_list.append(sar_ggF); ref_ggF_list.append(ref_ggF)

    sig_VBF, all_VBF, sar_VBF, ref_VBF = comparing_plot(df_VBF, "VBF", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], run_name, suffix)
    sig_VBF_list.append(sig_VBF); all_VBF_list.append(all_VBF)
    sar_VBF_list.append(sar_VBF); ref_VBF_list.append(ref_VBF)

    # Combine all nums and denoms for this iteration into a single list and append to all_numsa_and_denoms
    real_combined_list = [sig_QCD, all_QCD, sig_ggF, all_ggF, sig_VBF, all_VBF]
    meas_combined_list = [sar_QCD, ref_QCD, sar_ggF, ref_ggF, sar_VBF, ref_VBF]
    all_meas_list.append(meas_combined_list)
    all_real_list.append(real_combined_list)

In [None]:
# Distributions of all the simulation samples in the same plot
# variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter("dist")

y_range_list = [1100, 1200, 2700, 2500, 10000, 1300, 5400, 1200, 2700, 2300, 20000, 25000, 17000, 12000]

color_list   = [ROOT.kViolet, ROOT.kAzure-5, ROOT.kRed+1, ROOT.kOrange+2, ROOT.kGray+3, ROOT.kGreen+2]

legend_list  = ["#splitline{QCD events passing the}{signal trigger}", "QCD all events", "#splitline{ggF events passing the}{signal trigger}",
               "ggF all events", "#splitline{VBF events passing the}{signal trigger}", "VBF all events"]

for i in range(len(sig_QCD_list)):
    draw_histograms_same(all_real_list[i], color_list, y_range_list[i], legend_list, names_list_plot[i], "Events", "Distribution_MC_" + names_list[i] + "_real" + ".png", "QCD", suffix)
    draw_histograms_same(all_meas_list[i], color_list, y_range_list[i], legend_list, names_list_plot[i], "Events", "Distribution_MC_" + names_list[i] + "_meas" + ".png", "QCD", suffix)

In [None]:
# Distributions of event passing the reference trigger and signal and reference trigger for the data sample
y_range_list = [700, 600, 2300, 1300, 6000, 800, 4100, 600, 1600, 1200, 7000, 7000, 5000, 9000]
num_DATA_list   = []
denom_DATA_list = []

for j in range(len(range_min_list)):
    num_DATA, denom_DATA = numerator_and_denominator(df_DATA, "DATA", filter_pass_real, filter_pass_meas, filter_all_meas, variable_list[j], names_list[j], names_list_plot[j], y_range_list[j], run_name, suffix)
    num_DATA_list.append(num_DATA)
    denom_DATA_list.append(denom_DATA)

# Efficiency Plot

In [11]:
samples = ["QCD", "ggF", "VBF"]
for sample in samples:
    if sample == "QCD":
        data = df_QCD
    elif sample == "ggF":
        data = df_ggF
    elif sample == "VBF":
        data = df_VBF
    
    variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter(sample)
    eff_real_list, eff_meas_list = choosing_reference_trigger(sample, data, variable_list, names_list, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list, signal_trigger, reference_trigger, run_name, suffix)

Directory '/Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD' not found. Creating new directory for plots.


Info in <TCanvas::Print>: png file /Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD/QCD_TEfficiency_HighestPt_HLT_AK8PFJet260_Run2.png has been created
Info in <TCanvas::Print>: png file /Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD/QCD_TEfficiency_HT_HLT_AK8PFJet260_Run2.png has been created
Info in <TCanvas::Print>: png file /Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD/QCD_TEfficiency_MET_pt_HLT_AK8PFJet260_Run2.png has been created
Info in <TCanvas::Print>: png file /Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD/QCD_TEfficiency_mHH_HLT_AK8PFJet260_Run2.png has been created
Info in <TCanvas::Print>: png file /Users/khatran/Documents/CERN/cern-source/project/CMS-trigger-efficiency-briar/result/03-09-2025-oldQCD/QCD_TEfficiency_HighestMass_HLT_AK8PFJet260_Run2.

# Distribution Classification Plots

In [None]:
print("Hello World!")

In [None]:
variable_list, names_list, names_list_and_signal_trigger, names_list_plot, range_min_list, range_max_list, num_bins_list, y_min_list, y_max_list = define_parameter("dist")

df = pd.DataFrame(df_QCD.AsNumpy(columns=names_list_and_signal_trigger))

X = df.drop('Combo', axis=1).values
y = df['Combo'].astype('int').values

display(df)

display(X)
display(y)

for i in range(len(names_list)):
    print(names_list[i])

    plt.scatter(df[names_list[i]], df['Combo'].astype('int'))
    plt.show()

In [None]:
a = np.count_nonzero(df[df['MET_pt'] < 100]['Combo'])
b = df[df['MET_pt'] < 100]['Combo'].size
print(a/b)

In [None]:
a = np.count_nonzero(df[df['MET_FatJet'] > 1500]['Combo'])
b = df[df['MET_FatJet'] > 1500]['Combo'].size
print(a/b)
display(df[df['MET_FatJet'] > 1500])