In [141]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [142]:
import pandas as pd
import os
import yaml
from coffea.nanoevents import TreeMakerSchema, BaseSchema, NanoEventsFactory
# import argparse
import sys

from helperClasses import *

In [143]:
run_data = True
run_mc = False

def load_events(file_path, schema_class, entry_stop):
    """
    Load events from a ROOT file with error handling.
    """
    try:
        events = NanoEventsFactory.from_root(
            {file_path: "ntuple"}, 
            schemaclass=schema_class, 
            entry_stop=entry_stop
        ).events()
        return events
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"Error loading events from '{file_path}': {e}")
        return None

def load_config(config_path):
    """
    Load configuration from a YAML file.
    """
    try:
        with open(config_path, 'r') as config_file:
            return yaml.safe_load(config_file)
    except FileNotFoundError:
        print(f"Error: Configuration file '{config_path}' not found.")
        sys.exit(1)
    except yaml.YAMLError as e:
        print(f"Error parsing YAML configuration file: {e}")
        sys.exit(1)

config = load_config('config.yaml')

events_data = None
events_mc = None

# Load events
if run_data:
    events_data = load_events(config['data_path'], BaseSchema, config['entry_stop'])
    if events_data is None:
        print("Failed to load data events. Skipping data analysis.")
        run_data = False

if run_mc:
    events_mc = load_events(config['mc_path'], BaseSchema, config['entry_stop'])
    if events_mc is None:
        print("Failed to load MC events. Skipping MC analysis.")
        run_mc = False

if not run_data and not run_mc:
    print("No valid data or MC events loaded. Exiting.")
    sys.exit(1)

# Select columns
if run_data:
    events_data = events_data[config['columns']].compute()
if run_mc:
    events_mc = events_mc[config['columns']].compute()

In [144]:
save_path = config['savepath_base']
os.makedirs(save_path, exist_ok=True)

In [145]:
events_data = process_ups_candidates(events_data)

In [146]:
cutdict = CutDict()
cutdict.add_cut(0,
                 CutType(name="0",
                     long_name="Preselection",
                     mask=events_data['B_J1_mass'] > -1, 
                     variables=['B_J1_mass'], 
                     plot=False))

cutdict.add_cut(1,
                    CutType(name="1",
                        long_name="Muon Trigger",
                        mask=events_data['Mu_TriggerPath'] == 1, 
                        variables=['Mu_TriggerPath'], 
                        plot=False))

cutdict.add_cut(2, 
                    CutType(name="2",
                        long_name="Soft Muons",
                        mask=(events_data['B_Mu1_soft'] == 1) & (events_data['B_Mu2_soft'] == 1) & (events_data['B_Mu3_soft'] == 1) & (events_data['B_Mu4_soft'] == 1),
                        variables=['B_J1_soft', 'B_J2_soft', 'B_J3_soft', 'B_J4_soft'],
                        plot=False))

cutdict.add_cut(3,
                    CutType(name="3",
                        long_name="Muon pT",
                        mask=(events_data['B_Mu1_pt'] > 3) & (events_data['B_Mu2_pt'] > 3) & (events_data['B_Mu3_pt'] > 3) & (events_data['B_Mu4_pt'] > 3),
                        variables=['B_Mu1_pt', 'B_Mu2_pt', 'B_Mu3_pt', 'B_Mu4_pt'],
                        plot=True,
                        bins=100,
                        x_range=(0, 100),
                        labels=["Mu1_pt", "Mu2_pt", "Mu3_pt", "Mu4_pt"],
                        xlabel="pT (GeV)"))

cutdict.add_cut(4,
                    CutType(name="4",
                        long_name="Detector acceptance",
                        mask=(abs(events_data['B_Mu1_eta']) < 2.4) & (abs(events_data['B_Mu2_eta']) < 2.4) & (abs(events_data['B_Mu3_eta']) < 2.4) & (abs(events_data['B_Mu4_eta']) < 2.4),
                        variables=['B_Mu1_eta', 'B_Mu2_eta', 'B_Mu3_eta', 'B_Mu4_eta'],
                        plot=True,
                        bins=100,
                        x_range=(-3, 3),
                        labels=["Mu1_eta", "Mu2_eta", "Mu3_eta", "Mu4_eta"],
                        xlabel="Eta"))

cutdict.add_cut(5,
                    CutType(name="5",
                        long_name="Any dimuon pair vertex",
                        mask=((events_data['B_J1_VtxProb'] > 0.01) & (events_data['B_J2_VtxProb'] > 0.01)) | ((events_data['B_J3_VtxProb'] > 0.01) & (events_data['B_J4_VtxProb'] > 0.01)),
                        variables=['B_J1_VtxProb', 'B_J2_VtxProb', 'B_J3_VtxProb', 'B_J4_VtxProb'],
                        plot=True,
                        bins=100,
                        x_range=(0, 1),
                        labels=["J1_VtxProb", "J2_VtxProb", "J3_VtxProb", "J4_VtxProb"],
                        xlabel="Vtx Prob"))

cutdict.add_cut(6,
                    CutType(name="6",
                        long_name="Four muon vertex",
                        mask=(events_data['FourL_VtxProb'] > 0.01),
                        variables=['FourL_VtxProb'],
                        plot=True,
                        bins=100,
                        x_range=(0, 1),
                        labels=["FourL_VtxProb"],
                        xlabel="Vtx Prob"))

cutdict.add_cut(7,
                    CutType(name="7",
                        long_name="Candidate Combine",
                        mask=events_data['UpsMass'] > 0,
                        variables=['UpsMass'],
                        plot=False))

cutdict.add_cut(8,
                    CutType(name="8",
                        long_name="Dimuon Vertex Prob",
                        mask=(events_data['Ups_VtxProb1'] > 0.01) & (events_data['Ups_VtxProb2'] > 0.01),
                        variables=['Ups_VtxProb1', 'Ups_VtxProb2'],
                        plot=True,
                        bins=100,
                        x_range=(0, 1),
                        labels=["VtxProb1", "VtxProb2"],
                        xlabel="Vtx Prob"))

cutdict.add_cut(9,
                    CutType(name="9",
                        long_name="Dimuon pT",
                        mask=(events_data['Ups_Pt1'] > 5) & (events_data['Ups_Pt2'] > 5),
                        variables=['Ups_Pt1', 'Ups_Pt2'],
                        plot=True,
                        bins=100,
                        x_range=(0, 20),
                        labels=["Ups_Pt1", "Ups_Pt2"],
                        xlabel="pT (GeV)"))

cutdict.add_cut(10,
                    CutType(name="10",
                        long_name="Dimuon mass",
                        mask=(events_data['Ups1_mass'] > 2.8) & (events_data['Ups1_mass'] < 3.4) & (events_data['Ups2_mass'] > 70) & (events_data['Ups2_mass'] < 110),
                        variables=['Ups1_mass', 'Ups2_mass'],
                        plot=True,
                        bins=100,
                        x_range=(0, 20),
                        labels=["Ups1_mass", "Ups2_mass"],
                        xlabel="mass (GeV)"))


cutdict.add_cut(11,
                    CutType(name="11",
                        long_name="Four muon pT",
                        mask=(events_data['FourL_pt'] > 5),
                        variables=['FourL_pt'],
                        plot=True,
                        bins=100,
                        x_range=(0, 100),
                        labels=["FourL_pt"],
                        xlabel="pT (GeV)"))


cutdict.add_cut(12,
                    CutType(name="12",
                        long_name="Four muon mass",
                        mask=(events_data['FourL_mass'] > 112) & (events_data['FourL_mass'] < 162),
                        variables=['FourL_mass'],
                        plot=True,
                        bins=50,
                        x_range=(112, 162),
                        labels=["FourL_mass"],
                        xlabel="mass (GeV)"))


In [147]:
for key, item in cutdict.cutdict.items():
    print(key, item)

0 Cut: 0 (Preselection)
1 Cut: 1 (Muon Trigger)
2 Cut: 2 (Soft Muons)
3 Cut: 3 (Muon pT)
4 Cut: 4 (Detector acceptance)
5 Cut: 5 (Any dimuon pair vertex)
6 Cut: 6 (Four muon vertex)
7 Cut: 7 (Candidate Combine)
8 Cut: 8 (Dimuon Vertex Prob)
9 Cut: 9 (Dimuon pT)
10 Cut: 10 (Dimuon mass)
11 Cut: 11 (Four muon pT)
12 Cut: 12 (Four muon mass)


In [148]:
myorder = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
cut_analysis = CutAnalysis(events_data, cutdict)
cutSummary = cut_analysis.prepare_masks(myorder)

Cut 0: Preselection - 1389300 events, 499503 candidates
Cut 1: Muon Trigger - 253858 events, 156201 candidates
Cut 2: Soft Muons - 46683 events, 41296 candidates
Cut 3: Muon pT - 32259 events, 28024 candidates
Cut 4: Detector acceptance - 31107 events, 27119 candidates
Cut 5: Any dimuon pair vertex - 30668 events, 26762 candidates
Cut 6: Four muon vertex - 27592 events, 24186 candidates
Cut 7: Candidate Combine - 1744 events, 1647 candidates
Cut 8: Dimuon Vertex Prob - 1686 events, 1597 candidates
Cut 9: Dimuon pT - 1508 events, 1436 candidates
Cut 10: Dimuon mass - 1315 events, 1259 candidates
Cut 11: Four muon pT - 1267 events, 1212 candidates
Cut 12: Four muon mass - 540 events, 527 candidates


In [149]:
cutSummarydf = {k: v for k, v in cutSummary.items() if k not in ['mask', 'cutobj']}
pd.DataFrame(cutSummarydf)

Unnamed: 0,cut_id,cut_name,nevents,ncandidates
0,0,Preselection,1389300,499503
1,1,Muon Trigger,253858,156201
2,2,Soft Muons,46683,41296
3,3,Muon pT,32259,28024
4,4,Detector acceptance,31107,27119
5,5,Any dimuon pair vertex,30668,26762
6,6,Four muon vertex,27592,24186
7,7,Candidate Combine,1744,1647
8,8,Dimuon Vertex Prob,1686,1597
9,9,Dimuon pT,1508,1436


In [150]:
plotter = Plotter(events_data, cutdict, save_path)
plotter.plot_preselection()

Cut 0 does not have a plot.
Cut 1 does not have a plot.
Cut 2 does not have a plot.
Plotting cut 3: Muon pT
Plotting cut 4: Detector acceptance
Plotting cut 5: Any dimuon pair vertex
Plotting cut 6: Four muon vertex
Cut 7 does not have a plot.
Plotting cut 8: Dimuon Vertex Prob
Plotting cut 9: Dimuon pT
Plotting cut 10: Dimuon mass
Plotting cut 11: Four muon pT
Plotting cut 12: Four muon mass


In [151]:
plotter.plot_summary(cutSummary)

Next cut: 0 Preselection (1389300)
Cut 0 does not have a plot.
Next cut: 1 Muon Trigger (253858)
Cut 1 does not have a plot.
Next cut: 2 Soft Muons (46683)
Cut 2 does not have a plot.
Next cut: 3 Muon pT (32259)
Drawn at: 2 Soft Muons (46683)
Next cut: 4 Detector acceptance (31107)
Drawn at: 3 Muon pT (32259)
Next cut: 5 Any dimuon pair vertex (30668)
Drawn at: 4 Detector acceptance (31107)
Next cut: 6 Four muon vertex (27592)
Drawn at: 5 Any dimuon pair vertex (30668)
Next cut: 7 Candidate Combine (1744)
Cut 7 does not have a plot.
Next cut: 8 Dimuon Vertex Prob (1686)
Drawn at: 7 Candidate Combine (1744)
Next cut: 9 Dimuon pT (1508)
Drawn at: 8 Dimuon Vertex Prob (1686)
Next cut: 10 Dimuon mass (1315)
Drawn at: 9 Dimuon pT (1508)
Next cut: 11 Four muon pT (1267)
Drawn at: 10 Dimuon mass (1315)
Next cut: 12 Four muon mass (540)
Drawn at: 11 Four muon pT (1267)


In [152]:
cuts_to_show = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
plotter.plot_single_variable(cuts_to_show, cutSummary)

Cut 0 does not have a plot.
Cut 1 does not have a plot.
Cut 2 does not have a plot.
Plotting variable B_Mu1_pt at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu2_pt at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu3_pt at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu4_pt at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu1_eta at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu2_eta at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu3_eta at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_Mu4_eta at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_J1_VtxProb at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_J2_VtxProb at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_J3_VtxProb at steps: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Plotting variable B_

In [154]:
final = events_data[cutSummary['mask'][-1]]
df = ak.to_dataframe(final).reset_index(drop=True)
cols_to_keep = ['B_Mu1_pt', 'B_Mu1_eta', 'B_Mu1_phi', 'B_Mu2_pt', 'B_Mu2_eta', 'B_Mu2_phi', 'B_Mu3_pt', 'B_Mu3_eta', 'B_Mu3_phi', 'B_Mu4_pt', 'B_Mu4_eta', 'B_Mu4_phi']
df = df[cols_to_keep]

In [158]:
# a = events_data[cutSummary['mask'][-1]]
# b = a.B_J1_mass
# len(b[ak.num(b, axis=1) > 0])
# ak.sum(ak.num(b, axis=1))

In [161]:
df

Unnamed: 0,B_Mu1_pt,B_Mu1_eta,B_Mu1_phi,B_Mu2_pt,B_Mu2_eta,B_Mu2_phi,B_Mu3_pt,B_Mu3_eta,B_Mu3_phi,B_Mu4_pt,B_Mu4_eta,B_Mu4_phi
0,103.186508,1.438373,2.870542,22.894049,1.462914,-1.040241,13.316124,0.776021,-0.669847,6.885476,1.088768,-0.789165
1,28.450871,-1.546555,-1.876461,22.295788,-2.318979,-0.428387,19.671400,0.571382,1.874298,3.721630,-2.293579,-0.097019
2,69.444855,-1.242678,-0.855032,22.235916,-0.310465,-2.775220,13.077477,-0.125938,-2.812331,12.859173,1.182070,1.395073
3,24.584282,-1.059869,-0.977885,12.266626,2.121309,2.915663,8.758736,1.589972,0.562187,4.361233,1.998249,0.850984
4,26.317570,-0.562451,2.546980,22.479027,2.239649,-0.385522,5.566718,-1.496357,2.202571,4.672547,-1.961050,1.829848
...,...,...,...,...,...,...,...,...,...,...,...,...
535,149.119492,1.103105,-3.038614,9.626973,0.147911,-0.037731,9.478017,0.432173,0.008336,9.405591,0.428309,-0.123878
536,64.013733,2.146585,2.613188,35.735947,1.566478,0.340355,7.855406,0.113548,-0.349307,5.382901,-0.199727,-0.710128
537,26.012552,2.236405,0.649888,20.759472,-0.705649,-1.453095,10.663802,-0.680411,-1.451621,5.661178,-0.927125,-1.279199
538,36.809063,2.200942,1.118631,30.018286,0.618405,-2.054881,3.943361,-0.030299,-2.841347,3.661141,-0.750682,-2.511314


In [159]:
# def get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper):
#     """
#     Calculate the Figure of Merit.
#     """
#     analysis_data = Analysis(events_data, savepath_data)
#     analysis_mc = Analysis(events_mc, savepath_mc)
    
#     nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
#     nEff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100
#     FOM = nEff / np.sqrt(nb)
#     return nEff, nb, FOM

# def load_config(config_path):
#     """
#     Load configuration from a YAML file.
#     """
#     try:
#         with open(config_path, 'r') as config_file:
#             return yaml.safe_load(config_file)
#     except FileNotFoundError:
#         print(f"Error: Configuration file '{config_path}' not found.")
#         sys.exit(1)
#     except yaml.YAMLError as e:
#         print(f"Error parsing YAML configuration file: {e}")
#         sys.exit(1)

# def load_events(file_path, schema_class, entry_stop):
#     """
#     Load events from a ROOT file with error handling.
#     """
#     try:
#         events = NanoEventsFactory.from_root(
#             {file_path: "ntuple"}, 
#             schemaclass=schema_class, 
#             entry_stop=entry_stop
#         ).events()
#         return events
#     except FileNotFoundError:
#         print(f"Error: File '{file_path}' not found.")
#         return None
#     except Exception as e:
#         print(f"Error loading events from '{file_path}': {e}")
#         return None

# def main(config_path='config.yaml', run_data=True, run_mc=True):
#     # Load configuration
#     config = load_config(config_path)

#     events_data = None
#     events_mc = None

#     # Load events
#     if run_data:
#         events_data = load_events(config['data_path'], BaseSchema, config['entry_stop'])
#         if events_data is None:
#             print("Failed to load data events. Skipping data analysis.")
#             run_data = False

#     if run_mc:
#         events_mc = load_events(config['mc_path'], BaseSchema, config['entry_stop'])
#         if events_mc is None:
#             print("Failed to load MC events. Skipping MC analysis.")
#             run_mc = False

#     if not run_data and not run_mc:
#         print("No valid data or MC events loaded. Exiting.")
#         sys.exit(1)

#     # Select columns
#     if run_data:
#         events_data = events_data[config['columns']].compute()
#     if run_mc:
#         events_mc = events_mc[config['columns']].compute()

#     # Create DataFrame to store results
#     df = pd.DataFrame(columns=['elePt_low', 'elePt_high', 'eleEta', 'ymass_upper', 'Efficiency', 'Background', 'FOM'])

#     # Loop over different cut values
#     for elePt_low in config['cut_parameters']['elePt_low']:
#         for elePt_high in config['cut_parameters']['elePt_high']:
#             for eleEta in config['cut_parameters']['eleEta']:
#                 for ymass_upper in config['cut_parameters']['ymass_upper']:
#                     savepath_data = f"{config['savepath_data_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"
#                     savepath_mc = f"{config['savepath_mc_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"

#                     os.makedirs(savepath_data, exist_ok=True)
#                     os.makedirs(savepath_mc, exist_ok=True)

#                     eff, nb, FOM = 0, 0, 0
                    
#                     if run_data and run_mc:
#                         eff, nb, FOM = get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper)
#                     elif run_data:
#                         analysis_data = Analysis(events_data, savepath_data)
#                         nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
#                     elif run_mc:
#                         analysis_mc = Analysis(events_mc, savepath_mc)
#                         eff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100

#                     df.loc[len(df)] = {'elePt_low': elePt_low, 'elePt_high': elePt_high, 'eleEta': eleEta, 'ymass_upper': ymass_upper, 'Efficiency': eff, 'Background': nb, 'FOM': FOM}
#                     print(f"elePt_low: {elePt_low}, elePt_high: {elePt_high}, eleEta: {eleEta}, ymass_upper: {ymass_upper}")
#                     print(f"Efficiency: {eff}")
#                     print(f"Background: {nb}")
#                     print(f"FOM: {FOM}")
#                     print(df)
#                     print("\n")

#     print(df)
#     df.to_csv(config['output_file'], index=False)

# # if __name__ == "__main__":
# #     parser = argparse.ArgumentParser(description="Run particle physics analysis on data and/or MC events.")
# #     parser.add_argument("--config", default="config.yaml", help="Path to the configuration file")
# #     parser.add_argument("--data", action="store_true", help="Run analysis on data events")
# #     parser.add_argument("--mc", action="store_true", help="Run analysis on MC events")
# #     args = parser.parse_args()

# #     if not args.data and not args.mc:
# #         print("Error: You must specify at least one of --data or --mc")
# #         sys.exit(1)

#     # main(config_path=args.config, run_data=args.data, run_mc=args.mc)

In [160]:
# main(config_path="config.yaml", run_data=True, run_mc=False)