In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import os
# import argparse
import sys

from helperClasses import *

In [14]:
run_data = True
run_mc = True

config = load_config('config.yaml')

events_data = None
events_mc = None

# Load events
if run_data:
    events_data = load_events(config['data_path'], BaseSchema, config['entry_stop'])
    if events_data is None:
        print("Failed to load data events. Skipping data analysis.")
        run_data = False

if run_mc:
    events_mc = load_events(config['mc_path'], BaseSchema, config['entry_stop'])
    if events_mc is None:
        print("Failed to load MC events. Skipping MC analysis.")
        run_mc = False

if not run_data and not run_mc:
    print("No valid data or MC events loaded. Exiting.")
    sys.exit(1)

# Select columns
if run_data:
    events_data = events_data[config['columns']].compute()
if run_mc:
    events_mc = events_mc[config['columns']].compute()

In [15]:
save_path = config['savepath_base']
os.makedirs(save_path, exist_ok=True)

In [16]:
events_data = process_ups_candidates(events_data, 2.8, 3.4, 70, 110)
events_mc = process_ups_candidates(events_mc, 3.0, 3.2, 80, 100)

In [19]:
def create_cutdict(events):
    cutdict = CutDict()
    cutdict.add_cut(0,
                    CutType(name="0",
                        long_name="Preselection",
                        mask=events['B_J1_mass'] > -1, 
                        variables=['B_J1_mass'], 
                        plot=False))

    cutdict.add_cut(1,
                        CutType(name="1",
                            long_name="Muon Trigger",
                            mask=events['Mu_TriggerPath'] == 1, 
                            variables=['Mu_TriggerPath'], 
                            plot=False))

    cutdict.add_cut(2, 
                        CutType(name="2",
                            long_name="Soft Muons",
                            mask=(events['B_Mu1_soft'] == 1) & (events['B_Mu2_soft'] == 1) & (events['B_Mu3_soft'] == 1) & (events['B_Mu4_soft'] == 1),
                            variables=['B_J1_soft', 'B_J2_soft', 'B_J3_soft', 'B_J4_soft'],
                            plot=False))

    cutdict.add_cut(3,
                        CutType(name="3",
                            long_name="Muon pT",
                            mask=(events['B_Mu1_pt'] > 3) & (events['B_Mu2_pt'] > 3) & (events['B_Mu3_pt'] > 3) & (events['B_Mu4_pt'] > 3),
                            variables=['B_Mu1_pt', 'B_Mu2_pt', 'B_Mu3_pt', 'B_Mu4_pt'],
                            plot=True,
                            bins=100,
                            x_range=(0, 100),
                            labels=["Mu1_pt", "Mu2_pt", "Mu3_pt", "Mu4_pt"],
                            xlabel="pT (GeV)"))

    cutdict.add_cut(4,
                        CutType(name="4",
                            long_name="Detector acceptance",
                            mask=(abs(events['B_Mu1_eta']) < 2.4) & (abs(events['B_Mu2_eta']) < 2.4) & (abs(events['B_Mu3_eta']) < 2.4) & (abs(events['B_Mu4_eta']) < 2.4),
                            variables=['B_Mu1_eta', 'B_Mu2_eta', 'B_Mu3_eta', 'B_Mu4_eta'],
                            plot=True,
                            bins=100,
                            x_range=(-3, 3),
                            labels=["Mu1_eta", "Mu2_eta", "Mu3_eta", "Mu4_eta"],
                            xlabel="Eta"))

    cutdict.add_cut(5,
                        CutType(name="5",
                            long_name="Any dimuon pair vertex",
                            mask=((events['B_J1_VtxProb'] > 0.01) & (events['B_J2_VtxProb'] > 0.01)) | ((events['B_J3_VtxProb'] > 0.01) & (events['B_J4_VtxProb'] > 0.01)),
                            variables=['B_J1_VtxProb', 'B_J2_VtxProb', 'B_J3_VtxProb', 'B_J4_VtxProb'],
                            plot=True,
                            bins=100,
                            x_range=(0, 1),
                            labels=["J1_VtxProb", "J2_VtxProb", "J3_VtxProb", "J4_VtxProb"],
                            xlabel="Vtx Prob"))

    cutdict.add_cut(6,
                        CutType(name="6",
                            long_name="Four muon vertex",
                            mask=(events['FourL_VtxProb'] > 0.01),
                            variables=['FourL_VtxProb'],
                            plot=True,
                            bins=100,
                            x_range=(0, 1),
                            labels=["FourL_VtxProb"],
                            xlabel="Vtx Prob"))

    cutdict.add_cut(7,
                        CutType(name="7",
                            long_name="Candidate Combine",
                            mask=events['JPsiMass'] > 0,
                            variables=['JPsiMass'],
                            plot=False))

    cutdict.add_cut(8,
                        CutType(name="8",
                            long_name="Dimuon Vertex Prob",
                            mask=(events['JPsi_VtxProb'] > 0.01) & (events['Z_VtxProb'] > 0.01),
                            variables=['JPsi_VtxProb', 'Z_VtxProb'],
                            plot=True,
                            bins=100,
                            x_range=(0, 1),
                            labels=["VtxProb1", "VtxProb2"],
                            xlabel="Vtx Prob"))

    cutdict.add_cut(9,
                        CutType(name="9",
                            long_name="Dimuon pT",
                            mask=(events['JPsi_Pt'] > 5) & (events['Z_Pt'] > 5),
                            variables=['JPsi_Pt', 'Z_Pt'],
                            plot=True,
                            bins=100,
                            x_range=(0, 20),
                            labels=["JPsi_Pt", "Z_Pt"],
                            xlabel="pT (GeV)"))

    cutdict.add_cut(10,
                        CutType(name="10",
                            long_name="Dimuon mass",
                            mask=(events['JPsi_mass'] > 2.8) & (events['JPsi_mass'] < 3.4) & (events['Z_mass'] > 70) & (events['Z_mass'] < 110),
                            variables=['JPsi_mass', 'Z_mass'],
                            plot=True,
                            bins=100,
                            x_range=(0, 20),
                            labels=["JPsi_mass", "Z_mass"],
                            xlabel="mass (GeV)"))


    cutdict.add_cut(11,
                        CutType(name="11",
                            long_name="Four muon pT",
                            mask=(events['FourL_pt'] > 5),
                            variables=['FourL_pt'],
                            plot=True,
                            bins=100,
                            x_range=(0, 100),
                            labels=["FourL_pt"],
                            xlabel="pT (GeV)"))


    cutdict.add_cut(12,
                        CutType(name="12",
                            long_name="Four muon mass signal",
                            mask=(events['FourL_mass'] > 112) & (events['FourL_mass'] < 162),
                            variables=['FourL_mass'],
                            plot=True,
                            bins=50,
                            x_range=(112, 162),
                            labels=["FourL_mass"],
                            xlabel="mass (GeV)"))

    cutdict.add_cut(13,
                        CutType(name="13",
                            long_name="Four muon mass background",
                            mask=(events['FourL_mass'] < 120) | (events['FourL_mass'] > 130),
                            variables=['FourL_mass'],
                            plot=True,
                            bins=50,
                            x_range=(0, 300),
                            labels=["FourL_mass"],
                            xlabel="mass (GeV)"))
    
    return cutdict


In [20]:
cutdict_data = create_cutdict(events_data)
cutdict_mc = create_cutdict(events_mc)

In [22]:
for key, item in cutdict_data.cutdict.items():
    print(key, item)

0 Cut: 0 (Preselection)
1 Cut: 1 (Muon Trigger)
2 Cut: 2 (Soft Muons)
3 Cut: 3 (Muon pT)
4 Cut: 4 (Detector acceptance)
5 Cut: 5 (Any dimuon pair vertex)
6 Cut: 6 (Four muon vertex)
7 Cut: 7 (Candidate Combine)
8 Cut: 8 (Dimuon Vertex Prob)
9 Cut: 9 (Dimuon pT)
10 Cut: 10 (Dimuon mass)
11 Cut: 11 (Four muon pT)
12 Cut: 12 (Four muon mass signal)
13 Cut: 13 (Four muon mass background)


In [23]:
for key, item in cutdict_mc.cutdict.items():
    print(key, item)

0 Cut: 0 (Preselection)
1 Cut: 1 (Muon Trigger)
2 Cut: 2 (Soft Muons)
3 Cut: 3 (Muon pT)
4 Cut: 4 (Detector acceptance)
5 Cut: 5 (Any dimuon pair vertex)
6 Cut: 6 (Four muon vertex)
7 Cut: 7 (Candidate Combine)
8 Cut: 8 (Dimuon Vertex Prob)
9 Cut: 9 (Dimuon pT)
10 Cut: 10 (Dimuon mass)
11 Cut: 11 (Four muon pT)
12 Cut: 12 (Four muon mass signal)
13 Cut: 13 (Four muon mass background)


In [28]:
signal_cuts = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
background_cuts = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13]

cut_analysis_data = CutAnalysis(events_data, cutdict_data)
cut_analysis_mc = CutAnalysis(events_mc, cutdict_mc)

In [29]:
cutSummary_data = cut_analysis_data.prepare_masks(background_cuts)

Cut 0: Preselection - 1389300 events, 499503 candidates
Cut 1: Muon Trigger - 253858 events, 156201 candidates
Cut 2: Soft Muons - 46683 events, 41296 candidates
Cut 3: Muon pT - 32259 events, 28024 candidates
Cut 4: Detector acceptance - 31107 events, 27119 candidates
Cut 5: Any dimuon pair vertex - 30668 events, 26762 candidates
Cut 6: Four muon vertex - 27592 events, 24186 candidates
Cut 7: Candidate Combine - 1744 events, 1647 candidates
Cut 8: Dimuon Vertex Prob - 1686 events, 1597 candidates
Cut 9: Dimuon pT - 1508 events, 1436 candidates
Cut 10: Dimuon mass - 1315 events, 1259 candidates
Cut 11: Four muon pT - 1267 events, 1212 candidates
Cut 13: Four muon mass background - 1146 events, 1098 candidates


In [30]:
cutSummary_mc = cut_analysis_mc.prepare_masks(signal_cuts)

Cut 0: Preselection - 4691 events, 4291 candidates
Cut 1: Muon Trigger - 4507 events, 4117 candidates
Cut 2: Soft Muons - 4214 events, 3997 candidates
Cut 3: Muon pT - 3976 events, 3834 candidates
Cut 4: Detector acceptance - 3919 events, 3782 candidates
Cut 5: Any dimuon pair vertex - 3892 events, 3759 candidates
Cut 6: Four muon vertex - 3768 events, 3642 candidates
Cut 7: Candidate Combine - 3358 events, 3346 candidates
Cut 8: Dimuon Vertex Prob - 3283 events, 3271 candidates
Cut 9: Dimuon pT - 3213 events, 3202 candidates
Cut 10: Dimuon mass - 3213 events, 3202 candidates
Cut 11: Four muon pT - 3089 events, 3079 candidates
Cut 12: Four muon mass signal - 3077 events, 3068 candidates


In [None]:
cutSummarydf = {k: v for k, v in cutSummary.items() if k not in ['mask', 'cutobj']}
pd.DataFrame(cutSummarydf)

In [None]:
plotter = Plotter(events_data, cutdict, save_path)
plotter.plot_preselection()

In [None]:
plotter.plot_summary(cutSummary)

In [None]:
cuts_to_show = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
plotter.plot_single_variable(cuts_to_show, cutSummary)

In [None]:
final = events_data[cutSummary['mask'][-1]]
df = ak.to_dataframe(final).reset_index(drop=True)
cols_to_keep = ['B_Mu1_pt', 'B_Mu1_eta', 'B_Mu1_phi', 'B_Mu2_pt', 'B_Mu2_eta', 'B_Mu2_phi', 'B_Mu3_pt', 'B_Mu3_eta', 'B_Mu3_phi', 'B_Mu4_pt', 'B_Mu4_eta', 'B_Mu4_phi']
df = df[cols_to_keep]

In [None]:
# a = events_data[cutSummary['mask'][-1]]
# b = a.B_J1_mass
# len(b[ak.num(b, axis=1) > 0])
# ak.sum(ak.num(b, axis=1))

In [None]:
df

In [None]:
# def get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper):
#     """
#     Calculate the Figure of Merit.
#     """
#     analysis_data = Analysis(events_data, savepath_data)
#     analysis_mc = Analysis(events_mc, savepath_mc)
    
#     nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
#     nEff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100
#     FOM = nEff / np.sqrt(nb)
#     return nEff, nb, FOM

# def load_config(config_path):
#     """
#     Load configuration from a YAML file.
#     """
#     try:
#         with open(config_path, 'r') as config_file:
#             return yaml.safe_load(config_file)
#     except FileNotFoundError:
#         print(f"Error: Configuration file '{config_path}' not found.")
#         sys.exit(1)
#     except yaml.YAMLError as e:
#         print(f"Error parsing YAML configuration file: {e}")
#         sys.exit(1)

# def load_events(file_path, schema_class, entry_stop):
#     """
#     Load events from a ROOT file with error handling.
#     """
#     try:
#         events = NanoEventsFactory.from_root(
#             {file_path: "ntuple"}, 
#             schemaclass=schema_class, 
#             entry_stop=entry_stop
#         ).events()
#         return events
#     except FileNotFoundError:
#         print(f"Error: File '{file_path}' not found.")
#         return None
#     except Exception as e:
#         print(f"Error loading events from '{file_path}': {e}")
#         return None

# def main(config_path='config.yaml', run_data=True, run_mc=True):
#     # Load configuration
#     config = load_config(config_path)

#     events_data = None
#     events_mc = None

#     # Load events
#     if run_data:
#         events_data = load_events(config['data_path'], BaseSchema, config['entry_stop'])
#         if events_data is None:
#             print("Failed to load data events. Skipping data analysis.")
#             run_data = False

#     if run_mc:
#         events_mc = load_events(config['mc_path'], BaseSchema, config['entry_stop'])
#         if events_mc is None:
#             print("Failed to load MC events. Skipping MC analysis.")
#             run_mc = False

#     if not run_data and not run_mc:
#         print("No valid data or MC events loaded. Exiting.")
#         sys.exit(1)

#     # Select columns
#     if run_data:
#         events_data = events_data[config['columns']].compute()
#     if run_mc:
#         events_mc = events_mc[config['columns']].compute()

#     # Create DataFrame to store results
#     df = pd.DataFrame(columns=['elePt_low', 'elePt_high', 'eleEta', 'ymass_upper', 'Efficiency', 'Background', 'FOM'])

#     # Loop over different cut values
#     for elePt_low in config['cut_parameters']['elePt_low']:
#         for elePt_high in config['cut_parameters']['elePt_high']:
#             for eleEta in config['cut_parameters']['eleEta']:
#                 for ymass_upper in config['cut_parameters']['ymass_upper']:
#                     savepath_data = f"{config['savepath_data_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"
#                     savepath_mc = f"{config['savepath_mc_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"

#                     os.makedirs(savepath_data, exist_ok=True)
#                     os.makedirs(savepath_mc, exist_ok=True)

#                     eff, nb, FOM = 0, 0, 0
                    
#                     if run_data and run_mc:
#                         eff, nb, FOM = get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper)
#                     elif run_data:
#                         analysis_data = Analysis(events_data, savepath_data)
#                         nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
#                     elif run_mc:
#                         analysis_mc = Analysis(events_mc, savepath_mc)
#                         eff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100

#                     df.loc[len(df)] = {'elePt_low': elePt_low, 'elePt_high': elePt_high, 'eleEta': eleEta, 'ymass_upper': ymass_upper, 'Efficiency': eff, 'Background': nb, 'FOM': FOM}
#                     print(f"elePt_low: {elePt_low}, elePt_high: {elePt_high}, eleEta: {eleEta}, ymass_upper: {ymass_upper}")
#                     print(f"Efficiency: {eff}")
#                     print(f"Background: {nb}")
#                     print(f"FOM: {FOM}")
#                     print(df)
#                     print("\n")

#     print(df)
#     df.to_csv(config['output_file'], index=False)

# # if __name__ == "__main__":
# #     parser = argparse.ArgumentParser(description="Run particle physics analysis on data and/or MC events.")
# #     parser.add_argument("--config", default="config.yaml", help="Path to the configuration file")
# #     parser.add_argument("--data", action="store_true", help="Run analysis on data events")
# #     parser.add_argument("--mc", action="store_true", help="Run analysis on MC events")
# #     args = parser.parse_args()

# #     if not args.data and not args.mc:
# #         print("Error: You must specify at least one of --data or --mc")
# #         sys.exit(1)

#     # main(config_path=args.config, run_data=args.data, run_mc=args.mc)

In [None]:
# main(config_path="config.yaml", run_data=True, run_mc=False)