In [6]:
import uproot as up
import pandas as pd
import os
import numpy as np
import awkward as ak
import mplhep as hep
import matplotlib.pyplot as plt
import yaml
from coffea.nanoevents import TreeMakerSchema, BaseSchema, NanoEventsFactory
import argparse
import sys

hep.style.use(hep.style.CMS)

In [7]:
class ParticleSelection:
    """
    A class to define and apply particle selection criteria.
    """
    def __init__(self, events):
        self.events = events

    def make_cut_list(self, elePt_low=5, elePt_high=5, eleEta=2.5, ymass_upper=10):
        """
        Define a list of cuts to be applied to the events.
        """

        cut_fake = (self.events['B_Z_mass'] > -1)
        cut_unOrdered_Z = self.events['B_Z_pt1'] > self.events['B_Z_pt2']
        cut_unOrdered_J = self.events['B_J_pt1'] > self.events['B_J_pt2']
        cut_unOrdered = cut_unOrdered_Z & cut_unOrdered_J

        cut_EleTrigger = self.events['B_Z_TriggerPath']
        cut_Jsoft = self.events['B_J_soft1'] & self.events['B_J_soft2']
        cut_EleTrigEnforce = self.events['B_Z_pt1'] > 27
        cut_dilepton_prob = (self.events['B_J_VtxProb'] > 0.01) & (self.events['B_Z_VtxProb'] > 0.01)
        cut_FourL_prob = self.events['FourL_VtxProb'] > 0.01

        cut_Pt = (self.events['B_Z_pt1'] > 27.0) & (self.events['B_Z_pt2'] > 5.0) & (self.events['B_J_pt1'] > 3.0) & (self.events['B_J_pt2'] > 3.0)
        cut_eta = (abs(self.events['B_Z_eta1']) < 2.4) & (abs(self.events['B_Z_eta2']) < 2.4) & (abs(self.events['B_J_eta1']) < 2.5) & (abs(self.events['B_J_eta2']) < 2.5)
        cut_detector = cut_Pt & cut_eta

        cut_Jmass = (self.events['B_J_mass'] > 3.0) & (self.events['B_J_mass'] < 3.2)
        cut_Zmass = (self.events['B_Z_mass'] > 70) & (self.events['B_Z_mass'] < 110)

        cut_ZmvaIsoHighpT = self.events['B_Z_mvaIsoWP90_1']
        cut_ZmvaIsoLowpT = self.events['B_Z_mvaIsoWP90_2']
        cut_ZmvaIsoBoth = (self.events['B_Z_mvaIsoWP90_1']) & (self.events['B_Z_mvaIsoWP90_2'])

        cut_FourLmass = (self.events['FourL_mass'] > 112) & (self.events['FourL_mass'] < 162)

        return {
            "0": {"name": "Preselection", "mask": cut_fake},
            "1": {"name": "UnOrdered pT", "mask": cut_unOrdered},
            "2": {"name": "Electron Trigger", "mask": cut_EleTrigger},
            "3": {"name": "Soft Muons", "mask": cut_Jsoft},
            "4": {"name": "Electron Trigger Enforce", "mask": cut_EleTrigEnforce},
            "5": {"name": "Dilepton Vtx > 1%", "mask": cut_dilepton_prob, "var": ["B_J_VtxProb", "B_Z_VtxProb"]},
            "6": {"name": "FourL Vtx > 1%", "mask": cut_FourL_prob, "var": ["FourL_VtxProb"]},
            "7": {"name": "Detector acceptance", "mask": cut_detector},
            "8": {"name": "J mass", "mask": cut_Jmass, "var": ["B_J_mass"]},
            "9": {"name": "Z mass", "mask": cut_Zmass, "var": ["B_Z_mass"]},
            "10": {"name": "eleID High pT", "mask": cut_ZmvaIsoHighpT, "var": ["B_J_pt1"]},
            "11": {"name": "eleID Low pT", "mask": cut_ZmvaIsoLowpT, "var": ["B_J_pt2"]},
            "12": {"name": "eleID either", "mask": cut_ZmvaIsoBoth, "var": ["B_J_pt1", "B_J_pt2"]},
            "13": {"name": "FourL mass", "mask": cut_FourLmass, "var": ["FourL_mass"]}
        }

In [3]:
class Plotter:
    """
    A class to handle all plotting functions.
    """
    def __init__(self, savepath):
        self.savepath = savepath

    def make_hist(self, nbins, xlow, xhigh, values, labels, lines, fileName, xlabel, text_array):
        """
        Create and save a histogram.
        """
        cut_name_at_plot, cut_name_after_plot, n_ev_before, n_eve_after = text_array
        unit = 'GeV' if 'eta' not in xlabel else ''
        
        plt.figure(figsize=(8, 8))
        for i, value in enumerate(values):
            plt.hist(value, bins=nbins, range=(xlow, xhigh), label=labels[i], alpha=0.5)
        for line in lines:
            plt.axvline(x=line, color='r')

        plt.text(0.5, 0.5, f"Drawn at: {cut_name_at_plot} ({n_ev_before})", fontsize=12, transform=plt.gca().transAxes)
        plt.text(0.5, 0.45, f"Next cut: {cut_name_after_plot} ({n_eve_after})", fontsize=12, transform=plt.gca().transAxes)

        plt.xlabel(xlabel)
        plt.ylabel(f"Counts / {(xhigh-xlow)/nbins:.2f} {unit}")
        plt.legend(fontsize=13)
        plt.tight_layout()
        plt.savefig(f"{self.savepath}/{fileName}.png")
        plt.close()

    def make_hist2D(self, xvar, yvar, xlabel, ylabel, fileName, text_array):
        """
        Create and save a 2D histogram.
        """
        plt.figure(figsize=(8, 8))
        plt.hist2d(xvar, yvar, bins=(50, 50), range=((-3, 3), (0, 50)))
        plt.colorbar()
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.tight_layout()
        plt.savefig(f"{self.savepath}/{fileName}.png")
        plt.close()

    # Add other plotting methods here (plot_dilepton_vertexing, plot_dielectron_inv_mass, etc.)

class Analysis:
    """
    Main class to perform the analysis.
    """
    def __init__(self, events, savepath):
        self.events = events
        self.particle_selection = ParticleSelection(events)
        self.plotter = Plotter(savepath)


    def get_count(self, data):
        """
        Count the number of candidates and events in the given data.
        """
        array = data.B_J_mass
        nevents = len(array[ak.num(array, axis=1) > 0])
        ncandidates = ak.sum(ak.num(array, axis=1))
        return ncandidates, nevents
    

    def get_summary_of_cuts(self, cut_order):
        """
        Generate a summary of cuts applied to the events.
        """
        key_list = []
        ncandidates_list = []
        nevents_list = []
        aggMask_list = []
        var_list = []

        agg_mask = self.particle_selection.cut_list["0"]["mask"]

        for i in cut_order:
            mask = self.particle_selection.cut_list[str(i)]["mask"]
            agg_mask = agg_mask & mask
            ncandidates, nevents = self.get_count(self.events[agg_mask])

            key_list.append(self.particle_selection.cut_list[str(i)]["name"])
            ncandidates_list.append(ncandidates)
            nevents_list.append(nevents)   
            aggMask_list.append(agg_mask)
            var_list.append(self.particle_selection.cut_list[str(i)].get("var", None))

        summary_dict = {
            "Cut": key_list, 
            "Candidates": ncandidates_list, 
            "Events": nevents_list, 
            "Aggregated mask": aggMask_list, 
            "Var": var_list
        }
        
        summary_table = pd.DataFrame({key: summary_dict[key] for key in ["Cut", "Candidates", "Events"]})

        return summary_dict, summary_table

    def get_view_at(self, mycut, summary_dict):
        """
        Get the view of the data at a specific cut.
        """
        mycut_name = self.particle_selection.cut_list[str(mycut)]['name']
        cut_index = summary_dict['Cut'].index(mycut_name)
        n_events_after_cut = summary_dict['Events'][cut_index]
        view_index = cut_index - 1
        view_index_name = summary_dict['Cut'][view_index]
        view_index_mask = summary_dict['Aggregated mask'][view_index]
        n_events_before_cut = summary_dict['Events'][view_index]
        cut_of_interest = self.events[view_index_mask]
        text_array = [view_index_name, mycut_name, n_events_before_cut, n_events_after_cut]
        return cut_of_interest, text_array

    def apply_cut_progression(self, cut_progression):
        """
        Apply a series of cuts and create plots at each stage.
        """
        summary_dict, summary_table = self.get_summary_of_cuts(self.events, cut_progression)
        print("Summary of cuts")
        print(summary_table)
        self.show_plots_at_each_cut(summary_dict)
        return summary_dict, summary_table

    def show_plots_at_each_cut(self, summary_dict):
        """
        Create plots at each cut stage.
        """
        # Implement the logic to create plots at each cut stage
        pass

    def get_count(self, elePt_low, elePt_high, eleEta, ymass_upper):
        """
        Apply cuts and return the count of events passing all cuts.
        """
        self.particle_selection.cut_list = self.particle_selection.make_cut_list(elePt_low, elePt_high, eleEta, ymass_upper)
        eleID_both = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13]
        summary_dict, summary_table = self.apply_cut_progression(eleID_both)
        count = summary_table[summary_table['Cut'] == 'FourL mass']['Events'].values[0]
        return count

def get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper):
    """
    Calculate the Figure of Merit.
    """
    analysis_data = Analysis(events_data, savepath_data)
    analysis_mc = Analysis(events_mc, savepath_mc)
    
    nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
    nEff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100
    FOM = nEff / np.sqrt(nb)
    return nEff, nb, FOM

def load_config(config_path):
    """
    Load configuration from a YAML file.
    """
    try:
        with open(config_path, 'r') as config_file:
            return yaml.safe_load(config_file)
    except FileNotFoundError:
        print(f"Error: Configuration file '{config_path}' not found.")
        sys.exit(1)
    except yaml.YAMLError as e:
        print(f"Error parsing YAML configuration file: {e}")
        sys.exit(1)

def load_events(file_path, schema_class, entry_stop):
    """
    Load events from a ROOT file with error handling.
    """
    try:
        events = NanoEventsFactory.from_root(
            {file_path: "ntuple"}, 
            schemaclass=schema_class, 
            entry_stop=entry_stop
        ).events()
        return events
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"Error loading events from '{file_path}': {e}")
        return None

def main(config_path='config.yaml', run_data=True, run_mc=True):
    # Load configuration
    config = load_config(config_path)

    events_data = None
    events_mc = None

    # Load events
    if run_data:
        events_data = load_events(config['data_path'], BaseSchema, config['entry_stop'])
        if events_data is None:
            print("Failed to load data events. Skipping data analysis.")
            run_data = False

    if run_mc:
        events_mc = load_events(config['mc_path'], BaseSchema, config['entry_stop'])
        if events_mc is None:
            print("Failed to load MC events. Skipping MC analysis.")
            run_mc = False

    if not run_data and not run_mc:
        print("No valid data or MC events loaded. Exiting.")
        sys.exit(1)

    # Select columns
    if run_data:
        events_data = events_data[config['columns']].compute()
    if run_mc:
        events_mc = events_mc[config['columns']].compute()

    # Create DataFrame to store results
    df = pd.DataFrame(columns=['elePt_low', 'elePt_high', 'eleEta', 'ymass_upper', 'Efficiency', 'Background', 'FOM'])

    # Loop over different cut values
    for elePt_low in config['cut_parameters']['elePt_low']:
        for elePt_high in config['cut_parameters']['elePt_high']:
            for eleEta in config['cut_parameters']['eleEta']:
                for ymass_upper in config['cut_parameters']['ymass_upper']:
                    savepath_data = f"{config['savepath_data_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"
                    savepath_mc = f"{config['savepath_mc_base']}el_{elePt_low}_eh_{elePt_high}_eE_{eleEta}_yup_{ymass_upper}"

                    os.makedirs(savepath_data, exist_ok=True)
                    os.makedirs(savepath_mc, exist_ok=True)

                    eff, nb, FOM = 0, 0, 0
                    
                    if run_data and run_mc:
                        eff, nb, FOM = get_FOM(events_data, events_mc, savepath_data, savepath_mc, elePt_low, elePt_high, eleEta, ymass_upper)
                    elif run_data:
                        analysis_data = Analysis(events_data, savepath_data)
                        nb = analysis_data.get_count(elePt_low, elePt_high, eleEta, ymass_upper)
                    elif run_mc:
                        analysis_mc = Analysis(events_mc, savepath_mc)
                        eff = analysis_mc.get_count(elePt_low, elePt_high, eleEta, ymass_upper) / 100

                    df.loc[len(df)] = {'elePt_low': elePt_low, 'elePt_high': elePt_high, 'eleEta': eleEta, 'ymass_upper': ymass_upper, 'Efficiency': eff, 'Background': nb, 'FOM': FOM}
                    print(f"elePt_low: {elePt_low}, elePt_high: {elePt_high}, eleEta: {eleEta}, ymass_upper: {ymass_upper}")
                    print(f"Efficiency: {eff}")
                    print(f"Background: {nb}")
                    print(f"FOM: {FOM}")
                    print(df)
                    print("\n")

    print(df)
    df.to_csv(config['output_file'], index=False)

# if __name__ == "__main__":
#     parser = argparse.ArgumentParser(description="Run particle physics analysis on data and/or MC events.")
#     parser.add_argument("--config", default="config.yaml", help="Path to the configuration file")
#     parser.add_argument("--data", action="store_true", help="Run analysis on data events")
#     parser.add_argument("--mc", action="store_true", help="Run analysis on MC events")
#     args = parser.parse_args()

#     if not args.data and not args.mc:
#         print("Error: You must specify at least one of --data or --mc")
#         sys.exit(1)

    # main(config_path=args.config, run_data=args.data, run_mc=args.mc)

In [4]:
main(config_path="config.yaml", run_data=True, run_mc=False)

TypeError: get_summary_of_cuts() takes 2 positional arguments but 3 were given