In [1]:
# set path
%cd /work/project/escience/ruttho/FCC-ee_SimpleDelphesAnalysis/Analysis/

/work/project/escience/ruttho/FCC-ee_SimpleDelphesAnalysis/Analysis


In [None]:
#!/usr/bin/env python3
import uproot
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import awkward as ak
import seaborn as sns
import os
import vector
import time
# --- CONSTANTS ---
ELECTRON_MASS = 0.000511  # GeV/c^2
MUON_MASS = 0.1057        # GeV/c^2

def load_data(file_paths, tree_name, columns, max_events):
    """
    Loads data from multiple ROOT files into a dictionary of Akward arrays.
    """
    data = {}
    for name, path in file_paths.items():
        print(f"Loading data from {path}...")
        try:
            with uproot.open(path) as file:
                tree = file[tree_name]
                data[name] = tree.arrays(columns, entry_stop=max_events)
        except Exception as e:
            print(f"Error loading {path}: {e}")
            data[name] = None
    return data

# Configuration
TESTING_MODE = True
MAX_EVENTS = 100000 if TESTING_MODE else None
OUTPUT_DIR = "figure/test"
os.makedirs(OUTPUT_DIR, exist_ok=True)

file_paths = {
    "HZ4Lep": "Bg1_hz.root",
    "ZWW4Lep": "Bg2_zww.root",
    "HZ4LepLFV": "Sn1_hzLFV.root"
}

columns = [
    "MissingET_size", "MissingET.Eta", "MissingET.Phi", "MissingET.MET",
    "Jet_size", "Jet.Phi", "Jet.Eta", "Jet.PT", "Jet.Mass",
    "Electron_size", "Electron.Eta", "Electron.Phi", "Electron.PT",
    "Muon_size", "Muon.Eta", "Muon.Phi", "Muon.PT"
]

raw_data = load_data(file_paths, "Delphes", columns, MAX_EVENTS)

Loading data from Bg1_hz.root...
Loading data from Bg2_zww.root...
Loading data from Sn1_hzLFV.root...


In [3]:
def plot_three_data(data_arrays, settings, output_dir, other_config='', exception=False):
    """
    Generates and saves a histogram for three datasets.
    """
    if len(data_arrays) != 3:
        raise ValueError("This function requires exactly three data arrays.")
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    
    names = list(data_arrays.keys())[::-1]
    
    # Handle both flattened and already flattened arrays
    try:
        n_arr = [ak.flatten(data_arrays[names[2]][settings["name"]]), 
                 ak.flatten(data_arrays[names[1]][settings["name"]]), 
                 ak.flatten(data_arrays[names[0]][settings["name"]])]
    except:
        n_arr = [data_arrays[names[2]], data_arrays[names[1]], data_arrays[names[0]]]
        
    sns.set_theme(style="ticks")
    sns.histplot(n_arr, bins=settings["bins"], binrange=settings["binrange"], ax=ax, fill=False,
                 element=settings["element"], alpha=settings["alpha"], palette=settings['colors'][::-1],
                 stat=settings["stat"], common_norm=False, multiple=settings["multiple"])
    
    ax.set_title(f"{settings['name']} - {settings['prefix']}")
    ax.set_xlabel(settings['xlabel'])
    ax.set_ylabel("Density")
    ax.legend(labels=settings['labels'])
    
    if settings['log_scale']:
        ax.set_yscale("log")
    
    png_name = f"{settings['prefix']}_{settings['name']}{other_config}.png"
    plt.savefig(f"{output_dir}/{png_name}")
    print(f"-> {png_name} has been plotted!")
    plt.close()


def plot_data(data_arrays, prefix, output_dir, additional_plot=False):
    """
    Orchestrates the plotting of various variables for the datasets.
    """
    plot_configs = {
        'Electron.PT': {'bins': 65, 'binrange': (0, 130), 'xlabel': "GeV", 'log_scale': False},
        'Jet.PT': {'bins': 65, 'binrange': (0, 130), 'xlabel': "GeV", 'log_scale': False},
        'Jet.Mass': {'bins': 65, 'binrange': (0, 130), 'xlabel': "GeV", 'log_scale': True},
        'Muon.PT': {'bins': 65, 'binrange': (0, 130), 'xlabel': "GeV", 'log_scale': False},
        'Electron.Eta': {'bins': 200, 'binrange': (-10, 10), 'xlabel': "", 'log_scale': False},
        'Muon.Eta': {'bins': 200, 'binrange': (-10, 10), 'xlabel': "", 'log_scale': False},
        'Electron.Phi': {'bins': 140, 'binrange': (-2 * np.pi, 2 * np.pi), 'xlabel': "", 'log_scale': False},
        'Muon.Phi': {'bins': 140, 'binrange': (-2 * np.pi, 2 * np.pi), 'xlabel': "", 'log_scale': False},
        'MissingET.MET': {'bins': 130, 'binrange': (0, 130), 'xlabel': "GeV", 'log_scale': True}
    }

    base_settings = {
        'prefix': prefix, 'element': "step", 'alpha': 0.4,
        'colors': ["#FF9966", "#6698ca", "#66cc99"],
        'labels': ["HZ4LepLFV", "ZWW4Lep", "HZ4Lep"],
        'multiple': "layer", 'stat': "density"
    }

    for name, config in plot_configs.items():
        settings = {**base_settings, **config, 'name': name}
        plot_three_data(data_arrays, settings, output_dir)

def first_cut_4l(array, name, pmask):
    """
    Filters events with exactly 4 leptons.
    """
    mask = (array["Electron_size"] + array["Muon_size"] == 4) & pmask
    print(f"    {name}: {np.sum(mask)}/{len(mask)} : {np.sum(mask) / len(mask) * 100:.2f}%")
    return mask

def second_cut_lepton_flavor(array, name, pmask):
    """
    Filters events with an odd number of electrons and muons.
    """
    mask = (array["Electron_size"] % 2 != 0) & (array["Muon_size"] % 2 != 0) & pmask
    print(f"    {name}: {np.sum(mask)}/{len(mask)} : {np.sum(mask) / len(mask) * 100:.2f}%")
    return mask

In [4]:
import vector
import awkward as ak

# (Assuming raw_data, MAX_EVENTS, etc. are defined)

print("\nChecking uncut events...")
for name, arr in raw_data.items():
    all_events=MAX_EVENTS if MAX_EVENTS is not None else len(arr)
    # Use a list of dictionaries for easier field addition later
    additional_fields = {
        "1stCut": [False] * all_events,
        "2ndCut": [False] * all_events,
        "3rdCut": [False] * all_events, # Initialize 3rdCut as boolean False
        "dRThreleptonFromSingleLepton": [[] for _ in range(all_events)],  # Initialize dR with empty lists
        "lowestdRThreleptonFromSingleLepton": [[] for _ in range(all_events)],  # Initialize lowest dR with empty lists
        "lowestdRindex": [[] for _ in range(all_events)],  # Initialize lowest dR index with empty lists
        "InvariantMassFromlowestdR": [[] for _ in range(all_events)],  # Initialize Invariant Mass with empty lists
        "recoiledZMass": [[] for _ in range(all_events)]  # Initialize recoiled Z Mass with empty lists

    }
    print(f"      {name}: {len(arr)} events")
    print(f"-> Applying cut on {name}...")

    for eventnum in range(all_events):
        # First cut: exactly 4 leptons
        if arr["Electron_size"][eventnum] + arr["Muon_size"][eventnum] == 4:
            additional_fields["1stCut"][eventnum] = True
        
        # Second cut: odd number of electrons and muons
        # This condition is never met if the first cut (total=4) is applied,
        # but we'll leave it as per your original code.
        if (arr["Electron_size"][eventnum] % 2 != 0) and (arr["Muon_size"][eventnum] % 2 != 0):
            additional_fields["2ndCut"][eventnum] = True

        # Pre-Third cut: dR of single lepton to three others
        # This whole section should be one logical block
        single_lepton = None # Reset for each event

        if arr["Electron_size"][eventnum] == 1 and arr["Muon_size"][eventnum] == 3:
            single_lepton = vector.obj(
                pt=arr["Electron.PT"][eventnum][0], 
                eta=arr["Electron.Eta"][eventnum][0], 
                phi=arr["Electron.Phi"][eventnum][0],
                mass=ELECTRON_MASS
            )
            other_leptons = vector.zip({
                "pt": arr["Muon.PT"][eventnum],
                "eta": arr["Muon.Eta"][eventnum], 
                "phi": arr["Muon.Phi"][eventnum],
                "mass": [MUON_MASS]*3
            })
        
        elif arr["Muon_size"][eventnum] == 1 and arr["Electron_size"][eventnum] == 3:
            single_lepton = vector.obj(
                pt=arr["Muon.PT"][eventnum][0], 
                eta=arr["Muon.Eta"][eventnum][0], 
                phi=arr["Muon.Phi"][eventnum][0],
                mass=MUON_MASS
            )
            other_leptons = vector.zip({
                "pt": arr["Electron.PT"][eventnum],
                "eta": arr["Electron.Eta"][eventnum], 
                "phi": arr["Electron.Phi"][eventnum],
                "mass": [ELECTRON_MASS]*3
            })

        if single_lepton is not None:
            dr = single_lepton.deltaR(other_leptons)
            additional_fields["dRThreleptonFromSingleLepton"][eventnum] = dr.to_list()
            additional_fields["lowestdRThreleptonFromSingleLepton"][eventnum] = [np.min(dr.to_list())]
            mindR=np.argmin(dr.to_list())
            additional_fields["lowestdRindex"][eventnum] = [mindR]
            invmass= (single_lepton + other_leptons[mindR]).mass
            additional_fields["InvariantMassFromlowestdR"][eventnum] = [invmass]
            additional_fields["recoiledZMass"][eventnum] = [(vector.obj(px=0, py=0, pz=0, E=240) - (single_lepton + other_leptons[mindR])).mass]


        print(f"Processing event {(eventnum+1)/(all_events)*100:.2f}%", end='\r')

    # Add all new fields to the array at once
    for field_name, field_values in additional_fields.items():
        raw_data[name] = ak.with_field(raw_data[name], field_values, field_name)


Checking uncut events...
      HZ4Lep: 100000 events
-> Applying cut on HZ4Lep...
      ZWW4Lep: 100000 events
-> Applying cut on ZWW4Lep...
      HZ4LepLFV: 100000 events
-> Applying cut on HZ4LepLFV...
Processing event 100.00%

In [6]:
plot_configs = {
    'dRThreleptonFromSingleLepton': {'bins': 200, 'binrange': (0, 5), 'xlabel': "GeV", 'log_scale': False},
    'lowestdRThreleptonFromSingleLepton': {'bins': 200, 'binrange': (0, 5), 'xlabel': "GeV", 'log_scale': False},
    'InvariantMassFromlowestdR': {'bins': 400, 'binrange': (0, 200), 'xlabel': "GeV", 'log_scale': False},
    'recoiledZMass': {'bins': 400, 'binrange': (0, 200), 'xlabel': "GeV", 'log_scale': False}
}

base_settings = {
    'prefix': '2ndCut', 'element': "step", 'alpha': 1,
    'colors': ["#FF9966", "#6698ca", "#66cc99"],
    'labels': ["HZ4LepLFV", "ZWW4Lep", "HZ4Lep"],
    'multiple': "layer", 'stat': "density"
}
new_raw_data = {}
for name, arr in raw_data.items():
    pmask = arr["1stCut"] & arr["2ndCut"]
    new_raw_data[name] = arr[pmask]
for name, config in plot_configs.items():
    settings = {**base_settings, **config, 'name': name}
    plot_three_data(new_raw_data, settings, '.')

-> 2ndCut_dRThreleptonFromSingleLepton.png has been plotted!
-> 2ndCut_lowestdRThreleptonFromSingleLepton.png has been plotted!
-> 2ndCut_InvariantMassFromlowestdR.png has been plotted!
-> 2ndCut_recoiledZMass.png has been plotted!
