In [1]:
import os
import pathlib
import numpy as np
import pandas as pd

In [2]:
lq_folder = os.path.dirname(os.getcwd())
folder_DATA = os.path.join(lq_folder,"05_ML_Final_Distribution")
n_events_folder = os.path.join(lq_folder,"03_delphes_preselection","N_events")

In [3]:
folder_path = pathlib.Path(folder_DATA)
data_dict = dict([( str(file), np.loadtxt(file)) for file in list(folder_path.glob('**/*.txt')) if not ".ipynb_checkpoints" in str(file)])

In [4]:
def get_mass (path):
    return os.path.basename(os.path.dirname(path)).split("M")[-1]

In [5]:
def get_channel (path):
    path = os.path.dirname(path)
    return os.path.basename(os.path.dirname(path))

In [6]:
def rebin_histogram(hist_2n):
    """
    Re-bins a histogram with 2n bins to n bins.

    Parameters:
    hist_2n (numpy array): 1D numpy array containing the histogram with 2n bins.

    Returns:
    numpy array: 1D numpy array containing the re-binned histogram with n bins.

    """

    # Check that the input histogram has an even number of bins
    assert len(hist_2n) % 2 == 0, "Input histogram must have an even number of bins."

    # Reshape the input histogram to a 2D array with two columns
    hist_2n = hist_2n.reshape(-1, 2)

    # Sum the values in each row of the 2D array to re-bin the histogram
    hist_n = np.sum(hist_2n, axis=1)

    return hist_n

In [7]:
for key in data_dict.keys():
    #
    mass=get_mass(key)
    channel = get_channel(key)
    signal = key.split("high_per_bin_")[-1].rstrip(".txt")
    df = pd.read_csv(os.path.join(n_events_folder,f"{channel}.csv"), index_col=0)
    if signal == "stop":
        n_events  = float(df["stop"]["DeltaR > 0.3"])
    elif signal == "tbar":
        n_events  = float(df["ttbar"]["DeltaR > 0.3"])
    elif signal == "V+jets":
        n_events  = float(df["z_jets"]["DeltaR > 0.3"]) 
        n_events += float(df["w_jets"]["DeltaR > 0.3"])
    elif signal == "Diboson":
        n_events  = float(df["ww"]["DeltaR > 0.3"]) 
        n_events += float(df["wz"]["DeltaR > 0.3"])
        n_events += float(df["zz"]["DeltaR > 0.3"])
    elif signal == "Lq_Lq":
        n_events = float(df[f"LQ_LQ_{mass}"]["DeltaR > 0.3"])
    elif signal == "tau_Lq":
        n_events = float(df[f"Tau_LQ_{mass}"]["DeltaR > 0.3"])
    elif signal == "tau_tau":
        n_events = float(df[f"Tau_Tau_{mass}"]["DeltaR > 0.3"])
    elif signal == "Combined":
        n_events  = float(df[f"LQ_LQ_{mass}"]["DeltaR > 0.3"])
        n_events += float(df[f"Tau_LQ_{mass}"]["DeltaR > 0.3"])
        n_events += float(df[f"Tau_Tau_{mass}"]["DeltaR > 0.3"])
    else:
        n_events = 1
        print(f"{signal} don't have def to n_events")
    #
    if signal == "Diboson":
        ch = channel
        if "non-resonant" in ch:
            ch=ch.replace("non-resonant", "sLQ")
        data = np.loadtxt(
            os.path.join(folder_DATA,"Histograms_Diboson",ch, f"Diboson_M{mass}.dat")
        )
        data = rebin_histogram(data)
    else:
        data = data_dict[key]
        
    new_data = [0]
    for n, dat in enumerate(data):
        if n <= 1:
            new_data[0]+= dat
        else:
            new_data.append(dat)
    new_data = np.array(new_data)
    new_data *= n_events/sum(new_data)
    if (0. in list(new_data)):
        print(key)
    #
    new_path = key.replace("05_ML_Final_Distribution","06_Statistical_Preparation")
    os.makedirs(os.path.dirname(new_path), exist_ok=True)
    np.savetxt(new_path,new_data)

/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_sLQ/M2500/high_per_bin_Lq_Lq.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_sLQ/M1500/high_per_bin_Lq_Lq.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_sLQ/M2000/high_per_bin_Lq_Lq.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_sLQ/M2250/high_per_bin_Lq_Lq.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_dLQ/M2500/high_per_bin_V+jets.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_dLQ/M2500/high_per_bin_Lq_Lq.txt
/disco4/SIMULACIONES/Cristian/Github/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms/semileptonic_dLQ/M2500/high_pe