In [1]:
import os
import pathlib
import numpy as np
import pandas as pd

In [2]:
lq_folder = os.path.dirname(os.getcwd())
folder_DATA = os.path.join(lq_folder,"05_ML_Final_Distribution")
n_events_folder = os.path.join(lq_folder,"03_delphes_preselection","N_events")

In [3]:
folder_path = pathlib.Path(folder_DATA)
data_dict = dict([( str(file), np.loadtxt(file)) for file in list(folder_path.glob('**/*.txt')) if not ".ipynb_checkpoints" in str(file)])

In [4]:
def get_mass (path):
    mass =os.path.basename(os.path.dirname(path)).split("M")[-1]
    return mass.rstrip("_wo_RHC")

In [5]:
def get_channel (path):
    path = os.path.dirname(path)
    return os.path.basename(os.path.dirname(path))

In [6]:
def rebin_histogram(hist_2n):
    """
    Re-bins a histogram with 2n bins to n bins.

    Parameters:
    hist_2n (numpy array): 1D numpy array containing the histogram with 2n bins.

    Returns:
    numpy array: 1D numpy array containing the re-binned histogram with n bins.

    """

    # Check that the input histogram has an even number of bins
    assert len(hist_2n) % 2 == 0, "Input histogram must have an even number of bins."

    # Reshape the input histogram to a 2D array with two columns
    hist_2n = hist_2n.reshape(-1, 2)

    # Sum the values in each row of the 2D array to re-bin the histogram
    hist_n = np.sum(hist_2n, axis=1)

    return hist_n

In [7]:
def get_new_data (data: np.array) -> np.array:
    new_dat = []
    for i, dat in enumerate(data):
        old_dat=dat*(1+(1+np.random.uniform(-4.5e-2,4.5e-2)))
        if old_dat == 0 : old_dat = 0.8*np.random.uniform(0.95,1.05)*1e-5
        new_dat.append(old_dat)
        try: 
            intep_dat = (data[i] + data[i+1])*(1+np.random.uniform(-0.025,0.025))/2.
            if intep_dat == 0 : intep_dat = 0.8*np.random.uniform(0.95,1.05)*1e-5
            new_dat.append(intep_dat)
        except IndexError: pass
    return np.array(new_dat)

In [8]:
channels = {'hadronic_dLQ': 'hadronic_Tau_Tau_b_b',
           'hadronic_sLQ': 'hadronic_Tau_Tau_b',
           'hadronic_non-resonant': 'hadronic_Tau_Tau',
           'semileptonic_dLQ': 'semileptonic_Tau_Tau_b_b',
           'semileptonic_sLQ': 'semileptonic_Tau_Tau_b',
           'semileptonic_non-resonant': 'semileptonic_Tau_Tau'}

for key in data_dict.keys():
    #
    mass=get_mass(key)
    channel = get_channel(key)
    signal = key.split("high_per_bin_")[-1].rstrip(".txt")
    df = pd.read_csv(os.path.join(n_events_folder,f"{channel}.csv"), index_col=0)
    if signal == "stop":
        n_events  = float(df["stop"]["DeltaR > 0.3"])
    elif signal == "tbar":
        n_events  = float(df["ttbar"]["DeltaR > 0.3"])
    elif signal == "V+jets":
        n_events  = float(df["z_jets"]["DeltaR > 0.3"]) 
        n_events += float(df["w_jets"]["DeltaR > 0.3"])
    elif signal == "Diboson":
        n_events  = float(df["ww"]["DeltaR > 0.3"]) 
        n_events += float(df["wz"]["DeltaR > 0.3"])
        n_events += float(df["zz"]["DeltaR > 0.3"])
    elif signal == "Lq_Lq":
        if "wo_RHC" in key:
            sig_key =f"LQ_LQ_wo_RHC_{mass}"
        else :
            sig_key =f"LQ_LQ_{mass}"
        n_events = float(df[sig_key]["DeltaR > 0.3"])
    elif signal == "tau_Lq":
        if "wo_RHC" in key:
            sig_key =f"Tau_LQ_wo_RHC_{mass}"
        else :
            sig_key =f"Tau_LQ_{mass}"
        n_events = float(df[sig_key]["DeltaR > 0.3"])
    elif signal == "tau_tau":
        if "wo_RHC" in key:
            sig_key =f"Tau_Tau_wo_RHC_{mass}"
        else :
            sig_key =f"Tau_Tau_{mass}"
        n_events = float(df[sig_key]["DeltaR > 0.3"])
    elif signal == "Combined":
        if "wo_RHC" in key:
            n_events  = float(df[f"LQ_LQ_wo_RHC_{mass}"]["DeltaR > 0.3"])
            n_events += float(df[f"Tau_LQ_wo_RHC_{mass}"]["DeltaR > 0.3"])
            n_events += float(df[f"Tau_Tau_wo_RHC_{mass}"]["DeltaR > 0.3"])
        else :
            n_events  = float(df[f"LQ_LQ_{mass}"]["DeltaR > 0.3"])
            n_events += float(df[f"Tau_LQ_{mass}"]["DeltaR > 0.3"])
            n_events += float(df[f"Tau_Tau_{mass}"]["DeltaR > 0.3"])
    else:
        n_events = 1
        print(f"{signal} don't have def to n_events")
    #
    if signal == "Diboson":
        ch = channel
        if int(mass) == 1000:
            n_mass = 1250
        else:
            n_mass = mass
        if "non-resonant" in ch:
            ch=ch.replace("non-resonant", "sLQ")
        data = np.loadtxt(
            os.path.join(folder_DATA,"Histograms_Diboson",ch, f"Diboson_M{n_mass}.dat")
        )
        data = rebin_histogram(data)
    else:
        data = data_dict[key]
        
    new_data = [0]
    for n, dat in enumerate(data):
        if n < len(data) - 10:
            new_data[0]+= dat
        else:
            if (dat == 0): new_data.append(10e-4)
            else: new_data.append(dat)
            
    # new_data = get_new_data(new_data)
    new_data = np.array(new_data)
    
    new_data *= n_events/sum(new_data)
    if (0. in list(new_data)):
        print(key)
    #
    
    # new_path = key.replace("05_ML_Final_Distribution","06_Statistical_Preparation")
    # print(new_path)
    
    if "wo_RHC" in key: new_path = os.path.join(os.getcwd(), 'Histograms_woRHC', f'M{mass}', channels[channel], os.path.basename(key))
    else: new_path = os.path.join(os.getcwd(), 'Histograms_wRHC', f'M{mass}', channels[channel], os.path.basename(key))
        
    # print(new_path)
    
    os.makedirs(os.path.dirname(new_path), exist_ok=True)
    np.savetxt(new_path,new_data)
    

/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_Combined.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_Diboson.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_Lq_Lq.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_stop.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_tau_Lq.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/06_Statistical_Preparation/Histograms_wRHC/M1000/hadronic_Tau_Tau_b_b/high_per_bin_tau_tau.txt
/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches

In [9]:
# !rm -rf Histograms**

In [10]:
key

'/mnt/d/Bibliotecas/Documentos/GitHub/Leptoquarks_Searches_2023/05_ML_Final_Distribution/Histograms_wo_RHC/semileptonic_sLQ/M2500/high_per_bin_V+jets.txt'