In [1]:
import numpy as np 
import time
n_cuts = 3
n_settings = n_cuts**8
NO_MORE_TASKS = n_settings+1

In [2]:
class Data():
    """Process and load data
    """
    def __init__(self, filename: str = 'mc_ggH_16_13TeV_Zee_EGAM1_calocells_16249871.csv') -> None:
        """setting initial data specific parameters.

        Args:
            filename (str, optional): data file to load. Defaults to 'mc_ggH_16_13TeV_Zee_EGAM1_calocells_16249871.csv'.
        """
        self.data = self.read_data(filename)
        self.nevents = self.data.shape[0]
        self.name = ["averageInteractionsPerCrossing", "p_Rhad","p_Rhad1", "p_TRTTrackOccupancy", "p_topoetcone40", 
                     "p_eTileGap3Cluster", "p_phiModCalo", "p_etaModCalo"]
        self.NvtxReco = self.data[:,1]
        self.p_nTracks = self.data[:,2]
        self.p_truthType = self.data[:,10]

        self.signal = self.p_truthType == 2

        self.data = self.data[:, [0,3,4,5,6,7,8,9]]

        self.means_sig = np.array([np.average(self.data[self.signal, i]) for i in range(8)])
        self.means_bckg = np.array([np.average(self.data[~self.signal, i]) for i in range(8)])
        self.flip = np.sign(self.means_bckg - self.means_sig)
            
        for i in range(8): 
            self.data[:, i] *= self.flip[i]
            self.means_sig[i]  = self.means_sig[i] * self.flip[i]
            self.means_bckg[i] = self.means_bckg[i] * self.flip[i]


    
    def read_data(self, filename = 'mc_ggH_16_13TeV_Zee_EGAM1_calocells_16249871.csv') -> np.array:
        """Read data file using numpy (fastest).

        Args:
            filename (str, optional):File to load. Defaults to 'mc_ggH_16_13TeV_Zee_EGAM1_calocells_16249871.csv'.

        Returns:
            np.array: Returns data file except counter column and header row.
        """
        return np.loadtxt(filename, delimiter = ',', skiprows = 1, usecols=range(1,12))


In [3]:
def master(nworker: int, ds: Data):
    """Master function code. Master needs to collect work from workers.

    Args:
        nworker (int): The number of workers.
        ds (Data): Dataset as a Data class object.
    """
    ranges = np.zeros([n_cuts, 8])
    settings = list()
    accuracy = list()
    
    # loop over different event channels and set up cuts
    for i in range(8):
        for j in range(n_cuts):
            ranges[j][i] = ds.means_sig[i] + j * (ds.means_bckg[i] - ds.means_sig[i]) / n_cuts

    for k in range(n_settings):
        div = 1
        _set = np.zeros(8)
        for i in range(8):
            idx = int((k/div) % n_cuts)
            _set[i] = ranges[idx][i]
            div *= n_cuts
        settings.append(_set)
    
    tstart = time.time()

    for k in range(n_settings):
        accuracy.append(task_function(settings[k], ds))
    
    tend = time.time()

    best_accuracy_score = 0
    idx_best = 0
    for k in range(n_settings):
        if (accuracy[k] > best_accuracy_score):
            best_accuracy_score = accuracy[k]
            idx_best = k
    
    print("Best accuracy obtained:", best_accuracy_score, "\n")
    print("Final cuts: \n")
    
    for i in range(8):
        print(ds.name[i], " : ", settings[idx_best][i]*ds.flip[i], "\n")
    
    print()
    print("Number of settings:", n_settings, "\n")
    print("Elapsed time:", (tend - tstart), "\n")
    print("task time [mus]:", (tend - tstart)/ n_settings, "\n")

In [24]:
def task_function(setting: np.array, ds: Data) -> float:
    """_summary_

    Args:
        setting (numpy array): _description_
        ds (Data): _description_

    Returns:
        float: _description_
    """
    pred = np.min(ds.data < setting, axis=1)
    return np.sum(pred == ds.signal) / ds.nevents

In [5]:
def main() -> None:
    nrank = 1
    rank = 0

    ds = Data()
    if rank == 0:
        master(nrank-1, ds)
    

In [25]:
main()

Best accuracy obtained: 0.7365760732537989 

Final cuts: 

averageInteractionsPerCrossing  :  41.37656598765495 

p_Rhad  :  0.05993003622620082 

p_Rhad1  :  0.0377865347088777 

p_TRTTrackOccupancy  :  0.4684328988784968 

p_topoetcone40  :  4.9602659991986 

p_eTileGap3Cluster  :  0.3918736048431675 

p_phiModCalo  :  1.5157768399878342e-05 

p_etaModCalo  :  0.012525917018684045 


Number of settings: 6561 

Elapsed time: 16.504681825637817 

task time [mus]: 0.0025155741237064193 

