In [27]:
class Cut:
    """
    A generic object that can be used to make cuts on data using some set of selection criteria.
    This will enact cuts on a Data object and can return a modified instance of this object. 
    """

    def __init__(self, data, particles):
        """
        Create the Cut object. The cuts must act on some data and so a filepath to the dataset
        of interest is required along with a suffix specifying the decay tree. The cut will act
        on the same variables for all particles specified within the particles argument.

        Parameters
        ----------
            data : Data
            An instantiated Data object that points to a specific tuple and decay tree. It should
            be unused such that called data.get_data() returns None.

            particles : list
            A list of particles that are to be considered during the cuts. Be careful not to specify
            particles who do not contain features which are required for the cuts you are going to 
            perform.
        """

        if data.get_data() is None:
            self.d = data
            self.particles = particles
            self.events_cut = 0
        else:
            raise Exception("ERROR: data must be an unused Data object")
    
    def get_events_cut(self):
        """
        Return the total number of events cut during all cuts made within this object.
        """
        
        return self.events_cut
    
    def set_events_cut(self, events):
        """
        Add to the total number of events cut.
        """
        
        self.events_cut += events

    def nbody(self, n):
        """
        Make a cut on these particles data based on the number of bodies that were detected
        in the event. 

        Parameters
        ----------
        n : int, list
            The number of bodies to make a cut around such that only events with n
            decay bodies are kept. This can be a list of ints to keep. In essence
            for [1, 3] all events with 1 or 3 bodies are retained.
        """

        self.d.get_specific_features(["totCandidates"])
        # Add the totCandidates feature to the Data objects data
        initial_events = len(self.d.get_data())
        if type(n) is int:
            df = self.d.get_data().loc[self.d.get_data()["totCandidates"] == n]
        else:
            df = self.d.get_data().loc[self.d.get_data()["totCandidates"] in n]
        self.set_events_cut(initial_events - len(df))
        self.d.update_data(df)
        
    def minimal_pT(self, percentage=None, tolerance=None):
        """
        Cut these data based on the vector sum of transverse momentum. This cut considers
        that the incoming beam is almost purely travelling along z and as such the total
        transverse momentum of all particles must be zero. Only 1 of percentage or
        tolerance should be specified.
        
        Parameters
        ----------
        percentage : float
            A percentage between 0 and 1 which specifies how many events to cut. The 
            percentage is specified as the percentage of all events to allow through such 
            that the lowest percentage percent of events are retained.
            
        tolerance : float
            A way of defining the sensitivty of the cut. This is the number of standard
            deviations to allow through such that lower is more sensitive. 
        """
        
        import numpy as np
        
        print(len(self.d.get_data()))
        self.d.get_particle_data(self.particles, ['PX', 'PY'], drop_duplicates=True)
        df = self.d.get_data()
        print(len(df))
        initial_events = len(df)
        df['sum_PX'] = df[[particle + "_PX" for particle in self.particles]].sum(axis=1)
        df['sum_PY'] = df[[particle + "_PY" for particle in self.particles]].sum(axis=1)
        df['sum_PT'] = np.sqrt(df['sum_PX']**2 + df['sum_PY']**2)
        if [percentage, tolerance].count(None) == 2 or [percentage, tolerance].count(None) == 0:
            raise Exception("ERROR: Only pass either a percentage or a tolerance to cut by")
        if percentage is None:
            # Use a tolerance based cut
            mean, std = np.mean(df['sum_PT']), np.std(df['sum_PT'])
            df = df.loc[(df['sum_PT'] < mean + (tolerance*std)) and (df['sum_PT'] > mean - (tolerance*std))]
        else:
            # Use a percentage based cut
            n_events_to_drop = int(np.round(len(df) - (len(df) * percentage)))
            for i in range(n_events_to_drop):
                idx = df['sum_PT'].idxmax()
                df.drop(idx, axis=0, inplace=True)
        self.set_events_cut(initial_events - len(df))
        print("MINIMAL CALLS UPDATE")
        self.d.update_data(df)

    def cut_particle_cone(self, particles, sigma=2):
        """ TODO: THIS ENTIRE THING IS BROKEN JUST REBUILD IT FROM THE BEGINNING
        Make a cut on these particles data based on the direction of motion of the particle.
        In essence cut around the cone of momentum which contains a certain ratio of the total
        particles. The cone is defined by two angles theta and phi. Removes all events from the
        background real LHCb data outside sigma levels of significance. 
        """
        import numpy as np

        removed_events = {}
        mc_data = Cut(False, particles).get_particle_data(['PT', 'PZ', 'PY', 'PX'])
        cut_around = {}
        for particle, pdata in mc_data.items():
            # For every particle in the simulated particle data
            phi = np.arctan(pdata['PZ']/pdata['PX'])
            theta = np.arctan(pdata['PY']/np.sqrt(pdata['PX']**2 + pdata['PZ']**2))
            cut_around[particle] = [[np.mean(phi)-(sigma*np.std(phi)), np.mean(phi)+(sigma*np.std(phi))], [np.mean(theta)-(sigma*np.std(theta)), np.mean(theta)+(sigma*np.std(theta))]]
            # now we have arrays for phi and theta of these particles
        try:
            for particle in particles:
                initial_events = len(self.data[particle])
                self.data[particle][phi] = np.arctan(self.data[particle]['PZ']/self.data[particle]['PX'])
                self.data[particle][theta] = np.arctan(self.data[particle]['PY']/np.sqrt(self.data[particle]['PX']**2 + self.data[particle]['PZ']**2))
                # Make the phi cut
                self.data[particle] = self.data[particle].loc[(self.data[particle][phi] >= cut_around[particle][0][0]) and (self.data[particle][phi] <= cut_around[particle][0][1])]
                # Make the theta cut
                self.data[particle] = self.data[particle].loc[(self.data[particle][theta] >= cut_around[particle][1][0]) and (self.data[particle][theta] <= cut_around[particle][1][1])]
                final_events = len(self.data[particle])
                removed_events[particle] = initial_events - final_events
        except:
            print("ERROR: Have you specified any specific particles to consider?")

        return removed_events

In [28]:

fName, suffix = Consts().get_real_tuple()
data = Data(fName, suffix)
my_cut = Cut(data, ['L1', 'L2', 'K', 'p'])
my_cut.nbody(4)

I TRIGGER AS WELL!


In [29]:
my_cut.minimal_pT(percentage=0.9)

12916
             totCandidates    L1_PX    L2_PX    K_PX     p_PX    L1_PY  \
eventNumber                                                              
19946                    4  -961.08   185.92 -260.21  -179.48  4281.79   
19946                    4  -961.08   185.92 -179.48  -510.33  4281.79   
19946                    4  -961.08   185.92 -260.21  -179.48  4281.79   
19946                    4  -961.08   185.92 -179.48  -510.33  4281.79   
489775                   4  4317.10  1993.28  553.12  1524.30  3562.75   

              L2_PY    K_PY    p_PY  
eventNumber                          
19946        991.68  443.11  176.13  
19946        991.68  176.13  862.83  
19946        991.68  443.11  176.13  
19946        991.68  176.13  862.83  
489775      -127.43  189.80  506.77  
I TRIGGER AS WELL!
26980
MINIMAL CALLS UPDATE
I TRIGGER AS WELL!


In [31]:
list(data.get_data()['totCandidates'].unique())

[4]

In [223]:
len(data.get_data())

16836