### Implement selection and sorting of electrons

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
# %matplotlib inline

from skimage.util import view_as_blocks
import logging

log = logging.getLogger(__name__)

### Importing the data

We are using the Merged Electron Region data.

In [2]:
event_data = pd.read_csv("output_data/merged_energy_electron_data.csv") # Electron Region Event one

In [3]:
# View the merged data head
event_data.head()

Unnamed: 0,event,eta,phi,et,position,electron,tau
0,1,0,0,7.23932,3,0,0
1,1,0,1,14.7899,6,0,1
2,1,0,2,8.17547,4,0,1
3,1,0,3,6.89318,2,0,0
4,1,0,4,9.8915,7,0,0


### Reduce the dimensions of the matrix using 2x2 regions

- We will now reduce the dimensions of our matrix from 14x18 to 7x9 using 2x2 region replacements.
- We will first convert our 14x18 matrix into 2x2 blocks.
- Then, we are finding the positions in our 2x2 blocks where signals have "True(1)" values and saving their submatrix positions.

In [4]:
def find_energy_positions(event_data, signal):
    """
    Function to find energy values and positions from regions containing True signal value.
    Args:
        event_data: Dataframe containing event data
        signal: Type of signal data ['electron', 'tau']
    Returns:
        energy_dict: Dictionary containing index for regions with True signal bits along with their actual index in 14x18 matrix.
        {22: [[2, 3], 
             [{'actual_index': 98}, {'actual_index': 99}]]}
        matrices_with_energy: List of index of index of 7x9 matrices containing energy.
    """
    # Initialize empty dictionary to save row and index of energy values in matrix
    energy_dict = {}

    # Configuring the size of sub regions
    s = 2

    # We will now get 2x2 sub matrices as blocks from the electron matrix.
    # Reshape each matrix into a list format.
    blocks_signal = view_as_blocks(get_matrix(event_data, signal), (s, s)).reshape(-1,s**2)

    # We will now get 2x2 sub matrices as blocks from the index matrix.
    # Reshape each matrix into a list format.
    blocks_idx = view_as_blocks(get_matrix(event_data, 'index'), (s, s)).reshape(-1,s**2)
    
    # Loop inside sub matrix of blocks
    for i, sub in enumerate(blocks_signal):
        count = 0 # initially count is 0
        
        # Initializing empty lists for saving positions and index values as list
        actual_idx_list, actual_idx_list2, pos_list = [], [], []
        
        # Loop inside each element of submatrix
        for idx, element in enumerate(sub):

            actual_index = blocks_idx[i][idx]

            if element == 1: # if value of electron is 1
                
                log.info("Contains Signal in:", idx)
                # Add value with actual index
                pos_list.append(idx)
                actual_idx_list.append({'actual_index': actual_index})

                # Count increment
                count +=1

                # Save them into the dictionary
                energy_dict[i] = [pos_list, actual_idx_list]

            else:
                # Contains no electron True (1)
                if count == 0:
                    # Add value of actual index
                    actual_idx_list2.append({'actual_index': actual_index})
                    energy_dict[i] = ['NaN', actual_idx_list2]
                    log.info("No Signal in row")

        # Check if count is greater than or equal to 1        
        if count >= 1:
            log.info("Energy is present in matrix:", i)
        else:
            log.info("No energy in matrix:", i)
    
    # Get the list of matrices containing energy
    matrices_with_energy = [k for k,v in energy_dict.items() if v[0] != 'NaN']
    
    return energy_dict, matrices_with_energy 

The function below gets the value of a series from a dataframe as a numpy matrix based on the column.

In [5]:
def get_matrix(event_data, column):
    """
    Function to get matrix data from a series of column.
    
    Args:
        event_data: Dataframe containing event data
        column: Name of column to get the matrix for
    Returns: 
        numpy matrix of size 14x18.
    """
    # Initialize
    x_data = np.array([])
    
    if column != 'index':
        # Convert the series into a numpy matrix
        x_data = np.array(event_data[column])
        
    else:
        x_data = np.array(event_data.index.to_list())

    # Convert the series into a numpy matrix
    matrix_data = np.asmatrix(x_data)

    # Resizing into size 14x18
    matrix_data.resize((14, 18))
    
    return matrix_data

### Getting actual energy values and their index

- Previously we got value of energy positions inside submatrix as well as their actual index.
- We will now extract list of actual energy values (et) along with their actual index.

In [6]:
def get_list_of_energy_index(event_data, signal):
    """
    Get list of energies and corresponding actual index values.
    
    Args:
        event_data: Dataframe containing event data
        signal: Type of signal data ['electron', 'tau']
    Returns:
        energy_index_value: Dictionary containing actual index value of energies as key and et values as value. 
        {34: 12.8206,
        22: 4.587}
    """
    # Initializing empty dictionary to save
    energy_index_value = {}
    
    # size of submatrix is 2x2
    s = 2
    
    # We will now get 2x2 sub matrices as blocks from the energy(et) matrix.
    # Reshape each matrix into a list format.
    blocks_energy = view_as_blocks(get_matrix(event_data, 'et'), (s,s)).reshape(-1,s**2)
    
    energy_dict, matrices_with_energy = find_energy_positions(event_data, signal)
    
    # Loop within submatrix of 2x2 energy value blocks
    for row, sub in enumerate(blocks_energy):
        
        # Check if row contains electron True values
        if row in matrices_with_energy:
            log.info("Contains energy values in row:", row)

            # Get the value of index 
            index = np.argmax(sub[energy_dict[row][0]])

            # Append the tuple (max energy, actual index (18x14))
            # energy_dict[row][1][index] is the value of dictionary list of actual index 
            # that was the maximum from np argmax
            actual_index_val = list(energy_dict[row][1][index].values())[0]
            
            energy_index_value[actual_index_val] = sub[energy_dict[row][0]].max()
        
        else:
            # Get the value of first index of the 4 elements in 2x2 region with no True electron
            index = energy_dict[row][1][0]['actual_index']

            # Set the value of ET as 0
            energy_index_value[index] = 0
            
    return energy_index_value

### Selecting and sorting data

- We will now get the list of energy values and their index.
- We will map these values based on the actual index and replace the energies in our original dataframe.
- The rows except the ones that we replaced do not have signals in them so we set their energy(et) to zero.
- Finally, we get the non-zero rows from our dataframe, sort them in decreasing order and select the top 6 rows from them.

In [7]:
def select_and_sort_data(event_data, signal):
    """
    Function to sort and select limited amount of values from rows containing energy (with True signals).
    Args:
        event_data: Dataframe containing event data
        signal: Type of signal data ['electron', 'tau']
    Returns:
        event_data_final: Dataframe containing the specified number of reduced data by selection and sorting.
    """
    # Number of data rows to select
    selection = 6
    # Get a copy of original dataframe
    event_data_final = event_data.copy()
    
    # Get list of energy and their actual indices
    energy_index_list = get_list_of_energy_index(event_data, signal)
    
    # replace the new values of energy based on index
    event_data_final.loc[list(energy_index_list.keys()),'et'] = list(energy_index_list.values())
    
    for idx in event_data_final.index.to_list():
        # if the index is not in our prev list of energy values
        # electron does not exist in this index
        if idx not in energy_index_list.keys():
            event_data_final.loc[idx, 'et']= 0
    
    # Get non-zero values
    event_data_final = event_data_final[event_data_final['et']!=0]
    
    # Sort them and select top 6 values
    event_data_final = event_data_final.sort_values(by='et', ascending=False)[:selection]
    
    return event_data_final

In [8]:
# Get sorted and selected values for electron signals
sorted_data_electron = select_and_sort_data(event_data, 'electron')

In [9]:
sorted_data_electron

Unnamed: 0,event,eta,phi,et,position,electron,tau
49,1,2,13,64.3284,4,1,1
40,1,2,4,16.15002,2,1,0
219,1,12,3,15.3848,8,1,0
85,1,4,13,13.7001,9,1,1
98,1,5,8,13.4624,15,1,0
63,1,3,9,13.3519,15,1,1


In [10]:
# Get sorted and selected values for tau signals
sorted_data_tau = select_and_sort_data(event_data, 'tau')

In [11]:
sorted_data_tau

Unnamed: 0,event,eta,phi,et,position,electron,tau
49,1,2,13,64.3284,4,1,1
141,1,7,15,16.0898,1,0,1
1,1,0,1,14.7899,6,0,1
81,1,4,9,14.6035,3,0,1
244,1,13,10,14.49,12,0,1
85,1,4,13,13.7001,9,1,1


To do: 
Get time complexity