In [1]:
# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from skimage.util import view_as_blocks

# %matplotlib inline

### Importing the data

We are using the Electron Region data.

In [2]:
event_data = pd.read_csv("output_data/Event_one_Electron.csv") # Electron Region Event one

In [3]:
# View the merged data head
event_data.head()

Unnamed: 0,event,eta,phi,et,position,electron,tau
0,1,0,0,7.23932,3,0,0
1,1,0,1,14.7899,6,0,1
2,1,0,2,8.17547,4,0,1
3,1,0,3,6.89318,2,0,0
4,1,0,4,9.8915,7,0,0


**Data Dictionary**

- event: Event number (Integer)
- eta: Index of eta coordinate 0-13 (Integer)
- phi: Index of phi coordinate, 0-17, which is also specifies the optical link (Integer)
- et: Transverse energy deposited in the region (Float)
- position: Position of the cluster within the region, a 4-bit number (Integer)
- electron: Electron bit signifying that the region likely has an electron or photon deposit (Bool)
- tau: Tau bit signifying that the region likely has a tau deposit (Bool)

In [4]:
# Check the shape of the merged dataframe
event_data.shape

(252, 7)

### Position values

In [5]:
# Convert the series into a 14x18 matrix
x_pos = np.array(event_data['position'])
matrix_position = np.asmatrix(x_pos)

# Resizing into size 14x18
matrix_position.resize((14, 18))

### ET values

In [7]:
# Convert the series into a 14x18 matrix
x_et = np.array(event_data['et'])
matrix_et = np.asmatrix(x_et)

# Resizing into size 14x18
matrix_et.resize((14, 18))

In [8]:
def get_matrix(event_data, column):
    """
    Function to get matrix data from a series of column.
    
    Args:
        event_data: Dataframe containing event data
        column: Name of column to get the matrix for
    Returns: 
        numpy matrix of size 14x18.
    """
    # Initialize
    x_data = np.array([])
    
    if column != 'index':
        # Convert the series into a numpy matrix
        x_data = np.array(event_data[column])
        
    else:
        x_data = np.array(event_data.index.to_list())

    # Convert the series into a numpy matrix
    matrix_data = np.asmatrix(x_data)

    # Resizing into size 14x18
    matrix_data.resize((14, 18))
    
    return matrix_data

- We will now get the value of Energy (ET) in all 4 regions of the 2x2 submatrix and sum them. 
- We will then find highest index of initial et in the submatrix and substitute the sum value to that region.
- Finally, we will find the actual index of the ET region and set all others to zero.
- Then, we will select and sort 6 data values where ET is not zero in ascending order.

In [9]:
def get_energy_sums(event_data):
    sumenergy = {}
    
    # Configuring the size of sub regions
    s = 2

    # We will now get 2x2 sub matrices as blocks from the electron matrix.
    # Reshape each matrix into a list format.
    blocks_energy = view_as_blocks(get_matrix(event_data, 'et'), (s,s)).reshape(-1,s**2)

    # We will now get 2x2 sub matrices as blocks from the index matrix.
    # Reshape each matrix into a list format.
    blocks_idx = view_as_blocks(get_matrix(event_data, 'index'), (s, s)).reshape(-1,s**2)
    
    for row, (sub, idx) in enumerate(zip(blocks_energy, blocks_idx)):
        # Add all energies in 2x2 region
        sum_energy = np.sum(sub)
        # get the actual index of the highest postion of energy in the submatrix (2x2)
        actual_idx = idx[np.argmax(sub)]

        sumenergy[actual_idx] = sum_energy
        
    return sumenergy

In [10]:
def select_and_sort_data(event_data):
    """
    Function to sort and select limited amount of values from rows containing energy (with True signals).
    Args:
        event_data: Dataframe containing event data
        signal: Type of signal data ['electron', 'tau']
    Returns:
        event_data_final: Dataframe containing the specified number of reduced data by selection and sorting.
    """
    # Number of data rows to select
    selection = 6
    # Get a copy of original dataframe
    event_data_final = event_data.copy()
    
    # Get list of energy and their actual indices
    energy_index_list = get_energy_sums(event_data)
    
    # replace the new values of energy based on index
    event_data_final.loc[list(energy_index_list.keys()),'et'] = list(energy_index_list.values())
    
    for idx in event_data_final.index.to_list():
        # if the index is not in our prev list of energy values
        # electron does not exist in this index
        if idx not in energy_index_list.keys():
            event_data_final.loc[idx, 'et']= 0
    
    # Get non-zero values
    event_data_final = event_data_final[event_data_final['et']!=0]
    
    # Sort them and select top 6 values
    event_data_final = event_data_final.sort_values(by='et', ascending=False)[:selection]
    
    return event_data_final

In [11]:
select_and_sort_data(event_data)

Unnamed: 0,event,eta,phi,et,position,electron,tau
49,1,2,13,96.3013,4,1,1
218,1,12,2,55.64794,3,0,0
244,1,13,10,48.95422,12,0,1
164,1,9,2,48.6056,7,0,0
63,1,3,9,48.4978,15,1,1
168,1,9,6,47.5832,8,0,1
