# Reconstruction

In [7]:
import numpy as np
import uproot as up
import numba as nb
from multiprocessing import Pool
import pandas as pd
import matplotlib.pyplot as plot
from scipy.stats import norm
from scipy.optimize import curve_fit
import sympy
from IPython.display import clear_output
import math
import time
import util
#My imports
from util import PVect,get_layer,create_layer_map,theta_func,phi_func,findBin,bin_percent_theta_phi, train, test, create_data, create_data_depth,p_func, calculate_num_pixels,Classifier,plot_roc_curve
import torch

In [8]:
particle = "mu"
energy = "5"
color_dict = {
    "pi" : "red",
    "mu" : "blue"
}
part_dict = {
    -211 : 1,
    13 : 0
}
layer_map, super_layer_map = create_layer_map()

In [143]:
# @nb.njit
def inverse(x, a, b, c):
    return a / (x + b) + c

# @nb.njit
def calculate_num_pixels_z_dependence(energy_dep, z_hit):
    efficiency = inverse(770 - z_hit, 494.98, 9.9733, -0.16796)
    return 10 * energy_dep * (1000 * 1000) * efficiency / 100
def calculate_efficiency(z_hit):
    return inverse(770 - z_hit, 494.98, 9.9733, -0.16796)

In [144]:
num_layers = 28

def get_label(PDG):
    return (PDG + 211) // 224

def create_unique_mapping(arr):
    # Get unique values and their inverse mapping
    unique_values, inverse_indices = np.unique(arr, return_inverse=True)
    
    # Create a dictionary mapping unique values to their indices
    value_to_index = {val: idx for idx, val in enumerate(unique_values)}
    
    # Create an array of indices
    index_array = inverse_indices
    
    return len(unique_values), value_to_index

def process_data(uproot_path, file_num=0, particle="pion"):
    data = []
    events = up.open(uproot_path)
    
    x_pos_branch = events["HcalBarrelHits/HcalBarrelHits.position.x"].array(library='np')
    z_pos_branch = events["HcalBarrelHits/HcalBarrelHits.position.z"].array(library='np')
    EDep_branch = events["HcalBarrelHits.EDep"].array(library='np')
    PDG_branch = events["MCParticles.PDG"].array(library='np')
    x_momentum_branch = events["HcalBarrelHits/HcalBarrelHits.momentum.x"].array(library='np')
    y_momentum_branch = events["HcalBarrelHits/HcalBarrelHits.momentum.y"].array(library='np')
    z_momentum_branch = events["HcalBarrelHits/HcalBarrelHits.momentum.z"].array(library='np')
    Hits_MC_idx_branch = events["_HcalBarrelHits_MCParticle.index"].array(library='np')
    time_branch = events["HcalBarrelHits.time"].array(library='np')   
    num_events = len(x_pos_branch)
    for event_idx in range(num_events):
        Hits_MC_idx_event = Hits_MC_idx_branch[event_idx]
        n_unique_parts, idx_dict = create_unique_mapping(Hits_MC_idx_event)
        
        p_layer_list = np.ones((n_unique_parts,num_layers)) * -1
        z_hit_layer_list = np.ones((n_unique_parts,num_layers)) * -1
        theta_layer_list = np.ones((n_unique_parts,num_layers)) * -1
        hit_time_layer_list = np.ones((n_unique_parts,num_layers)) * -1
        edep_event = np.ones((n_unique_parts,num_layers)) * -1
        
        x_pos_event = x_pos_branch[event_idx]
        px_event = x_momentum_branch[event_idx]
        py_event = y_momentum_branch[event_idx]
        pz_event = z_momentum_branch[event_idx]
        z_event = z_pos_branch[event_idx]
        time_event = time_branch[event_idx]
        EDep_event = EDep_branch[event_idx]
        for hit_idx in range(len(x_pos_event)):
            idx = Hits_MC_idx_branch[event_idx][hit_idx]
            part_idx = idx_dict[idx]
            layer_idx = get_layer(x_pos_event[hit_idx], super_layer_map)
            if layer_idx == -1: #error handling for get_layer
                continue
            elif p_layer_list[part_idx,layer_idx] == -1:
                p_layer_list[part_idx,layer_idx] = np.sqrt(px_event[hit_idx]**2 + py_event[hit_idx]**2 + pz_event[hit_idx]**2)
                z_hit_layer_list[part_idx,layer_idx] = z_event[hit_idx]
                theta_layer_list[part_idx,layer_idx] = np.arctan2(np.sqrt(px_event[hit_idx]**2 + py_event[hit_idx]**2), pz_event[hit_idx])
                hit_time_layer_list[part_idx,layer_idx] = time_event[hit_idx]
                edep_event[part_idx,layer_idx] = EDep_event[hit_idx]
            else:
                edep_event[part_idx,layer_idx] += EDep_event[hit_idx]
        data.append(np.stack([z_hit_layer_list,hit_time_layer_list,theta_layer_list,p_layer_list,(np.floor(calculate_num_pixels_z_dependence(edep_event,z_hit_layer_list)).astype(int))],axis = -1))


    
    return data #returns list: each entry is a diff event array; each event array has shape: (#unique particles, #layers, #features)
                #features: z hit, hit time, theta, p, energy dep

In [145]:
up_path = f"/cwork/rck32/eic/work_eic/root_files/June_18/variation_sector_scint_uniform/mu/variation_10kevents_file_11.edm4hep.root:events"
data = process_data(up_path)

In [151]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

def prepare_data_for_nn(processed_data):
    all_features = []
    all_metadata = []
    
    for event_idx, event_data in enumerate(processed_data):
        for particle_idx in range(event_data.shape[0]):
            for layer_idx in range(event_data.shape[1]):
                features = event_data[particle_idx, layer_idx, :4]  # Get first 4 features
                repeat_count = int(event_data[particle_idx, layer_idx, 4])  # Get 5th feature as repeat count
                
                if not np.any(features == -1) and repeat_count > 0:  # Check if all features are -1 and repeat_count is valid
                    # Repeat the features and metadata by repeat_count
                    all_features.extend([features] * repeat_count)
                    all_metadata.extend([(event_idx, particle_idx, layer_idx)] * repeat_count)
    
    # Convert to numpy arrays
    features_array = np.array(all_features)
    metadata_array = np.array(all_metadata)
    
    return features_array, metadata_array

def create_dataloader(features, metadata, batch_size=32):
    # Convert to PyTorch tensors
    features_tensor = torch.tensor(features, dtype=torch.float32)
    metadata_tensor = torch.tensor(metadata, dtype=torch.long)
    
    # Create TensorDataset
    dataset = TensorDataset(features_tensor, metadata_tensor)
    
    # Create DataLoader
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    return dataloader

# Assuming processed_data is your list of 3D numpy arrays from the previous step
features, metadata = prepare_data_for_nn(data)

print("Features shape:", features.shape)
print("Metadata shape:", metadata.shape)

# Create DataLoader
batch_size = 32
dataloader = create_dataloader(features, metadata, batch_size)

# Example of using the DataLoader
for batch_features, batch_metadata in dataloader:
    print("Batch features shape:", batch_features.shape)
    print("Batch metadata shape:", batch_metadata.shape)
    
    # Here you would typically pass batch_features to your neural network
    
    # You can access the metadata for each datapoint in the batch like this:
    for i in range(batch_metadata.shape[0]):
        event_idx, particle_idx, layer_idx = batch_metadata[i]
        print(f"Datapoint {i} in batch: Event {event_idx}, Particle {particle_idx}, Layer {layer_idx}")
    
    break  # Just to demonstrate one batch

Features shape: (72494710, 4)
Metadata shape: (72494710, 3)
Batch features shape: torch.Size([32, 4])
Batch metadata shape: torch.Size([32, 3])
Datapoint 0 in batch: Event 9585, Particle 0, Layer 11
Datapoint 1 in batch: Event 1173, Particle 0, Layer 10
Datapoint 2 in batch: Event 6061, Particle 5, Layer 11
Datapoint 3 in batch: Event 8351, Particle 2, Layer 3
Datapoint 4 in batch: Event 8820, Particle 2, Layer 5
Datapoint 5 in batch: Event 7034, Particle 0, Layer 4
Datapoint 6 in batch: Event 5294, Particle 0, Layer 15
Datapoint 7 in batch: Event 5774, Particle 0, Layer 12
Datapoint 8 in batch: Event 7034, Particle 1, Layer 2
Datapoint 9 in batch: Event 9057, Particle 31, Layer 22
Datapoint 10 in batch: Event 8364, Particle 0, Layer 4
Datapoint 11 in batch: Event 6128, Particle 0, Layer 25
Datapoint 12 in batch: Event 8372, Particle 0, Layer 11
Datapoint 13 in batch: Event 6519, Particle 0, Layer 19
Datapoint 14 in batch: Event 1277, Particle 0, Layer 5
Datapoint 15 in batch: Event 88

In [154]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7f6f70a3cbe0>