In [1]:
import uproot as ur
import awkward as ak
import pandas as pd
import itertools
import numpy as np

In [8]:
# Load file
input_file = "../../genIII_data/Sim/p_2.5GeV.edm4hep.root"
events = ur.open(f"{input_file}:events")

# Load jagged arrays from branches
energy = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.energy"].array()
x = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.x"].array()
y = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.y"].array()
z = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.z"].array()

contrib_begin = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.contributions_begin"].array()
time_all = events["HcalFarForwardZDCHitsContributions/HcalFarForwardZDCHitsContributions.time"].array()

# Use the begin indices to select the first time for each hit
first_time = time_all[contrib_begin]

# Add event number as a jagged array matching hit counts
event_nums = ak.local_index(energy)

# Combine into a jagged array of records
hits = ak.zip({
    "event": event_nums,
    "energy_GeV": energy,
    "x": x,
    "y": y,
    "z": z - np.min(z),
    "t": first_time
})

# Flatten to get one row per hit
flat_hits = ak.flatten(hits, axis=0)

# Convert to pandas DataFrame
df = ak.to_dataframe(flat_hits).reset_index(drop=True)

In [9]:
print(df)

        event    energy_GeV          x          y           z           t
0           0  1.281608e-03  12.200000  12.200000    0.000000    0.192782
1           1  9.080343e-04 -12.200000 -12.200000   27.111996    0.294401
2           2  2.428513e-03  12.200000  12.200000   54.224003    0.391181
3           3  8.517034e-06  36.599998 -12.200000   81.335999    6.961655
4           4  6.111864e-05  12.200000 -36.599998  325.343994   48.113235
...       ...           ...        ...        ...         ...         ...
201843     23  1.998560e-06  85.400002  85.400002  298.231995   90.514847
201844     24  7.549862e-07  85.400002  36.599998  298.231995  116.756233
201845     25  3.279125e-05  36.599998 -61.000000  244.008011   27.167301
201846     26  1.711879e-05  85.400002 -12.200000  244.008011   23.226875
201847     27  3.697388e-05 -36.599998  12.200000  162.671997    0.951371

[201848 rows x 6 columns]


In [10]:
unique_xyz = df[['x', 'y', 'z']].drop_duplicates()
unique_events = df['event'].drop_duplicates()

# Step 3: Create a Cartesian product of all event × (x, y, z)
full_grid = pd.merge(
    unique_events.to_frame(name='event').assign(dummy=1),
    unique_xyz.assign(dummy=1),
    on='dummy'
).drop(columns='dummy')

# Step 4: Merge original data into this grid
full_df = pd.merge(
    full_grid,
    df,
    on=['event', 'x', 'y', 'z'],
    how='left'
)

# Step 5: Fill in missing energies with 0
full_df['energy_GeV'] = full_df['energy_GeV'].fillna(0)

# Optional: convert energy to float (if needed)
full_df['energy_GeV'] = full_df['energy_GeV'].astype(float)
full_df = full_df.sort_values(by=["event", "z", "y", "x"]).reset_index(drop=True)


# Step 1: Assign "layer" by ranking unique z values
z_to_layer = {z: i for i, z in enumerate(sorted(full_df['z'].unique()))}
full_df['layer'] = full_df['z'].map(z_to_layer)

# Step 2: Sort again to ensure correct order before assigning layer_ch
full_df = full_df.sort_values(by=["event", "layer", "y", "x"]).reset_index(drop=True)

# Step 3: Assign layer_ch within each event/layer group
full_df['layer_ch'] = full_df.groupby(['event', 'layer']).cumcount()
# Add energy_MIP column
full_df["energy_MIP"] = full_df["energy_GeV"] / 0.000875127161931984


In [11]:
print(full_df)

        event           x          y           z  energy_GeV         t  layer  \
0           0  -85.400002 -85.400002    0.000000    0.000000       NaN      0   
1           0  -36.599998 -85.400002    0.000000    0.000000       NaN      0   
2           0   12.200000 -85.400002    0.000000    0.000000       NaN      0   
3           0   61.000000 -85.400002    0.000000    0.000004  0.472079      0   
4           0  109.800003 -85.400002    0.000000    0.000000       NaN      0   
...       ...         ...        ...         ...         ...       ...    ...   
223661    116 -109.800003  85.400002  406.679993    0.000000       NaN     15   
223662    116  -61.000000  85.400002  406.679993    0.000000       NaN     15   
223663    116  -12.200000  85.400002  406.679993    0.000000       NaN     15   
223664    116   36.599998  85.400002  406.679993    0.000000       NaN     15   
223665    116   85.400002  85.400002  406.679993    0.000000       NaN     15   

        layer_ch  energy_MI