In [2]:
import uproot as ur
import awkward as ak
import pandas as pd
import itertools
import numpy as np
import pickle

In [3]:
# Load file
input_file = "/Users/seanbp/Documents/Data/GenII_Prototype_Data/Sim/5deg_e+_3.3GeV.edm4hep.root"
events = ur.open(f"{input_file}:events")

# Load jagged arrays from branches
energy = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.energy"].array()
x = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.x"].array()
y = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.y"].array()
z = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.position.z"].array()

contrib_begin = events["HcalFarForwardZDCHits/HcalFarForwardZDCHits.contributions_begin"].array()
time_all = events["HcalFarForwardZDCHitsContributions/HcalFarForwardZDCHitsContributions.time"].array()

# Use the begin indices to select the first time for each hit
first_time = time_all[contrib_begin]

# This gives one event number per event
event_nums = ak.local_index(energy, axis=0)

# Broadcast to match the hits per event
event_nums = ak.broadcast_arrays(energy, event_nums)[1]


# Combine into a jagged array of records
hits = ak.zip({
    "event": event_nums,
    "energy_GeV": energy,
    "x": x,
    "y": y,
    "z": z - np.min(z),
    "t": first_time
})

# Flatten to get one row per hit
flat_hits = ak.flatten(hits, axis=0)

# Convert to pandas DataFrame
df = ak.to_dataframe(flat_hits).reset_index(drop=True)

In [4]:
unique_events = df[['event']].drop_duplicates()
unique_xyz = df[['x', 'y', 'z']].drop_duplicates()


# Cartesian product
full_grid = unique_events.assign(dummy=1).merge(
    unique_xyz.assign(dummy=1), on='dummy').drop('dummy', axis=1)

# Merge with original
full_df = pd.merge(
    full_grid,
    df,
    on=['event', 'x', 'y', 'z'],
    how='left'
)


# Step 5: Fill in missing energies with 0
full_df['energy_GeV'] = full_df['energy_GeV'].fillna(0)

# Optional: convert energy to float (if needed)
full_df['energy_GeV'] = full_df['energy_GeV'].astype(float)
full_df = full_df.sort_values(by=["event", "z", "y", "x"]).reset_index(drop=True)


# Step 1: Assign "layer" by ranking unique z values
z_to_layer = {z: i for i, z in enumerate(sorted(full_df['z'].unique()))}
full_df['layer'] = full_df['z'].map(z_to_layer)

# Step 2: Sort again to ensure correct order before assigning layer_ch
full_df = full_df.sort_values(by=["event", "layer", "y", "x"]).reset_index(drop=True)

# Step 3: Assign layer_ch within each event/layer group
full_df['layer_ch'] = full_df.groupby(['event', 'layer']).cumcount()
# Add energy_MIP column
full_df["energy_MIP"] = full_df["energy_GeV"] / 0.000875127161931984

In [7]:
with open('Sim_e+3.3_DF.pkl', 'wb') as f:
    pickle.dump(full_df, f)

In [5]:
print(full_df)

         event           x          y           z  energy_GeV   t  layer  \
0            0  -85.400002 -85.400002    0.000000         0.0 NaN      0   
1            0  -36.599998 -85.400002    0.000000         0.0 NaN      0   
2            0   12.200000 -85.400002    0.000000         0.0 NaN      0   
3            0   61.000000 -85.400002    0.000000         0.0 NaN      0   
4            0  109.800003 -85.400002    0.000000         0.0 NaN      0   
...        ...         ...        ...         ...         ...  ..    ...   
3999995   9999 -109.800003  85.400002  406.679993         0.0 NaN     15   
3999996   9999  -61.000000  85.400002  406.679993         0.0 NaN     15   
3999997   9999  -12.200000  85.400002  406.679993         0.0 NaN     15   
3999998   9999   36.599998  85.400002  406.679993         0.0 NaN     15   
3999999   9999   85.400002  85.400002  406.679993         0.0 NaN     15   

         layer_ch  energy_MIP  
0               0         0.0  
1               1      

In [6]:
unique_coords = (
    full_df.groupby(['layer', 'layer_ch'])[['x', 'y', 'z']]
    .first()
    .reset_index()
)

print(unique_coords)


     layer  layer_ch           x          y           z
0        0         0  -85.400002 -85.400002    0.000000
1        0         1  -36.599998 -85.400002    0.000000
2        0         2   12.200000 -85.400002    0.000000
3        0         3   61.000000 -85.400002    0.000000
4        0         4  109.800003 -85.400002    0.000000
..     ...       ...         ...        ...         ...
395     15        20 -109.800003  85.400002  406.679993
396     15        21  -61.000000  85.400002  406.679993
397     15        22  -12.200000  85.400002  406.679993
398     15        23   36.599998  85.400002  406.679993
399     15        24   85.400002  85.400002  406.679993

[400 rows x 5 columns]


In [8]:
with pd.option_context('display.max_rows', None):
    print(unique_coords)

     layer  layer_ch           x           y           z
0        0         0  -85.400002  -85.400002    0.000000
1        0         1  -36.599998  -85.400002    0.000000
2        0         2   12.200000  -85.400002    0.000000
3        0         3   61.000000  -85.400002    0.000000
4        0         4  109.800003  -85.400002    0.000000
5        0         5  -85.400002  -36.599998    0.000000
6        0         6  -36.599998  -36.599998    0.000000
7        0         7   12.200000  -36.599998    0.000000
8        0         8   61.000000  -36.599998    0.000000
9        0         9  109.800003  -36.599998    0.000000
10       0        10  -85.400002   12.200000    0.000000
11       0        11  -36.599998   12.200000    0.000000
12       0        12   12.200000   12.200000    0.000000
13       0        13   61.000000   12.200000    0.000000
14       0        14  109.800003   12.200000    0.000000
15       0        15  -85.400002   61.000000    0.000000
16       0        16  -36.59999