In [51]:
import pandas as pd
import os
import h5py
import numpy as np

In [62]:
def compute_seq_id(hit, or_id=0):
    """Compute the RICH PMT sequence ID"""
    disk_id, pm_id, sc_id, up_dw_id, _ = hit
    if or_id < 1:
        seq_id = sc_id * 8 + pm_id + up_dw_id * 61 * 8 + disk_id * 61 * 8 * 2
    else:
        seq_id = 61 * 8 * 2 * 2 + sc_id + up_dw_id * 61 + disk_id * 61 * 2
    return int(seq_id)


compute_seq_id = np.vectorize(compute_seq_id, otypes=[int])


def get_hit_data(f, event):
    """Get the hit data for an event as a numpy array."""
    hit_data = []
    
    position_map = np.load("/home/nico/RICHPID/tools/rich_pmt_positions.npy")
    
    # get the raw hit data, hit time, and chod time
    hits = f["Hits"][
        f["HitMapping"][event]:f["HitMapping"][event+1]
    ]
    hit_times = hits["hit_time"]
    chod_time = f["Events"][event]["chod_time"]
    
    # compute (x, y) positions
    for hit in hits:
        idx = compute_seq_id(hit)

        hit_data.append(position_map[idx])

    # convert (x, y) positions to numpy array
    hit_data = np.array(hit_data)

    # create array of chod times
    chod_time = np.full((hit_data.shape[0], 1), chod_time)

    # join hits, hit times, and chod time
    hit_data = np.c_[hit_data, hit_times, chod_time]

    # create CHOD - time delta column
    hit_data = np.append(hit_data, hit_data[:, 4:] - hit_data[:, 3:4], axis=1)
    
    # final columns are x, y, mirror, hit time, chod time, delta
    return hit_data

# Unfiltered data

In [2]:
df = pd.read_hdf("/fast_scratch_1/capstone_2022/datasetC_combined.h5")

In [3]:
df.describe().round(3).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
run_id,1731226.0,8999.629,21.041,8968.0,8982.0,8998.0,9014.0,9040.0
burst_id,1731226.0,743.398,433.352,1.0,366.0,738.0,1109.0,1646.0
event_id,1731226.0,1144229.514,626307.273,9989.0,610397.5,1151238.5,1672073.0,2866610.0
track_id,1731226.0,0.08,0.289,0.0,0.0,0.0,0.0,9.0
track_momentum,1731226.0,31.298,7.543,15.0,25.641,31.541,37.292,45.0
chod_time,1731226.0,14.503,8.467,-24.897,8.146,14.624,21.175,49.806
ring_radius,1731220.0,4710.062,1507327.5,0.0,171.259,178.452,182.505,1366392000.0
ring_centre_pos_x,1731220.0,8848.921,1307900.5,-938928100.0,-173.313,-100.625,-38.024,1183359000.0
ring_centre_pos_y,1731220.0,7550.644,758649.875,-683145300.0,-73.84,3.221,77.719,307175400.0
ring_likelihood_pion,1731226.0,0.525,0.482,0.0,0.0,0.773,1.0,1.0


# Filtered data

In [12]:
filtered_df = df.query("ring_radius < 500 and ring_radius > 0")
filtered_df = filtered_df.query("ring_centre_pos_x < 2500 and ring_centre_pos_x > -2500")
filtered_df = filtered_df.query("ring_centre_pos_y < 2500 and ring_centre_pos_y > -2500")

In [13]:
filtered_df.describe().round(3).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
run_id,1716936.0,8999.627,21.041,8968.0,8982.0,8998.0,9014.0,9040.0
burst_id,1716936.0,743.385,433.38,1.0,366.0,738.0,1109.0,1646.0
event_id,1716936.0,1144319.562,626325.349,9989.0,610483.75,1151292.0,1672140.5,2866610.0
track_id,1716936.0,0.08,0.288,0.0,0.0,0.0,0.0,9.0
track_momentum,1716936.0,31.339,7.523,15.0,25.704,31.58,37.316,45.0
chod_time,1716936.0,14.495,8.457,-24.679,8.142,14.615,21.165,49.806
ring_radius,1716936.0,174.972,12.013,12.728,171.327,178.459,182.489,445.424
ring_centre_pos_x,1716936.0,-110.251,78.601,-410.247,-173.849,-101.88,-39.185,222.039
ring_centre_pos_y,1716936.0,1.139,82.92,-393.071,-74.338,1.971,76.368,316.534
ring_likelihood_pion,1716936.0,0.527,0.482,0.0,0.0,0.8,1.0,1.0


In [14]:
print("Before removing outliers:")
print("Ring centre mean x:", df["ring_centre_pos_x"].mean())
print("Ring centre mean y:", df["ring_centre_pos_y"].mean())
print("Momentum mean:", df["track_momentum"].mean())
print("Momentum std:", df["track_momentum"].std())
print("Ring radii mean:", df["ring_radius"].mean())
print("Ring radii std:", df["ring_radius"].std())

Before removing outliers:
Ring centre mean x: 8848.921
Ring centre mean y: 7550.6436
Momentum mean: 31.298187
Momentum std: 7.543085
Ring radii mean: 4710.0625
Ring radii std: 1507327.5


In [15]:
print("After removing outliers:")
print("Ring centre mean x:", filtered_df["ring_centre_pos_x"].mean())
print("Ring centre mean y:", filtered_df["ring_centre_pos_y"].mean())
print("Momentum mean:", filtered_df["track_momentum"].mean())
print("Momentum std:", filtered_df["track_momentum"].std())
print("Ring radii mean:", filtered_df["ring_radius"].mean())
print("Ring radii std:", filtered_df["ring_radius"].std())

After removing outliers:
Ring centre mean x: -110.25132
Ring centre mean y: 1.1389542
Momentum mean: 31.338661
Momentum std: 7.523443
Ring radii mean: 174.97235
Ring radii std: 12.013085


# Delta distribution

In [79]:
indices = filtered_df["original_index"].to_numpy()
labels = filtered_df["label"]

In [80]:
# data folder, data set, individual data file
data_folder = "/data/bvelghe/capstone2022/"
data_set = "C/"
file_path = "/data/bvelghe/capstone2022/C/2018E.EOSlist.CTRL_patched.h5"

# read in 
f = h5py.File(os.path.join(data_folder, data_set, file_path))
events = f['Events']
hits = f['Hits']
hit_map = f['HitMapping']

In [84]:
deltas = []
 
for i in range(1000):
    deltas.extend(
        get_hit_data(f, indices[i])[:, -1:].flatten().tolist()
    )

In [85]:
deltas

[0.031490325927734375,
 -0.01288604736328125,
 -0.08673667907714844,
 0.05806541442871094,
 0.13801193237304688,
 0.03366851806640625,
 -0.3020057678222656,
 -0.09667205810546875,
 0.2652587890625,
 0.04723548889160156,
 0.000148773193359375,
 0.1434307098388672,
 -0.025873184204101562,
 -0.22095870971679688,
 0.027822494506835938,
 -0.09076499938964844,
 0.1560039520263672,
 0.019414901733398438,
 0.5336151123046875,
 -0.5033760070800781,
 -0.46553611755371094,
 -0.21860504150390625,
 0.08128929138183594,
 0.6188297271728516,
 -0.03867530822753906,
 0.3896503448486328,
 -42.96364212036133,
 -42.724300384521484,
 11.236513137817383,
 0.1667027473449707,
 0.11681413650512695,
 -0.45777320861816406,
 -0.03817415237426758,
 11.215842247009277,
 10.70642375946045,
 0.2124309539794922,
 10.734827995300293,
 10.851266860961914,
 -42.51921081542969,
 10.98284912109375,
 10.9690580368042,
 -44.17478942871094,
 -43.20958709716797,
 -42.8426628112793,
 41.42601013183594,
 -42.7214241027832,
 -43