# SWIFT-HEP / GridPP Workshop - April 2025

## Caching

The Dirac Client is introduced here.
Functionally it works the same as the dask.distributed.Client, but allows for persistent caching.

The following cache locations are supported:
- `local`: to set the directory use `file:///path/to/cache`

Caching options in the works;
- `rucio`: to set the directory use `rucio:///path/to/cache`
- `dirac`: to set the directory use `dirac:///path/to/cache`

In [4]:
from dask_dirac import DiracClient, DiracCluster
from dask.distributed import LocalCluster, Client
import dask.array as da

In [5]:
# Task structure has changed in newer versions, so for this using 2024.5.0
import dask
dask.__version__

'2024.5.0'

In [6]:
cluster = LocalCluster(n_workers=1)

In [None]:
client = DiracClient(cluster, 
                     cache_location="file:///tmp/dask-cache_05022025")
#client = Client(cluster)

In [8]:
# Check the cache location and show what files are there
print(client.cache_location)
!ls {client.cache_location[7:]} # remove file:// at the beginning

file:///tmp/dask-cache_05022025


In [15]:
# Create a Dask DataFrame directly
dask_array = da.ones((1e4, 1), chunks=(1)) + 20231
dask_array

Unnamed: 0,Array,Chunk
Bytes,78.12 kiB,8 B
Shape,"(10000, 1)","(1, 1)"
Dask graph,10000 chunks in 2 graph layers,10000 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 78.12 kiB 8 B Shape (10000, 1) (1, 1) Dask graph 10000 chunks in 2 graph layers Data type float64 numpy.ndarray",1  10000,

Unnamed: 0,Array,Chunk
Bytes,78.12 kiB,8 B
Shape,"(10000, 1)","(1, 1)"
Dask graph,10000 chunks in 2 graph layers,10000 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [10]:
result = client.compute(dask_array)

In [11]:
r = result.result()

In [12]:
# Check the cache location and show what files are there
print(client.cache_location)
!ls {client.cache_location[7:]} # remove file:// at the beginning

file:///tmp/dask-cache_05022025
79ff19291795877767dc8ff7ac8dec3ae6ad1ddd52e93e1c088bc90f24451fe5ecbaec4bbe9a055ab35ab4b70431e16f.parquet
7d41dc42b2d1236a9e67e28103d1fb3bdc6fc748920a8f971a7abe6c9304ff7477f3e402e50face5c1b40c694eee0402.parquet


## GPU vs CPU

This is an LUX-ZEPLIN analysis which builds a model of multi-scatter-single-ionisation (MSSI) events from simulated events.
This simulated events are from detector components. 
In this analysis, the simulations (ROOT files) are read using `uproot`, and then events are looped over, selecting MSSI events.
The simulated events here have already gone through a pre-processing so only events classified as single-scatter events are considered.

A more detailed step-by-step description of the analysis is as follows:
1. Simulations of detector components are stored as ROOT files.
2. These files are read using `uproot` into `awkward` arrays.
3. A selection is applied to the data to select MSSI events.
4. A normalization is applied to get the expected rate of these events.
5. Something about building the model.


In addition to the above, this analysis also highlights function decorations with numba for CPU and GPU acceleration.

In [25]:
import awkward as ak
import numpy as np
from dask.distributed import LocalCluster, Client, progress
import glob
import pandas as pd
import uproot as up
import numba
import dask
from numba import cuda
import math

Define the processing

In [45]:
coeffs = np.array([[-1.78880746e-13, 4.91268301e-10, -4.96134607e-07, 2.26430932e-04, -4.71792008e-02, 7.33811298e+01],
            [-1.72264463e-13, 4.59149636e-10, -4.59325165e-07, 2.14612376e-04, -4.85599108e-02, 7.35290867e+01],
            [-3.17099156e-14, 7.26336129e-11, -6.99495385e-08, 3.85531008e-05, -1.33386004e-02, 7.18002889e+01],
            [-6.12280314e-14, 1.67968911e-10, -1.83625538e-07, 1.00457608e-04, -2.86728022e-02, 7.22754350e+01],
            [-1.89897962e-14, 1.52777215e-11, -2.79681508e-09, 1.25689887e-05, -1.33093804e-02, 7.17662251e+01],
            [-2.32118621e-14, 7.30043322e-11, -9.40606298e-08, 6.29728588e-05, -2.28150175e-02, 7.22661091e+01],
            [-8.29749194e-14, 2.31096069e-10, -2.47867121e-07, 1.27576029e-04, -3.24702414e-02, 7.26357609e+01],
            [-2.00718008e-13, 5.44135757e-10, -5.59484466e-07, 2.73028553e-04, -6.46879791e-02, 7.45264998e+01],
            [-7.77420021e-14, 1.97357045e-10, -1.90016273e-07, 8.99659454e-05, -2.30169916e-02, 7.25038258e+01],
            [-5.27296334e-14, 1.49415580e-10, -1.58205132e-07, 8.00275441e-05, -2.13559394e-02, 7.23995451e+01],
            [-6.00198219e-14, 1.55333004e-10, -1.60367908e-07, 7.97754165e-05, -1.94435594e-02, 7.22714399e+01],
            [-8.89919309e-14, 2.40830027e-10, -2.57060475e-07, 1.33002951e-04, -3.32969110e-02, 7.28696020e+01]])

n_phi_slices = 12
phi_slices = np.linspace(-np.pi, np.pi, n_phi_slices + 1) + np.pi/4
phi_slices[phi_slices > np.pi] -= 2*np.pi

@numba.njit
def perform_poly(dt, c):
    poly_results = coeffs[c][0]*dt**5 + coeffs[c][1]*dt**4 + coeffs[c][2]*dt**3 + coeffs[c][3]*dt**2 + coeffs[c][4]*dt + coeffs[c][5]
    return poly_results


@numba.njit
def sr3_wall_radius(dt):
    """
    given a drift time, return the radius (cm) of the SR3
    wall using Katie W's prelim fit.
    """
    c = np.array([-4.44147071e-14,  1.43684777e-10, -1.82739476e-07,
                     1.02160174e-04, -2.31617857e-02, -2.05932471e+00])
    wall_r2 = c[0]*dt**5 + c[1]*dt**4 + c[2]*dt**3 + c[3]*dt**2 + c[4]*dt + c[5]
    return wall_r2


@numba.njit
def resistor_fv(x, y):
    res1X = -69.8
    res1Y = 3.5
    res1R = 6
    res2X = -67.5
    res2Y = -14.3
    res2R = 6
    
    ## cut booleans
    insideRes1 = np.sqrt( (x-res1X)*(x-res1X) + (y-res1Y)*(y-res1Y) ) > res1R
    insideRes2 = np.sqrt( (x-res2X)*(x-res2X) + (y-res2Y)*(y-res2Y) ) > res2R
    
    return (insideRes1 & insideRes2)

@numba.njit
def calc_dR_phi(x, y, dt):

    # Calculate event radii and angles, then mask them according to each slice
    R = np.sqrt(x**2 + y**2)
    phi = np.arctan2(y, x)

    dR_phi = 0.0
    # Process each phi slice
    for i in range(n_phi_slices):
        phi_min, phi_max = phi_slices[i], phi_slices[i + 1]
        if (phi >= phi_min) & (phi < phi_max):
            dR_phi = R - perform_poly(dt, i)

    return dR_phi


@numba.njit
def phi_fv(x, y, dt):
    dR_phi = calc_dR_phi(x, y, dt)
    contour = sr3_wall_radius(dt) - 3
    expandable = (dt > 71) & (dt < 900)

    mask = ((dR_phi < (contour + 0.)) & expandable) | ((dR_phi < contour) & ~expandable)
    return mask & (dR_phi <= 0)

@numba.njit
def fv_z(dt):
    dt_cut = (dt > 71) & (dt < 1030)
    return dt_cut

@numba.njit
def process_events(ss, mc):
    n_FV = 0
    n_ROI = 0
    n_FV_ROI = 0
    n_mssi = 0
    n_FV_mssi = 0
    n_ROI_mssi = 0
    n_FV_ROI_mssi = 0

    for i in range(len(ss['ss.correctedS1Area_phd'])):

        x = ss['ss.x_cm'][i]
        y = ss['ss.y_cm'][i]
        dt = ss['ss.driftTime_ns'][i] / 1000
        nS1 = 0
        nS2 = 0

        resistor_cut = resistor_fv(x, y) # uncorrected position
        phi_cut = phi_fv(x, y, dt)
        drift_cut = fv_z(dt)

        # Cuts
        fv_cut = resistor_cut and phi_cut and drift_cut
        roi_cut = ss['ss.correctedS1Area_phd'][i] > 3. and ss['ss.correctedS1Area_phd'][i] < 80. and ss['ss.s2Area_phd'][i] > 14.5*44.5 and ss['ss.correctedS2Area_phd'][i] < 10**4.5

        for j in range(mc['mcTruthVertices.nRQMCTruthVertices'][i]):
            if mc['mcTruthVertices.detectedS1Photons'][i][j] > 0.:
                nS1 += 1
            if mc['mcTruthVertices.detectedS2Photons'][i][j] > 0.:
                nS2 += 1
        if nS1 > nS2:
            n_mssi += 1
            if fv_cut:
                n_FV_mssi += 1
                if roi_cut:
                    n_FV_ROI_mssi += 1
            if roi_cut:
                n_ROI_mssi += 1

        if fv_cut:
            n_FV += 1
            if roi_cut:
                n_FV_ROI += 1
        if roi_cut:
            n_ROI += 1

    return n_FV, n_ROI, n_FV_ROI, n_mssi, n_FV_mssi, n_ROI_mssi, n_FV_ROI_mssi

def process_file(file):
    branches = ['ss.correctedS1Area_phd', 'ss.correctedS2Area_phd', 'ss.s1Area_phd', 'ss.s2Area_phd', 'ss.x_cm', 'ss.y_cm', 'ss.driftTime_ns']
    mcBranches = ['mcTruthVertices.nRQMCTruthVertices', 'mcTruthVertices.volumeName', 'mcTruthVertices.detectedS1Photons', 'mcTruthVertices.detectedS2Photons']

    tfile = up.open(file)
    t = tfile['Scatters']
    mct = tfile['RQMCTruth']
    ss = t.arrays(branches)
    mc = mct.arrays(mcBranches)
    n_FV, n_ROI, n_FV_ROI, n_mssi, n_FV_mssi, n_ROI_mssi, n_FV_ROI_mssi = process_events(ss, mc)
    component = file.split('/SS_skim_')[1][:-5] # remove .root from the end of the file name

    return component, len(ss['ss.s1Area_phd']), n_FV, n_ROI, n_FV_ROI, n_mssi, n_FV_mssi, n_ROI_mssi, n_FV_ROI_mssi, 1e-6 / 200

In [107]:
coeffs = np.array([[-1.78880746e-13, 4.91268301e-10, -4.96134607e-07, 2.26430932e-04, -4.71792008e-02, 7.33811298e+01],
            [-1.72264463e-13, 4.59149636e-10, -4.59325165e-07, 2.14612376e-04, -4.85599108e-02, 7.35290867e+01],
            [-3.17099156e-14, 7.26336129e-11, -6.99495385e-08, 3.85531008e-05, -1.33386004e-02, 7.18002889e+01],
            [-6.12280314e-14, 1.67968911e-10, -1.83625538e-07, 1.00457608e-04, -2.86728022e-02, 7.22754350e+01],
            [-1.89897962e-14, 1.52777215e-11, -2.79681508e-09, 1.25689887e-05, -1.33093804e-02, 7.17662251e+01],
            [-2.32118621e-14, 7.30043322e-11, -9.40606298e-08, 6.29728588e-05, -2.28150175e-02, 7.22661091e+01],
            [-8.29749194e-14, 2.31096069e-10, -2.47867121e-07, 1.27576029e-04, -3.24702414e-02, 7.26357609e+01],
            [-2.00718008e-13, 5.44135757e-10, -5.59484466e-07, 2.73028553e-04, -6.46879791e-02, 7.45264998e+01],
            [-7.77420021e-14, 1.97357045e-10, -1.90016273e-07, 8.99659454e-05, -2.30169916e-02, 7.25038258e+01],
            [-5.27296334e-14, 1.49415580e-10, -1.58205132e-07, 8.00275441e-05, -2.13559394e-02, 7.23995451e+01],
            [-6.00198219e-14, 1.55333004e-10, -1.60367908e-07, 7.97754165e-05, -1.94435594e-02, 7.22714399e+01],
            [-8.89919309e-14, 2.40830027e-10, -2.57060475e-07, 1.33002951e-04, -3.32969110e-02, 7.28696020e+01]])

n_phi_slices = 12
phi_slices = np.linspace(-np.pi, np.pi, n_phi_slices + 1) + np.pi/4
phi_slices[phi_slices > np.pi] -= 2*np.pi

@cuda.jit
def perform_poly(dt, c, result):
    result[0] = (
        coeffs[c][0] * dt**5 + coeffs[c][1] * dt**4 + coeffs[c][2] * dt**3 +
        coeffs[c][3] * dt**2 + coeffs[c][4] * dt + coeffs[c][5]
    )


@cuda.jit
def sr3_wall_radius(dt, result):
    #c = [-4.44147071e-14, 1.43684777e-10, -1.82739476e-07, 1.02160174e-04, -2.31617857e-02, -2.05932471e+00]
    result[0] = -4.44147071e-14 * math.pow(dt, 5) \
        + 1.43684777e-10 * math.pow(dt, 4) \
            -1.82739476e-07 * math.pow(dt, 3) \
                + 1.02160174e-04 * math.pow(dt, 2) \
                    -2.31617857e-02 * dt \
                        -2.05932471e+00


@cuda.jit
def resistor_fv(x, y, result):
    res1X, res1Y, res1R = -69.8, 3.5, 6
    res2X, res2Y, res2R = -67.5, -14.3, 6

    insideRes1 = ((x - res1X) ** 2 + (y - res1Y) ** 2) > res1R 
    insideRes2 = ((x - res2X) ** 2 + (y - res2Y) ** 2) > res2R 

    result[0] = insideRes1 and insideRes2


@cuda.jit
def calc_dR_phi(x, y, dt, dR_phi_result):
    R = math.sqrt(x ** 2 + y ** 2)
    phi = math.atan2(y, x)

    for i in range(n_phi_slices):
        phi_min, phi_max = phi_slices[i], phi_slices[i + 1]
        if (phi >= phi_min) and (phi < phi_max):
            poly_result = cuda.local.array(1, numba.float64)
            perform_poly(dt, i, poly_result)
            dR_phi_result[0] = R - poly_result[0]


@cuda.jit
def phi_fv(x, y, dt, result):
    dR_phi_result = cuda.local.array(1, numba.float64)
    calc_dR_phi(x, y, dt, dR_phi_result)
    
    contour_result = cuda.local.array(1, numba.float64)
    sr3_wall_radius(dt, contour_result)
    
    #contour = contour_result[0] - 3
    #expandable = dt > 71 and dt < 900

    #mask = (dR_phi_result[0] < (contour + 0.) and expandable) or ((dR_phi_result[0] < contour) and not expandable)
    result[0] = True #mask and dR_phi_result[0] <= 0


@cuda.jit
def fv_z(dt, result):
    result[0] = (dt > 71) and (dt < 1030)


@cuda.jit
def process_events(ss_x, ss_y, ss_dt, ss_s1, ss_s2, ss_s2c,
                   mc_verts, mc_detS1, mc_detS2,
                   n_fv, n_roi, n_fv_roi, n_mssi, n_fv_mssi, n_roi_mssi, n_fv_roi_mssi):

    i = numba.cuda.grid(1)
    if i >= ss_x.shape[0]:
        return
    

    x = ss_x[i]
    y = ss_y[i]
    dt = ss_dt[i] / 1000 # to us from ns
    nS1 = 0
    nS2 = 0

    # Cuts
    resistor_result = cuda.local.array(1, numba.boolean)
    resistor_fv(x, y, resistor_result)
    phi_result = cuda.local.array(1, numba.boolean)
    phi_fv(x, y, dt, phi_result)
    drift_result = cuda.local.array(1, numba.boolean)
    fv_z(dt, drift_result)

    fv_cut = resistor_result[0] and phi_result[0] and drift_result[0]
    roi_cut = 3. < ss_s1[i] and ss_s1[i] < 80. and 14.5 * 44.5 < ss_s2[i] and ss_s2c[i] < 10**4.5

    # Check if MSSI
    for j in range(mc_verts[i]):
        if mc_detS1[i][j] > 0.:
            nS1 += 1
        if mc_detS2[i][j] > 0.:
            nS2 += 1
    if nS1 > nS2:
        cuda.atomic.add(n_mssi, 0, 1)
        if fv_cut:
            cuda.atomic.add(n_fv_mssi, 0, 1)
            if roi_cut:
                cuda.atomic.add(n_roi_mssi, 0, 1)
        if roi_cut:
            cuda.atomic.add(n_fv_roi_mssi, 0, 1)
        
    if fv_cut:
        cuda.atomic.add(n_fv, 0, 1)
        if roi_cut:
            cuda.atomic.add(n_fv_roi, 0, 1)
    if roi_cut:
        cuda.atomic.add(n_roi, 0, 1)

def process_file(file):
    branches = ['ss.correctedS1Area_phd', 'ss.correctedS2Area_phd', 'ss.s1Area_phd', 'ss.s2Area_phd', 'ss.x_cm', 'ss.y_cm', 'ss.driftTime_ns']
    mcBranches = ['mcTruthVertices.nRQMCTruthVertices', 'mcTruthVertices.volumeName', 'mcTruthVertices.detectedS1Photons', 'mcTruthVertices.detectedS2Photons']

    tfile = up.open(file)
    t = tfile['Scatters']
    mct = tfile['RQMCTruth']
    ss = t.arrays(branches)
    mc = mct.arrays(mcBranches)

    # Allocate device arrays
    ss_x = cuda.to_device(ss['ss.x_cm'])
    ss_y = cuda.to_device(ss['ss.y_cm'])
    ss_dt = cuda.to_device(ss['ss.driftTime_ns'])
    ss_s1 = cuda.to_device(ss['ss.s1Area_phd'])
    ss_s2 = cuda.to_device(ss['ss.s2Area_phd'])
    ss_s2c = cuda.to_device(ss['ss.correctedS2Area_phd'])

    # zero pad 
    lengths = ak.num(mc['mcTruthVertices.detectedS1Photons'])
    max_length = int(np.percentile(lengths, 99))
    mc_verts = cuda.to_device(mc['mcTruthVertices.nRQMCTruthVertices'])
    mc_detS1_padded = ak.fill_none(ak.pad_none(mc['mcTruthVertices.detectedS1Photons'], max_length, clip=True), 0)
    mc_detS1_numpy = ak.to_numpy(mc_detS1_padded)
    mc_detS1 = cuda.to_device(mc_detS1_numpy)
    mc_detS2_padded = ak.fill_none(ak.pad_none(mc['mcTruthVertices.detectedS2Photons'], max_length, clip=True), 0)
    mc_detS2_numpy = ak.to_numpy(mc_detS2_padded)
    mc_detS2 = cuda.to_device(mc_detS2_numpy)

    n_fv = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_roi = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_fv_roi = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_mssi = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_fv_mssi = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_roi_mssi = cuda.to_device(np.zeros(1, dtype=np.int32))
    n_fv_roi_mssi = cuda.to_device(np.zeros(1, dtype=np.int32))

    n_blocks = len(ss['ss.x_cm'])
    block_size = 256
    n_blocks = (n_blocks + block_size - 1) // block_size

    process_events[n_blocks, block_size](
        ss_x, ss_y, ss_dt, ss_s1, ss_s2, ss_s2c, mc_verts, mc_detS1, mc_detS2,
        n_fv, n_roi, n_fv_roi, n_mssi, n_fv_mssi, n_roi_mssi, n_fv_roi_mssi)
    
    n_fv_host = n_fv.copy_to_host()[0]
    n_roi_host = n_roi.copy_to_host()[0]
    n_fv_roi_host = n_fv_roi.copy_to_host()[0]
    n_mssi_host = n_mssi.copy_to_host()[0]
    n_fv_mssi_host = n_fv_mssi.copy_to_host()[0]
    n_roi_mssi_host = n_roi_mssi.copy_to_host()[0]
    n_fv_roi_mssi_host = n_fv_roi_mssi.copy_to_host()[0]
    

    component = file.split('/SS_skim_')[1][:-5] # remove .root from the end of the file name

    return component, len(ss['ss.s1Area_phd']), n_fv_host, n_roi_host, n_fv_roi_host, n_mssi_host, n_fv_mssi_host, n_roi_mssi_host, n_fv_roi_mssi_host, 1e-6 / 200

In [108]:
process_file(files[0])

('Co60_CalibrationSourceTubes',
 1400000,
 np.int32(993668),
 np.int32(1067),
 np.int32(524),
 np.int32(801675),
 np.int32(693601),
 np.int32(489),
 np.int32(862),
 5e-09)

setup dask cluster

In [46]:
cluster = LocalCluster()
client = Client(cluster)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 38337 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:38337/status,

0,1
Dashboard: http://127.0.0.1:38337/status,Workers: 16
Total threads: 128,Total memory: 502.63 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:35425,Workers: 16
Dashboard: http://127.0.0.1:38337/status,Total threads: 128
Started: Just now,Total memory: 502.63 GiB

0,1
Comm: tcp://127.0.0.1:39961,Total threads: 8
Dashboard: http://127.0.0.1:40495/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:45861,
Local directory: /tmp/dask-scratch-space/worker-mwiju8th,Local directory: /tmp/dask-scratch-space/worker-mwiju8th

0,1
Comm: tcp://127.0.0.1:44279,Total threads: 8
Dashboard: http://127.0.0.1:39545/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:38585,
Local directory: /tmp/dask-scratch-space/worker-rkis9112,Local directory: /tmp/dask-scratch-space/worker-rkis9112

0,1
Comm: tcp://127.0.0.1:36345,Total threads: 8
Dashboard: http://127.0.0.1:35287/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:39305,
Local directory: /tmp/dask-scratch-space/worker-ny4qyjhf,Local directory: /tmp/dask-scratch-space/worker-ny4qyjhf

0,1
Comm: tcp://127.0.0.1:33447,Total threads: 8
Dashboard: http://127.0.0.1:40675/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:40791,
Local directory: /tmp/dask-scratch-space/worker-t_2xkpmt,Local directory: /tmp/dask-scratch-space/worker-t_2xkpmt

0,1
Comm: tcp://127.0.0.1:39679,Total threads: 8
Dashboard: http://127.0.0.1:38203/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:36151,
Local directory: /tmp/dask-scratch-space/worker-6gq_3q29,Local directory: /tmp/dask-scratch-space/worker-6gq_3q29

0,1
Comm: tcp://127.0.0.1:45839,Total threads: 8
Dashboard: http://127.0.0.1:43519/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:40927,
Local directory: /tmp/dask-scratch-space/worker-g8tmphmg,Local directory: /tmp/dask-scratch-space/worker-g8tmphmg

0,1
Comm: tcp://127.0.0.1:39717,Total threads: 8
Dashboard: http://127.0.0.1:33091/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:45941,
Local directory: /tmp/dask-scratch-space/worker-670adcwj,Local directory: /tmp/dask-scratch-space/worker-670adcwj

0,1
Comm: tcp://127.0.0.1:40319,Total threads: 8
Dashboard: http://127.0.0.1:37923/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:39311,
Local directory: /tmp/dask-scratch-space/worker-gujgyuv3,Local directory: /tmp/dask-scratch-space/worker-gujgyuv3

0,1
Comm: tcp://127.0.0.1:42433,Total threads: 8
Dashboard: http://127.0.0.1:34885/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:33825,
Local directory: /tmp/dask-scratch-space/worker-36l3hiqb,Local directory: /tmp/dask-scratch-space/worker-36l3hiqb

0,1
Comm: tcp://127.0.0.1:39481,Total threads: 8
Dashboard: http://127.0.0.1:37599/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:41625,
Local directory: /tmp/dask-scratch-space/worker-lvb5bypn,Local directory: /tmp/dask-scratch-space/worker-lvb5bypn

0,1
Comm: tcp://127.0.0.1:37833,Total threads: 8
Dashboard: http://127.0.0.1:38757/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:42009,
Local directory: /tmp/dask-scratch-space/worker-xlb8lqor,Local directory: /tmp/dask-scratch-space/worker-xlb8lqor

0,1
Comm: tcp://127.0.0.1:34111,Total threads: 8
Dashboard: http://127.0.0.1:46619/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:40863,
Local directory: /tmp/dask-scratch-space/worker-cffkhcy_,Local directory: /tmp/dask-scratch-space/worker-cffkhcy_

0,1
Comm: tcp://127.0.0.1:36277,Total threads: 8
Dashboard: http://127.0.0.1:41329/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:36967,
Local directory: /tmp/dask-scratch-space/worker-2gousv6j,Local directory: /tmp/dask-scratch-space/worker-2gousv6j

0,1
Comm: tcp://127.0.0.1:42621,Total threads: 8
Dashboard: http://127.0.0.1:38857/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:34417,
Local directory: /tmp/dask-scratch-space/worker-96tsxygj,Local directory: /tmp/dask-scratch-space/worker-96tsxygj

0,1
Comm: tcp://127.0.0.1:44049,Total threads: 8
Dashboard: http://127.0.0.1:41165/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:44887,
Local directory: /tmp/dask-scratch-space/worker-o7swg4_l,Local directory: /tmp/dask-scratch-space/worker-o7swg4_l

0,1
Comm: tcp://127.0.0.1:46295,Total threads: 8
Dashboard: http://127.0.0.1:35519/status,Memory: 31.41 GiB
Nanny: tcp://127.0.0.1:33541,
Local directory: /tmp/dask-scratch-space/worker-lga3c27n,Local directory: /tmp/dask-scratch-space/worker-lga3c27n


Select the files to be used. 
In this example, the files are stored locally under `/shared/scratch/ak18773/lz/mssi/`. 
Each file is a ROOT file containing the output of an `LZLAMA` simulation (the `NEST` handler); more details can be found in [arvix:2001.09363](https://arxiv.org/abs/2001.09363)

In [4]:
files = glob.glob("/shared/scratch/ak18773/lz/mssi/*.root")
print(f'N. files to process: {len(files)}')

N. files to process: 10


In [32]:
process_file(files[0])

TypingError: Failed in cuda mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in function lt>) found for signature:
 
 >>> lt(array(float32, 1d, C), float64)
 
There are 20 candidate implementations:
[1m   - Of which 18 did not match due to:
   Overload of function 'lt': File: <numerous>: Line N/A.
     With argument(s): '(array(float32, 1d, C), float64)':[0m
[1m    No match.[0m
[1m   - Of which 2 did not match due to:
   Operator Overload in function 'lt': File: unknown: Line unknown.
     With argument(s): '(array(float32, 1d, C), float64)':[0m
[1m    No match for registered cases:
     * (bool, bool) -> bool
     * (int8, int8) -> bool
     * (int16, int16) -> bool
     * (int32, int32) -> bool
     * (int64, int64) -> bool
     * (uint8, uint8) -> bool
     * (uint16, uint16) -> bool
     * (uint32, uint32) -> bool
     * (uint64, uint64) -> bool
     * (float32, float32) -> bool
     * (float64, float64) -> bool[0m
[0m
[0m[1mDuring: typing of intrinsic-call at /tmp/ipykernel_3250043/451758262.py (105)[0m
[1m
File "../../../../../../tmp/ipykernel_3250043/451758262.py", line 105:[0m
[1m<source missing, REPL/exec in use?>[0m

[0m[1mDuring: Pass nopython_type_inference[0m

In [66]:
delayed_results = [dask.delayed(process_file)(file) for file in files]
futures = client.compute(delayed_results)

Key:       process_file-426cbde2-5f66-4134-a584-d8b6770686dd
Function:  process_file
args:      ('/shared/scratch/ak18773/lz/mssi/SS_skim_K40_DomePMTs.root')
kwargs:    {}
Exception: "ValueError('cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-44/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)')"

Key:       process_file-50406305-5d90-4ae5-8048-f15ca687ee62
Function:  process_file
args:      ('/shared/scratch/ak18773/lz/mssi/SS_skim_K40_BottomTruss.root')
kwargs:    {}
Exception: "ValueError('cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-44/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)')"

Key:       process_file-1df83908-6d6e-489e-8f4e-a84ef250eab8
Function:  process_file
args:      ('/shared/scratch/ak18773/lz/mssi/SS_skim_Th232-lat

In [62]:
# monitor the progress
progress(futures)

VBox()

Key:       process_file-679697e6-bc8f-4c10-9ae9-894935dba842
Function:  process_file
args:      ('/shared/scratch/ak18773/lz/mssi/SS_skim_Th232-late_BottomTPCPMTBases.root')
kwargs:    {}
Exception: "ValueError('cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-44/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)')"

Key:       process_file-cfa33ffb-b127-4a8a-9f9c-96e178fc3756
Function:  process_file
args:      ('/shared/scratch/ak18773/lz/mssi/SS_skim_Th232-early_BottomTPCPMTBodies.root')
kwargs:    {}
Exception: "ValueError('cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-44/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)')"

Key:       process_file-74519176-12a7-4fca-b0da-01a7077c96ec
Function:  process_file
args:      ('/shared/scratch/ak

In [50]:
# Once complete, retrieve the results
results = client.gather(futures)

In [51]:
results_df = pd.DataFrame(results, columns=['Source', 'nSS', 'nSS FV', 'nSS ROI', 'nSS FV ROI', 'nMSSI', 'nMSSI FV', 'nMSSI ROI', 'nMSSI FV ROI', 'eventWeight'])
results_df

Unnamed: 0,Source,nSS,nSS FV,nSS ROI,nSS FV ROI,nMSSI,nMSSI FV,nMSSI ROI,nMSSI FV ROI,eventWeight
0,Co60_CalibrationSourceTubes,1400000,132410,938,2,799336,68986,755,1,5e-09
1,Co60_DomePMTs,1507911,874756,189,9,1472377,848548,189,9,5e-09
2,K40_BottomTruss,160904,61547,31,0,126295,44703,26,0,5e-09
3,K40_DomePMTs,107034,62464,10,2,84704,45883,10,2,5e-09
4,Th232-early_BottomTPCPMTBodies,491017,241293,75,2,418065,194826,73,2,5e-09
5,Th232-late_BottomTPCPMTBases,260607,133536,18,0,255214,129945,18,0,5e-09
6,Th232-late_BottomTPCPMTBodies,507322,255499,42,0,496530,248524,42,0,5e-09
7,Th232-late_ForwardFieldResistors,3000000,15489,5172,1,950774,11208,2089,0,5e-09
8,Th232-late_HVInnerCone,84923,9977,33,0,55747,4442,27,0,5e-09
9,U238-late_AnodeGridWires,2152246,29086,1976,9,632270,14298,1301,9,5e-09


### Post processing
Now that we have the fraction of events in each region, we can calculate the rates using the known `decays/day`

In [52]:
rates = {
    "Co60_CalibrationSourceTubes": 4690.57902,
    "Co60_DomePMTs": 3885.410702,
    "K40_BottomTruss": 28927.99798,
    "K40_DomePMTs": 88935.50817,
    "Th232-early_BottomTPCPMTBodies": 38003.65201,
    "Th232-late_BottomTPCPMTBases": 20626.61384,
    "Th232-late_BottomTPCPMTBodies": 51716.2229,
    "Th232-late_ForwardFieldResistors": 77545.76613,
    "Th232-late_HVInnerCone": 363483.6619,
    "U238-late_AnodeGridWires": 4316.423461
}
rates_df = pd.DataFrame(list(rates.items()), columns=["Source", "Rate (Decays/day)"])
rates_df

Unnamed: 0,Source,Rate (Decays/day)
0,Co60_CalibrationSourceTubes,4690.57902
1,Co60_DomePMTs,3885.410702
2,K40_BottomTruss,28927.99798
3,K40_DomePMTs,88935.50817
4,Th232-early_BottomTPCPMTBodies,38003.65201
5,Th232-late_BottomTPCPMTBases,20626.61384
6,Th232-late_BottomTPCPMTBodies,51716.2229
7,Th232-late_ForwardFieldResistors,77545.76613
8,Th232-late_HVInnerCone,363483.6619
9,U238-late_AnodeGridWires,4316.423461


In [53]:
# match up where 'Source' is the same in both dataframes, and combine them
df = pd.merge(results_df, rates_df, on='Source')
df

Unnamed: 0,Source,nSS,nSS FV,nSS ROI,nSS FV ROI,nMSSI,nMSSI FV,nMSSI ROI,nMSSI FV ROI,eventWeight,Rate (Decays/day)
0,Co60_CalibrationSourceTubes,1400000,132410,938,2,799336,68986,755,1,5e-09,4690.57902
1,Co60_DomePMTs,1507911,874756,189,9,1472377,848548,189,9,5e-09,3885.410702
2,K40_BottomTruss,160904,61547,31,0,126295,44703,26,0,5e-09,28927.99798
3,K40_DomePMTs,107034,62464,10,2,84704,45883,10,2,5e-09,88935.50817
4,Th232-early_BottomTPCPMTBodies,491017,241293,75,2,418065,194826,73,2,5e-09,38003.65201
5,Th232-late_BottomTPCPMTBases,260607,133536,18,0,255214,129945,18,0,5e-09,20626.61384
6,Th232-late_BottomTPCPMTBodies,507322,255499,42,0,496530,248524,42,0,5e-09,51716.2229
7,Th232-late_ForwardFieldResistors,3000000,15489,5172,1,950774,11208,2089,0,5e-09,77545.76613
8,Th232-late_HVInnerCone,84923,9977,33,0,55747,4442,27,0,5e-09,363483.6619
9,U238-late_AnodeGridWires,2152246,29086,1976,9,632270,14298,1301,9,5e-09,4316.423461


In [56]:
df['SS/day'] =  df['nSS'] * df['eventWeight'] * df['Rate (Decays/day)']
df['SS/day FV'] = df['nSS FV'] * df['eventWeight'] * df['Rate (Decays/day)']
df['SS/day ROI'] = df['nSS ROI'] * df['eventWeight'] * df['Rate (Decays/day)']
df['SS/day FV ROI'] = df['nSS FV ROI'] * df['eventWeight'] * df['Rate (Decays/day)']
df['MSSI/day'] = df['nMSSI'] * df['eventWeight'] * df['Rate (Decays/day)']
df['MSSI/day FV'] = df['nMSSI FV'] * df['eventWeight'] * df['Rate (Decays/day)']
df['MSSI/day ROI'] = df['nMSSI ROI'] * df['eventWeight'] * df['Rate (Decays/day)']
df['MSSI/day FV ROI'] = df['nMSSI FV ROI'] * df['eventWeight'] * df['Rate (Decays/day)']

In [59]:
# Calculate the number of events per day from each source
print('Number of SS events expected per day')
all = df['SS/day'].sum()
in_fv = df['SS/day FV'].sum()
in_roi = df['SS/day ROI'].sum()
in_fv_roi = df['SS/day FV ROI'].sum()
print(f'N. SS / day:   {all}')
print(f'In FV / day:     {in_fv}')
print(f'In ROI / day:     {in_roi}')
print(f'N. FV ROI / day: {in_fv_roi}')
print('----------------------------')
print('Number of MSSI events expected per day')
all = df['MSSI/day'].sum()
in_fv = df['MSSI/day FV'].sum()
in_roi = df['MSSI/day ROI'].sum()
in_fv_roi = df['MSSI/day FV ROI'].sum()
in_dataset = in_fv_roi * 220
print(f'N. MSSI / day:   {all}')
print(f'In FV / day:     {in_fv}')
print(f'In ROI / day:     {in_roi}')
print(f'N. FV ROI / day: {in_fv_roi}')
print('----------------------------')
print('Fraction of SS events that are MSSI')
fraction = df['MSSI/day FV ROI'].sum() / df['SS/day FV ROI'].sum()
print(f'fraction: {fraction:.2f}')

Number of SS events expected per day
N. SS / day:   1748.337506286756
In FV / day:     207.2327463507382
In ROI / day:     2.16952389550092
N. FV ROI / day: 0.002073108759985
----------------------------
Number of MSSI events expected per day
N. MSSI / day:   821.0420669019749
In FV / day:     172.38464498613672
In ROI / day:     0.9432883552961951
N. FV ROI / day: 0.001661927034235
----------------------------
Fraction of SS events that are MSSI
fraction: 0.80


### How does processing time compare?

On GPU00...
* 14.6s # numba.njit
* 45+mins # regular Python

In [None]:
USE_NUMBA_GPU = False
if USE_NUMBA_GPU:
    import numba.cuda
    import math
    
    @numba.cuda.jit(device=True)
    def evaluate_poly(coeffs, x):
        result = 0.0
        for c in coeffs:
            result = result * x + c
        return result
    
    @numba.cuda.jit()
    def loop_over_events(ss_x, ss_y, ss_driftTime_ns, ss_correctedS1Area, ss_correctedS2Area, 
                        mc_nRQMCTruthVertices, mc_volumeName, mc_detectedS1Photons, mc_detectedS2Photons, 
                        is_mssi, is_FV_mssi, is_FV_ROI_mssi, is_FV_ss, is_FV_ROI_ss):
        i = numba.cuda.grid(1)  # get thread index
        if i >= ss_x.shape[0]:  # boundary check
            return

        wall_poly_coeffs = [-8.14589334e-14, 2.09181587e-10, -2.06758029e-07,
                            1.01366014e-04, -2.69048354e-02, 7.24276394e+01]

        nS1 = 0
        nS2 = 0
        r = math.sqrt(ss_x[i] ** 2 + ss_y[i] ** 2)
        drift_time = ss_driftTime_ns[i] / 1000.0
        boundary_r = evaluate_poly(wall_poly_coeffs, drift_time) - 3

        for j in range(mc_nRQMCTruthVertices[i]):
            if mc_volumeName[i][j] == 0:  # Placeholder check, as string comparison isn't allowed in CUDA
                continue
            if mc_detectedS1Photons[i][j] > 0.:
                nS1 += 1
            if mc_detectedS2Photons[i][j] > 0.:
                nS2 += 1

        if nS1 > nS2:
            is_mssi[i] = 1
            if r < boundary_r and 71. < drift_time < 1030.:
                is_FV_mssi[i] = 1
                if 3 < ss_correctedS1Area[i] < 600 and math.log10(ss_correctedS2Area[i]) < 4.5 and ss_correctedS2Area[i] > 14.5 * 44.5:
                    is_FV_ROI_mssi[i] = 1

        if r < boundary_r and 71. < drift_time < 1030.:
            is_FV_ss[i] = 1
            if 3 < ss_correctedS1Area[i] < 600 and math.log10(ss_correctedS2Area[i]) < 4.5 and ss_correctedS2Area[i] > 14.5 * 44.5:
                is_FV_ROI_ss[i] =  1

    def process_file(file):
        branches = ['ss.correctedS1Area_phd', 'ss.correctedS2Area_phd', 'ss.s1Area_phd', 'ss.s2Area_phd', 
                    'ss.x_cm', 'ss.y_cm', 'ss.driftTime_ns']
        mcBranches = ['mcTruthVertices.nRQMCTruthVertices', 'mcTruthVertices.volumeName', 
                    'mcTruthVertices.detectedS1Photons', 'mcTruthVertices.detectedS2Photons', 
                    'mcTruthEvent.eventWeight']

        tfile = up.open(file)
        t = tfile['Scatters']
        mct = tfile['RQMCTruth']

        ss = t.arrays(branches, library="np")
        mc = mct.arrays(mcBranches, library="np")

        num_events = ss['ss.correctedS1Area_phd'].shape[0]

        # Allocate device arrays
        is_mssi = numba.cuda.device_array(num_events, dtype=np.int32)
        is_FV_mssi = numba.cuda.device_array(num_events, dtype=np.int32)
        is_FV_ROI_mssi = numba.cuda.device_array(num_events, dtype=np.int32)
        is_FV_ss = numba.cuda.device_array(num_events, dtype=np.int32)
        is_FV_ROI_ss = numba.cuda.device_array(num_events, dtype=np.int32)

        # Convert Awkward arrays to NumPy
        ss_x = numba.cuda.to_device(ss['ss.x_cm'])
        ss_y = numba.cuda.to_device(ss['ss.y_cm'])
        ss_driftTime_ns = numba.cuda.to_device(ss['ss.driftTime_ns'])
        ss_correctedS1Area = numba.cuda.to_device(ss['ss.correctedS1Area_phd'])
        ss_correctedS2Area = numba.cuda.to_device(ss['ss.correctedS2Area_phd'])
        mc_nRQMCTruthVertices = numba.cuda.to_device(mc['mcTruthVertices.nRQMCTruthVertices'])
        mc_detectedS1Photons = numba.cuda.to_device(mc['mcTruthVertices.detectedS1Photons'])
        mc_detectedS2Photons = numba.cuda.to_device(mc['mcTruthVertices.detectedS2Photons'])

        # Handle strings in mc['mcTruthVertices.volumeName'] by converting to integers before passing to CUDA
        mc_volumeName = numba.cuda.to_device(np.zeros_like(mc_nRQMCTruthVertices, dtype=np.int32))

        threads_per_block = 256
        blocks_per_grid = (num_events + threads_per_block - 1) // threads_per_block

        # Launch kernel
        loop_over_events[threads_per_block, threads_per_block](
            ss_x, ss_y, ss_driftTime_ns, ss_correctedS1Area, ss_correctedS2Area,
            mc_nRQMCTruthVertices, mc_volumeName, mc_detectedS1Photons, mc_detectedS2Photons,
            is_mssi, is_FV_mssi, is_FV_ROI_mssi, is_FV_ss, is_FV_ROI_ss
        )

        # Copy results back to host
        is_mssi_host = is_mssi.copy_to_host()
        is_FV_mssi_host = is_FV_mssi.copy_to_host()
        is_FV_ROI_mssi_host = is_FV_ROI_mssi.copy_to_host()
        is_FV_ss_host = is_FV_ss.copy_to_host()
        is_FV_ROI_ss_host = is_FV_ROI_ss.copy_to_host()

        eventWeight = mc['mcTruthEvent.eventWeight'][0]
        f_name = file.split('/SS_skim_')[1][:-5]  # remove .root

        return f_name, len(ss['ss.s1Area_phd']), num_events, sum(is_FV_ss_host), sum(is_FV_ROI_ss_host), sum(is_mssi_host), sum(is_FV_mssi_host), sum(is_FV_ROI_mssi_host), eventWeight
    
else:
    #@numba.njit
    def evaluate_poly(coeffs, x):
        result = 0.0
        for c in coeffs:
            result = result * x + c
        return result

    #@numba.njit
    def loop_over_events(ss, mc):
        is_mssi = np.zeros(len(ss['ss.correctedS1Area_phd']))
        is_FV_mssi = np.zeros(len(ss['ss.correctedS1Area_phd']))
        is_FV_ROI_mssi = np.zeros(len(ss['ss.correctedS1Area_phd']))
        is_FV_ss = np.zeros(len(ss['ss.correctedS1Area_phd']))
        is_FV_ROI_ss = np.zeros(len(ss['ss.correctedS1Area_phd']))


        wall_poly_coeffs = np.array([-8.14589334e-14, 2.09181587e-10, -2.06758029e-07,
                                    1.01366014e-04, -2.69048354e-02, 7.24276394e+01])

        for i in range(len(is_mssi)):
            nS1 = 0
            nS2 = 0
            r = np.sqrt(ss['ss.x_cm'][i] ** 2 + ss['ss.y_cm'][i] ** 2)
            drift_time = ss['ss.driftTime_ns'][i] / 1000.
            boundary_r = evaluate_poly(wall_poly_coeffs, drift_time) - 3
            # Loop over truth vertices
            for j in range(mc['mcTruthVertices.nRQMCTruthVertices'][i]):
                if 'Skin' in str(mc['mcTruthVertices.volumeName'][i][j]) or 'Scint' in str(mc['mcTruthVertices.volumeName'][i][j]):
                    continue
                if mc['mcTruthVertices.detectedS1Photons'][i][j] > 0.:
                    nS1 += 1
                if mc['mcTruthVertices.detectedS2Photons'][i][j] > 0.:
                    nS2 += 1
            if nS1 > nS2:
                is_mssi[i] = 1
                # Apply FV cut
                if r < boundary_r and drift_time < 1030. and drift_time > 71.:
                    is_FV_mssi[i] = 1
                    # Apply ROI
                    if ss['ss.correctedS1Area_phd'][i] < 600 and ss['ss.correctedS1Area_phd'][i] > 3 and np.log10(ss['ss.correctedS2Area_phd'][i]) < 4.5 and ss['ss.s2Area_phd'][i] > 14.5 * 44.5:
                        is_FV_ROI_mssi[i] = 1
            # single scatter rate
            if r < boundary_r and drift_time < 1030. and drift_time > 71.:
                is_FV_ss[i] = 1
                # Apply ROI
                if ss['ss.correctedS1Area_phd'][i] < 600 and ss['ss.correctedS1Area_phd'][i] > 3 and np.log10(ss['ss.correctedS2Area_phd'][i]) < 4.5 and ss['ss.s2Area_phd'][i] > 14.5 * 44.5:
                    is_FV_ROI_ss[i] = 1

        return is_mssi, is_FV_mssi, is_FV_ROI_mssi, is_FV_ss, is_FV_ROI_ss
    

    def process_file(file):
        # Read the file
        branches = ['ss.correctedS1Area_phd', 'ss.correctedS2Area_phd', 'ss.s1Area_phd', 'ss.s2Area_phd', 'ss.x_cm', 'ss.y_cm', 'ss.driftTime_ns']
        mcBranches = ['mcTruthVertices.nRQMCTruthVertices', 'mcTruthVertices.volumeName', 'mcTruthVertices.detectedS1Photons', 'mcTruthVertices.detectedS2Photons', 'mcTruthEvent.eventWeight']

        tfile = up.open(file)
        t = tfile['Scatters']
        mct = tfile['RQMCTruth']

        ss = t.arrays(branches)
        mc = mct.arrays(mcBranches)

        # Now calculate the number of MSSI events
        is_mssi, is_FV_mssi, is_FV_ROI_mssi, is_FV_ss, is_FV_ROI_ss = loop_over_events(ss, mc)
        eventWeight = mc['mcTruthEvent.eventWeight'][0]

        f_name = file.split('/SS_skim_')[1][:-5] # remove .root from the end of the file name

        return f_name, len(ss['ss.s1Area_phd']),  sum(is_FV_ss), sum(is_FV_ROI_ss), sum(is_mssi), sum(is_FV_mssi), sum(is_FV_ROI_mssi), eventWeight