In [81]:
import numpy as np
import pandas as pd
import time
import timeit
from numba import cuda
from numba import vectorize
import math
import consts

In [82]:
'''GPU Quenching Kernel function'''
@cuda.jit
def Quenching(x_start,x_end,y_start,y_end,z_start,z_end,dE, nElectrons):
    pos  = cuda.grid(1)         
    if (pos < x_start.shape[0]):   
        dx         = math.sqrt((x_end[pos] - x_start[pos]) ** 2 + 
                    (y_end[pos] - y_start[pos]) ** 2 +
                    (z_end[pos] - z_start[pos]) ** 2)
        dedx        = abs(dE[pos])*consts.GeVToMeV/dx
        epsilon     = consts.recombBeta/(consts.lArDensity * consts.eField) * dedx
        recomb      = math.log(consts.recombAlpha + epsilon) / epsilon
        
        nElectrons[pos]  = recomb * dE[pos] * consts.GeVToMeV * consts.MeVToElectrons

In [83]:
'''GPU Drift Simulation Kernel function'''
@cuda.jit
def Drifting(z_start, z_end, nElectrons,long_diffusion, trans_diffusion):
    pos  = cuda.grid(1)    
    if (pos < z_start.shape[0]): 
        z = (z_end[pos] + z_start[pos])/2.    
        drift_time  = abs(z - consts.tpcPlaneZ)/ consts.vdrift;
        lifetime    = math.exp(-drift_time/consts.msTous)
        nElectrons[pos]      *= lifetime
        long_diffusion[pos]  = math.sqrt(drift_time) * consts.longDiff
        trans_diffusion[pos] = math.sqrt(drift_time) * consts.transDiff

In [84]:
'Read input file and copy arrays to device memory'

tracks = pd.read_csv('tracks.txt', delim_whitespace=True)

'Input Arrays'
x_start_device = cuda.to_device(tracks['x_start'].to_numpy())
x_end_device   = cuda.to_device(tracks['x_end'].to_numpy())
y_start_device = cuda.to_device(tracks['y_start'].to_numpy())
y_end_device   = cuda.to_device(tracks['y_end'].to_numpy())
z_start_device = cuda.to_device(tracks['z_start'].to_numpy())
z_end_device   = cuda.to_device(tracks['z_end'].to_numpy())
dE_device      = cuda.to_device(tracks['dE'].to_numpy())

'Output Arrays'
long_diffusion_device  = cuda.device_array_like(x_start_device)
trans_diffusion_device = cuda.device_array_like(x_start_device)
nelec_device           = cuda.device_array_like(x_start_device)

In [85]:
threads_per_block = 128
blocks_per_grid   = 30

In [86]:
%%time
Quenching[blocks_per_grid, threads_per_block](x_start_device,
                                              x_end_device,
                                              y_start_device,
                                              y_end_device,
                                              z_start_device,
                                              z_end_device,
                                              dE_device,
                                              nelec_device)

CPU times: user 206 ms, sys: 4.37 ms, total: 211 ms
Wall time: 215 ms


In [74]:
%%timeit -n1000
Quenching[blocks_per_grid, threads_per_block](x_start_device,
                                              x_end_device,
                                              y_start_device,
                                              y_end_device,
                                              z_start_device,
                                              z_end_device,
                                              dE_device,
                                              nelec_device)

257 µs ± 4.75 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [87]:
%%time
Drifting[blocks_per_grid, threads_per_block](z_start_device, 
                                             z_end_device, 
                                             nelec_device, 
                                             long_diffusion_device,
                                             trans_diffusion_device)

CPU times: user 157 ms, sys: 273 µs, total: 157 ms
Wall time: 157 ms


In [78]:
%%timeit -n1000
Drifting[blocks_per_grid, threads_per_block](z_start_device, 
                                             z_end_device, 
                                             nelec_device, 
                                             long_diffusion_device,
                                             trans_diffusion_device)

191 µs ± 3.85 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [88]:
%%time
tracks['long_diffusion']   = long_diffusion_device.copy_to_host()
tracks['trans_diffusion']  = trans_diffusion_device.copy_to_host()
tracks['nElectrons']       = nelec_device.copy_to_host()

CPU times: user 3.29 ms, sys: 0 ns, total: 3.29 ms
Wall time: 2.69 ms


In [90]:
tracks[:20]

Unnamed: 0,track_id,x_start,x_end,y_start,y_end,z_start,z_end,dE,long_diffusion,trans_diffusion,nElectrons
0,0,34.838947,34.675339,-47.52961,-47.650214,72.756482,72.835971,0.000417,0.000236,0.00062,10858.415074
1,0,34.675339,34.175492,-47.650214,-48.018983,72.835971,73.079805,0.00136,0.000236,0.000621,35196.815115
2,0,34.175492,33.734543,-48.018983,-48.344365,73.079805,73.294425,0.001391,0.000236,0.000621,35416.259371
3,0,33.734543,30.87698,-48.344365,-50.428987,73.294425,74.691796,0.007353,0.000237,0.000622,191033.631305
4,0,30.87698,30.752687,-50.428987,-50.519585,74.691796,74.752716,0.000294,0.000237,0.000623,7672.890095
5,0,30.752687,30.154558,-50.519585,-50.9548,74.752716,75.04425,0.00139,0.000237,0.000623,36305.065281
6,0,30.154558,30.020936,-50.9548,-51.052066,75.04425,75.109381,0.00028,0.000237,0.000624,7323.485586
7,0,30.020936,29.604457,-51.052066,-51.355745,75.109381,75.312549,0.000969,0.000237,0.000624,25303.687427
8,0,29.604457,29.106617,-51.355745,-51.718589,75.312549,75.555749,0.001201,0.000237,0.000624,31313.765541
9,0,29.106617,28.866414,-51.718589,-51.894283,75.555749,75.6734,0.000647,0.000237,0.000624,16734.064153
