# SETUP

## 1. GENERAL

In [1]:
import sys, os
PYHEADTAIL_DIR = '../../../'
sys.path.append(os.path.expanduser(PYHEADTAIL_DIR))

from scipy.constants import c, e, m_p, epsilon_0
import numpy as np

from PyHEADTAIL.particles.generators import ParticleGenerator, gaussian2D, gaussian2D_asymmetrical
import PyHEADTAIL.trackers.longitudinal_tracking
from PyHEADTAIL.particles.slicing import UniformBinSlicer
from PyHEADTAIL.spacecharge.spacecharge import LongSpaceCharge
import PyHEADTAIL.particles.slicing as slicing
from PyHEADTAIL.particles.slicing import SliceSet

PyHEADTAIL v1.3.0-10-ge651b0b162-dirty




In [2]:
# physics
circ   = 6911.
R      = circ/(2*np.pi)
alpha  = 18**-2 # i.e. E_transition = 18 * m_p * c**2 = 16.89e9

energy = 26e9#10e9#
gamma  = energy/(m_p/e*c**2)
beta   = np.sqrt(1-gamma**-2)
p0     = m_p*np.sqrt(gamma**2-1)*c

eta    = alpha - gamma**-2
V      = [4e6, 0e5]
h      = [4620, 18480]
if eta > 0:
    phi = np.array([0, 0])
else:
    phi = np.array([np.pi, np.pi])
phi_offset = np.array([0, 0])
phi += phi_offset

intensity = 4e13#4e12#
sigx = 1e-4
sigxp = 1e-2
sigy = 1e-4
sigyp = 1e-2
sigz = 0.23 #* 0.3
sigdp = 1e-3

pipe_radius = 5e-2
T0 = circ/(beta*c)

In [3]:
# knobs to turn
macroparticlenumber = 1e6
n_slices = 400

In [4]:
def make_bunch(macroparticlenumber):
    return ParticleGenerator(macroparticlenumber, intensity, e, m_p, circ, gamma,
                             distribution_x=gaussian2D_asymmetrical(sigx, sigxp),
                             distribution_y=gaussian2D_asymmetrical(sigy, sigyp),
                             distribution_z=gaussian2D_asymmetrical(sigz, sigdp)).generate()

longitudinal_map = longitudinal_tracking.RFSystems(circ, h, V, phi, [alpha], gamma)
slicer = UniformBinSlicer(n_slices, n_sigma_z=3)
long_sc = LongSpaceCharge(slicer, pipe_radius, T0)

## 2. PYCUDA SETUP

In [5]:
from pycuda import gpuarray
from pycuda import cumath
# from pycuda.elementwise import ElementwiseKernel
import pycuda.autoinit

# CPU evaluation

In [6]:
# preparation
longitudinal_tracking.sin = np.sin

bunch = make_bunch(macroparticlenumber)

In [7]:
%timeit longitudinal_map.track(bunch)

10 loops, best of 3: 155 ms per loop


In [8]:
%timeit long_sc.track(bunch)

10 loops, best of 3: 54.1 ms per loop


# GPU evaluation

In [9]:
# preparation
longitudinal_tracking.sin = cumath.sin

bunch = make_bunch(macroparticlenumber)



bunch.z = gpuarray.to_gpu(bunch.z.astype(np.float64))
bunch.dp = gpuarray.to_gpu(bunch.dp.astype(np.float64))

slicing.SliceSet.npconvert_to_particles = slicing.SliceSet.convert_to_particles
slicing.SliceSet.convert_to_particles = lambda *args, **kwargs: gpuarray.to_gpu(slicing.SliceSet.npconvert_to_particles(*args, **kwargs))

slicing.floor = cumath.floor

def slice_(self, beam, *args, **kwargs):
    '''Return a SliceSet object according to the saved
    configuration. Generate it using the keywords of the
    self.compute_sliceset_kwargs(beam) method.
    Defines interface to create SliceSet instances
    (factory method).
    Arguments:
    - statistics=True attaches mean values, standard deviations
    and emittances to the SliceSet for all planes.
    - statistics=['mean_x', 'sigma_dp', 'epsn_z'] only adds the
    listed statistics values (can be used to save time).
    Valid list entries are all statistics functions of Particles.
    '''
    sliceset_kwargs = self.compute_sliceset_kwargs(beam)
    sliceset_kwargs['beam_parameters'] = (
    self.extract_beam_parameters(beam))
    sliceset_kwargs['slice_index_of_particle'] = sliceset_kwargs['slice_index_of_particle'].get()
    sliceset = SliceSet(**sliceset_kwargs)
    if 'statistics' in kwargs:
        self.add_statistics(sliceset, beam, kwargs['statistics'])
    return sliceset

slicing.Slicer.slice = slice_

bunch.mean_z = lambda : 0
bunch.sigma_z = lambda : sigz

In [10]:
bunch.x = gpuarray.to_gpu(bunch.x.astype(np.float64))
bunch.y = gpuarray.to_gpu(bunch.y.astype(np.float64))
%timeit longitudinal_map.track(bunch)

The slowest run took 37.09 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 23.5 ms per loop


In [11]:
bunch.x = bunch.x.get() # cobra_functions cannot handle gpuarrays...
bunch.y = bunch.y.get() # cobra_functions cannot handle gpuarrays...
%timeit long_sc.track(bunch)

The slowest run took 8.28 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 45.6 ms per loop
