Skip to content

Commit

Permalink
Modernize idcpu Treatment
Browse files Browse the repository at this point in the history
- faster: less emitted operations, no jumps
- cheaper: less used registers
- safer: no read-before-write warnings
- cooler: no explanation needed
  • Loading branch information
ax3l committed Feb 2, 2024
1 parent 1227c91 commit 593221d
Show file tree
Hide file tree
Showing 12 changed files with 42 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ FieldProbeParticleContainer::AddNParticles (int lev,
for (int i = 0; i < np; i++)
{
auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData();
idcpu_data.push_back(0);
amrex::ParticleIDWrapper{idcpu_data.back()} = ParticleType::NextID();
amrex::ParticleCPUWrapper(idcpu_data.back()) = ParallelDescriptor::MyProc();
idcpu_data.push_back(amrex::SetParticleIDandCPU(ParticleType::NextID(), ParallelDescriptor::MyProc()));
}

// write Real attributes (SoA) to particle initialized zero
Expand Down
4 changes: 3 additions & 1 deletion Source/EmbeddedBoundary/ParticleBoundaryProcess.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#ifndef PARTICLEBOUNDARYPROCESS_H_
#define PARTICLEBOUNDARYPROCESS_H_

#include <AMReX_Particle.H>
#include <AMReX_REAL.H>
#include <AMReX_RealVect.H>
#include <AMReX_Random.H>


namespace ParticleBoundaryProcess {

struct NoOp {
Expand All @@ -29,7 +31,7 @@ struct Absorb {
const amrex::RealVect& /*pos*/, const amrex::RealVect& /*normal*/,
amrex::RandomEngine const& /*engine*/) const noexcept
{
ptd.id(i) = -ptd.id(i);
amrex::ParticleIDWrapper{ptd.m_idcpu[i]}.make_invalid();
}
};
}
Expand Down
6 changes: 3 additions & 3 deletions Source/EmbeddedBoundary/ParticleScraper.H
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* passed in to this function as an argument. This function can access the
* position at which the particle hit the boundary, and also the associated
* normal vector. Particles can be `absorbed` by setting their ids to negative
* to flag them for removal. Likewise, the can be reflected back into the domain
* to flag them for removal. Likewise, they can be reflected back into the domain
* by modifying their data appropriately and leaving their ids alone.
*
* This version operates only at the specified level.
Expand Down Expand Up @@ -82,7 +82,7 @@ scrapeParticles (PC& pc, const amrex::Vector<const amrex::MultiFab*>& distance_t
* passed in to this function as an argument. This function can access the
* position at which the particle hit the boundary, and also the associated
* normal vector. Particles can be `absorbed` by setting their ids to negative
* to flag them for removal. Likewise, the can be reflected back into the domain
* to flag them for removal. Likewise, they can be reflected back into the domain
* by modifying their data appropriately and leaving their ids alone.
*
* This version operates over all the levels in the pc.
Expand Down Expand Up @@ -175,7 +175,7 @@ scrapeParticles (PC& pc, const amrex::Vector<const amrex::MultiFab*>& distance_t
[=] AMREX_GPU_DEVICE (const int ip, amrex::RandomEngine const& engine) noexcept
{
// skip particles that are already flagged for removal
if (ptd.id(ip) < 0) return;
if (!amrex::ParticleIDWrapper{ptd.m_idcpu[ip]}.is_valid()) return;

amrex::ParticleReal xp, yp, zp;
getPosition(ip, xp, yp, zp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,14 @@ int splitScatteringParticles (
// to replace the following lambda.
auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu)
{
constexpr amrex::Long minus_one_long = -1;
uint64_t tmp = 0;
amrex::ParticleIDWrapper wrapper(tmp);
wrapper = minus_one_long;
#if defined(AMREX_USE_OMP)
#pragma omp atomic write
idcpu = wrapper.m_idata;
idcpu = amrex::ParticleIdCpus::Invalid;
#else
auto *old_ptr = reinterpret_cast<unsigned long long*>(&idcpu);
amrex::Gpu::Atomic::Exch(old_ptr, (unsigned long long) wrapper.m_idata);
amrex::Gpu::Atomic::Exch(
(unsigned long long)&idcpu,
(unsigned long long)amrex::ParticleIdCpus::Invalid
);
#endif
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,14 @@ public:
// to replace the following lambda.
auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu)
{
constexpr amrex::Long minus_one_long = -1;
uint64_t tmp = 0;
amrex::ParticleIDWrapper wrapper(tmp);
wrapper = minus_one_long;
#if defined(AMREX_USE_OMP)
#pragma omp atomic write
idcpu = wrapper.m_idata;
idcpu = amrex::ParticleIdCpus::Invalid;
#else
auto *old_ptr = reinterpret_cast<unsigned long long*>(&idcpu);
amrex::Gpu::Atomic::Exch(old_ptr, (unsigned long long) wrapper.m_idata);
amrex::Gpu::Atomic::Exch(
(unsigned long long)&idcpu,
(unsigned long long)amrex::ParticleIdCpus::Invalid
);
#endif
};

Expand All @@ -224,6 +222,7 @@ public:
if (w1[p_pair_indices_1[i]] <= amrex::ParticleReal(0.))
{
atomicSetIdMinus(idcpu1[p_pair_indices_1[i]]);

}
if (w2[p_pair_indices_2[i]] <= amrex::ParticleReal(0.))
{
Expand Down
2 changes: 1 addition & 1 deletion Source/Particles/ElementaryProcess/QEDPairGeneration.H
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ public:
p_ux, p_uy, p_uz,
engine);

amrex::ParticleIDWrapper{src.m_idcpu[i_src]} = -1; // destroy photon after pair generation
src.m_idcpu[i_src] = amrex::ParticleIdCpus::Invalid; // destroy photon after pair generation
}

private:
Expand Down
2 changes: 1 addition & 1 deletion Source/Particles/ElementaryProcess/QEDPhotonEmission.H
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ void cleanLowEnergyPhotons(
const auto phot_energy2 = (ux*ux + uy*uy + uz*uz)*me_c*me_c;

if (phot_energy2 < energy_threshold2) {
amrex::ParticleIDWrapper{p_idcpu[ip]} = -1;
p_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
}
});
}
Expand Down
3 changes: 1 addition & 2 deletions Source/Particles/ParticleCreation/SmartCreate.H
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ struct SmartCreate
amrex::ignore_unused(x,y);
#endif

amrex::ParticleIDWrapper{prt.m_idcpu[i_prt]} = id;
amrex::ParticleCPUWrapper{prt.m_idcpu[i_prt]} = cpu;
prt.m_idcpu[i_prt] = amrex::SetParticleIDandCPU(id, cpu);

// initialize the real components after position
for (int j = AMREX_SPACEDIM; j < PartData::NAR; ++j) {
Expand Down
3 changes: 1 addition & 2 deletions Source/Particles/ParticleCreation/SmartUtils.H
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ void setNewParticleIDs (PTile& ptile, int old_size, int num_added)
amrex::ParallelFor(num_added, [=] AMREX_GPU_DEVICE (int ip) noexcept
{
auto const new_id = ip + old_size;
amrex::ParticleIDWrapper{ptd.m_idcpu[new_id]} = pid+ip;
amrex::ParticleCPUWrapper{ptd.m_idcpu[new_id]} = cpuid;
ptd.m_idcpu[new_id] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
});
}

Expand Down
24 changes: 11 additions & 13 deletions Source/Particles/PhysicalParticleContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ namespace
if (has_breit_wheeler) {p_optical_depth_BW[ip] = 0._rt;}
#endif

amrex::ParticleIDWrapper{idcpu[ip]} = -1;
idcpu[ip] = amrex::ParticleIdCpus::Invalid;
}
}

Expand Down Expand Up @@ -1225,8 +1225,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
for (int i_part = 0; i_part < pcounts[index]; ++i_part)
{
long ip = poffset[index] + i_part;
amrex::ParticleIDWrapper{pa_idcpu[ip]} = pid+ip;
amrex::ParticleCPUWrapper{pa_idcpu[ip]} = cpuid;
pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ?
// In the refined injection region: use refinement ratio `lrrfac`
inj_pos->getPositionUnitBox(i_part, lrrfac, engine) :
Expand Down Expand Up @@ -1766,8 +1765,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
for (int i_part = 0; i_part < pcounts[index]; ++i_part)
{
const long ip = poffset[index] + i_part;
amrex::ParticleIDWrapper{pa_idcpu[ip]} = pid+ip;
amrex::ParticleCPUWrapper{pa_idcpu[ip]} = cpuid;
pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid);

// This assumes the flux_pos is of type InjectorPositionRandomPlane
const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ?
Expand All @@ -1792,27 +1790,27 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
// the particles will be within the domain.
#if defined(WARPX_DIM_3D)
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.y,ppos.z})) {
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
continue;
}
#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
amrex::ignore_unused(k);
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.z,0.0_prt})) {
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
continue;
}
#else
amrex::ignore_unused(j,k);
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.z,0.0_prt,0.0_prt})) {
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
continue;
}
#endif
// Lab-frame simulation
// If the particle's initial position is not within or on the species's
// xmin, xmax, ymin, ymax, zmin, zmax, go to the next generated particle.
if (!flux_pos->insideBoundsInclusive(ppos.x, ppos.y, ppos.z)) {
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
continue;
}

Expand Down Expand Up @@ -1845,8 +1843,8 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
#endif
Real flux = inj_flux->getFlux(ppos.x, ppos.y, ppos.z, t);
// Remove particle if flux is negative or 0
if ( flux <=0 ){
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
if (flux <= 0) {
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
continue;
}

Expand All @@ -1855,7 +1853,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
}

#ifdef WARPX_QED
if(loc_has_quantum_sync){
if (loc_has_quantum_sync) {
p_optical_depth_QSR[ip] = quantum_sync_get_opt(engine);
}

Expand Down Expand Up @@ -2459,7 +2457,7 @@ PhysicalParticleContainer::SplitParticles (int lev)
}
#endif
// invalidate the particle
amrex::ParticleIDWrapper{idcpu[i]} = -1;
idcpu[i] = amrex::ParticleIdCpus::Invalid;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Source/Particles/Resampling/LevelingThinning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void LevelingThinning::operator() (WarpXParIter& pti, const int lev,
// Remove particle with probability 1 - particle_weight/level_weight
if (random_number > w[indices[i]]/level_weight)
{
amrex::ParticleIDWrapper{idcpu[indices[i]]} = -1;
idcpu[indices[i]] = amrex::ParticleIdCpus::Invalid;
}
// Set particle weight to level weight otherwise
else
Expand Down
17 changes: 8 additions & 9 deletions Source/Particles/WarpXParticleContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,12 @@ WarpXParticleContainer::AddNParticles (int /*lev*/, long n,
for (auto i = ibegin; i < iend; ++i)
{
auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData();
idcpu_data.push_back(0);
if (id==-1) {
amrex::ParticleIDWrapper{idcpu_data.back()} = ParticleType::NextID();
} else {
amrex::ParticleIDWrapper{idcpu_data.back()} = id;

amrex::Long current_id = id; // copy input
if (id == -1) {
current_id = ParticleType::NextID();
}
amrex::ParticleCPUWrapper(idcpu_data.back()) = ParallelDescriptor::MyProc();
idcpu_data.push_back(amrex::SetParticleIDandCPU(current_id, ParallelDescriptor::MyProc()));

#ifdef WARPX_DIM_RZ
r[i-ibegin] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
Expand Down Expand Up @@ -1544,8 +1543,8 @@ WarpXParticleContainer::ApplyBoundaryConditions (){
pti.numParticles(),
[=] AMREX_GPU_DEVICE (long i, amrex::RandomEngine const& engine) {
// skip particles that are already flagged for removal
auto const id = amrex::ParticleIDWrapper{idcpu[i]};
if (id < 0) { return; }
auto pidw = amrex::ParticleIDWrapper{idcpu[i]};
if (!pidw.is_valid()) { return; }

ParticleReal x, y, z;
GetPosition.AsStored(i, x, y, z);
Expand All @@ -1567,7 +1566,7 @@ WarpXParticleContainer::ApplyBoundaryConditions (){
boundary_conditions, engine);

if (particle_lost) {
amrex::ParticleIDWrapper{idcpu[i]} = -id;
pidw.make_invalid();
} else {
SetPosition.AsStored(i, x, y, z);
}
Expand Down

0 comments on commit 593221d

Please sign in to comment.