diff --git a/src/Initializer/BatchRecorders/DynamicRuptureRecorder.cpp b/src/Initializer/BatchRecorders/DynamicRuptureRecorder.cpp index 4c35c3f7b..c335f1bf0 100644 --- a/src/Initializer/BatchRecorders/DynamicRuptureRecorder.cpp +++ b/src/Initializer/BatchRecorders/DynamicRuptureRecorder.cpp @@ -20,29 +20,28 @@ void DynamicRuptureRecorder::recordDofsTimeEvaluation() { real* idofsPlus = static_cast(currentLayer->getScratchpadMemory(currentHandler->idofsPlusOnDevice)); real* idofsMinus = static_cast(currentLayer->getScratchpadMemory(currentHandler->idofsMinusOnDevice)); - if (currentLayer->getNumberOfCells()) { - std::vector timeDerivativePlusPtrs{}; - std::vector timeDerivativeMinusPtrs{}; - std::vector idofsPlusPtrs{}; - std::vector idofsMinusPtrs{}; + const auto size = currentLayer->getNumberOfCells(); + if (size > 0) { + std::vector timeDerivativePlusPtrs(size, nullptr); + std::vector timeDerivativeMinusPtrs(size, nullptr); + std::vector idofsPlusPtrs(size, nullptr); + std::vector idofsMinusPtrs(size, nullptr); const size_t idofsSize = tensor::Q::size(); - for (unsigned face = 0; face < currentLayer->getNumberOfCells(); ++face) { - timeDerivativePlusPtrs.push_back(timeDerivativePlus[face]); - timeDerivativeMinusPtrs.push_back(timeDerivativeMinus[face]); - idofsPlusPtrs.push_back(&idofsPlus[face * idofsSize]); - idofsMinusPtrs.push_back(&idofsMinus[face * idofsSize]); + for (unsigned faceId = 0; faceId < size; ++faceId) { + timeDerivativePlusPtrs[faceId] = timeDerivativePlus[faceId]; + timeDerivativeMinusPtrs[faceId] = timeDerivativeMinus[faceId]; + idofsPlusPtrs[faceId] = &idofsPlus[faceId * idofsSize]; + idofsMinusPtrs[faceId] = &idofsMinus[faceId * idofsSize]; } - if (!idofsPlusPtrs.empty()) { - ConditionalKey key(*KernelNames::DrTime); - checkKey(key); + ConditionalKey key(*KernelNames::DrTime); + checkKey(key); - (*currentTable)[key].content[*EntityId::DrDerivativesPlus] = new BatchPointers(timeDerivativePlusPtrs); - (*currentTable)[key].content[*EntityId::DrDerivativesMinus] = new BatchPointers(timeDerivativeMinusPtrs); - (*currentTable)[key].content[*EntityId::DrIdofsPlus] = new BatchPointers(idofsPlusPtrs); - (*currentTable)[key].content[*EntityId::DrIdofsMinus] = new BatchPointers(idofsMinusPtrs); - } + (*currentTable)[key].content[*EntityId::DrDerivativesPlus] = new BatchPointers(timeDerivativePlusPtrs); + (*currentTable)[key].content[*EntityId::DrDerivativesMinus] = new BatchPointers(timeDerivativeMinusPtrs); + (*currentTable)[key].content[*EntityId::DrIdofsPlus] = new BatchPointers(idofsPlusPtrs); + (*currentTable)[key].content[*EntityId::DrIdofsMinus] = new BatchPointers(idofsMinusPtrs); } } @@ -59,7 +58,8 @@ void DynamicRuptureRecorder::recordSpaceInterpolation() { DRGodunovData* godunovData = currentLayer->var(currentHandler->godunovData); DRFaceInformation* faceInfo = currentLayer->var(currentHandler->faceInformation); - if (currentLayer->getNumberOfCells()) { + const auto size = currentLayer->getNumberOfCells(); + if (size > 0) { std::array, *FaceId::Count> QInterpolatedPlusPtr {}; std::array, *FaceId::Count> idofsPlusPtr {}; std::array, *FaceId::Count> TinvTPlusPtr {}; @@ -71,17 +71,17 @@ void DynamicRuptureRecorder::recordSpaceInterpolation() { const size_t QInterpolatedSize = CONVERGENCE_ORDER * tensor::QInterpolated::size(); const size_t idofsSize = tensor::Q::size(); - for (unsigned face = 0; face < currentLayer->getNumberOfCells(); ++face) { - const auto plusSide = faceInfo[face].plusSide; - QInterpolatedPlusPtr[plusSide].push_back(&QInterpolatedPlus[face * QInterpolatedSize]); - idofsPlusPtr[plusSide].push_back(&idofsPlus[face * idofsSize]); - TinvTPlusPtr[plusSide].push_back((&godunovData[face])->TinvT); - - const auto minusSide = faceInfo[face].minusSide; - const auto faceRelation = faceInfo[face].faceRelation; - QInterpolatedMinusPtr[minusSide][faceRelation].push_back(&QInterpolatedMinus[face * QInterpolatedSize]); - idofsMinusPtr[minusSide][faceRelation].push_back(&idofsMinus[face * idofsSize]); - TinvTMinusPtr[minusSide][faceRelation].push_back((&godunovData[face])->TinvT); + for (unsigned faceId = 0; faceId < size; ++faceId) { + const auto plusSide = faceInfo[faceId].plusSide; + QInterpolatedPlusPtr[plusSide].push_back(&QInterpolatedPlus[faceId * QInterpolatedSize]); + idofsPlusPtr[plusSide].push_back(&idofsPlus[faceId * idofsSize]); + TinvTPlusPtr[plusSide].push_back((&godunovData[faceId])->TinvT); + + const auto minusSide = faceInfo[faceId].minusSide; + const auto faceRelation = faceInfo[faceId].faceRelation; + QInterpolatedMinusPtr[minusSide][faceRelation].push_back(&QInterpolatedMinus[faceId * QInterpolatedSize]); + idofsMinusPtr[minusSide][faceRelation].push_back(&idofsMinus[faceId * idofsSize]); + TinvTMinusPtr[minusSide][faceRelation].push_back((&godunovData[faceId])->TinvT); } for (unsigned side = 0; side < 4; ++side) { diff --git a/src/Initializer/BatchRecorders/LocalIntegrationRecorder.cpp b/src/Initializer/BatchRecorders/LocalIntegrationRecorder.cpp index 0afa90dfc..124e3cbdb 100644 --- a/src/Initializer/BatchRecorders/LocalIntegrationRecorder.cpp +++ b/src/Initializer/BatchRecorders/LocalIntegrationRecorder.cpp @@ -28,25 +28,28 @@ void LocalIntegrationRecorder::recordTimeAndVolumeIntegrals() { real *idofsScratch = static_cast(currentLayer->getScratchpadMemory(currentHandler->idofsScratch)); real *derivativesScratch = static_cast(currentLayer->getScratchpadMemory(currentHandler->derivativesScratch)); - if (currentLayer->getNumberOfCells()) { - std::vector dofsPtrs{}; - std::vector starPtrs{}; + const auto size = currentLayer->getNumberOfCells(); + if (size > 0) { + std::vector dofsPtrs(size, nullptr); + std::vector starPtrs(size, nullptr); std::vector idofsPtrs{}; - std::vector dQPtrs{}; + std::vector dQPtrs(size, nullptr); std::vector ltsBuffers{}; std::vector idofsForLtsBuffers{}; + idofsPtrs.reserve(size); + real **derivatives = currentLayer->var(currentHandler->derivatives); real **buffers = currentLayer->var(currentHandler->buffers); - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + for (unsigned cell = 0; cell < size; ++cell) { auto data = currentLoader->entry(cell); // dofs - dofsPtrs.push_back(static_cast(data.dofs)); + dofsPtrs[cell] = static_cast(data.dofs); // idofs - real *nextIdofPtr = &idofsScratch[idofsAddressCounter]; + real *nextIdofPtr = &idofsScratch[integratedDofsAddressCounter]; bool isBuffersProvided = ((data.cellInformation.ltsSetup >> 8) % 2) == 1; bool isLtsBuffers = ((data.cellInformation.ltsSetup >> 10) % 2) == 1; @@ -60,7 +63,7 @@ void LocalIntegrationRecorder::recordTimeAndVolumeIntegrals() { ltsBuffers.push_back(buffers[cell]); idofsAddressRegistry[cell] = nextIdofPtr; - idofsAddressCounter += tensor::I::size(); + integratedDofsAddressCounter += tensor::I::size(); } else { // gts buffers have to be always overridden idofsPtrs.push_back(buffers[cell]); @@ -69,22 +72,24 @@ void LocalIntegrationRecorder::recordTimeAndVolumeIntegrals() { } else { idofsPtrs.push_back(nextIdofPtr); idofsAddressRegistry[cell] = nextIdofPtr; - idofsAddressCounter += tensor::I::size(); + integratedDofsAddressCounter += tensor::I::size(); } // stars - starPtrs.push_back(static_cast(data.localIntegrationOnDevice.starMatrices[0])); + starPtrs[cell] = static_cast(data.localIntegrationOnDevice.starMatrices[0]); // derivatives bool isDerivativesProvided = ((data.cellInformation.ltsSetup >> 9) % 2) == 1; if (isDerivativesProvided) { - dQPtrs.push_back(derivatives[cell]); + dQPtrs[cell] = derivatives[cell]; } else { - dQPtrs.push_back(&derivativesScratch[derivativesAddressCounter]); + dQPtrs[cell] = &derivativesScratch[derivativesAddressCounter]; derivativesAddressCounter += yateto::computeFamilySize(); } } + // just to be sure that we took all branches while filling in idofsPtrs vector + assert(dofsPtrs.size() == idofsPtrs.size()); ConditionalKey key(KernelNames::Time || KernelNames::Volume); checkKey(key); @@ -106,12 +111,17 @@ void LocalIntegrationRecorder::recordTimeAndVolumeIntegrals() { void LocalIntegrationRecorder::recordLocalFluxIntegral() { + const auto size = currentLayer->getNumberOfCells(); for (unsigned face = 0; face < 4; ++face) { std::vector idofsPtrs{}; std::vector dofsPtrs{}; std::vector aplusTPtrs{}; - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + idofsPtrs.reserve(size); + dofsPtrs.reserve(size); + aplusTPtrs.reserve(size); + + for (unsigned cell = 0; cell < size; ++cell) { auto data = currentLoader->entry(cell); // no element local contribution in the case of dynamic rupture boundary conditions @@ -141,7 +151,8 @@ void LocalIntegrationRecorder::recordDisplacements() { // NOTE: velocity components are between 6th and 8th columns constexpr unsigned OFFSET_TO_VELOCITIES = 6 * seissol::tensor::I::Shape[0]; - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + const auto size = currentLayer->getNumberOfCells(); + for (unsigned cell = 0; cell < size; ++cell) { if (displacements[cell] != nullptr) { real *iVelocity = &idofsAddressRegistry[cell][OFFSET_TO_VELOCITIES]; iVelocitiesPtrs.push_back(iVelocity); diff --git a/src/Initializer/BatchRecorders/NeighIntegrationRecorder.cpp b/src/Initializer/BatchRecorders/NeighIntegrationRecorder.cpp index b815a98cc..6c5563080 100644 --- a/src/Initializer/BatchRecorders/NeighIntegrationRecorder.cpp +++ b/src/Initializer/BatchRecorders/NeighIntegrationRecorder.cpp @@ -23,13 +23,14 @@ void NeighIntegrationRecorder::recordDofsTimeEvaluation() { real *(*faceNeighbors)[4] = currentLayer->var(currentHandler->faceNeighbors); real *idofsScratch = static_cast(currentLayer->getScratchpadMemory(currentHandler->idofsScratch)); - if (currentLayer->getNumberOfCells()) { + const auto size = currentLayer->getNumberOfCells(); + if (size > 0) { std::vector ltsIDofsPtrs{}; std::vector ltsDerivativesPtrs{}; std::vector gtsDerivativesPtrs{}; std::vector gtsIDofsPtrs{}; - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + for (unsigned cell = 0; cell < size; ++cell) { auto data = currentLoader->entry(cell); for (unsigned face = 0; face < 4; ++face) { @@ -47,7 +48,7 @@ void NeighIntegrationRecorder::recordDofsTimeEvaluation() { bool isNeighbProvidesDerivatives = ((data.cellInformation.ltsSetup >> face) % 2) == 1; if (isNeighbProvidesDerivatives) { - real *NextTempIDofsPtr = &idofsScratch[idofsAddressCounter]; + real *NextTempIDofsPtr = &idofsScratch[integratedDofsAddressCounter]; bool isGtsNeigbour = ((data.cellInformation.ltsSetup >> (face + 4)) % 2) == 1; if (isGtsNeigbour) { @@ -61,7 +62,7 @@ void NeighIntegrationRecorder::recordDofsTimeEvaluation() { ltsIDofsPtrs.push_back(NextTempIDofsPtr); ltsDerivativesPtrs.push_back(neighbourBuffer); } - idofsAddressCounter += tensor::I::size(); + integratedDofsAddressCounter += tensor::I::size(); } else { idofsAddressRegistry[neighbourBuffer] = neighbourBuffer; } @@ -101,12 +102,13 @@ void NeighIntegrationRecorder::recordNeighbourFluxIntegrals() { CellDRMapping(*drMapping)[4] = currentLayer->var(currentHandler->drMapping); - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + const auto size = currentLayer->getNumberOfCells(); + for (unsigned cell = 0; cell < size; ++cell) { auto data = currentLoader->entry(cell); for (unsigned int face = 0; face < 4; face++) { switch (data.cellInformation.faceTypes[face]) { case FaceType::regular: - // Fallthrough intended + [[fallthrough]]; case FaceType::periodic: { // compute face type relation diff --git a/src/Initializer/BatchRecorders/PlasticityRecorder.cpp b/src/Initializer/BatchRecorders/PlasticityRecorder.cpp index c97794fb9..eec86fcd3 100644 --- a/src/Initializer/BatchRecorders/PlasticityRecorder.cpp +++ b/src/Initializer/BatchRecorders/PlasticityRecorder.cpp @@ -14,27 +14,27 @@ void PlasticityRecorder::record(LTS &handler, Layer &layer) { real(*pstrains)[7] = currentLayer->var(currentHandler->pstrain); size_t nodalStressTensorCounter = 0; real *scratchMem = static_cast(currentLayer->getScratchpadMemory(currentHandler->idofsScratch)); - if (currentLayer->getNumberOfCells()) { - std::vector dofsPtrs{}; - std::vector qstressNodalPtrs{}; - std::vector pstransPtrs{}; - std::vector initialLoadPtrs{}; + const auto size = currentLayer->getNumberOfCells(); + if (size > 0) { + std::vector dofsPtrs(size, nullptr); + std::vector qstressNodalPtrs(size, nullptr); + std::vector pstransPtrs(size, nullptr); + std::vector initialLoadPtrs(size, nullptr); - for (unsigned cell = 0; cell < currentLayer->getNumberOfCells(); ++cell) { + for (unsigned cell = 0; cell < size; ++cell) { auto data = currentLoader->entry(cell); - dofsPtrs.push_back(static_cast(data.dofs)); - qstressNodalPtrs.push_back(&scratchMem[nodalStressTensorCounter]); + dofsPtrs[cell] = static_cast(data.dofs); + qstressNodalPtrs[cell] = &scratchMem[nodalStressTensorCounter]; nodalStressTensorCounter += tensor::QStressNodal::size(); - pstransPtrs.push_back(static_cast(pstrains[cell])); - initialLoadPtrs.push_back(static_cast(data.plasticity.initialLoading)); - } - if (!dofsPtrs.empty()) { - ConditionalKey key(*KernelNames::Plasticity); - checkKey(key); - (*currentTable)[key].content[*EntityId::Dofs] = new BatchPointers(dofsPtrs); - (*currentTable)[key].content[*EntityId::NodalStressTensor] = new BatchPointers(qstressNodalPtrs); - (*currentTable)[key].content[*EntityId::Pstrains] = new BatchPointers(pstransPtrs); - (*currentTable)[key].content[*EntityId::InitialLoad] = new BatchPointers(initialLoadPtrs); + pstransPtrs[cell] = static_cast(pstrains[cell]); + initialLoadPtrs[cell] = static_cast(data.plasticity.initialLoading); } + + ConditionalKey key(*KernelNames::Plasticity); + checkKey(key); + (*currentTable)[key].content[*EntityId::Dofs] = new BatchPointers(dofsPtrs); + (*currentTable)[key].content[*EntityId::NodalStressTensor] = new BatchPointers(qstressNodalPtrs); + (*currentTable)[key].content[*EntityId::Pstrains] = new BatchPointers(pstransPtrs); + (*currentTable)[key].content[*EntityId::InitialLoad] = new BatchPointers(initialLoadPtrs); } } diff --git a/src/Initializer/BatchRecorders/Recorders.h b/src/Initializer/BatchRecorders/Recorders.h index 4f7d36f3e..063a57b96 100644 --- a/src/Initializer/BatchRecorders/Recorders.h +++ b/src/Initializer/BatchRecorders/Recorders.h @@ -76,7 +76,7 @@ class LocalIntegrationRecorder : public AbstractRecorder idofsAddressRegistry{}; - size_t idofsAddressCounter{0}; + size_t integratedDofsAddressCounter{0}; size_t derivativesAddressCounter{0}; }; @@ -97,14 +97,14 @@ class NeighIntegrationRecorder : public AbstractRecorder idofsAddressRegistry{}; - size_t idofsAddressCounter{0}; + size_t integratedDofsAddressCounter{0}; }; diff --git a/src/Kernels/DeviceAux/PlasticityAux.h b/src/Kernels/DeviceAux/PlasticityAux.h index a059b6bc8..55584d0e8 100644 --- a/src/Kernels/DeviceAux/PlasticityAux.h +++ b/src/Kernels/DeviceAux/PlasticityAux.h @@ -17,7 +17,7 @@ void saveFirstModes(real *firstModes, void adjustDeviatoricTensors(real **nodalStressTensors, int *isAdjustableVector, const PlasticityData *plasticity, - double one_minus_integrating_factor, + double oneMinusIntegratingFactor, size_t numElements); void adjustModalStresses(real **modalStressTensors, @@ -31,7 +31,7 @@ void computePstrains(real **pstrains, const real **modalStressTensors, const real *firsModes, const PlasticityData *plasticity, - double one_minus_integrating_factor, + double oneMinusIntegratingFactor, double timeStepWidth, double T_v, size_t numElements); diff --git a/src/Kernels/DeviceAux/cuda/PlasticityAux.cu b/src/Kernels/DeviceAux/cuda/PlasticityAux.cu index bab8bf2c8..4384f9c2f 100644 --- a/src/Kernels/DeviceAux/cuda/PlasticityAux.cu +++ b/src/Kernels/DeviceAux/cuda/PlasticityAux.cu @@ -36,7 +36,7 @@ void saveFirstModes(real *firstModes, __global__ void kernel_adjustDeviatoricTensors(real **nodalStressTensors, int *isAdjustableVector, const PlasticityData *plasticity, - const double one_minus_integrating_factor) { + const double oneMinusIntegratingFactor) { real *elementTensors = nodalStressTensors[blockIdx.x]; real localStresses[NUM_STREESS_COMPONENTS]; @@ -80,7 +80,7 @@ __global__ void kernel_adjustDeviatoricTensors(real **nodalStressTensors, real factor = 0.0; if (tau > taulim) { isAdjusted = static_cast(true); - factor = ((taulim / tau) - 1.0) * one_minus_integrating_factor; + factor = ((taulim / tau) - 1.0) * oneMinusIntegratingFactor; } // 7. Adjust deviatoric stress tensor if a node within a node exceeds the elasticity region @@ -100,7 +100,7 @@ __global__ void kernel_adjustDeviatoricTensors(real **nodalStressTensors, void adjustDeviatoricTensors(real **nodalStressTensors, int *isAdjustableVector, const PlasticityData *plasticity, - const double one_minus_integrating_factor, + const double oneMinusIntegratingFactor, const size_t numElements) { constexpr unsigned numNodesPerElement = tensor::QStressNodal::Shape[0]; dim3 block(numNodesPerElement, 1, 1); @@ -108,7 +108,7 @@ void adjustDeviatoricTensors(real **nodalStressTensors, kernel_adjustDeviatoricTensors<<>>(nodalStressTensors, isAdjustableVector, plasticity, - one_minus_integrating_factor); + oneMinusIntegratingFactor); } //-------------------------------------------------------------------------------------------------- @@ -173,7 +173,7 @@ __global__ void kernel_computePstrains(real **pstrains, const real** modalStressTensors, const real* firsModes, const PlasticityData* plasticity, - const double one_minus_integrating_factor, + const double oneMinusIntegratingFactor, const double timeStepWidth, const double T_v, const size_t numElements) { @@ -189,7 +189,7 @@ __global__ void kernel_computePstrains(real **pstrains, const PlasticityData *localData = &plasticity[index]; constexpr unsigned numModesPerElement = init::Q::Shape[0]; - real factor = localData->mufactor / (T_v * one_minus_integrating_factor); + real factor = localData->mufactor / (T_v * oneMinusIntegratingFactor); real duDtPstrain = factor * (localFirstMode[threadIdx.x] - localModalTensor[threadIdx.x * numModesPerElement]); localPstrains[threadIdx.x] += timeStepWidth * duDtPstrain; @@ -216,7 +216,7 @@ void computePstrains(real **pstrains, const real **modalStressTensors, const real *firsModes, const PlasticityData *plasticity, - const double one_minus_integrating_factor, + const double oneMinusIntegratingFactor, const double timeStepWidth, const double T_v, const size_t numElements) { @@ -228,7 +228,7 @@ void computePstrains(real **pstrains, modalStressTensors, firsModes, plasticity, - one_minus_integrating_factor, + oneMinusIntegratingFactor, timeStepWidth, T_v, numElements); diff --git a/src/Kernels/Plasticity.cpp b/src/Kernels/Plasticity.cpp index 0cb110411..5eca313e6 100644 --- a/src/Kernels/Plasticity.cpp +++ b/src/Kernels/Plasticity.cpp @@ -55,7 +55,7 @@ using namespace device; #endif namespace seissol::kernels { - unsigned Plasticity::computePlasticity(double one_minus_integrating_factor, + unsigned Plasticity::computePlasticity(double oneMinusIntegratingFactor, double timeStepWidth, double T_v, GlobalData const *global, @@ -137,7 +137,7 @@ namespace seissol::kernels { // where r = 1 - exp(-timeStepWidth / T_v) if (tau[ip] > taulim[ip]) { adjust = true; - yieldFactor[ip] = (taulim[ip] / tau[ip] - 1.0) * one_minus_integrating_factor; + yieldFactor[ip] = (taulim[ip] / tau[ip] - 1.0) * oneMinusIntegratingFactor; } else { yieldFactor[ip] = 0.0; } @@ -192,7 +192,7 @@ namespace seissol::kernels { * * If tau < taulim, then sigma_{ij} - sigmaNew_{ij} = 0. */ - real factor = plasticityData->mufactor / (T_v * one_minus_integrating_factor); + real factor = plasticityData->mufactor / (T_v * oneMinusIntegratingFactor); dudt_pstrain[q] = factor * (prev_degreesOfFreedom[q] - degreesOfFreedom[q * NUMBER_OF_ALIGNED_BASIS_FUNCTIONS]); // Integrate with explicit Euler @@ -211,7 +211,7 @@ namespace seissol::kernels { return 0; } - unsigned Plasticity::computePlasticityBatched(double one_minus_integrating_factor, + unsigned Plasticity::computePlasticityBatched(double oneMinusIntegratingFactor, double timeStepWidth, double T_v, GlobalData const *global, @@ -266,7 +266,7 @@ namespace seissol::kernels { device::aux::plasticity::adjustDeviatoricTensors(nodalStressTensors, isAdjustableVector, plasticity, - one_minus_integrating_factor, + oneMinusIntegratingFactor, numElements); unsigned numAdjustedDofs = device.algorithms.reduceVector(isAdjustableVector, @@ -287,7 +287,7 @@ namespace seissol::kernels { const_cast(modalStressTensors), firsModes, plasticity, - one_minus_integrating_factor, + oneMinusIntegratingFactor, timeStepWidth, T_v, numElements); diff --git a/src/Kernels/Plasticity.h b/src/Kernels/Plasticity.h index 03097d40f..2e416ae49 100644 --- a/src/Kernels/Plasticity.h +++ b/src/Kernels/Plasticity.h @@ -57,7 +57,7 @@ class seissol::kernels::Plasticity { public: /** Returns 1 if there was plastic yielding otherwise 0. */ - static unsigned computePlasticity( double relaxTime, + static unsigned computePlasticity( double oneMinusIntegratingFactor, double timeStepWidth, double T_v, GlobalData const* global, diff --git a/src/Solver/Pipeline/DrPipeline.h b/src/Solver/Pipeline/DrPipeline.h index 0a7c5747d..a5d4e7c40 100644 --- a/src/Solver/Pipeline/DrPipeline.h +++ b/src/Solver/Pipeline/DrPipeline.h @@ -5,7 +5,7 @@ * @author Ravil Dorozhinskii (ravil.dorozhinskii AT tum.de) * * @section LICENSE - * Copyright (c) 2015-2017, SeisSol Group + * Copyright (c) 2020-2021, SeisSol Group * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/Solver/Pipeline/DrTuner.cpp b/src/Solver/Pipeline/DrTuner.cpp index 08477485e..bcf67f24f 100644 --- a/src/Solver/Pipeline/DrTuner.cpp +++ b/src/Solver/Pipeline/DrTuner.cpp @@ -5,7 +5,7 @@ * @author Ravil Dorozhinskii (ravil.dorozhinskii AT tum.de) * * @section LICENSE - * Copyright (c) 2015-2017, SeisSol Group + * Copyright (c) 2020-2021, SeisSol Group * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,10 +35,9 @@ * POSSIBILITY OF SUCH DAMAGE. * * @section DESCRIPTION - * A custom pipeline tuner of DR pipeline which is based on the golden bisection method + * A custom tuner of DR pipeline which is based on the golden bisection method * - * The objective function is given as million DR cells updates per second which is - * supposed to get maximized + * Note, this can be deprecated once friction solvers are adapted for GPU computing **/ #include "DrTuner.h" @@ -61,6 +60,16 @@ namespace seissol::dr::pipeline { action = Action::BeginRecordingRightEvaluation; } + /** + * Implements a golden-section search to find a optimal batch size. + * + * The objective function is given as million DR cells updates per second which is + * supposed to get maximized. Note, that a callee evaluates the function. Values are returned back + * during the next call. Actions are used to handle the logic. The method sets `SkipAction` which + * results in fixing a batch size once a convergence achieved. + * + * @param stageTiming average CPU time (in seconds) step on each stage for a batch processing. + **/ void DrPipelineTuner::tune(const std::array& stageTiming) { constexpr size_t ComputeStageId{1}; double currPerformance = 1e6 * currBatchSize / (stageTiming[ComputeStageId] + 1e-12); @@ -102,7 +111,6 @@ namespace seissol::dr::pipeline { return; } - // Golden-section search if (leftValue > rightValue) { maxBatchSize = rightPoint; rightPoint = leftPoint; diff --git a/src/Solver/Pipeline/DrTuner.h b/src/Solver/Pipeline/DrTuner.h index 833ef366e..0008d30a6 100644 --- a/src/Solver/Pipeline/DrTuner.h +++ b/src/Solver/Pipeline/DrTuner.h @@ -5,7 +5,7 @@ * @author Ravil Dorozhinskii (ravil.dorozhinskii AT tum.de) * * @section LICENSE - * Copyright (c) 2015-2017, SeisSol Group + * Copyright (c) 2020-2021, SeisSol Group * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,8 +37,7 @@ * @section DESCRIPTION * A custom pipeline tuner of DR pipeline which is based on the golden bisection method * - * The objective function is given as million DR cells updates per second which is - * supposed to get maximized + * Note, this can be deprecated once friction solvers are adapted for GPU computing **/ #ifndef DR_TUNER_H @@ -46,9 +45,6 @@ #include -namespace seissol::unit_test { -class DrTunerTest; -} namespace seissol::dr::pipeline { class DrPipelineTuner: public PipelineTuner<3, 1024> { @@ -56,7 +52,9 @@ namespace seissol::dr::pipeline { DrPipelineTuner(); ~DrPipelineTuner() override = default; void tune(const std::array& stageTiming) override; - + [[nodiscard]] bool isTunerConverged() const {return isConverged;} + [[nodiscard]] double getMaxBatchSize() const {return maxBatchSize;} + [[nodiscard]] double getMinBatchSize() const {return minBatchSize;} private: enum class Action { BeginRecordingLeftEvaluation, @@ -65,7 +63,7 @@ namespace seissol::dr::pipeline { RecordRightEvaluation, SkipAction, }; - Action action; + Action action{Action::BeginRecordingRightEvaluation}; double maxBatchSize{DefaultBatchSize}; double minBatchSize{0.1 * DefaultBatchSize}; @@ -79,8 +77,6 @@ namespace seissol::dr::pipeline { double invPhiSquared{}; double eps{5e-2}; bool isConverged{false}; - - friend class seissol::unit_test::DrTunerTest; }; } diff --git a/src/Solver/Pipeline/GenericPipeline.h b/src/Solver/Pipeline/GenericPipeline.h index 74e3cdafd..ae464dd93 100644 --- a/src/Solver/Pipeline/GenericPipeline.h +++ b/src/Solver/Pipeline/GenericPipeline.h @@ -5,7 +5,7 @@ * @author Ravil Dorozhinskii (ravil.dorozhinskii AT tum.de) * * @section LICENSE - * Copyright (c) 2015-2017, SeisSol Group + * Copyright (c) 2020-2021, SeisSol Group * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,10 +53,6 @@ #include -namespace seissol::unit_test { - class PipelineTest; -} - namespace seissol { template @@ -82,11 +78,10 @@ namespace seissol { template> class GenericPipeline { - private: public: struct PipelineCallBack { virtual ~PipelineCallBack() = default; - virtual void operator()(size_t begin, size_t end, size_t callCounter) = 0; + virtual void operator()(size_t begin, size_t batchSize, size_t callCounter) = 0; virtual void finalize() = 0; }; @@ -97,7 +92,7 @@ namespace seissol { } ~GenericPipeline() = default; constexpr static decltype(NumStagesP) NumStages{NumStagesP}; - static constexpr decltype(NumStagesP) TailSize{NumStagesP - 1}; + constexpr static decltype(NumStagesP) TailSize{NumStagesP - 1}; constexpr static decltype(DefaultBatchSizeP) DefaultBatchSize{DefaultBatchSizeP}; void registerCallBack(unsigned id, PipelineCallBack* callBack) { @@ -141,7 +136,6 @@ namespace seissol { return *this; } - private: void clamp() { begin = (begin > limit) ? limit : begin; end = (end > limit) ? limit : end; @@ -225,8 +219,6 @@ namespace seissol { bool resetAfterRun{true}; TunerT tuner; Stopwatch stopwatch{}; - - friend class seissol::unit_test::PipelineTest; }; } diff --git a/src/Solver/time_stepping/TimeCluster.cpp b/src/Solver/time_stepping/TimeCluster.cpp index 9c04ae96c..a8861a413 100755 --- a/src/Solver/time_stepping/TimeCluster.cpp +++ b/src/Solver/time_stepping/TimeCluster.cpp @@ -768,7 +768,7 @@ void seissol::time_stepping::TimeCluster::computeNeighboringIntegration( seissol ); #ifdef USE_PLASTICITY - numberOTetsWithPlasticYielding += seissol::kernels::Plasticity::computePlasticity( m_relaxTime, + numberOTetsWithPlasticYielding += seissol::kernels::Plasticity::computePlasticity( m_oneMinusIntegratingFactor, m_timeStepWidth, m_tv, m_globalDataOnHost, @@ -807,7 +807,7 @@ void seissol::time_stepping::TimeCluster::computeNeighboringIntegration( seissol #ifdef USE_PLASTICITY PlasticityData* plasticity = i_layerData.var(m_lts->plasticity); - unsigned numAdjustedDofs = seissol::kernels::Plasticity::computePlasticityBatched(m_relaxTime, + unsigned numAdjustedDofs = seissol::kernels::Plasticity::computePlasticityBatched(m_oneMinusIntegratingFactor, m_timeStepWidth, m_tv, m_globalDataOnDevice, diff --git a/src/Solver/time_stepping/TimeCluster.h b/src/Solver/time_stepping/TimeCluster.h index aa3a2eb14..b1c1411ee 100644 --- a/src/Solver/time_stepping/TimeCluster.h +++ b/src/Solver/time_stepping/TimeCluster.h @@ -203,7 +203,7 @@ class seissol::time_stepping::TimeCluster double m_tv; //! Relax time for plasticity - double m_one_minus_integrating_factor; + double m_oneMinusIntegratingFactor; //! Stopwatch of TimeManager LoopStatistics* m_loopStatistics; @@ -322,7 +322,7 @@ class seissol::time_stepping::TimeCluster //! Update relax time for plasticity void updateRelaxTime() { - m_one_minus_integrating_factor = (m_tv > 0.0) ? 1.0 - exp(-m_timeStepWidth / m_tv) : 1.0; + m_oneMinusIntegratingFactor = (m_tv > 0.0) ? 1.0 - exp(-m_timeStepWidth / m_tv) : 1.0; } public: diff --git a/src/tests/Pipeline/DrTunerTest.t.h b/src/tests/Pipeline/DrTunerTest.t.h index e77041e24..54f2f2b0a 100644 --- a/src/tests/Pipeline/DrTunerTest.t.h +++ b/src/tests/Pipeline/DrTunerTest.t.h @@ -17,35 +17,35 @@ class seissol::unit_test::DrTunerTest : public CxxTest::TestSuite { // this is going to results in: Performance ~ 1 / batchSize auto squareFunction = [] (size_t x) {return static_cast(x * x);}; - while (!tuner.isConverged) { + while (!tuner.isTunerConverged()) { batchSize = tuner.getBatchSize(); timing[ComputeStageId] = squareFunction(batchSize); tuner.tune(timing); } - TS_ASSERT_DELTA(batchSize, tuner.minBatchSize, eps); + TS_ASSERT_DELTA(batchSize, tuner.getMinBatchSize(), eps); } void testGoesToRight() { dr::pipeline::DrPipelineTuner tuner; auto hyperbolicTime = [](size_t x) {return 1.0 / (static_cast(x + 1.0));}; - while (!tuner.isConverged) { + while (!tuner.isTunerConverged()) { batchSize = tuner.getBatchSize(); timing[ComputeStageId] = hyperbolicTime(batchSize); tuner.tune(timing); } - TS_ASSERT_DELTA(batchSize, tuner.maxBatchSize, eps); + TS_ASSERT_DELTA(batchSize, tuner.getMaxBatchSize(), eps); } void testMaxWithinRange() { dr::pipeline::DrPipelineTuner tuner; - const auto midPoint = 0.5 * (tuner.maxBatchSize + tuner.minBatchSize); + const auto midPoint = 0.5 * (tuner.getMaxBatchSize() + tuner.getMinBatchSize()); auto hatFunction = [midPoint] (size_t x) { return std::abs(midPoint - x); }; - while (!tuner.isConverged) { + while (!tuner.isTunerConverged()) { batchSize = tuner.getBatchSize(); timing[ComputeStageId] = hatFunction(batchSize); tuner.tune(timing); diff --git a/src/tests/Pipeline/PipelineTest.t.h b/src/tests/Pipeline/PipelineTest.t.h index f2627b77c..5694e7b22 100644 --- a/src/tests/Pipeline/PipelineTest.t.h +++ b/src/tests/Pipeline/PipelineTest.t.h @@ -12,8 +12,7 @@ namespace seissol { } } -class seissol::unit_test::PipelineTest : public CxxTest::TestSuite -{ +class seissol::unit_test::PipelineTest : public CxxTest::TestSuite { private: using TestPipeline = seissol::GenericPipeline<4, 1024>; struct TestStageCallBack : public TestPipeline::PipelineCallBack { @@ -27,9 +26,15 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite struct FirstStage : public TestStageCallBack { explicit FirstStage(std::string& buffer) : TestStageCallBack(buffer) {} - void operator()(size_t, size_t, size_t) override { + void operator()(size_t, size_t batchSize, size_t) override { buffer.push_back('A'); + batchSizes.emplace_back(batchSize); } + + std::vector getBatchSizes() {return batchSizes;} + private: + // Note: it is enough to test batchSizes in only one stage + std::vector batchSizes{}; }; struct SecondStage : public TestStageCallBack { @@ -71,8 +76,7 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite TS_ASSERT_EQUALS(TestPipeline::TailSize, 3); } - void testSuperShortPipeline() - { + void testSuperShortPipeline() { std::string testBuffer{}; std::array, 4> callBacks = { std::make_shared(testBuffer), @@ -86,16 +90,19 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite } pipeline.run(TestPipeline::DefaultBatchSize - 3); - std::string expectedResults{PipelineTest::format("AF BF CF DF")}; + auto testedBatchSizes = static_cast(callBacks[0].get())->getBatchSizes(); + std::vector expectedBatchSizes{TestPipeline::DefaultBatchSize - 3}; + TS_ASSERT_EQUALS(testedBatchSizes.size(), expectedBatchSizes.size()); + for (size_t i = 0; i < expectedBatchSizes.size(); ++i) + TS_ASSERT_EQUALS(testedBatchSizes[i], expectedBatchSizes[i]); + + std::string expectedResults{PipelineTest::format("AF BF CF DF")}; TS_ASSERT_EQUALS(testBuffer.size(), expectedResults.size()); - TS_ASSERT_EQUALS(pipeline.numFullPipeIterations, 0); - TS_ASSERT_EQUALS(pipeline.numTotalIterations, 1); TS_ASSERT_SAME_DATA(testBuffer.data(), expectedResults.data(), expectedResults.size()); } - void testShortPipeline() - { + void testShortPipeline() { std::string testBuffer{}; std::array, 4> callBacks = { std::make_shared(testBuffer), @@ -109,16 +116,20 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite } pipeline.run(2 * TestPipeline::DefaultBatchSize - 3); - std::string expectedResults{PipelineTest::format("A AFB BFC CFD DF")}; + auto testedBatchSizes = static_cast(callBacks[0].get())->getBatchSizes(); + std::vector expectedBatchSizes{TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize - 3}; + TS_ASSERT_EQUALS(testedBatchSizes.size(), expectedBatchSizes.size()); + for (size_t i = 0; i < expectedBatchSizes.size(); ++i) + TS_ASSERT_EQUALS(testedBatchSizes[i], expectedBatchSizes[i]); + + std::string expectedResults{PipelineTest::format("A AFB BFC CFD DF")}; TS_ASSERT_EQUALS(testBuffer.size(), expectedResults.size()); - TS_ASSERT_EQUALS(pipeline.numFullPipeIterations, 0); - TS_ASSERT_EQUALS(pipeline.numTotalIterations, 2); TS_ASSERT_SAME_DATA(testBuffer.data(), expectedResults.data(), expectedResults.size()); } - void testWithOneFullPipelineIteration() - { + void testWithOneFullPipelineIteration() { std::string testBuffer{}; std::array, 4> callBacks = { std::make_shared(testBuffer), @@ -132,16 +143,22 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite } pipeline.run(4 * TestPipeline::DefaultBatchSize - 3); - std::string expectedResults{PipelineTest::format("A AB ABC AFBCD BFCD CFD DF")}; + auto testedBatchSizes = static_cast(callBacks[0].get())->getBatchSizes(); + std::vector expectedBatchSizes{TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize - 3}; + TS_ASSERT_EQUALS(testedBatchSizes.size(), expectedBatchSizes.size()); + for (size_t i = 0; i < expectedBatchSizes.size(); ++i) + TS_ASSERT_EQUALS(testedBatchSizes[i], expectedBatchSizes[i]); + + std::string expectedResults{PipelineTest::format("A AB ABC AFBCD BFCD CFD DF")}; TS_ASSERT_EQUALS(testBuffer.size(), expectedResults.size()); - TS_ASSERT_EQUALS(pipeline.numFullPipeIterations, 1); - TS_ASSERT_EQUALS(pipeline.numTotalIterations, 4); TS_ASSERT_SAME_DATA(testBuffer.data(), expectedResults.data(), expectedResults.size()); } - void testLongPipeline() - { + void testLongPipeline() { std::string testBuffer{}; std::array, 4> callBacks = { std::make_shared(testBuffer), @@ -155,11 +172,20 @@ class seissol::unit_test::PipelineTest : public CxxTest::TestSuite } pipeline.run(4 * TestPipeline::DefaultBatchSize + 3); - std::string expectedResults{PipelineTest::format("A AB ABC ABCD AFBCD BFCD CFD DF")}; + auto testedBatchSizes = static_cast(callBacks[0].get())->getBatchSizes(); + std::vector expectedBatchSizes{TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize, + TestPipeline::DefaultBatchSize, + 3}; + TS_ASSERT_EQUALS(testedBatchSizes.size(), expectedBatchSizes.size()); + for (size_t i = 0; i < expectedBatchSizes.size(); ++i) + TS_ASSERT_EQUALS(testedBatchSizes[i], expectedBatchSizes[i]); + + + std::string expectedResults{PipelineTest::format("A AB ABC ABCD AFBCD BFCD CFD DF")}; TS_ASSERT_EQUALS(testBuffer.size(), expectedResults.size()); - TS_ASSERT_EQUALS(pipeline.numFullPipeIterations, 2); - TS_ASSERT_EQUALS(pipeline.numTotalIterations, 5); TS_ASSERT_SAME_DATA(testBuffer.data(), expectedResults.data(), expectedResults.size()); } };