Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Time tracing for the openPMD plugin #4909

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/picongpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@ set(PIC_VERBOSE "1" CACHE STRING
"Set verbosity level for PIConGPU (default is only physics output)")
add_definitions(-DPIC_VERBOSE_LVL=${PIC_VERBOSE})

set(PIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES "0" CACHE STRING "Set the maximum number of MPI ranks that create a file with trace information.")
add_definitions(-DPIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES=${PIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES})

option(PIC_ADD_RPATH "Add RPATH's to binaries." ON)


Expand Down
160 changes: 160 additions & 0 deletions include/picongpu/plugins/common/DumpTimes.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#include <chrono>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <functional>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>

namespace picongpu
{
namespace openPMD
{
template<typename Clock>
struct TimeFormatters
{
using TimeFormatter = std::function<std::string(typename Clock::time_point const&)>;

static std::string humanReadable(typename Clock::time_point const& currentTime)
{
std::stringstream res;
std::time_t now_c = Clock::to_time_t(currentTime);
res << std::put_time(std::localtime(&now_c), "%F %T") << '.' << std::setfill('0') << std::setw(3)
<< std::chrono::duration_cast<std::chrono::milliseconds>(currentTime.time_since_epoch()).count()
% 1000;
return res.str();
}

static std::string epochTime(typename Clock::time_point const& currentTime)
{
return std::to_string(
std::chrono::duration_cast<std::chrono::milliseconds>(currentTime.time_since_epoch()).count());
}
};

// https://en.cppreference.com/w/cpp/named_req/Clock
template<bool enable = true, typename Clock = std::chrono::system_clock>
class DumpTimes
{
public:
using time_point = typename Clock::time_point;
using duration = typename Clock::duration;

using TimeFormatter = typename TimeFormatters<Clock>::TimeFormatter;
using Ret_t = std::pair<time_point, duration>;

const std::string filename;

DumpTimes(std::string filename);
~DumpTimes();

template<typename Duration>
auto now(
std::string description,
std::string separator = "\t",
TimeFormatter = &TimeFormatters<Clock>::epochTime) -> Ret_t;

auto append(std::string const&) -> void;

auto flush() -> void;

private:
std::fstream m_outStream;
bool m_pendingInit = true;
time_point m_lastTimePoint;

auto outStream(std::string const& separator) -> std::fstream&;
};

/*
* Don't do any IO in the constructor yet, but only upon actually calling API functions.
*/
template<bool enable, typename Clock>
DumpTimes<enable, Clock>::DumpTimes(std::string _filename)
: filename(std::move(_filename))
, m_pendingInit(true)
{
}

template<bool enable, typename Clock>
DumpTimes<enable, Clock>::~DumpTimes()
{
if(!m_pendingInit)
{
m_outStream << std::endl;
}
}

template<bool enable, typename Clock>
template<typename Duration>
auto DumpTimes<enable, Clock>::now(std::string description, std::string separator, TimeFormatter timeFormatter)
-> Ret_t
{
auto& stream = outStream(separator);
auto currentTime = Clock::now();
auto delta = currentTime - m_lastTimePoint;
m_lastTimePoint = currentTime;
std::string nowFormatted = timeFormatter(currentTime);
stream << '\n'
<< std::move(nowFormatted) << separator << std::chrono::duration_cast<Duration>(delta).count()
<< separator << description;
return std::make_pair(currentTime, delta);
}

template<bool enable, typename Clock>
auto DumpTimes<enable, Clock>::append(std::string const& str) -> void
{
m_outStream << str;
}

template<bool enable, typename Clock>
auto DumpTimes<enable, Clock>::flush() -> void
{
m_outStream << std::flush;
}

template<bool enable, typename Clock>
auto DumpTimes<enable, Clock>::outStream(std::string const& separator) -> std::fstream&
{
if(m_pendingInit)
{
m_outStream = std::fstream(filename, std::ios_base::out | std::ios_base::app);
m_lastTimePoint = Clock::now();
m_outStream << "timestamp" << separator << "difftime" << separator << "description";
m_pendingInit = false;
}
return m_outStream;
}

template<typename Clock>
class DumpTimes<false, Clock>
{
public:
using Ret_t = void;
DumpTimes()
{
}
DumpTimes(std::string)
{
}

template<typename, typename... Args>
inline auto now(Args&&...) -> Ret_t
{
}

template<typename... Args>
inline auto append(Args&&...) -> void
{
}

template<typename... Args>
inline auto flush(Args&&...) -> void
{
}
};

} // namespace openPMD
} // namespace picongpu
25 changes: 24 additions & 1 deletion include/picongpu/plugins/openPMD/WriteSpecies.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,9 @@ namespace picongpu
const DataSpace<simDim> particleToTotalDomainOffset)
{
log<picLog::INPUT_OUTPUT>("openPMD: (begin) write species: %1%") % T_SpeciesFilter::getName();
params->m_dumpTimes.now<std::chrono::milliseconds>(
"Begin write species " + T_SpeciesFilter::getName());

DataConnector& dc = Environment<>::get().DataConnector();
GridController<simDim>& gc = Environment<simDim>::get().GridController();
uint64_t mpiSize = gc.getGlobalSize();
Expand Down Expand Up @@ -475,6 +478,12 @@ namespace picongpu
log<picLog::INPUT_OUTPUT>("openPMD: (begin) write particle records for %1%, dumping round %2%")
% T_SpeciesFilter::getName() % dumpIteration;

std::stringstream description;
description << "\tslice " << dumpIteration << " prepare";
params->m_dumpTimes.now<std::chrono::milliseconds>(description.str());

size_t writtenBytes = 0;

meta::ForEach<
typename NewParticleDescription::ValueTypeSeq,
openPMD::ParticleAttribute<boost::mpl::_1>>
Expand All @@ -486,12 +495,23 @@ namespace picongpu
basename,
chunk.numberOfParticles,
globalNumParticles,
myParticleOffset + particleOffset);
myParticleOffset + particleOffset,
writtenBytes);

description = std::stringstream();
description << ": " << writtenBytes << " bytes for " << chunk.numberOfParticles
<< " particles from offset " << (myParticleOffset + particleOffset);
params->m_dumpTimes.append(description.str());

log<picLog::INPUT_OUTPUT>("openPMD: flush particle records for %1%, dumping round %2%")
% T_SpeciesFilter::getName() % dumpIteration;

params->m_dumpTimes.now<std::chrono::milliseconds>(
"\tslice " + std::to_string(dumpIteration) + " flush");
params->openPMDSeries->flush(PreferredFlushTarget::Disk);
params->m_dumpTimes.now<std::chrono::milliseconds>(
"\tslice " + std::to_string(dumpIteration) + " end");


log<picLog::INPUT_OUTPUT>("openPMD: (end) write particle records for %1%, dumping round %2%")
% T_SpeciesFilter::getName() % dumpIteration;
Expand Down Expand Up @@ -554,7 +574,10 @@ namespace picongpu
globalNumParticles,
*params->jsonMatcher,
series.particlesPath() + speciesGroup);
params->m_dumpTimes.now<std::chrono::milliseconds>(
"\tFlush species " + T_SpeciesFilter::getName());
params->openPMDSeries->flush(PreferredFlushTarget::Buffer);
params->m_dumpTimes.now<std::chrono::milliseconds>("\tFinished flush species");
}

log<picLog::INPUT_OUTPUT>("openPMD: ( end ) writing particle patches for %1%")
Expand Down
45 changes: 45 additions & 0 deletions include/picongpu/plugins/openPMD/openPMDWriter.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "picongpu/simulation_defines.hpp"

#include "picongpu/plugins/common/DumpTimes.hpp"
#include "picongpu/plugins/openPMD/Json.hpp"
#include "picongpu/plugins/openPMD/Parameters.hpp"
#include "picongpu/plugins/openPMD/toml.hpp"
Expand Down Expand Up @@ -102,6 +103,50 @@ namespace picongpu
DataSpace<simDim> localWindowToDomainOffset;

std::vector<double> times;
static constexpr bool enableTimeTrace = PIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES > 0;
DumpTimes<enableTimeTrace> m_dumpTimes
{
#if PIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES < 1
"/dev/null"
#else
[]()
{
constexpr unsigned maxLogFiles = PIC_OPENPMD_TIMETRACE_NUMBER_OF_FILES;
static GridController<simDim>& gc = Environment<simDim>::get().GridController();
auto const size = gc.getGlobalSize();
auto const rank = gc.getGlobalRank();
std::string outputFile = "PIC_openPMD_TimeTrace_" + std::to_string(rank) + ".txt";
if(size <= maxLogFiles)
{
return outputFile;
}
else
{
/*
* Only up to maxLogFiles logs should be written in parallel in order not to
* overload the file system with measuring I/O efficiency, e.g. 20 logs.
* The following logic figures out if this rank should write a log and
* which log index from 0 to 19 should be used locally.
*
* Log file with index i is written from `rank = bottom(i/maxLogFiles * size)`
* This means that there exists an e < 1 s.t. `rank + e = i/maxLogFiles * size`,
* which can be given as `i = rank*maxLogFiles/size + e*maxLogFiles/size`.
* In this if branch, `maxLogFiles/size < 1` was checked before entering,
* hence also `e*maxLogFiles/size < 1`.
* This means that the log index `i` can be computed
* via `i = ceil(rank*maxLogFiles/size)`.
* In order to find out if this rank writes a log, the rank is then
* computed with `rank = bottom(i/maxLogFiles * size)` (see first formula above)
* and compared to the actual rank ID.
*/
auto div_ceil = [](auto a, auto b) { return (a + b - 1) / b; };
size_t proposed_log_idx = div_ceil(rank * maxLogFiles, size);
size_t rank_of_that_log = (proposed_log_idx * size) / maxLogFiles;
return rank == rank_of_that_log ? outputFile : "/dev/null";
}
}()
#endif
};

::openPMD::Series& openSeries(::openPMD::Access at);

Expand Down
29 changes: 27 additions & 2 deletions include/picongpu/plugins/openPMD/openPMDWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.

HINLINE void operator_impl(ThreadParams* params, uint32_t const currentStep)
{
params->m_dumpTimes.now<std::chrono::milliseconds>("Begin write field " + getName());
DataConnector& dc = Environment<>::get().DataConnector();

/*## update field ##*/
Expand Down Expand Up @@ -855,7 +856,8 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
FieldTmp::getUnitDimension<Solver>(),
std::move(inCellPosition),
timeOffset,
isDomainBound);
isDomainBound,
/* logBeginWriteField = */ false);
}
};

Expand Down Expand Up @@ -1485,7 +1487,8 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
std::vector<float_64> unitDimension,
std::vector<std::vector<float_X>> inCellPosition,
float_X timeOffset,
bool isDomainBound)
bool isDomainBound,
bool logBeginWriteField = true)
{
auto const name_lookup_tpl = plugins::misc::getComponentNames(nComponents);
std::optional<std::string> pathToRecordComponentSpecifiedViaMeshName = std::nullopt;
Expand Down Expand Up @@ -1527,6 +1530,10 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.

log<picLog::INPUT_OUTPUT>("openPMD: write field: %1% %2% %3%") % name % nComponents
% buffer.getHostDataBox().getPointer();
if(logBeginWriteField)
{
params->m_dumpTimes.now<std::chrono::milliseconds>("Begin write field " + name);
}

::openPMD::Iteration iteration = params->openPMDSeries->writeIterations()[currentStep];
::openPMD::Mesh mesh = iteration.meshes[name];
Expand Down Expand Up @@ -1596,6 +1603,13 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.

auto const numDataPoints = localWindowSize.productOfComponents();

{
std::stringstream description;
description << ": " << (numDataPoints * sizeof(ComponentType) * nComponents) << " bytes for "
<< numDataPoints << " cells";
params->m_dumpTimes.append(description.str());
}

/* write the actual field data */
for(uint32_t d = 0; d < nComponents; d++)
{
Expand Down Expand Up @@ -1639,6 +1653,9 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
continue;
}

params->m_dumpTimes.now<std::chrono::milliseconds>(
"\tComponent " + std::to_string(d) + " prepare");

// ask openPMD to create a buffer for us
// in some backends (ADIOS2), this allows avoiding memcopies
auto span = mrc.storeChunk<ComponentType>(
Expand Down Expand Up @@ -1685,7 +1702,9 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
}
}

params->m_dumpTimes.now<std::chrono::milliseconds>("\tComponent " + std::to_string(d) + " flush");
params->openPMDSeries->flush(PreferredFlushTarget::Disk);
params->m_dumpTimes.now<std::chrono::milliseconds>("\tComponent " + std::to_string(d) + " end");
}
}

Expand Down Expand Up @@ -1732,6 +1751,8 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.

void write(ThreadParams* threadParams, uint32_t const currentStep, std::string mpiTransportParams)
{
threadParams->m_dumpTimes.now<std::chrono::milliseconds>(
"Beginning iteration " + std::to_string(currentStep));
const SubGrid<simDim>& subGrid = Environment<simDim>::get().SubGrid();
const pmacc::Selection<simDim> localDomain = subGrid.getLocalDomain();
const pmacc::Selection<simDim> globalDomain = subGrid.getGlobalDomain();
Expand Down Expand Up @@ -1856,7 +1877,11 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
// avoid deadlock between not finished pmacc tasks and mpi calls in
// openPMD
eventSystem::getTransactionEvent().waitForFinished();
mThreadParams.m_dumpTimes.now<std::chrono::milliseconds>(
"Closing iteration " + std::to_string(currentStep));
mThreadParams.openPMDSeries->writeIterations()[currentStep].close();
mThreadParams.m_dumpTimes.now<std::chrono::milliseconds>("Done.");
mThreadParams.m_dumpTimes.flush();

return;
}
Expand Down
Loading