Skip to content

Commit

Permalink
Merge pull request #4842 from ye-luo/more-scoped-timer
Browse files Browse the repository at this point in the history
Replace start/stop with scoped timers.
  • Loading branch information
prckent committed Nov 21, 2023
2 parents c6bb890 + ad6b279 commit 33d8b76
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 162 deletions.
61 changes: 31 additions & 30 deletions src/QMCDrivers/DMC/DMCBatched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,42 +457,43 @@ bool DMCBatched::run()

for (int block = 0; block < num_blocks; ++block)
{
dmc_loop.start();
estimator_manager_->startBlock(qmcdriver_input_.get_max_steps());

dmc_state.recalculate_properties_period = (qmc_driver_mode_[QMC_UPDATE_MODE])
? qmcdriver_input_.get_recalculate_properties_period()
: (qmcdriver_input_.get_max_blocks() + 1) * qmcdriver_input_.get_max_steps();
dmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
: false;
{
ScopeGuard<LoopTimer<>> dmc_local_timer(dmc_loop);
estimator_manager_->startBlock(qmcdriver_input_.get_max_steps());

for (UPtr<Crowd>& crowd : crowds_)
crowd->startBlock(qmcdriver_input_.get_max_steps());
dmc_state.recalculate_properties_period = (qmc_driver_mode_[QMC_UPDATE_MODE])
? qmcdriver_input_.get_recalculate_properties_period()
: (qmcdriver_input_.get_max_blocks() + 1) * qmcdriver_input_.get_max_steps();
dmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
: false;

for (int step = 0; step < qmcdriver_input_.get_max_steps(); ++step)
{
ScopedTimer local_timer(timers_.run_steps_timer);
dmc_state.step = step;
crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(step_contexts_),
std::ref(crowds_));
for (UPtr<Crowd>& crowd : crowds_)
crowd->startBlock(qmcdriver_input_.get_max_steps());

for (int step = 0; step < qmcdriver_input_.get_max_steps(); ++step)
{
const int iter = block * qmcdriver_input_.get_max_steps() + step;
walker_controller_->branch(iter, population_, iter == 0);
branch_engine_->updateParamAfterPopControl(walker_controller_->get_ensemble_property(),
population_.get_golden_electrons().getTotalNum());
walker_controller_->setTrialEnergy(branch_engine_->getEtrial());
}
ScopedTimer local_timer(timers_.run_steps_timer);
dmc_state.step = step;
crowd_task(crowds_.size(), runDMCStep, dmc_state, timers_, dmc_timers_, std::ref(step_contexts_),
std::ref(crowds_));

population_.redistributeWalkers(crowds_);
{
const int iter = block * qmcdriver_input_.get_max_steps() + step;
walker_controller_->branch(iter, population_, iter == 0);
branch_engine_->updateParamAfterPopControl(walker_controller_->get_ensemble_property(),
population_.get_golden_electrons().getTotalNum());
walker_controller_->setTrialEnergy(branch_engine_->getEtrial());
}

population_.redistributeWalkers(crowds_);
}
print_mem("DMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
recordBlock(block);
}
print_mem("DMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
recordBlock(block);
dmc_loop.stop();

bool stop_requested = false;
// Rank 0 decides whether the time limit was reached
Expand Down
268 changes: 136 additions & 132 deletions src/QMCDrivers/VMC/VMCBatched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,137 +74,140 @@ void VMCBatched::advanceWalkers(const StateForThread& sft,
if (sft.qmcdrv_input.get_debug_checks() & DriverDebugChecks::CHECKGL_AFTER_LOAD)
checkLogAndGL(crowd, "checkGL_after_load");

timers.movepbyp_timer.start();
const int num_walkers = crowd.size();
auto& walker_leader = walker_elecs.getLeader();
const int num_particles = walker_leader.getTotalNum();
// Note std::vector<bool> is not like the rest of stl.
std::vector<bool> moved(num_walkers, false);
constexpr RealType mhalf(-0.5);
const bool use_drift = sft.vmcdrv_input.get_use_drift();

std::vector<TrialWaveFunction::PsiValue> ratios(num_walkers);
std::vector<RealType> log_gf(num_walkers);
std::vector<RealType> log_gb(num_walkers);
std::vector<RealType> prob(num_walkers);

// local list to handle accept/reject
std::vector<bool> isAccepted;
std::vector<std::reference_wrapper<TrialWaveFunction>> twf_accept_list, twf_reject_list;
isAccepted.reserve(num_walkers);

MCCoords<CT> drifts(num_walkers), drifts_reverse(num_walkers);
MCCoords<CT> walker_deltas(num_walkers * num_particles), deltas(num_walkers);
TWFGrads<CT> grads_now(num_walkers), grads_new(num_walkers);

for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++)
{
//This generates an entire steps worth of deltas.
makeGaussRandomWithEngine(walker_deltas, step_context.get_random_gen());

// up and down electrons are "species" within qmpack
for (int ig = 0; ig < walker_leader.groups(); ++ig) //loop over species
ScopedTimer pbyp_local_timer(timers.movepbyp_timer);
const int num_walkers = crowd.size();
auto& walker_leader = walker_elecs.getLeader();
const int num_particles = walker_leader.getTotalNum();
// Note std::vector<bool> is not like the rest of stl.
std::vector<bool> moved(num_walkers, false);
constexpr RealType mhalf(-0.5);
const bool use_drift = sft.vmcdrv_input.get_use_drift();

std::vector<TrialWaveFunction::PsiValue> ratios(num_walkers);
std::vector<RealType> log_gf(num_walkers);
std::vector<RealType> log_gb(num_walkers);
std::vector<RealType> prob(num_walkers);

// local list to handle accept/reject
std::vector<bool> isAccepted;
std::vector<std::reference_wrapper<TrialWaveFunction>> twf_accept_list, twf_reject_list;
isAccepted.reserve(num_walkers);

MCCoords<CT> drifts(num_walkers), drifts_reverse(num_walkers);
MCCoords<CT> walker_deltas(num_walkers * num_particles), deltas(num_walkers);
TWFGrads<CT> grads_now(num_walkers), grads_new(num_walkers);

for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++)
{
TauParams<RealType, CT> taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig],
sft.qmcdrv_input.get_spin_mass());

twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig);
//This generates an entire steps worth of deltas.
makeGaussRandomWithEngine(walker_deltas, step_context.get_random_gen());

for (int iat = walker_leader.first(ig); iat < walker_leader.last(ig); ++iat)
// up and down electrons are "species" within qmpack
for (int ig = 0; ig < walker_leader.groups(); ++ig) //loop over species
{
//get deltas for this particle (iat) for all walkers
walker_deltas.getSubset(iat * num_walkers, num_walkers, deltas);
scaleBySqrtTau(taus, deltas);

if (use_drift)
{
twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now);
sft.drift_modifier.getDrifts(taus, grads_now, drifts);
drifts += deltas;
}
else
drifts = deltas;
TauParams<RealType, CT> taus(sft.qmcdrv_input.get_tau(), sft.population.get_ptclgrp_inv_mass()[ig],
sft.qmcdrv_input.get_spin_mass());

ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts);
twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig);

// This is inelegant
if (use_drift)
for (int iat = walker_leader.first(ig); iat < walker_leader.last(ig); ++iat)
{
twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new);
//get deltas for this particle (iat) for all walkers
walker_deltas.getSubset(iat * num_walkers, num_walkers, deltas);
scaleBySqrtTau(taus, deltas);

computeLogGreensFunction(deltas, taus, log_gf);
if (use_drift)
{
twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now);
sft.drift_modifier.getDrifts(taus, grads_now, drifts);
drifts += deltas;
}
else
drifts = deltas;

sft.drift_modifier.getDrifts(taus, grads_new, drifts_reverse);
ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts);

drifts_reverse += drifts;
// This is inelegant
if (use_drift)
{
twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new);

computeLogGreensFunction(drifts_reverse, taus, log_gb);
}
else
twf_dispatcher.flex_calcRatio(walker_twfs, walker_elecs, iat, ratios);
computeLogGreensFunction(deltas, taus, log_gf);

std::transform(ratios.begin(), ratios.end(), prob.begin(), [](auto ratio) { return std::norm(ratio); });
sft.drift_modifier.getDrifts(taus, grads_new, drifts_reverse);

isAccepted.clear();
drifts_reverse += drifts;

for (int i_accept = 0; i_accept < num_walkers; ++i_accept)
if (prob[i_accept] >= std::numeric_limits<RealType>::epsilon() &&
step_context.get_random_gen()() < prob[i_accept] * std::exp(log_gb[i_accept] - log_gf[i_accept]))
{
crowd.incAccept();
isAccepted.push_back(true);
computeLogGreensFunction(drifts_reverse, taus, log_gb);
}
else
{
crowd.incReject();
isAccepted.push_back(false);
}
twf_dispatcher.flex_calcRatio(walker_twfs, walker_elecs, iat, ratios);

std::transform(ratios.begin(), ratios.end(), prob.begin(), [](auto ratio) { return std::norm(ratio); });

isAccepted.clear();

twf_dispatcher.flex_accept_rejectMove(walker_twfs, walker_elecs, iat, isAccepted, true);
for (int i_accept = 0; i_accept < num_walkers; ++i_accept)
if (prob[i_accept] >= std::numeric_limits<RealType>::epsilon() &&
step_context.get_random_gen()() < prob[i_accept] * std::exp(log_gb[i_accept] - log_gf[i_accept]))
{
crowd.incAccept();
isAccepted.push_back(true);
}
else
{
crowd.incReject();
isAccepted.push_back(false);
}

ps_dispatcher.flex_accept_rejectMove<CT>(walker_elecs, iat, isAccepted);
twf_dispatcher.flex_accept_rejectMove(walker_twfs, walker_elecs, iat, isAccepted, true);

ps_dispatcher.flex_accept_rejectMove<CT>(walker_elecs, iat, isAccepted);
}
}
twf_dispatcher.flex_completeUpdates(walker_twfs);
}
twf_dispatcher.flex_completeUpdates(walker_twfs);
}

ps_dispatcher.flex_donePbyP(walker_elecs);
timers.movepbyp_timer.stop();
ps_dispatcher.flex_donePbyP(walker_elecs);
}

timers.buffer_timer.start();
twf_dispatcher.flex_evaluateGL(walker_twfs, walker_elecs, recompute);
if (sft.qmcdrv_input.get_debug_checks() & DriverDebugChecks::CHECKGL_AFTER_MOVES)
checkLogAndGL(crowd, "checkGL_after_moves");
timers.buffer_timer.stop();
{
ScopedTimer buffer_local_timer(timers.buffer_timer);
twf_dispatcher.flex_evaluateGL(walker_twfs, walker_elecs, recompute);
if (sft.qmcdrv_input.get_debug_checks() & DriverDebugChecks::CHECKGL_AFTER_MOVES)
checkLogAndGL(crowd, "checkGL_after_moves");
}

timers.hamiltonian_timer.start();
const RefVectorWithLeader<QMCHamiltonian> walker_hamiltonians(crowd.get_walker_hamiltonians()[0],
crowd.get_walker_hamiltonians());
ResourceCollectionTeamLock<QMCHamiltonian> hams_res_lock(crowd.getSharedResource().ham_res, walker_hamiltonians);
std::vector<QMCHamiltonian::FullPrecRealType> local_energies(
ham_dispatcher.flex_evaluate(walker_hamiltonians, walker_twfs, walker_elecs));
timers.hamiltonian_timer.stop();

auto resetSigNLocalEnergy = [](MCPWalker& walker, TrialWaveFunction& twf, auto& local_energy) {
walker.resetProperty(twf.getLogPsi(), twf.getPhase(), local_energy);
};
for (int iw = 0; iw < crowd.size(); ++iw)
resetSigNLocalEnergy(walkers[iw], walker_twfs[iw], local_energies[iw]);

// moved to be consistent with DMC
timers.collectables_timer.start();
auto evaluateNonPhysicalHamiltonianElements = [](QMCHamiltonian& ham, ParticleSet& pset, MCPWalker& walker) {
ham.auxHevaluate(pset, walker);
};
for (int iw = 0; iw < crowd.size(); ++iw)
evaluateNonPhysicalHamiltonianElements(walker_hamiltonians[iw], walker_elecs[iw], walkers[iw]);

auto savePropertiesIntoWalker = [](QMCHamiltonian& ham, MCPWalker& walker) {
ham.saveProperty(walker.getPropertyBase());
};
for (int iw = 0; iw < crowd.size(); ++iw)
savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]);
timers.collectables_timer.stop();
{
ScopedTimer hamiltonian_local_timer(timers.hamiltonian_timer);
ResourceCollectionTeamLock<QMCHamiltonian> hams_res_lock(crowd.getSharedResource().ham_res, walker_hamiltonians);
std::vector<QMCHamiltonian::FullPrecRealType> local_energies(
ham_dispatcher.flex_evaluate(walker_hamiltonians, walker_twfs, walker_elecs));

auto resetSigNLocalEnergy = [](MCPWalker& walker, TrialWaveFunction& twf, auto& local_energy) {
walker.resetProperty(twf.getLogPsi(), twf.getPhase(), local_energy);
};
for (int iw = 0; iw < crowd.size(); ++iw)
resetSigNLocalEnergy(walkers[iw], walker_twfs[iw], local_energies[iw]);
}

{
ScopedTimer collectables_local_timer(timers.collectables_timer);
auto evaluateNonPhysicalHamiltonianElements = [](QMCHamiltonian& ham, ParticleSet& pset, MCPWalker& walker) {
ham.auxHevaluate(pset, walker);
};
for (int iw = 0; iw < crowd.size(); ++iw)
evaluateNonPhysicalHamiltonianElements(walker_hamiltonians[iw], walker_elecs[iw], walkers[iw]);

auto savePropertiesIntoWalker = [](QMCHamiltonian& ham, MCPWalker& walker) {
ham.saveProperty(walker.getPropertyBase());
};
for (int iw = 0; iw < crowd.size(); ++iw)
savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]);
}

if (accumulate_this_step)
{
Expand Down Expand Up @@ -352,38 +355,39 @@ bool VMCBatched::run()

for (int block = 0; block < num_blocks; ++block)
{
vmc_loop.start();
vmc_state.recalculate_properties_period =
(qmc_driver_mode_[QMC_UPDATE_MODE]) ? qmcdriver_input_.get_recalculate_properties_period() : 0;
vmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
: false;

estimator_manager_->startBlock(qmcdriver_input_.get_max_steps());

for (auto& crowd : crowds_)
crowd->startBlock(qmcdriver_input_.get_max_steps());
for (int step = 0; step < qmcdriver_input_.get_max_steps(); ++step)
{
ScopedTimer local_timer(timers_.run_steps_timer);
vmc_state.step = step;
crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, std::ref(step_contexts_), std::ref(crowds_));

if (collect_samples_)
ScopeGuard<LoopTimer<>> vmc_local_timer(vmc_loop);
vmc_state.recalculate_properties_period =
(qmc_driver_mode_[QMC_UPDATE_MODE]) ? qmcdriver_input_.get_recalculate_properties_period() : 0;
vmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
: false;

estimator_manager_->startBlock(qmcdriver_input_.get_max_steps());

for (auto& crowd : crowds_)
crowd->startBlock(qmcdriver_input_.get_max_steps());
for (int step = 0; step < qmcdriver_input_.get_max_steps(); ++step)
{
const auto& elec_psets = population_.get_elec_particle_sets();
for (const auto& walker : elec_psets)
ScopedTimer local_timer(timers_.run_steps_timer);
vmc_state.step = step;
crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, std::ref(step_contexts_), std::ref(crowds_));

if (collect_samples_)
{
samples_.appendSample(MCSample(*walker));
const auto& elec_psets = population_.get_elec_particle_sets();
for (const auto& walker : elec_psets)
{
samples_.appendSample(MCSample(*walker));
}
}
}
print_mem("VMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
recordBlock(block);
}
print_mem("VMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
recordBlock(block);
vmc_loop.stop();

bool stop_requested = false;
// Rank 0 decides whether the time limit was reached
Expand Down

0 comments on commit 33d8b76

Please sign in to comment.