diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4b01c51a7..91670535e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,6 +49,7 @@ spack_setup: .gpu_node: variables: bb5_constraint: volta + bb5_ntasks: 16 .test_neuron: extends: [.ctest] stage: test_neuron diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 410fe5c62..7411dbfa2 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -470,6 +470,7 @@ static void* load_dynamic_mpi(const std::string& libname) { extern "C" void mk_mech_init(int argc, char** argv) { // reset all parameters to their default values corenrn_param.reset(); + // read command line parameters and parameter config files corenrn_param.parse(argc, argv); diff --git a/coreneuron/io/core2nrn_data_return.cpp b/coreneuron/io/core2nrn_data_return.cpp index 2d046a866..6a12c197f 100644 --- a/coreneuron/io/core2nrn_data_return.cpp +++ b/coreneuron/io/core2nrn_data_return.cpp @@ -170,6 +170,14 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { */ static void core2nrn_tqueue(NrnThread&); +/** @brief Callback to clear NEURON thread queues. + In particular need to initialize bin queues to the current time before + transferring events. + */ +extern "C" { +void (*core2nrn_clear_queues_)(double t); +} + /** @brief All activated WATCH statements need activation on NEURON side. */ // vector in unpermuted Memb_list index order of vector of @@ -200,6 +208,9 @@ void core2nrn_data_return() { if (!nrn2core_type_return_) { return; } + + (*core2nrn_clear_queues_)(nrn_threads[0]._t); // all threads at same time + for (int tid = 0; tid < nrn_nthread; ++tid) { size_t n = 0; double* data = nullptr; diff --git a/coreneuron/io/nrn2core_data_init.cpp b/coreneuron/io/nrn2core_data_init.cpp index e79ed824d..6838c668d 100644 --- a/coreneuron/io/nrn2core_data_init.cpp +++ b/coreneuron/io/nrn2core_data_init.cpp @@ -51,6 +51,8 @@ void direct_mode_initialize() { dt2thread(-1.); nrn_thread_table_check(); + clear_event_queue(); + // Reproduce present NEURON WATCH activation // Start from nothing active. watch_activate_clear(); diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index ccd1e7b8c..6bec06fde 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -180,7 +180,7 @@ void nrn_read_filesdat(int& ngrp, int*& grp, const char* filesdat) { FILE* fp = fopen(filesdat, "r"); if (!fp) { - nrn_fatal_error("No input file with nrnthreads, exiting..."); + nrn_fatal_error("No input file ( %s ) with nrnthreads, exiting...", filesdat); } char version[256]; @@ -710,6 +710,9 @@ void nrn_cleanup_ion_map() { void nrn_cleanup() { clear_event_queue(); // delete left-over TQItem + for (auto psi: gid2in) { + delete psi.second; + } gid2in.clear(); gid2out.clear(); diff --git a/coreneuron/mpi/lib/mpispike.cpp b/coreneuron/mpi/lib/mpispike.cpp index 87d073d7e..bbe81ac6c 100644 --- a/coreneuron/mpi/lib/mpispike.cpp +++ b/coreneuron/mpi/lib/mpispike.cpp @@ -24,7 +24,7 @@ extern MPI_Comm nrnmpi_comm; static int np; static int* displs{nullptr}; -static int* byteovfl; /* for the compressed transfer method */ +static int* byteovfl{nullptr}; /* for the compressed transfer method */ static MPI_Datatype spike_type; static void* emalloc(size_t size) { @@ -175,7 +175,7 @@ The allgather sends the first part of the buf and the allgatherv buffer sends any overflow. */ int nrnmpi_spike_exchange_compressed_impl(int localgid_size, - unsigned char* spfixin_ovfl, + unsigned char*& spfixin_ovfl, int send_nspike, int* nin, int ovfl_capacity, @@ -187,9 +187,10 @@ int nrnmpi_spike_exchange_compressed_impl(int localgid_size, np = nrnmpi_numprocs_; displs = (int*) emalloc(np * sizeof(int)); displs[0] = 0; + } + if (!byteovfl) { byteovfl = (int*) emalloc(np * sizeof(int)); } - MPI_Allgather( spikeout_fixed, ag_send_size, MPI_BYTE, spikein_fixed, ag_send_size, MPI_BYTE, nrnmpi_comm); int novfl = 0; diff --git a/coreneuron/mpi/lib/nrnmpi.cpp b/coreneuron/mpi/lib/nrnmpi.cpp index 070ce05fd..bc84a969e 100644 --- a/coreneuron/mpi/lib/nrnmpi.cpp +++ b/coreneuron/mpi/lib/nrnmpi.cpp @@ -35,6 +35,12 @@ static void nrn_fatal_error(const char* msg) { } nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { + // Execute at most once per launch. Avoid memory leak. + static bool executed = false; + if (executed) { + return {nrnmpi_numprocs_, nrnmpi_myid_}; + } + nrnmpi_under_nrncontrol_ = true; if (!nrnmpi_initialized_impl()) { @@ -62,6 +68,7 @@ nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { #endif } + executed = true; return {nrnmpi_numprocs_, nrnmpi_myid_}; } diff --git a/coreneuron/mpi/nrnmpidec.h b/coreneuron/mpi/nrnmpidec.h index f5ac5bf60..a9f12c8f6 100644 --- a/coreneuron/mpi/nrnmpidec.h +++ b/coreneuron/mpi/nrnmpidec.h @@ -37,7 +37,7 @@ extern mpi_function nrnmp extern "C" int nrnmpi_spike_exchange_impl(int* nin, NRNMPI_Spike* spikeout, int icapacity, NRNMPI_Spike** spikein, int& ovfl, int nout, NRNMPI_Spikebuf* spbufout, NRNMPI_Spikebuf* spbufin); extern mpi_function nrnmpi_spike_exchange; -extern "C" int nrnmpi_spike_exchange_compressed_impl(int, unsigned char*, int, int*, int, unsigned char*, int, unsigned char*, int& ovfl); +extern "C" int nrnmpi_spike_exchange_compressed_impl(int, unsigned char*&, int, int*, int, unsigned char*, int, unsigned char*, int& ovfl); extern mpi_function nrnmpi_spike_exchange_compressed; extern "C" int nrnmpi_int_allmax_impl(int i); diff --git a/coreneuron/network/multisend_setup.cpp b/coreneuron/network/multisend_setup.cpp index a11edb54e..72453b7a1 100644 --- a/coreneuron/network/multisend_setup.cpp +++ b/coreneuron/network/multisend_setup.cpp @@ -224,7 +224,7 @@ void TarList::alloc() { // for two phase -static nrnran123_State* ranstate; +static nrnran123_State* ranstate{nullptr}; static void random_init(int i) { if (!ranstate) { @@ -236,6 +236,14 @@ static unsigned int get_random() { return nrnran123_ipick(ranstate); } +// Avoid warnings if the global index is changed on subsequent psolve. +static void random_delete() { + if (ranstate) { + nrnran123_deletestream(ranstate); + ranstate = nullptr; + } +} + static int iran(int i1, int i2) { // discrete uniform random integer from i2 to i2 inclusive. Must // work if i1 == i2 @@ -575,6 +583,7 @@ static std::vector setup_target_lists(bool use_phase2) { phase2organize(tl); } } + random_delete(); } // For clarity, use the all2allv_int style of information flow diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp index 4fb1d165f..cecd1f30a 100644 --- a/coreneuron/network/netcvode.cpp +++ b/coreneuron/network/netcvode.cpp @@ -135,6 +135,10 @@ void NetCvodeThreadData::interthread_send(double td, DiscreteEvent* db, NrnThrea inter_thread_events_.emplace_back(InterThreadEvent{db, td}); } +void interthread_enqueue(NrnThread* nt) { + net_cvode_instance->p[nt->id].enqueue(net_cvode_instance, nt); +} + void NetCvodeThreadData::enqueue(NetCvode* nc, NrnThread* nt) { std::lock_guard lock(mut); for (const auto& ite: inter_thread_events_) { @@ -229,14 +233,14 @@ void NetCvode::clear_events() { d.unreffed_event_cnt_ = 0; d.inter_thread_events_.clear(); d.tqe_->nshift_ = -1; - d.tqe_->shift_bin(nrn_threads->_t); + d.tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } } void NetCvode::init_events() { for (int i = 0; i < nrn_nthread; ++i) { p[i].tqe_->nshift_ = -1; - p[i].tqe_->shift_bin(nrn_threads->_t); + p[i].tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } for (int tid = 0; tid < nrn_nthread; ++tid) { // can be done in parallel diff --git a/coreneuron/network/netcvode.hpp b/coreneuron/network/netcvode.hpp index b5694b10f..6e1da66e5 100644 --- a/coreneuron/network/netcvode.hpp +++ b/coreneuron/network/netcvode.hpp @@ -37,6 +37,7 @@ class DiscreteEvent; class NetCvode; extern NetCvode* net_cvode_instance; +extern void interthread_enqueue(NrnThread*); struct InterThreadEvent { DiscreteEvent* de_; diff --git a/coreneuron/network/netpar.cpp b/coreneuron/network/netpar.cpp index cbb2b547c..036e5baab 100644 --- a/coreneuron/network/netpar.cpp +++ b/coreneuron/network/netpar.cpp @@ -286,7 +286,10 @@ void nrn_spike_exchange_init() { t_exchange_ = t; dt1_ = rev_dt; usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt; - assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) < 255); + if (usable_mindelay_ * dt1_ >= 255.) { + usable_mindelay_ = 255. / dt1_; + } + assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) <= 255.); } else { #if nrn_spikebuf_size > 0 if (spbufout) { @@ -366,6 +369,7 @@ void nrn_spike_exchange(NrnThread* nt) { ps->send(spikein[i].spiketime, net_cvode_instance, nt); } } + nrn_multithread_job(interthread_enqueue); wt1_ = nrn_wtime() - wt; } @@ -482,6 +486,12 @@ void nrn_spike_exchange_compressed(NrnThread* nt) { } } } + // In case of multiple threads some above ps->send events put + // NetCon events into interthread buffers. Some of those may + // need to be delivered early enough that the interthread buffers + // need transfer to the thread event queues before the next dqueue_bin + // while loop in deliver_net_events. So enqueue now... + nrn_multithread_job(interthread_enqueue); t_exchange_ = nrn_threads->_t; wt1_ = nrn_wtime() - wt; } @@ -606,6 +616,7 @@ void BBS_netpar_solve(double tstop) { } nrn_timeout(timeout_); + nrn_multithread_job(interthread_enqueue); ncs2nrn_integrate(tstop * (1. + 1e-11)); nrn_spike_exchange(nrn_threads); nrn_timeout(0); diff --git a/external/nmodl b/external/nmodl index 46f8baf2b..3e960d7d9 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 46f8baf2bbeaa0d21559d6306ec37b94c601f1ee +Subproject commit 3e960d7d9e6db1e4f74a1c7fb6b773a6a3cd593c