From cef08b9035116f98a3e493af4367bcc036813336 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Wed, 24 Nov 2021 09:15:48 -0500 Subject: [PATCH 01/13] Bug fixes with regard to option --spkcompress --- coreneuron/mpi/lib/mpispike.cpp | 7 ++++--- coreneuron/mpi/nrnmpidec.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/coreneuron/mpi/lib/mpispike.cpp b/coreneuron/mpi/lib/mpispike.cpp index 87d073d7e..bbe81ac6c 100644 --- a/coreneuron/mpi/lib/mpispike.cpp +++ b/coreneuron/mpi/lib/mpispike.cpp @@ -24,7 +24,7 @@ extern MPI_Comm nrnmpi_comm; static int np; static int* displs{nullptr}; -static int* byteovfl; /* for the compressed transfer method */ +static int* byteovfl{nullptr}; /* for the compressed transfer method */ static MPI_Datatype spike_type; static void* emalloc(size_t size) { @@ -175,7 +175,7 @@ The allgather sends the first part of the buf and the allgatherv buffer sends any overflow. */ int nrnmpi_spike_exchange_compressed_impl(int localgid_size, - unsigned char* spfixin_ovfl, + unsigned char*& spfixin_ovfl, int send_nspike, int* nin, int ovfl_capacity, @@ -187,9 +187,10 @@ int nrnmpi_spike_exchange_compressed_impl(int localgid_size, np = nrnmpi_numprocs_; displs = (int*) emalloc(np * sizeof(int)); displs[0] = 0; + } + if (!byteovfl) { byteovfl = (int*) emalloc(np * sizeof(int)); } - MPI_Allgather( spikeout_fixed, ag_send_size, MPI_BYTE, spikein_fixed, ag_send_size, MPI_BYTE, nrnmpi_comm); int novfl = 0; diff --git a/coreneuron/mpi/nrnmpidec.h b/coreneuron/mpi/nrnmpidec.h index f5ac5bf60..a9f12c8f6 100644 --- a/coreneuron/mpi/nrnmpidec.h +++ b/coreneuron/mpi/nrnmpidec.h @@ -37,7 +37,7 @@ extern mpi_function nrnmp extern "C" int nrnmpi_spike_exchange_impl(int* nin, NRNMPI_Spike* spikeout, int icapacity, NRNMPI_Spike** spikein, int& ovfl, int nout, NRNMPI_Spikebuf* spbufout, NRNMPI_Spikebuf* spbufin); extern mpi_function nrnmpi_spike_exchange; -extern "C" int nrnmpi_spike_exchange_compressed_impl(int, unsigned char*, int, int*, int, unsigned char*, int, unsigned char*, int& ovfl); +extern "C" int nrnmpi_spike_exchange_compressed_impl(int, unsigned char*&, int, int*, int, unsigned char*, int, unsigned char*, int& ovfl); extern mpi_function nrnmpi_spike_exchange_compressed; extern "C" int nrnmpi_int_allmax_impl(int i); From 570092daa54047e76f17c2d546e0b3c79424e076 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Wed, 24 Nov 2021 09:21:54 -0500 Subject: [PATCH 02/13] Temporary change to allow binqueue and multisend to be turned off. --- coreneuron/apps/main1.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 0fdaa509b..5d5767921 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -467,6 +467,8 @@ static void* load_dynamic_mpi(const std::string& libname) { #endif extern "C" void mk_mech_init(int argc, char** argv) { + corenrn_param.multisend = false; + corenrn_param.binqueue = false; // read command line parameters and parameter config files corenrn_param.parse(argc, argv); From be8efb8a8c8675da4e275b10cdd6d7b6e0ccc05e Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Sat, 27 Nov 2021 09:14:55 -0500 Subject: [PATCH 03/13] After compressed spike exchange, do interthread_enqueue. --- coreneuron/network/netcvode.cpp | 4 ++++ coreneuron/network/netcvode.hpp | 1 + coreneuron/network/netpar.cpp | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp index 899bc1e14..c7ed3f1e5 100644 --- a/coreneuron/network/netcvode.cpp +++ b/coreneuron/network/netcvode.cpp @@ -138,6 +138,10 @@ void NetCvodeThreadData::interthread_send(double td, DiscreteEvent* db, NrnThrea inter_thread_events_.emplace_back(InterThreadEvent{db, td}); } +void interthread_enqueue(NrnThread* nt) { + net_cvode_instance->p[nt->id].enqueue(net_cvode_instance, nt); +} + void NetCvodeThreadData::enqueue(NetCvode* nc, NrnThread* nt) { std::lock_guard lock(mut); for (const auto& ite: inter_thread_events_) { diff --git a/coreneuron/network/netcvode.hpp b/coreneuron/network/netcvode.hpp index b5694b10f..6e1da66e5 100644 --- a/coreneuron/network/netcvode.hpp +++ b/coreneuron/network/netcvode.hpp @@ -37,6 +37,7 @@ class DiscreteEvent; class NetCvode; extern NetCvode* net_cvode_instance; +extern void interthread_enqueue(NrnThread*); struct InterThreadEvent { DiscreteEvent* de_; diff --git a/coreneuron/network/netpar.cpp b/coreneuron/network/netpar.cpp index cbb2b547c..d3bd530d4 100644 --- a/coreneuron/network/netpar.cpp +++ b/coreneuron/network/netpar.cpp @@ -482,6 +482,12 @@ void nrn_spike_exchange_compressed(NrnThread* nt) { } } } + // In case of multiple threads some above ps->send events put + // NetCon events into interthread buffers. Some of those may + // need to be delivered early enough that the interthread buffers + // need transfer to the thread event queues before the next dqueue_bin + // while loop in deliver_net_events. So enqueue now... + nrn_multithread_job(interthread_enqueue); t_exchange_ = nrn_threads->_t; wt1_ = nrn_wtime() - wt; } From 5619fff5ce04406c66e9639538454564b8c09070 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Sat, 27 Nov 2021 09:25:11 -0500 Subject: [PATCH 04/13] nrn binq must be initialized before core2nrn queue transfer. --- coreneuron/io/core2nrn_data_return.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/coreneuron/io/core2nrn_data_return.cpp b/coreneuron/io/core2nrn_data_return.cpp index 2d046a866..e8a73cbca 100644 --- a/coreneuron/io/core2nrn_data_return.cpp +++ b/coreneuron/io/core2nrn_data_return.cpp @@ -170,6 +170,12 @@ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { */ static void core2nrn_tqueue(NrnThread&); +/** @brief Callback to clear NEURON thread queues. + In particular need to initialize bin queues to the current time before + transferring events. + */ +extern "C" {void (*core2nrn_clear_queues_)(double t); } + /** @brief All activated WATCH statements need activation on NEURON side. */ // vector in unpermuted Memb_list index order of vector of @@ -200,6 +206,9 @@ void core2nrn_data_return() { if (!nrn2core_type_return_) { return; } + + (*core2nrn_clear_queues_)(nrn_threads[0]._t); // all threads at same time + for (int tid = 0; tid < nrn_nthread; ++tid) { size_t n = 0; double* data = nullptr; From 7d1c6b64e5fb36a6e274bdac53b3acc865be8ae5 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Sat, 27 Nov 2021 09:27:47 -0500 Subject: [PATCH 05/13] nrncore binq must be initialized before nrn2core queue transfer --- coreneuron/io/nrn2core_data_init.cpp | 2 ++ coreneuron/network/netcvode.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/coreneuron/io/nrn2core_data_init.cpp b/coreneuron/io/nrn2core_data_init.cpp index e79ed824d..6838c668d 100644 --- a/coreneuron/io/nrn2core_data_init.cpp +++ b/coreneuron/io/nrn2core_data_init.cpp @@ -51,6 +51,8 @@ void direct_mode_initialize() { dt2thread(-1.); nrn_thread_table_check(); + clear_event_queue(); + // Reproduce present NEURON WATCH activation // Start from nothing active. watch_activate_clear(); diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp index c7ed3f1e5..160bb57dd 100644 --- a/coreneuron/network/netcvode.cpp +++ b/coreneuron/network/netcvode.cpp @@ -236,7 +236,7 @@ void NetCvode::clear_events() { d.unreffed_event_cnt_ = 0; d.inter_thread_events_.clear(); d.tqe_->nshift_ = -1; - d.tqe_->shift_bin(nrn_threads->_t); + d.tqe_->shift_bin(nrn_threads->_t - 0.5*nrn_threads->_dt); } } From d51f75d55251a9ccc2b7d3b1ae93495b5219097f Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Mon, 29 Nov 2021 14:06:30 -0500 Subject: [PATCH 06/13] interthread buffer must be enqueued at beginning of psolve. --- coreneuron/network/netpar.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/coreneuron/network/netpar.cpp b/coreneuron/network/netpar.cpp index d3bd530d4..823fedf36 100644 --- a/coreneuron/network/netpar.cpp +++ b/coreneuron/network/netpar.cpp @@ -286,7 +286,10 @@ void nrn_spike_exchange_init() { t_exchange_ = t; dt1_ = rev_dt; usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt; - assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) < 255); + if (usable_mindelay_ * dt1_ >= 255.) { + usable_mindelay_ = 255./dt1_; + } + assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) <= 255.); } else { #if nrn_spikebuf_size > 0 if (spbufout) { @@ -366,6 +369,7 @@ void nrn_spike_exchange(NrnThread* nt) { ps->send(spikein[i].spiketime, net_cvode_instance, nt); } } + nrn_multithread_job(interthread_enqueue); wt1_ = nrn_wtime() - wt; } @@ -612,6 +616,7 @@ void BBS_netpar_solve(double tstop) { } nrn_timeout(timeout_); + nrn_multithread_job(interthread_enqueue); ncs2nrn_integrate(tstop * (1. + 1e-11)); nrn_spike_exchange(nrn_threads); nrn_timeout(0); From f4c35ecf616dfd1856a2bc91874e271a26e1cb6a Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Mon, 29 Nov 2021 14:11:09 -0500 Subject: [PATCH 07/13] clang-format --- coreneuron/io/core2nrn_data_return.cpp | 6 ++++-- coreneuron/network/netcvode.cpp | 2 +- coreneuron/network/netpar.cpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/coreneuron/io/core2nrn_data_return.cpp b/coreneuron/io/core2nrn_data_return.cpp index e8a73cbca..6a12c197f 100644 --- a/coreneuron/io/core2nrn_data_return.cpp +++ b/coreneuron/io/core2nrn_data_return.cpp @@ -174,7 +174,9 @@ static void core2nrn_tqueue(NrnThread&); In particular need to initialize bin queues to the current time before transferring events. */ -extern "C" {void (*core2nrn_clear_queues_)(double t); } +extern "C" { +void (*core2nrn_clear_queues_)(double t); +} /** @brief All activated WATCH statements need activation on NEURON side. */ @@ -207,7 +209,7 @@ void core2nrn_data_return() { return; } - (*core2nrn_clear_queues_)(nrn_threads[0]._t); // all threads at same time + (*core2nrn_clear_queues_)(nrn_threads[0]._t); // all threads at same time for (int tid = 0; tid < nrn_nthread; ++tid) { size_t n = 0; diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp index 160bb57dd..0b58856a7 100644 --- a/coreneuron/network/netcvode.cpp +++ b/coreneuron/network/netcvode.cpp @@ -236,7 +236,7 @@ void NetCvode::clear_events() { d.unreffed_event_cnt_ = 0; d.inter_thread_events_.clear(); d.tqe_->nshift_ = -1; - d.tqe_->shift_bin(nrn_threads->_t - 0.5*nrn_threads->_dt); + d.tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } } diff --git a/coreneuron/network/netpar.cpp b/coreneuron/network/netpar.cpp index 823fedf36..036e5baab 100644 --- a/coreneuron/network/netpar.cpp +++ b/coreneuron/network/netpar.cpp @@ -287,7 +287,7 @@ void nrn_spike_exchange_init() { dt1_ = rev_dt; usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt; if (usable_mindelay_ * dt1_ >= 255.) { - usable_mindelay_ = 255./dt1_; + usable_mindelay_ = 255. / dt1_; } assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) <= 255.); } else { From fb587306667042dbd6e4f448c318e0bc78a0cb75 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Tue, 30 Nov 2021 20:11:30 -0500 Subject: [PATCH 08/13] fix some memory leaks --- coreneuron/io/nrn_setup.cpp | 5 ++++- coreneuron/mpi/lib/nrnmpi.cpp | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp index ccd1e7b8c..6bec06fde 100644 --- a/coreneuron/io/nrn_setup.cpp +++ b/coreneuron/io/nrn_setup.cpp @@ -180,7 +180,7 @@ void nrn_read_filesdat(int& ngrp, int*& grp, const char* filesdat) { FILE* fp = fopen(filesdat, "r"); if (!fp) { - nrn_fatal_error("No input file with nrnthreads, exiting..."); + nrn_fatal_error("No input file ( %s ) with nrnthreads, exiting...", filesdat); } char version[256]; @@ -710,6 +710,9 @@ void nrn_cleanup_ion_map() { void nrn_cleanup() { clear_event_queue(); // delete left-over TQItem + for (auto psi: gid2in) { + delete psi.second; + } gid2in.clear(); gid2out.clear(); diff --git a/coreneuron/mpi/lib/nrnmpi.cpp b/coreneuron/mpi/lib/nrnmpi.cpp index 070ce05fd..bc84a969e 100644 --- a/coreneuron/mpi/lib/nrnmpi.cpp +++ b/coreneuron/mpi/lib/nrnmpi.cpp @@ -35,6 +35,12 @@ static void nrn_fatal_error(const char* msg) { } nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { + // Execute at most once per launch. Avoid memory leak. + static bool executed = false; + if (executed) { + return {nrnmpi_numprocs_, nrnmpi_myid_}; + } + nrnmpi_under_nrncontrol_ = true; if (!nrnmpi_initialized_impl()) { @@ -62,6 +68,7 @@ nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { #endif } + executed = true; return {nrnmpi_numprocs_, nrnmpi_myid_}; } From a9f60c5a566ed976ef743a31bb96dd697c62c7de Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Wed, 1 Dec 2021 06:01:07 -0500 Subject: [PATCH 09/13] Initialization of binq consistent everywhere. --- coreneuron/network/netcvode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp index 0b58856a7..62bdd7498 100644 --- a/coreneuron/network/netcvode.cpp +++ b/coreneuron/network/netcvode.cpp @@ -243,7 +243,7 @@ void NetCvode::clear_events() { void NetCvode::init_events() { for (int i = 0; i < nrn_nthread; ++i) { p[i].tqe_->nshift_ = -1; - p[i].tqe_->shift_bin(nrn_threads->_t); + p[i].tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } for (int tid = 0; tid < nrn_nthread; ++tid) { // can be done in parallel From f1604ed8bbe0db3c10a93c1f054f1276591faaf1 Mon Sep 17 00:00:00 2001 From: Michael Hines Date: Fri, 3 Dec 2021 13:18:57 -0500 Subject: [PATCH 10/13] Avoid Random123 globalindex warning if the index has not changed. Release random123 instance when multisend setup no longer needs it. Psolve restores a few more arg default values. --- coreneuron/apps/main1.cpp | 7 +++++++ coreneuron/network/multisend_setup.cpp | 11 ++++++++++- coreneuron/utils/randoms/nrnran123.cu | 2 +- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/coreneuron/apps/main1.cpp b/coreneuron/apps/main1.cpp index 5d5767921..19087214f 100644 --- a/coreneuron/apps/main1.cpp +++ b/coreneuron/apps/main1.cpp @@ -467,8 +467,15 @@ static void* load_dynamic_mpi(const std::string& libname) { #endif extern "C" void mk_mech_init(int argc, char** argv) { + // cannot corenrn_param = corenrn_parameters(), so unfortunately repeating + // some initialization values from corenrn_parameters.hpp to allow starting + // fresh with original defaults. corenrn_param.multisend = false; corenrn_param.binqueue = false; + corenrn_param.ms_phases = 2; + corenrn_param.ms_subint = 2; + corenrn_param.spkcompress = 0; + // read command line parameters and parameter config files corenrn_param.parse(argc, argv); diff --git a/coreneuron/network/multisend_setup.cpp b/coreneuron/network/multisend_setup.cpp index a11edb54e..72453b7a1 100644 --- a/coreneuron/network/multisend_setup.cpp +++ b/coreneuron/network/multisend_setup.cpp @@ -224,7 +224,7 @@ void TarList::alloc() { // for two phase -static nrnran123_State* ranstate; +static nrnran123_State* ranstate{nullptr}; static void random_init(int i) { if (!ranstate) { @@ -236,6 +236,14 @@ static unsigned int get_random() { return nrnran123_ipick(ranstate); } +// Avoid warnings if the global index is changed on subsequent psolve. +static void random_delete() { + if (ranstate) { + nrnran123_deletestream(ranstate); + ranstate = nullptr; + } +} + static int iran(int i1, int i2) { // discrete uniform random integer from i2 to i2 inclusive. Must // work if i1 == i2 @@ -575,6 +583,7 @@ static std::vector setup_target_lists(bool use_phase2) { phase2organize(tl); } } + random_delete(); } // For clarity, use the all2allv_int style of information flow diff --git a/coreneuron/utils/randoms/nrnran123.cu b/coreneuron/utils/randoms/nrnran123.cu index b13dad7eb..526c06be8 100644 --- a/coreneuron/utils/randoms/nrnran123.cu +++ b/coreneuron/utils/randoms/nrnran123.cu @@ -179,7 +179,7 @@ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. { std::lock_guard _{g_instance_count_mutex}; - if (g_instance_count != 0 && nrnmpi_myid == 0) { + if (g_instance_count != 0 && nrnmpi_myid == 0 && get_global_state().v[0] != gix) { std::cout << "nrnran123_set_globalindex(" << gix << ") called when a non-zero number of Random123 streams (" << g_instance_count From a2531f36a0ee28b9f640eea8290ec17159dafdfc Mon Sep 17 00:00:00 2001 From: nrnhines Date: Fri, 17 Dec 2021 06:31:20 -0500 Subject: [PATCH 11/13] Revert nrnran123.cu Eliminating the warning when Random123 global index does not change, increases the chance of hiding a bug. --- coreneuron/utils/randoms/nrnran123.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coreneuron/utils/randoms/nrnran123.cu b/coreneuron/utils/randoms/nrnran123.cu index 526c06be8..b13dad7eb 100644 --- a/coreneuron/utils/randoms/nrnran123.cu +++ b/coreneuron/utils/randoms/nrnran123.cu @@ -179,7 +179,7 @@ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. { std::lock_guard _{g_instance_count_mutex}; - if (g_instance_count != 0 && nrnmpi_myid == 0 && get_global_state().v[0] != gix) { + if (g_instance_count != 0 && nrnmpi_myid == 0) { std::cout << "nrnran123_set_globalindex(" << gix << ") called when a non-zero number of Random123 streams (" << g_instance_count From 2dd175a7c7ca3d133ec1a349cd2a14bec9aa079f Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Thu, 23 Dec 2021 15:49:41 +0100 Subject: [PATCH 12/13] update nmodl submodule --- external/nmodl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/nmodl b/external/nmodl index 46f8baf2b..3e960d7d9 160000 --- a/external/nmodl +++ b/external/nmodl @@ -1 +1 @@ -Subproject commit 46f8baf2bbeaa0d21559d6306ec37b94c601f1ee +Subproject commit 3e960d7d9e6db1e4f74a1c7fb6b773a6a3cd593c From 363943ed962d8be0551e7c557ca1a2ae489c6f2c Mon Sep 17 00:00:00 2001 From: Pramod Kumbhar Date: Thu, 23 Dec 2021 19:22:37 +0100 Subject: [PATCH 13/13] ntasks=16 for gpu tests as well --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4b01c51a7..91670535e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -49,6 +49,7 @@ spack_setup: .gpu_node: variables: bb5_constraint: volta + bb5_ntasks: 16 .test_neuron: extends: [.ctest] stage: test_neuron