This repository has been archived by the owner on Mar 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 37
/
multisend_setup.cpp
706 lines (651 loc) · 24.2 KB
/
multisend_setup.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
#include <stdio.h>
#include <math.h>
#include "coreneuron/utils/randoms/nrnran123.h"
#include "coreneuron/nrniv/nrniv_decl.h"
#include "coreneuron/nrniv/multisend.h"
#include "coreneuron/nrnmpi/nrnmpidec.h"
#include "coreneuron/utils/memory_utils.h"
/*
For very large numbers of processors and cells and fanout, it is taking
a long time to figure out each cells target list given the input gids
(gid2in) on each host. e.g 240 seconds for 2^25 cells, 1k connections
per cell, and 128K cores; and 340 seconds for two phase excchange.
To reduce this setup time we experiment with a very different algorithm in which
we construct a gid target host list on host gid%nhost and copy that list to
the source host owning the gid.
*/
#if NRN_MULTISEND
namespace coreneuron {
typedef std::map<int, InputPreSyn*> Gid2IPS;
typedef std::map<int, PreSyn*> Gid2PS;
#if 0
template <typename T>
static void celldebug(const char* p, T& map) {
FILE* f;
char fname[100];
sprintf(fname, "debug.%d", nrnmpi_myid);
f = fopen(fname, "a");
fprintf(f, "\n%s\n", p);
int rank = nrnmpi_myid;
fprintf(f, " %2d:", rank);
for (typename T::iterator map_it = map.begin(); map_it != map.end(); ++map_it) {
int gid = map_it->first;
fprintf(f, " %2d", gid);
}
fprintf(f, "\n");
fclose(f);
}
static void
alltoalldebug(const char* p, int* s, int* scnt, int* sdispl, int* r, int* rcnt, int* rdispl) {
FILE* f;
char fname[100];
sprintf(fname, "debug.%d", nrnmpi_myid);
f = fopen(fname, "a");
fprintf(f, "\n%s\n", p);
int rank = nrnmpi_myid;
fprintf(f, " rank %d\n", rank);
for (int i = 0; i < nrnmpi_numprocs; ++i) {
fprintf(f, " s%d : %d %d :", i, scnt[i], sdispl[i]);
for (int j = sdispl[i]; j < sdispl[i + 1]; ++j) {
fprintf(f, " %2d", s[j]);
}
fprintf(f, "\n");
}
for (int i = 0; i < nrnmpi_numprocs; ++i) {
fprintf(f, " r%d : %d %d :", i, rcnt[i], rdispl[i]);
for (int j = rdispl[i]; j < rdispl[i + 1]; ++j) {
fprintf(f, " %2d", r[j]);
}
fprintf(f, "\n");
}
fclose(f);
}
#else
template <typename T>
static void celldebug(const char*, T&) {
}
static void alltoalldebug(const char*, int*, int*, int*, int*, int*, int*) {
}
#endif
#if 0
void phase1debug(int* targets_phase1) {
FILE* f;
char fname[100];
sprintf(fname, "debug.%d", nrnmpi_myid);
f = fopen(fname, "a");
fprintf(f, "\nphase1debug %d", nrnmpi_myid);
for (Gid2PS::iterator gid2out_it = gid2out.begin(); gid2out_it != gid2out.end(); ++gid2out_it) {
PreSyn* ps = gid2out_it->second;
fprintf(f, "\n %2d:", ps->gid_);
int* ranks = targets_phase1 + ps->multisend_index_;
int n = ranks[1];
ranks += 2;
for (int i = 0; i < n; ++i) {
fprintf(f, " %2d", ranks[i]);
}
}
fprintf(f, "\n");
fclose(f);
}
void phase2debug(int* targets_phase2) {
FILE* f;
char fname[100];
sprintf(fname, "debug.%d", nrnmpi_myid);
f = fopen(fname, "a");
fprintf(f, "\nphase2debug %d", nrnmpi_myid);
for (Gid2IPS::iterator gid2in_it = gid2in.begin(); gid2in_it != gid2in.end(); ++gid2in_it) {
int gid = gid2in_it->first;
InputPreSyn* ps = gid2in_it->second;
fprintf(f, "\n %2d:", gid);
int j = ps->multisend_phase2_index_;
if (j >= 0) {
int* ranks = targets_phase2 + j;
int cnt = ranks[0];
ranks += 1;
for (int i = 0; i < cnt; ++i) {
fprintf(f, " %2d", ranks[i]);
}
}
}
fprintf(f, "\n");
fclose(f);
}
#endif
static void del(int* a) {
if (a) {
delete[] a;
}
}
static int* newintval(int val, int size) {
if (size == 0) {
return 0;
}
int* x = new int[size];
for (int i = 0; i < size; ++i) {
x[i] = val;
}
return x;
}
static int* newoffset(int* acnt, int size) {
int* aoff = new int[size + 1];
aoff[0] = 0;
for (int i = 0; i < size; ++i) {
aoff[i + 1] = aoff[i] + acnt[i];
}
return aoff;
}
// input scnt, sdispl ; output, newly allocated rcnt, rdispl
static void all2allv_helper(int* scnt, int*& rcnt, int*& rdispl) {
int np = nrnmpi_numprocs;
int* c = newintval(1, np);
rdispl = newoffset(c, np);
rcnt = newintval(0, np);
nrnmpi_int_alltoallv(scnt, c, rdispl, rcnt, c, rdispl);
del(c);
del(rdispl);
rdispl = newoffset(rcnt, np);
}
#define all2allv_perf 1
// input s, scnt, sdispl ; output, newly allocated r, rcnt, rdispl
static void
all2allv_int(int* s, int* scnt, int* sdispl, int*& r, int*& rcnt, int*& rdispl, const char* dmes) {
#if all2allv_perf
double tm = nrn_wtime();
#endif
int np = nrnmpi_numprocs;
all2allv_helper(scnt, rcnt, rdispl);
r = newintval(0, rdispl[np]);
nrnmpi_int_alltoallv(s, scnt, sdispl, r, rcnt, rdispl);
alltoalldebug(dmes, s, scnt, sdispl, r, rcnt, rdispl);
// when finished with r, rcnt, rdispl, caller should del them.
#if all2allv_perf
if (nrnmpi_myid == 0) {
int nb = 4 * nrnmpi_numprocs + sdispl[nrnmpi_numprocs] + rdispl[nrnmpi_numprocs];
tm = nrn_wtime() - tm;
printf("all2allv_int %s space=%d total=%g time=%g\n", dmes, nb, nrn_mallinfo(), tm);
}
#endif
}
class TarList {
public:
TarList();
virtual ~TarList();
virtual void alloc();
int size;
int* list;
int rank;
int* indices; // indices of list for groups of phase2 targets.
// If indices is not null, then size is one less than
// the size of the indices list where indices[size] = the size of
// the list. Indices[0] is 0 and list[indices[i]] is the rank
// to send the ith group of phase2 targets.
};
typedef std::map<int, TarList*> Int2TarList;
static Int2TarList* gid2tarlist;
TarList::TarList() {
size = 0;
list = 0;
rank = -1;
indices = 0;
}
TarList::~TarList() {
del(list);
del(indices);
}
void TarList::alloc() {
if (size) {
list = new int[size];
}
}
// for two phase
static nrnran123_State* ranstate;
static void random_init(int i) {
if (!ranstate) {
ranstate = nrnran123_newstream(i, 0);
}
}
static unsigned int get_random() {
return nrnran123_ipick(ranstate);
}
static int iran(int i1, int i2) {
// discrete uniform random integer from i2 to i2 inclusive. Must
// work if i1 == i2
if (i1 == i2) {
return i1;
}
int i3 = i1 + get_random() % (i2 - i1 + 1);
return i3;
}
static void phase2organize(TarList* tl) {
int n, nt;
nt = tl->size;
n = int(sqrt(double(nt)));
// change to about 20
if (n > 1) { // do not bother if not many connections
// equal as possible group sizes
tl->indices = new int[n + 1];
tl->indices[n] = tl->size;
tl->size = n;
for (int i = 0; i < n; ++i) {
tl->indices[i] = (i * nt) / n;
}
// Note: not sure the following is true anymore but it could be.
// This distribution is very biased (if 0 is a phase1 target
// it is always a phase2 sender. So now choose a random
// target in the subset and make that the phase2 sender
// (need to switch the indices[i] target and the one chosen)
for (int i = 0; i < n; ++i) {
int i1 = tl->indices[i];
int i2 = tl->indices[i + 1] - 1;
// need discrete uniform random integer from i1 to i2
int i3 = iran(i1, i2);
int itar = tl->list[i1];
tl->list[i1] = tl->list[i3];
tl->list[i3] = itar;
}
}
}
// end of twophase
/*
Setting up target lists uses a lot of temporary memory. It is conceiveable
that this can be done prior to creating any cells or connections. I.e.
gid2out is presently known from pc.set_gid2node(gid,...). Gid2in is presenly
known from NetCon = pc.gid_connect(gid, target) and it is quite a style
and hoc network programming change to use something like pc.need_gid(gid)
before cells with their synapses are created since one would have to imagine
that the hoc network setup code would have to be executed in a virtual
or 'abstract' fashion without actually creating, cells, targets, or NetCons.
Anyway, to potentially support this in the future, we write setup_target_lists
to not use any PreSyn information.
*/
static int setup_target_lists(int, int**);
static void fill_multisend_lists(int, int, int*, int*&, int*&);
void nrn_multisend_setup_targets(int use_phase2, int*& targets_phase1, int*& targets_phase2) {
int* r;
int sz = setup_target_lists(use_phase2, &r);
// initialize as unused
for (Gid2PS::iterator gid2out_it = gid2out.begin(); gid2out_it != gid2out.end(); ++gid2out_it) {
PreSyn* ps = gid2out_it->second;
ps->multisend_index_ = -1;
}
// Only will be not -1 if non-NULL input is a phase 2 sender.
for (Gid2IPS::iterator gid2in_it = gid2in.begin(); gid2in_it != gid2in.end(); ++gid2in_it) {
InputPreSyn* ps = gid2in_it->second;
ps->multisend_phase2_index_ = -1;
}
fill_multisend_lists(use_phase2, sz, r, targets_phase1, targets_phase2);
del(r);
// phase1debug(targets_phase1);
// phase2debug(targets_phase2);
}
// Some notes about threads and the rank lists.
// Assume all MPI message sent and received from a single thread (0).
// gid2in and gid2out are rank wide lists for all threads
//
static void fill_multisend_lists(int use_phase2,
int sz,
int* r,
int*& targets_phase1,
int*& targets_phase2) {
// sequence of gid, size, [totalsize], list
// Note that totalsize is there only for output gid's and use_phase2.
// Using this sequence, copy lists to proper phase
// 1 and phase 2 lists. (Phase one lists found in gid2out_ and phase
// two lists found in gid2in_.
int phase1_index = 0;
int phase2_index = 0;
// Count and fill in multisend_index and multisend_phase2_index_
// From the counts can allocate targets_phase1 and targets_phase2
// Then can iterate again and copy r to proper target locations.
for (int i = 0; i < sz;) {
InputPreSyn* ips = NULL;
int gid = r[i++];
int size = r[i++];
if (use_phase2) { // look in gid2in first
Gid2IPS::iterator gid2in_it;
gid2in_it = gid2in.find(gid);
if (gid2in_it != gid2in.end()) { // phase 2 target list
ips = gid2in_it->second;
ips->multisend_phase2_index_ = phase2_index;
phase2_index += 1 + size; // count + ranks
for (int j = 0; j < size; ++j) {
i++;
}
}
}
if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0)
Gid2PS::iterator gid2out_it;
gid2out_it = gid2out.find(gid);
assert(gid2out_it != gid2out.end());
PreSyn* ps = gid2out_it->second;
ps->multisend_index_ = phase1_index;
phase1_index += 2 + size; // total + count + ranks
if (use_phase2 > 0) {
i++;
}
for (int j = 0; j < size; ++j) {
i++;
}
}
}
targets_phase1 = new int[phase1_index];
targets_phase2 = new int[phase2_index];
// printf("%d sz=%d\n", nrnmpi_myid, sz);
for (int i = 0; i < sz;) {
InputPreSyn* ips = NULL;
int gid = r[i++];
int size = r[i++];
if (use_phase2) { // look in gid2in first
Gid2IPS::iterator gid2in_it;
gid2in_it = gid2in.find(gid);
if (gid2in_it != gid2in.end()) { // phase 2 target list
ips = gid2in_it->second;
int p = ips->multisend_phase2_index_;
int* ranks = targets_phase2 + p;
ranks[0] = size;
ranks += 1;
// printf("%d i=%d gid=%d phase2 size=%d\n", nrnmpi_myid, i, gid, size);
for (int j = 0; j < size; ++j) {
ranks[j] = r[i++];
// printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]);
assert(ranks[j] != nrnmpi_myid);
}
}
}
if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0)
Gid2PS::iterator gid2out_it;
gid2out_it = gid2out.find(gid);
assert(gid2out_it != gid2out.end());
PreSyn* ps = gid2out_it->second;
int p = ps->multisend_index_;
int* ranks = targets_phase1 + p;
int total = size;
if (use_phase2 > 0) {
total = r[i++];
}
ranks[0] = total;
ranks[1] = size;
ranks += 2;
// printf("%d i=%d gid=%d phase1 size=%d total=%d\n", nrnmpi_myid, i, gid, size, total);
for (int j = 0; j < size; ++j) {
ranks[j] = r[i++];
// printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]);
// There never was a possibility of send2self
// because an output presyn is never in gid2in_.
assert(ranks[j] != nrnmpi_myid);
}
}
}
// compute max_ntarget_host and max_multisend_targets
int max_ntarget_host = 0;
int max_multisend_targets = 0;
for (Gid2PS::iterator gid2out_it = gid2out.begin(); gid2out_it != gid2out.end(); ++gid2out_it) {
PreSyn* ps = gid2out_it->second;
if (ps->output_index_ >= 0) { // only ones that generate spikes
int i = ps->multisend_index_;
if (max_ntarget_host < targets_phase1[i]) {
max_ntarget_host = targets_phase1[i];
}
if (max_multisend_targets < targets_phase1[i + 1]) {
max_multisend_targets = targets_phase1[i + 1];
}
}
}
if (use_phase2)
for (Gid2IPS::iterator gid2in_it = gid2in.begin(); gid2in_it != gid2in.end(); ++gid2in_it) {
InputPreSyn* ps = gid2in_it->second;
int i = ps->multisend_phase2_index_;
if (i >= 0 && max_multisend_targets < targets_phase2[i]) {
max_multisend_targets = targets_phase2[i];
}
}
}
// return is vector and its size. The vector encodes a sequence of
// gid, target list size, and target list
static int setup_target_lists(int use_phase2, int** r_return) {
int *s, *r, *scnt, *rcnt, *sdispl, *rdispl;
int nhost = nrnmpi_numprocs;
celldebug<Gid2PS>("output gid", gid2out);
celldebug<Gid2IPS>("input gid", gid2in);
// What are the target ranks for a given input gid. All the ranks
// with the same input gid send that gid to the intermediate
// gid%nhost rank. The intermediate rank can then construct the
// list of target ranks for the gids it gets.
// scnt is number of input gids from target
scnt = newintval(0, nhost);
for (Gid2IPS::iterator gid2in_it = gid2in.begin(); gid2in_it != gid2in.end(); ++gid2in_it) {
int gid = gid2in_it->first;
++scnt[gid % nhost];
}
// s are the input gids from target to be sent to the various intermediates
sdispl = newoffset(scnt, nhost);
s = newintval(0, sdispl[nhost]);
for (Gid2IPS::iterator gid2in_it = gid2in.begin(); gid2in_it != gid2in.end(); ++gid2in_it) {
int gid = gid2in_it->first;
s[sdispl[gid % nhost]++] = gid;
}
// Restore sdispl for the message.
del(sdispl);
sdispl = newoffset(scnt, nhost);
all2allv_int(s, scnt, sdispl, r, rcnt, rdispl, "gidin to intermediate");
del(s);
del(scnt);
del(sdispl);
// r is the gids received by this intermediate rank from all other ranks.
// Construct hash table for finding the target rank list for a given gid.
gid2tarlist = new Int2TarList;
// Now figure out the size of the target list for each distinct gid in r.
for (int i = 0; i < rdispl[nhost]; ++i) {
TarList* tl;
Int2TarList::iterator itl_it = gid2tarlist->find(r[i]);
if (itl_it != gid2tarlist->end()) {
tl = itl_it->second;
tl->size += 1;
} else {
tl = new TarList();
tl->size = 1;
(*gid2tarlist)[r[i]] = tl;
}
}
// Conceptually, now the intermediate is the mpi source and the gid
// sources are the mpi destination in regard to target lists.
// It would be possible at this point, but confusing,
// to allocate a s[rdispl[nhost]] and figure out scnt and sdispl by
// by getting the counts and gids from the ranks that own the source
// gids. In this way we could organize s without having to allocate
// individual target lists on the intermediate and then allocate
// another large s buffer to receive a copy of them. However for
// this processing we already require two large buffers for input
// gid's so there is no real savings of space.
// So let's do the simple obvious sequence and now complete the
// target lists.
// Allocate the target lists (and set size to 0 (we will recount when filling).
for (Int2TarList::iterator itl_it = gid2tarlist->begin(); itl_it != gid2tarlist->end();
++itl_it) {
TarList* tl = itl_it->second;
tl->alloc();
tl->size = 0;
}
// fill the target lists
for (int rank = 0; rank < nhost; ++rank) {
int b = rdispl[rank];
int e = rdispl[rank + 1];
for (int i = b; i < e; ++i) {
TarList* tl;
Int2TarList::iterator itl_it = gid2tarlist->find(r[i]);
if (itl_it != gid2tarlist->end()) {
tl = itl_it->second;
tl->list[tl->size] = rank;
tl->size++;
}
}
}
del(r);
del(rcnt);
del(rdispl);
// Now the intermediate hosts have complete target lists and
// the sources know the intermediate host from the gid2out_ map.
// We could potentially organize here for two-phase exchange as well.
// Which target lists are desired by the source rank?
// Ironically, for round robin distributions, the target lists are
// already on the proper source rank so the following code should
// be tested for random distributions of gids.
// How many on the source rank?
scnt = newintval(0, nhost);
for (Gid2PS::iterator gid2ps_it = gid2out.begin(); gid2ps_it != gid2out.end(); ++gid2ps_it) {
int gid = gid2ps_it->first;
PreSyn* ps = gid2ps_it->second;
if (ps->output_index_ >= 0) { // only ones that generate spikes
++scnt[gid % nhost];
}
}
sdispl = newoffset(scnt, nhost);
// what are the gids of those target lists
s = newintval(0, sdispl[nhost]);
for (Gid2PS::iterator gid2ps_it = gid2out.begin(); gid2ps_it != gid2out.end(); ++gid2ps_it) {
int gid = gid2ps_it->first;
PreSyn* ps = gid2ps_it->second;
if (ps->output_index_ >= 0) { // only ones that generate spikes
s[sdispl[gid % nhost]++] = gid;
}
}
// Restore sdispl for the message.
del(sdispl);
sdispl = newoffset(scnt, nhost);
all2allv_int(s, scnt, sdispl, r, rcnt, rdispl, "gidout");
// fill in the tl->rank for phase 1 target lists
// r is an array of source spiking gids
// tl is list associating input gids with list of target ranks.
for (int rank = 0; rank < nhost; ++rank) {
int b = rdispl[rank];
int e = rdispl[rank + 1];
for (int i = b; i < e; ++i) {
TarList* tl;
// note that there may be input gids with no corresponding
// output gid so that the find may not return true and in
// that case the tl->rank remains -1.
// For example multisplit gids or simulation of a subset of
// cells.
Int2TarList::iterator itl_it = gid2tarlist->find(r[i]);
if (itl_it != gid2tarlist->end()) {
tl = itl_it->second;
tl->rank = rank;
}
}
}
del(s);
del(scnt);
del(sdispl);
del(r);
del(rcnt);
del(rdispl);
if (use_phase2) {
random_init(nrnmpi_myid + 1);
for (Int2TarList::iterator itl_it = gid2tarlist->begin(); itl_it != gid2tarlist->end();
++itl_it) {
TarList* tl = itl_it->second;
if (tl->rank >= 0) { // only if output gid is spike generating
phase2organize(tl);
}
}
}
// For clarity, use the all2allv_int style of information flow
// from source to destination as above
// and also use a uniform code
// for copying one and two phase information from a TarList to
// develop the s, scnt, and sdispl buffers. That is, a buffer list
// section in s for either a one-phase list or the much shorter
// (individually) lists for first and second phases, has a
// gid, size, totalsize header for each list where totalsize
// is only present if the gid is an output gid (for
// NrnMultisend_Send.ntarget_host used for conservation).
// Note that totalsize is tl->indices[tl->size]
// how much to send to each rank
scnt = newintval(0, nhost);
for (Int2TarList::iterator itl_it = gid2tarlist->begin(); itl_it != gid2tarlist->end();
++itl_it) {
TarList* tl = itl_it->second;
if (tl->rank < 0) {
// When the output gid does not generate spikes, that rank
// is not interested if there is a target list for it.
// If the output gid does not exist, there is no rank.
// In either case ignore this target list.
continue;
}
if (tl->indices) {
// indices[size] is the size of list but size of those
// are the sublist phase 2 destination ranks which
// don't get sent as part of the phase 2 target list.
// Also there is a phase 1 target list of size so there
// are altogether size+1 target lists.
// (one phase 1 list and size phase 2 lists)
scnt[tl->rank] += tl->size + 2; // gid, size, list
for (int i = 0; i < tl->size; ++i) {
scnt[tl->list[tl->indices[i]]] += tl->indices[i + 1] - tl->indices[i] + 1;
// gid, size, list
}
} else {
// gid, list size, list
scnt[tl->rank] += tl->size + 2;
}
if (use_phase2) {
// The phase 1 header has as its third element, the
// total list size (needed for conservation);
scnt[tl->rank] += 1;
}
}
sdispl = newoffset(scnt, nhost);
s = newintval(0, sdispl[nhost]);
// what to send to each rank
for (Int2TarList::iterator itl_it = gid2tarlist->begin(); itl_it != gid2tarlist->end();
++itl_it) {
int gid = itl_it->first;
TarList* tl = itl_it->second;
if (tl->rank < 0) {
continue;
}
if (tl->indices) {
s[sdispl[tl->rank]++] = gid;
s[sdispl[tl->rank]++] = tl->size;
if (use_phase2) {
s[sdispl[tl->rank]++] = tl->indices[tl->size];
}
for (int i = 0; i < tl->size; ++i) {
s[sdispl[tl->rank]++] = tl->list[tl->indices[i]];
}
for (int i = 0; i < tl->size; ++i) {
int rank = tl->list[tl->indices[i]];
s[sdispl[rank]++] = gid;
assert(tl->indices[i + 1] > tl->indices[i]);
s[sdispl[rank]++] = tl->indices[i + 1] - tl->indices[i] - 1;
for (int j = tl->indices[i] + 1; j < tl->indices[i + 1]; ++j) {
s[sdispl[rank]++] = tl->list[j];
}
}
} else {
// gid, list size, list
s[sdispl[tl->rank]++] = gid;
s[sdispl[tl->rank]++] = tl->size;
if (use_phase2) {
s[sdispl[tl->rank]++] = tl->size;
}
for (int i = 0; i < tl->size; ++i) {
s[sdispl[tl->rank]++] = tl->list[i];
}
}
delete tl;
}
delete gid2tarlist;
sdispl = newoffset(scnt, nhost);
all2allv_int(s, scnt, sdispl, r, rcnt, rdispl, "lists");
del(s);
del(scnt);
del(sdispl);
del(rcnt);
int sz = rdispl[nhost];
del(rdispl);
*r_return = r;
return sz;
}
} // namespace coreneuron
#endif // NRN_MULTISEND