This repository has been archived by the owner on Mar 20, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 37
/
cellorder2.cpp
533 lines (481 loc) · 16.5 KB
/
cellorder2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
#include <stdio.h>
#include "coreneuron/nrniv/nrn_assert.h"
#include "coreneuron/nrniv/cellorder.h"
#include "coreneuron/nrniv/tnode.h"
#include "coreneuron/nrniv/nrniv_decl.h"
#include <map>
#include <set>
#include <algorithm>
#include <string.h>
using namespace std;
// experiment starting with identical cell ordering
// groupindex aleady defined that keeps identical cells together
// begin with leaf to root ordering
namespace coreneuron {
typedef VecTNode VTN; // level of nodes
typedef vector<VTN> VVTN; // group of levels
typedef vector<VVTN> VVVTN; // groups
// verify level in groups of nident identical nodes
void chklevel(VTN& level, size_t nident = 8) {
#if 0
nrn_assert(level.size() % nident == 0);
for (size_t i = 0; i < level.size(); ++i) {
size_t j = nident*int(i/nident);
nrn_assert(level[i]->hash == level[j]->hash);
}
#endif
}
// first child before second child, etc
// if same parent level, then parent order
// if not same parent, then earlier parent (no parent earlier than parent)
// if same parents, then children order
// if no parents then nodevec_index order.
static bool sortlevel_cmp(TNode* a, TNode* b) {
// when starting with leaf to root order
// note that leaves are at max level and all roots at level 0
bool result = false;
// since cannot have an index < 0, just add 1 to level
size_t palevel = a->parent ? 1 + a->parent->level : 0;
size_t pblevel = b->parent ? 1 + b->parent->level : 0;
if (palevel < pblevel) { // only used when starting leaf to root order
result = true; // earlier level first
} else if (palevel == pblevel) { // alwayse true when starting root to leaf
if (palevel == 0) { // a and b are roots
if (a->nodevec_index < b->nodevec_index) {
result = true;
}
} else { // parent order (already sorted with proper treenode_order
if (a->treenode_order < b->treenode_order) { // children order
result = true;
} else if (a->treenode_order == b->treenode_order) {
if (a->parent->treenode_order < b->parent->treenode_order) {
result = true;
}
}
}
}
return result;
}
static void sortlevel(VTN& level) {
std::sort(level.begin(), level.end(), sortlevel_cmp);
#if 0
printf("after sortlevel\n");
for (size_t i = 0; i < level.size(); ++i) {
TNode* nd = level[i];
printf("ilev=%ld i=%ld plev=%ld pi=%ld phash=%ld ord=%ld hash=%ld\n",
nd->level, i, nd->parent?nd->parent->level:0,
nd->parent?nd->parent->treenode_order:0, nd->parent?nd->parent->hash:0,
nd->treenode_order, nd->hash);
}
chklevel(level);
#endif
for (size_t i = 0; i < level.size(); ++i) {
level[i]->treenode_order = i;
}
}
static void set_treenode_order(VVTN& levels) {
size_t order = 0;
for (size_t i = 0; i < levels.size(); ++i) {
for (size_t j = 0; j < levels[i].size(); ++j) {
TNode* nd = levels[i][j];
nd->treenode_order = order++;
}
}
}
// every level starts out with no race conditions involving both
// parent and child in the same level. Can we arrange things so that
// every level has at least 32 nodes?
static size_t g32(TNode* nd) {
return nd->nodevec_index / warpsize;
}
static bool is_parent_race(TNode* nd) { // vitiating
size_t pg = g32(nd);
for (size_t i = 0; i < nd->children.size(); ++i) {
if (pg == g32(nd->children[i])) {
return true;
}
}
return false;
}
// less than 32 apart
static bool is_parent_race2(TNode* nd) { // vitiating
size_t pi = nd->nodevec_index;
for (size_t i = 0; i < nd->children.size(); ++i) {
if (nd->children[i]->nodevec_index - pi < warpsize) {
return true;
}
}
return false;
}
static bool is_child_race(TNode* nd) { // potentially handleable by atomic
if (nd->children.size() < 2) {
return false;
}
if (nd->children.size() == 2) {
return g32(nd->children[0]) == g32(nd->children[1]);
}
std::set<size_t> s;
for (size_t i = 0; i < nd->children.size(); ++i) {
size_t gc = g32(nd->children[i]);
if (s.find(gc) != s.end()) {
return true;
}
s.insert(gc);
}
return false;
}
static bool is_child_race2(TNode* nd) { // potentially handleable by atomic
if (nd->children.size() < 2) {
return false;
}
if (nd->children.size() == 2) {
size_t c0 = nd->children[0]->nodevec_index;
size_t c1 = nd->children[1]->nodevec_index;
c0 = (c0 < c1) ? (c1 - c0) : (c0 - c1);
return c0 < warpsize;
}
size_t ic0 = nd->children[0]->nodevec_index;
for (size_t i = 1; i < nd->children.size(); ++i) {
size_t ic = nd->children[i]->nodevec_index;
if (ic - ic0 < warpsize) {
return true;
}
ic0 = ic;
}
return false;
}
size_t dist2child(TNode* nd) {
size_t d = 1000;
size_t pi = nd->nodevec_index;
for (size_t i = 0; i < nd->children.size(); ++i) {
size_t d1 = nd->children[i]->nodevec_index - pi;
if (d1 < d) {
d = d1;
}
}
return d;
}
// from stackoverflow.com
template <typename T>
static void move_range(size_t start, size_t length, size_t dst, std::vector<T>& v) {
typename std::vector<T>::iterator first, middle, last;
if (start < dst) {
first = v.begin() + start;
middle = first + length;
last = v.begin() + dst;
} else {
first = v.begin() + dst;
middle = v.begin() + start;
last = middle + length;
}
std::rotate(first, middle, last);
}
static void move_nodes(size_t start, size_t length, size_t dst, VTN& nodes) {
nrn_assert(dst <= nodes.size());
nrn_assert(start + length <= dst);
move_range(start, length, dst, nodes);
// check correctness of move
for (size_t i = start; i < dst - length; ++i) {
nrn_assert(nodes[i]->nodevec_index == i + length);
}
for (size_t i = dst - length; i < dst; ++i) {
nrn_assert(nodes[i]->nodevec_index == start + (i - (dst - length)));
}
// update nodevec_index
for (size_t i = start; i < dst; ++i) {
nodes[i]->nodevec_index = i;
}
}
// least number of nodes to move after nd to eliminate prace
static size_t need2move(TNode* nd) {
size_t d = dist2child(nd);
return warpsize - ((nd->nodevec_index % warpsize) + d);
}
#if 0
static void how_many_warpsize_groups_have_only_leaves(VTN& nodes) {
size_t n = 0;
for (size_t i = 0; i < nodes.size(); i += warpsize) {
bool r = true;
for (size_t j=0; j < warpsize; ++j) {
if (nodes[i+j]->children.size() != 0) {
r = false;
break;
}
}
if (r) {
printf("warpsize group %ld starting at level %ld\n", i/warpsize, nodes[i]->level);
++n;
}
}
printf("number of warpsize groups with only leaves = %ld\n", n);
}
#endif
static void pr_race_situation(VTN& nodes) {
size_t prace2 = 0;
size_t prace = 0;
size_t crace = 0;
for (size_t i = nodes.size() - 1; nodes[i]->level != 0; --i) {
TNode* nd = nodes[i];
if (is_parent_race2(nd)) {
++prace2;
}
if (is_parent_race(nd)) {
printf("level=%ld i=%ld d=%ld n=%ld", nd->level, nd->nodevec_index, dist2child(nd),
need2move(nd));
for (size_t j = 0; j < nd->children.size(); ++j) {
TNode* cnd = nd->children[j];
printf(" %ld %ld", cnd->level, cnd->nodevec_index);
}
printf("\n");
++prace;
}
if (is_child_race(nd)) {
++crace;
}
}
printf("prace=%ld crace=%ld prace2=%ld\n", prace, crace, prace2);
}
static size_t next_leaf(TNode* nd, VTN& nodes) {
size_t i = 0;
for (i = nd->nodevec_index - 1; i > 0; --i) {
if (nodes[i]->children.size() == 0) {
return i;
}
}
// nrn_assert(i > 0);
return 0;
}
static void checkrace(TNode* nd, VTN& nodes) {
bool res = true;
for (size_t i = nd->nodevec_index; i < nodes.size(); ++i) {
if (is_parent_race2(nodes[i])) {
// printf("checkrace %ld\n", i);
res = false;
}
}
if (0 && res) {
printf("checkrace no race from nd onward\n");
}
}
static bool eliminate_race(TNode* nd, size_t d, VTN& nodes, TNode* look) {
// printf("eliminate_race %ld %ld\n", nd->nodevec_index, d);
// opportunistically move that number of leaves
// error if no leaves left to move.
size_t i = look->nodevec_index;
while (d > 0) {
i = next_leaf(nodes[i], nodes);
if (i == 0) {
return false;
}
size_t n = 1;
while (nodes[i - 1]->children.size() == 0 && n < d) {
--i;
++n;
}
// printf(" move_nodes src=%ld len=%ld dest=%ld\n", i, n, nd->nodevec_index);
move_nodes(i, n, nd->nodevec_index + 1, nodes);
d -= n;
}
checkrace(nd, nodes);
return true;
}
static void eliminate_prace(TNode* nd, VTN& nodes) {
size_t d = warpsize - dist2child(nd);
bool b = eliminate_race(nd, d, nodes, nd);
if (0 && !b) {
printf("could not eliminate prace for g=%ld c=%ld l=%ld o=%ld %ld\n", nd->groupindex,
nd->cellindex, nd->level, nd->treenode_order, nd->hash);
}
}
static void eliminate_crace(TNode* nd, VTN& nodes) {
size_t c0 = nd->children[0]->nodevec_index;
size_t c1 = nd->children[1]->nodevec_index;
size_t d = warpsize - ((c0 > c1) ? (c0 - c1) : (c1 - c0));
TNode* cnd = nd->children[0];
bool b = eliminate_race(cnd, d, nodes, nd);
if (0 && !b) {
printf("could not eliminate crace for g=%ld c=%ld l=%ld o=%ld %ld\n", nd->groupindex,
nd->cellindex, nd->level, nd->treenode_order, nd->hash);
}
}
static void question2(VVTN& levels) {
size_t nnode = 0;
for (size_t i = 0; i < levels.size(); ++i) {
nnode += levels[i].size();
}
VTN nodes(nnode);
nnode = 0;
for (size_t i = 0; i < levels.size(); ++i) {
for (size_t j = 0; j < levels[i].size(); ++j) {
nodes[nnode++] = levels[i][j];
}
}
for (size_t i = 0; i < nodes.size(); ++i) {
nodes[i]->nodevec_index = i;
}
// how_many_warpsize_groups_have_only_leaves(nodes);
// work backward and check the distance from parent to children.
// if parent in different group then there is no vitiating race.
// if children in different group then ther is no race (satisfied by
// atomic).
// If there is a vitiating race, then figure out how many nodes
// need to be inserted just before the parent to avoid the race.
// It is not clear if we should prioritize safe nodes (when moved they
// do not introduce a race) and/or contiguous nodes (probably, to keep
// the low hanging fruit together).
// At least, moved nodes should have proper tree order and not themselves
// introduce a race at their new location. Leaves are nice in that there
// are no restrictions in movement toward higher indices.
// Note that unless groups of 32 are inserted, it may be the case that
// races are generated at greater indices since otherwise a portion of
// each group is placed into the next group. This would not be an issue
// if, in fact, the stronger requirement of every parent having
// pi + 32 <= ci is demanded instead of merely being in different warpsize.
// One nice thing about adding warpsize nodes is that it does not disturb
// any existing contiguous groups except the moved group which gets divided
// between parent warpsize and child, where the nodes past the parent
// get same relative indices in the next warpsize
// let's see how well we can do by opportunistically moving leaves to
// separate parents from children by warpsize (ie is_parent_prace2 is false)
// Hopefully, we won't run out of leaves before eliminating all
// is_parent_prace2
if (0 && nodes.size() % warpsize != 0) {
size_t nnode = nodes.size() - levels[0].size();
printf("warp of %ld cells has %ld nodes in last cycle %ld\n", levels[0].size(),
nnode % warpsize, nnode / warpsize + 1);
}
// pr_race_situation(nodes);
// eliminate parent and children races using leaves
for (size_t i = nodes.size() - 1; i >= levels[0].size(); --i) {
TNode* nd = nodes[i];
if (is_child_race2(nd)) {
eliminate_crace(nd, nodes);
i = nd->nodevec_index;
}
if (is_parent_race2(nd)) {
eliminate_prace(nd, nodes);
i = nd->nodevec_index;
}
}
if (0) {
pr_race_situation(nodes);
}
// copy nodes indices to treenode_order
for (size_t i = 0; i < nodes.size(); ++i) {
nodes[i]->treenode_order = i;
}
}
// size of groups with contiguous parents for each level
static void question(VVTN& levels) {
#if 0
for (size_t i = 0; i < levels.size(); ++i) {
printf("%3ld %5ld", i, levels[i].size());
size_t iplast = 100000000;
size_t nsame = 0;
for (size_t j=0; j < levels[i].size(); ++j) {
TNode* nd = levels[i][j];
if (nd->parent == NULL) {
nsame += 1;
}else if (nd->parent->treenode_order == iplast + 1) {
nsame += 1;
iplast = nd->parent->treenode_order;
}else{
if (nsame) { printf(" %3ld", nsame); }
nsame = 1;
iplast = nd->parent->treenode_order;
}
}
if (nsame) { printf(" %3ld", nsame); }
printf("\n");
}
#endif
}
static void analyze(VVTN& levels) {
// sort each level with respect to parent level order
// earliest parent level first.
// treenode order can be anything as long as first children < second
// children etc.. After sorting a level, the order will be correct for
// that level, ranging from [0:level.size]
for (size_t i = 0; i < levels.size(); ++i) {
chklevel(levels[i]);
for (size_t j = 0; j < levels[i].size(); ++j) {
TNode* nd = levels[i][j];
for (size_t k = 0; k < nd->children.size(); ++k) {
nd->children[k]->treenode_order = k;
}
}
}
for (size_t i = 0; i < levels.size(); ++i) {
sortlevel(levels[i]);
chklevel(levels[i]);
}
set_treenode_order(levels);
}
void prgroupsize(VVVTN& groups) {
#if 0
for (size_t i=0; i < groups[0].size(); ++i) {
printf("%5ld\n", i);
for (size_t j=0; j < groups.size(); ++j) {
printf(" %5ld", groups[j][i].size());
}
printf("\n");
}
#endif
}
// group index primary, treenode_order secondary
static bool final_nodevec_cmp(TNode* a, TNode* b) {
bool result = false;
if (a->groupindex < b->groupindex) {
result = true;
} else if (a->groupindex == b->groupindex) {
if (a->treenode_order < b->treenode_order) {
result = true;
}
}
return result;
}
static void set_nodeindex(VecTNode& nodevec) {
for (size_t i = 0; i < nodevec.size(); ++i) {
nodevec[i]->nodevec_index = i;
}
}
void group_order2(VecTNode& nodevec, size_t groupsize, size_t ncell) {
#if 1
size_t maxlevel = level_from_root(nodevec);
#else
size_t maxlevel = level_from_leaf(nodevec);
// reverse the level numbering so leaves are at maxlevel.
// also make all roots have level 0
for (size_t i = 0; i < nodevec.size(); ++i) {
TNode* nd = nodevec[i];
nd->level = maxlevel - nd->level;
if (nd->parent == NULL) {
nd->level = 0;
}
}
#endif
// if not NULL use this to define groups (and reset TNode.groupindex)
size_t nwarp = warp_balance(ncell, nodevec);
// work on a cellgroup as a vector of levels. ie only possible race is
// two children in same warpsize
VVVTN groups(nwarp ? nwarp : (ncell / groupsize + ((ncell % groupsize) ? 1 : 0)));
for (size_t i = 0; i < groups.size(); ++i) {
groups[i].resize(maxlevel + 1);
}
for (size_t i = 0; i < nodevec.size(); ++i) {
TNode* nd = nodevec[i];
groups[nd->groupindex][nd->level].push_back(nd);
}
prgroupsize(groups);
// deal with each group
for (size_t i = 0; i < groups.size(); ++i) {
analyze(groups[i]);
question2(groups[i]);
}
question(groups[0]);
// question2(groups[0]);
// final nodevec order according to group_index and treenode_order
std::sort(nodevec.begin() + ncell, nodevec.end(), final_nodevec_cmp);
set_nodeindex(nodevec);
}
} // namespace coreneuron