-
Notifications
You must be signed in to change notification settings - Fork 145
/
circuit_sparse.rg
739 lines (662 loc) · 23.8 KB
/
circuit_sparse.rg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
-- Copyright 2019 Stanford University
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- runs-with:
-- [
-- ["-ll:cpu", "4", "-dm:memoize"],
-- ["-ll:cpu", "2", "-fflow-spmd", "1", "-fflow-spmd-shardsize", "2", "-ftrace", "0"],
-- ["-ll:cpu", "2", "-fflow-spmd", "1", "-fflow-spmd-shardsize", "2", "-dm:memoize"],
-- ["-ll:cpu", "5", "-fflow-spmd", "1", "-fflow-spmd-shardsize", "5", "-p", "5"]
-- ]
import "regent"
-- Compile and link circuit.cc
local ccircuit
do
local root_dir = arg[0]:match(".*/") or "./"
local runtime_dir = os.getenv('LG_RT_DIR') .. "/"
local circuit_cc = root_dir .. "circuit.cc"
local circuit_so
if os.getenv('OBJNAME') then
local out_dir = os.getenv('OBJNAME'):match('.*/') or './'
circuit_so = out_dir .. "libcircuit.so"
elseif os.getenv('SAVEOBJ') == '1' then
circuit_so = root_dir .. "libcircuit.so"
else
circuit_so = os.tmpname() .. ".so" -- root_dir .. "circuit.so"
end
local cxx = os.getenv('CXX') or 'c++'
local max_dim = os.getenv('MAX_DIM') or '3'
local cxx_flags = os.getenv('CC_FLAGS') or ''
cxx_flags = cxx_flags .. " -O2 -Wall -Werror -DLEGION_MAX_DIM=" .. max_dim .. " -DREALM_MAX_DIM=" .. max_dim
if os.execute('test "$(uname)" = Darwin') == 0 then
cxx_flags =
(cxx_flags ..
" -dynamiclib -single_module -undefined dynamic_lookup -fPIC")
else
cxx_flags = cxx_flags .. " -shared -fPIC"
end
local cmd = (cxx .. " " .. cxx_flags .. " -I " .. runtime_dir .. " " ..
circuit_cc .. " -o " .. circuit_so)
if os.execute(cmd) ~= 0 then
print("Error: failed to compile " .. circuit_cc)
assert(false)
end
terralib.linklibrary(circuit_so)
ccircuit = terralib.includec("circuit.h", {"-I", root_dir, "-I", runtime_dir})
end
local c = regentlib.c
local std = terralib.includec("stdlib.h")
local cmath = terralib.includec("math.h")
local cstring = terralib.includec("string.h")
rawset(_G, "drand48", std.drand48)
rawset(_G, "srand48", std.srand48)
rawset(_G, "ceil", cmath.ceil)
WIRE_SEGMENTS = 10
STEPS = 10000
DELTAT = 1e-6
struct Colorings {
privacy_map : c.legion_point_coloring_t,
private_node_map : c.legion_point_coloring_t,
shared_node_map : c.legion_point_coloring_t,
}
struct Config {
num_loops : uint,
num_pieces : uint,
pieces_per_superpiece : uint,
nodes_per_piece : uint,
wires_per_piece : uint,
pct_wire_in_piece : uint,
random_seed : uint,
steps : uint,
sync : uint,
prune : uint,
perform_checks : bool,
dump_values : bool,
pct_shared_nodes : double,
shared_nodes_per_piece : uint,
density : uint,
num_neighbors : uint,
window : int,
}
fspace node {
node_cap : float,
leakage : float,
charge : float,
node_voltage : float,
}
struct currents {
_0 : float,
_1 : float,
_2 : float,
_3 : float,
_4 : float,
_5 : float,
_6 : float,
_7 : float,
_8 : float,
_9 : float,
}
struct voltages {
_0 : float,
_1 : float,
_2 : float,
_3 : float,
_4 : float,
_5 : float,
_6 : float,
_7 : float,
_8 : float,
}
struct ghost_range {
first : int,
last : int,
}
fspace wire(rpn : region(node),
rsn : region(node),
rgn : region(node)) {
in_ptr : ptr(node, rpn, rsn),
out_ptr : ptr(node, rpn, rsn, rgn),
inductance : float,
resistance : float,
wire_cap : float,
current : currents,
voltage : voltages,
}
terra parse_input_args(conf : Config)
var args = c.legion_runtime_get_input_args()
for i = 0, args.argc do
if cstring.strcmp(args.argv[i], "-l") == 0 then
i = i + 1
conf.num_loops = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-i") == 0 then
i = i + 1
conf.steps = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-p") == 0 then
i = i + 1
conf.num_pieces = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-pps") == 0 then
i = i + 1
conf.pieces_per_superpiece = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-npp") == 0 then
i = i + 1
conf.nodes_per_piece = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-wpp") == 0 then
i = i + 1
conf.wires_per_piece = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-pct") == 0 then
i = i + 1
conf.pct_wire_in_piece = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-s") == 0 then
i = i + 1
conf.random_seed = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-sync") == 0 then
i = i + 1
conf.sync = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-prune") == 0 then
i = i + 1
conf.prune = std.atoi(args.argv[i])
elseif cstring.strcmp(args.argv[i], "-checks") == 0 then
conf.perform_checks = true
elseif cstring.strcmp(args.argv[i], "-dump") == 0 then
conf.dump_values = true
end
end
return conf
end
terra random_element(arr : &c.legion_ptr_t,
num_elmts : uint)
var index = [uint](drand48() * num_elmts)
return arr[index]
end
task init_nodes(rn : region(node))
where
reads writes(rn)
do
for node in rn do
node.node_cap = drand48() + 1.0
node.leakage = 0.1 * drand48()
node.charge = 0.0
node.node_voltage = 2 * drand48() - 1.0
end
end
task init_wires(spiece_id : int,
conf : Config,
rgr : region(ghost_range),
rpn : region(node),
rsn : region(node),
all_shared : region(node),
rw : region(wire(rpn, rsn, all_shared)))
where
reads writes(rw, rgr)
do
var npp = conf.nodes_per_piece
var snpp = conf.shared_nodes_per_piece
var pnpp = npp - snpp
var num_pieces : int = conf.num_pieces
var num_shared_nodes = num_pieces * snpp
var pps = conf.pieces_per_superpiece
var num_neighbors : int = conf.num_neighbors
if num_neighbors == 0 then
num_neighbors = [int](ceil((num_pieces - 1) / 2.0 * (conf.density / 100.0)))
end
if num_neighbors >= conf.num_pieces then
num_neighbors = conf.num_pieces - 1
end
var window = conf.window
if window * 2 < num_neighbors then
window = (num_neighbors + 1) / 2
end
var piece_shared_nodes = [&uint](c.malloc([sizeof(uint)] * num_pieces))
var neighbor_ids : &uint = [&uint](c.malloc([sizeof(uint)] * num_neighbors))
var alread_picked : &bool = [&bool](c.malloc([sizeof(bool)] * num_pieces))
var max_shared_node_id = 0
var min_shared_node_id = num_shared_nodes * pps
var offset = spiece_id * pps * conf.wires_per_piece
for piece_id = spiece_id * pps, (spiece_id + 1) * pps do
var pn_ptr_offset = piece_id * pnpp + num_shared_nodes
var sn_ptr_offset = piece_id * snpp
var start_piece_id : int = piece_id - window
var end_piece_id : int = piece_id + window
if start_piece_id < 0 then
start_piece_id = 0
end_piece_id = min(num_neighbors, num_pieces - 1)
end
if end_piece_id >= num_pieces then
start_piece_id = max(0, (num_pieces - 1) - num_neighbors)
end_piece_id = num_pieces - 1
end
for i = 0, num_pieces do
piece_shared_nodes[i] = 0
alread_picked[i] = false
end
var window_size = end_piece_id - start_piece_id + 1
regentlib.assert(start_piece_id >= 0, "wrong start piece id")
regentlib.assert(end_piece_id < num_pieces, "wrong end piece id")
regentlib.assert(start_piece_id <= end_piece_id, "wrong neighbor range")
for i = 0, num_neighbors do
var neighbor_id = [uint](drand48() * window_size + start_piece_id)
while neighbor_id == piece_id or alread_picked[neighbor_id] do
neighbor_id = [uint](drand48() * window_size + start_piece_id)
end
alread_picked[neighbor_id] = true
neighbor_ids[i] = neighbor_id
end
for wire_id = 0, conf.wires_per_piece do
var wire =
unsafe_cast(ptr(wire(rpn, rsn, all_shared), rw), ptr(wire_id + offset))
wire.current.{_0, _1, _2, _3, _4, _5, _6, _7, _8, _9} = 0.0
wire.voltage.{_0, _1, _2, _3, _4, _5, _6, _7, _8} = 0.0
wire.resistance = drand48() * 10.0 + 1.0
-- Keep inductance on the order of 1e-3 * dt to avoid resonance problems
wire.inductance = (drand48() + 0.1) * DELTAT * 1e-3
wire.wire_cap = drand48() * 0.1
var in_node = [uint](drand48() * npp)
if in_node < snpp then
in_node += sn_ptr_offset
else
in_node += pn_ptr_offset - snpp
end
wire.in_ptr = dynamic_cast(ptr(node, rpn, rsn), ptr(in_node))
regentlib.assert(not isnull(wire.in_ptr), "picked an invalid random pointer")
var out_node = 0
if (100 * drand48() < conf.pct_wire_in_piece) or (conf.num_pieces == 1) then
out_node = [uint](drand48() * npp)
if out_node < snpp then
out_node += sn_ptr_offset
else
out_node += pn_ptr_offset - snpp
end
else
---- pick a random other piece and a node from there
--var pp = [uint](drand48() * (conf.num_pieces - 1))
--if pp >= piece_id then pp += 1 end
var pp = neighbor_ids[ [uint](drand48() * num_neighbors) ]
var idx = [uint](drand48() * snpp)
if idx >= piece_shared_nodes[pp] then
idx = piece_shared_nodes[pp]
if piece_shared_nodes[pp] < snpp then
piece_shared_nodes[pp] = piece_shared_nodes[pp] + 1
end
end
out_node = pp * snpp + idx
max_shared_node_id = max(max_shared_node_id, out_node)
min_shared_node_id = min(min_shared_node_id, out_node)
end
wire.out_ptr = dynamic_cast(ptr(node, rpn, rsn, all_shared), ptr(out_node))
end
offset += conf.wires_per_piece
end
for range in rgr do
range.first = min_shared_node_id
range.last = max_shared_node_id
end
c.free(piece_shared_nodes)
c.free(neighbor_ids)
c.free(alread_picked)
end
task init_piece(spiece_id : int,
conf : Config,
rgr : region(ghost_range),
rpn : region(node),
rsn : region(node),
all_shared : region(node),
rw : region(wire(rpn, rsn, all_shared)))
where
reads writes(rgr, rpn, rsn, rw)
do
init_nodes(rpn)
init_nodes(rsn)
init_wires(spiece_id, conf, rgr, rpn, rsn, all_shared, rw)
end
task init_pointers(rpn : region(node),
rsn : region(node),
rgn : region(node),
rw : region(wire(rpn, rsn, rgn)))
where
reads writes(rw.{in_ptr, out_ptr})
do
for w in rw do
w.in_ptr = dynamic_cast(ptr(node, rpn, rsn), w.in_ptr)
regentlib.assert(not isnull(w.in_ptr), "in ptr is null!")
w.out_ptr = dynamic_cast(ptr(node, rpn, rsn, rgn), w.out_ptr)
regentlib.assert(not isnull(w.out_ptr), "out ptr is null!")
end
end
task calculate_new_currents(print_ts : bool,
steps : uint,
rpn : region(node),
rsn : region(node),
rgn : region(node),
rw : region(wire(rpn, rsn, rgn)))
where
reads(rpn.node_voltage, rsn.node_voltage, rgn.node_voltage,
rw.{in_ptr, out_ptr, inductance, resistance, wire_cap}),
reads writes(rw.{current, voltage})
do
if print_ts then
c.printf("t: %ld\n", c.legion_get_current_time_in_micros())
end
var dt : float = DELTAT
var recip_dt : float = 1.0 / dt
--__demand(__vectorize)
for w in rw do
var temp_v : float[WIRE_SEGMENTS + 1]
var temp_i : float[WIRE_SEGMENTS]
var old_i : float[WIRE_SEGMENTS]
var old_v : float[WIRE_SEGMENTS - 1]
temp_i[0] = w.current._0
temp_i[1] = w.current._1
temp_i[2] = w.current._2
temp_i[3] = w.current._3
temp_i[4] = w.current._4
temp_i[5] = w.current._5
temp_i[6] = w.current._6
temp_i[7] = w.current._7
temp_i[8] = w.current._8
temp_i[9] = w.current._9
for i = 0, WIRE_SEGMENTS do
old_i[i] = temp_i[i]
end
temp_v[1] = w.voltage._0
temp_v[2] = w.voltage._1
temp_v[3] = w.voltage._2
temp_v[4] = w.voltage._3
temp_v[5] = w.voltage._4
temp_v[6] = w.voltage._5
temp_v[7] = w.voltage._6
temp_v[8] = w.voltage._7
temp_v[9] = w.voltage._8
for i = 0, WIRE_SEGMENTS - 1 do
old_v[i] = temp_v[i + 1]
end
-- Pin the outer voltages to the node voltages
temp_v[0] = w.in_ptr.node_voltage
temp_v[WIRE_SEGMENTS] = w.out_ptr.node_voltage
-- Solve the RLC model iteratively
var inductance : float = w.inductance
var recip_resistance : float = 1.0 / w.resistance
var recip_capacitance : float = 1.0 / w.wire_cap
for j = 0, steps do
-- first, figure out the new current from the voltage differential
-- and our inductance:
-- dV = R*I + L*I' ==> I = (dV - L*I')/R
for i = 0, WIRE_SEGMENTS do
temp_i[i] = ((temp_v[i + 1] - temp_v[i]) -
(inductance * (temp_i[i] - old_i[i]) * recip_dt)) * recip_resistance
end
-- Now update the inter-node voltages
for i = 0, WIRE_SEGMENTS - 1 do
temp_v[i + 1] = old_v[i] + dt * (temp_i[i] - temp_i[i + 1]) * recip_capacitance
end
end
-- Write out the results
w.current._0 = temp_i[0]
w.current._1 = temp_i[1]
w.current._2 = temp_i[2]
w.current._3 = temp_i[3]
w.current._4 = temp_i[4]
w.current._5 = temp_i[5]
w.current._6 = temp_i[6]
w.current._7 = temp_i[7]
w.current._8 = temp_i[8]
w.current._9 = temp_i[9]
w.voltage._0 = temp_v[1]
w.voltage._1 = temp_v[2]
w.voltage._2 = temp_v[3]
w.voltage._3 = temp_v[4]
w.voltage._4 = temp_v[5]
w.voltage._5 = temp_v[6]
w.voltage._6 = temp_v[7]
w.voltage._7 = temp_v[8]
w.voltage._8 = temp_v[9]
end
end
task distribute_charge(rpn : region(node),
rsn : region(node),
rgn : region(node),
rw : region(wire(rpn, rsn, rgn)))
where
reads(rw.{in_ptr, out_ptr, current._0, current._9}),
reduces +(rpn.charge, rsn.charge, rgn.charge)
do
var dt = DELTAT
for w in rw do
var in_current : float = -dt * w.current._0
var out_current : float = dt * w.current._9
w.in_ptr.charge += in_current
w.out_ptr.charge += out_current
end
end
task update_voltages(print_ts : bool,
rpn : region(node),
rsn : region(node))
where
reads(rpn.{node_cap, leakage}, rsn.{node_cap, leakage}),
reads writes(rpn.{node_voltage, charge}, rsn.{node_voltage, charge})
do
for node in rpn do
var voltage : float = node.node_voltage + node.charge / node.node_cap
voltage = voltage * (1.0 - node.leakage)
node.node_voltage = voltage
node.charge = 0.0
end
for node in rsn do
var voltage : float = node.node_voltage + node.charge / node.node_cap
voltage = voltage * (1.0 - node.leakage)
node.node_voltage = voltage
node.charge = 0.0
end
if print_ts then
c.printf("t: %ld\n", c.legion_get_current_time_in_micros())
end
end
task dump_task(rpn : region(node),
rsn : region(node),
rgn : region(node),
rw : region(wire(rpn, rsn, rgn)))
where
reads(rpn, rsn, rgn, rw)
do
for w in rw do
c.printf(" %.5g", w.current._0);
c.printf(" %.5g", w.current._1);
c.printf(" %.5g", w.current._2);
c.printf(" %.5g", w.current._3);
c.printf(" %.5g", w.current._4);
c.printf(" %.5g", w.current._5);
c.printf(" %.5g", w.current._6);
c.printf(" %.5g", w.current._7);
c.printf(" %.5g", w.current._8);
c.printf(" %.5g", w.current._9);
c.printf(" %.5g", w.voltage._0);
c.printf(" %.5g", w.voltage._1);
c.printf(" %.5g", w.voltage._2);
c.printf(" %.5g", w.voltage._3);
c.printf(" %.5g", w.voltage._4);
c.printf(" %.5g", w.voltage._5);
c.printf(" %.5g", w.voltage._6);
c.printf(" %.5g", w.voltage._7);
c.printf(" %.5g", w.voltage._8);
c.printf("\n");
end
end
terra create_colorings(conf : Config)
var coloring : Colorings
coloring.privacy_map = c.legion_point_coloring_create()
coloring.private_node_map = c.legion_point_coloring_create()
coloring.shared_node_map = c.legion_point_coloring_create()
var num_circuit_nodes : uint64 = conf.num_pieces * conf.nodes_per_piece
var num_shared_nodes = conf.num_pieces * conf.shared_nodes_per_piece
regentlib.assert(
(num_circuit_nodes - num_shared_nodes) % conf.num_pieces == 0,
"something went wrong in the arithmetic")
c.legion_point_coloring_add_range(coloring.privacy_map, ptr(1),
c.legion_ptr_t { value = 0 },
c.legion_ptr_t { value = num_shared_nodes - 1})
c.legion_point_coloring_add_range(coloring.privacy_map, ptr(0),
c.legion_ptr_t { value = num_shared_nodes },
c.legion_ptr_t { value = num_circuit_nodes - 1})
var pps = conf.pieces_per_superpiece
var num_superpieces = conf.num_pieces / pps
var snpp = conf.shared_nodes_per_piece
var pnpp = conf.nodes_per_piece - snpp
for spiece_id = 0, num_superpieces do
c.legion_point_coloring_add_range(coloring.shared_node_map, ptr(spiece_id),
c.legion_ptr_t { value = spiece_id * snpp * pps },
c.legion_ptr_t { value = (spiece_id + 1) * snpp * pps - 1})
c.legion_point_coloring_add_range(coloring.private_node_map, ptr(spiece_id),
c.legion_ptr_t { value = num_shared_nodes + spiece_id * pnpp * pps},
c.legion_ptr_t { value = num_shared_nodes + (spiece_id + 1) * pnpp * pps - 1})
end
return coloring
end
task create_ghost_partition(conf : Config,
all_shared : region(node),
ghost_ranges : region(ghost_range))
where
reads(ghost_ranges)
do
var ghost_node_map = c.legion_point_coloring_create()
var num_superpieces = conf.num_pieces / conf.pieces_per_superpiece
for range in ghost_ranges do
c.legion_point_coloring_add_range(ghost_node_map,
range,
c.legion_ptr_t { value = range.first },
c.legion_ptr_t { value = range.last })
end
return partition(aliased, all_shared, ghost_node_map, ghost_ranges.ispace)
end
task toplevel()
var conf : Config
conf.num_loops = 5
conf.num_pieces = 4
conf.pieces_per_superpiece = 1
conf.nodes_per_piece = 4
conf.wires_per_piece = 8
conf.pct_wire_in_piece = 80
conf.random_seed = 12345
conf.steps = STEPS
conf.sync = 0
conf.prune = 0
conf.perform_checks = false
conf.dump_values = false
conf.pct_shared_nodes = 1.0
conf.density = 20 -- ignored if num_neighbors > 0
conf.num_neighbors = 5 -- set 0 if density parameter is to be picked
conf.window = 3 -- find neighbors among [piece_id - window, piece_id + window]
conf = parse_input_args(conf)
regentlib.assert(conf.num_pieces % conf.pieces_per_superpiece == 0,
"pieces should be evenly distributed to superpieces")
conf.shared_nodes_per_piece =
[int](ceil(conf.nodes_per_piece * conf.pct_shared_nodes / 100.0))
c.printf("circuit settings: loops=%d prune=%d pieces=%d (pieces/superpiece=%d) nodes/piece=%d (nodes/piece=%d) wires/piece=%d pct_in_piece=%d seed=%d\n",
conf.num_loops, conf.prune, conf.num_pieces, conf.pieces_per_superpiece, conf.nodes_per_piece,
conf.shared_nodes_per_piece, conf.wires_per_piece, conf.pct_wire_in_piece, conf.random_seed)
var num_pieces = conf.num_pieces
var num_superpieces = conf.num_pieces / conf.pieces_per_superpiece
var num_circuit_nodes : uint64 = num_pieces * conf.nodes_per_piece
var num_circuit_wires : uint64 = num_pieces * conf.wires_per_piece
var all_nodes = region(ispace(ptr, num_circuit_nodes), node)
var all_wires = region(ispace(ptr, num_circuit_wires), wire(wild, wild, wild))
-- report mesh size in bytes
do
var node_size = [ terralib.sizeof(node) ]
var wire_size = [ terralib.sizeof(wire(wild,wild,wild)) ]
c.printf("Circuit memory usage:\n")
c.printf(" Nodes : %10lld * %4d bytes = %12lld bytes\n", num_circuit_nodes, node_size, num_circuit_nodes * node_size)
c.printf(" Wires : %10lld * %4d bytes = %12lld bytes\n", num_circuit_wires, wire_size, num_circuit_wires * wire_size)
var total = ((num_circuit_nodes * node_size) + (num_circuit_wires * wire_size))
c.printf(" Total %12lld bytes\n", total)
end
var colorings = create_colorings(conf)
var rp_all_nodes = partition(disjoint, all_nodes, colorings.privacy_map, ispace(ptr, 2))
var all_private = rp_all_nodes[0]
var all_shared = rp_all_nodes[1]
var launch_domain = ispace(ptr, num_superpieces)
var rp_private = partition(disjoint, all_private, colorings.private_node_map, launch_domain)
var rp_shared = partition(disjoint, all_shared, colorings.shared_node_map, launch_domain)
var rp_wires = partition(equal, all_wires, launch_domain)
var ghost_ranges = region(ispace(ptr, num_superpieces), ghost_range)
var rp_ghost_ranges = partition(equal, ghost_ranges, launch_domain)
for j = 0, 1 do
__demand(__parallel)
for i = 0, num_superpieces do
init_piece(i, conf, rp_ghost_ranges[i],
rp_private[i], rp_shared[i], all_shared, rp_wires[i])
end
end
var rp_ghost = create_ghost_partition(conf, all_shared, ghost_ranges)
__demand(__spmd)
for j = 0, 1 do
for i = 0, num_superpieces do
init_pointers(rp_private[i], rp_shared[i], rp_ghost[i], rp_wires[i])
end
end
c.printf("Starting main simulation loop\n")
var simulation_success = true
var steps = conf.steps
var prune = conf.prune
var num_loops = conf.num_loops + 2*prune
__fence(__execution, __block)
var ts_start = c.legion_get_current_time_in_micros()
__demand(__spmd, __trace)
for j = 0, num_loops do
for i = 0, num_superpieces do
calculate_new_currents(j == prune, steps, rp_private[i], rp_shared[i], rp_ghost[i], rp_wires[i])
end
for i = 0, num_superpieces do
distribute_charge(rp_private[i], rp_shared[i], rp_ghost[i], rp_wires[i])
end
for i = 0, num_superpieces do
update_voltages(j == num_loops - prune - 1, rp_private[i], rp_shared[i])
end
end
__fence(__execution, __block)
var ts_end = c.legion_get_current_time_in_micros()
if simulation_success then
c.printf("SUCCESS!\n")
else
c.printf("FAILURE!\n")
end
do
var sim_time = 1e-6 * (ts_end - ts_start)
c.printf("ELAPSED TIME = %7.3f s\n", sim_time)
-- Compute the floating point operations per second
var num_circuit_nodes : uint64 = conf.num_pieces * conf.nodes_per_piece
var num_circuit_wires : uint64 = conf.num_pieces * conf.wires_per_piece
-- calculate currents
var operations : uint64 = num_circuit_wires * (WIRE_SEGMENTS*6 + (WIRE_SEGMENTS-1)*4) * conf.steps
-- distribute charge
operations = operations + (num_circuit_wires * 4)
-- update voltages
operations = operations + (num_circuit_nodes * 4)
-- multiply by the number of loops
operations = operations * conf.num_loops
-- Compute the number of gflops
var gflops = (1e-9*operations)/sim_time
c.printf("GFLOPS = %7.3f GFLOPS\n", gflops)
end
c.printf("simulation complete - destroying regions\n")
end
if os.getenv('SAVEOBJ') == '1' then
local root_dir = arg[0]:match(".*/") or "./"
local out_dir = (os.getenv('OBJNAME') and os.getenv('OBJNAME'):match('.*/')) or root_dir
local link_flags = terralib.newlist({"-L" .. out_dir, "-lcircuit", "-lm"})
if os.getenv('STANDALONE') == '1' then
os.execute('cp ' .. os.getenv('LG_RT_DIR') .. '/../bindings/regent/libregent.so ' .. out_dir)
end
local exe = os.getenv('OBJNAME') or "circuit"
regentlib.saveobj(toplevel, exe, "executable", ccircuit.register_mappers, link_flags)
else
regentlib.start(toplevel, ccircuit.register_mappers)
end