-
Notifications
You must be signed in to change notification settings - Fork 1
/
lutdata.go
88 lines (77 loc) · 2.07 KB
/
lutdata.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package engine
import (
"github.com/barnex/cuda5/cu"
"github.com/mumax/3/cuda"
"github.com/mumax/3/data"
"github.com/mumax/3/util"
"unsafe"
)
// look-up table for region based parameters
type lut struct {
gpu_buf cuda.LUTPtrs // gpu copy of cpu buffer, only transferred when needed
gpu_ok bool // gpu cache up-to date with cpu source?
cpu_buf [][NREGION]float32 // table data on cpu
source updater // updates cpu data
}
type updater interface {
update() // updates cpu lookup table
}
func (p *lut) init(nComp int, source updater) {
p.gpu_buf = make(cuda.LUTPtrs, nComp)
p.cpu_buf = make([][NREGION]float32, nComp)
p.source = source
}
// get an up-to-date version of the lookup-table on CPU
func (p *lut) cpuLUT() [][NREGION]float32 {
p.source.update()
return p.cpu_buf
}
// get an up-to-date version of the lookup-table on GPU
func (p *lut) gpuLUT() cuda.LUTPtrs {
p.source.update()
if !p.gpu_ok {
// upload to GPU
p.assureAlloc()
cuda.Sync() // sync previous kernels, may still be using gpu lut
for c := range p.gpu_buf {
cu.MemcpyHtoD(cu.DevicePtr(p.gpu_buf[c]), unsafe.Pointer(&p.cpu_buf[c][0]), cu.SIZEOF_FLOAT32*NREGION)
}
p.gpu_ok = true
cuda.Sync() //sync upload
}
return p.gpu_buf
}
// utility for LUT of single-component data
func (p *lut) gpuLUT1() cuda.LUTPtr {
util.Assert(len(p.gpu_buf) == 1)
return cuda.LUTPtr(p.gpuLUT()[0])
}
// all data is 0?
func (p *lut) isZero() bool {
v := p.cpuLUT()
for c := range v {
for i := 0; i < NREGION; i++ {
if v[c][i] != 0 {
return false
}
}
}
return true
}
func (p *lut) assureAlloc() {
if p.gpu_buf[0] == nil {
for i := range p.gpu_buf {
p.gpu_buf[i] = cuda.MemAlloc(NREGION * cu.SIZEOF_FLOAT32)
}
}
}
func (b *lut) NComp() int { return len(b.cpu_buf) }
// uncompress the table to a full array with parameter values per cell.
func (p *lut) Slice() (*data.Slice, bool) {
gpu := p.gpuLUT()
b := cuda.Buffer(p.NComp(), Mesh().Size())
for c := 0; c < p.NComp(); c++ {
cuda.RegionDecode(b.Comp(c), cuda.LUTPtr(gpu[c]), regions.Gpu())
}
return b, true
}