/
conv_mfm.go
105 lines (89 loc) · 2.87 KB
/
conv_mfm.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package cuda
// Generation of Magnetic Force Microscopy images.
import (
"github.com/mumax/3/data"
"github.com/mumax/3/mag"
)
// Stores the necessary state to perform FFT-accelerated convolution
type MFMConvolution struct {
size [3]int // 3D size of the input/output data
kernSize [3]int // Size of kernel and logical FFT size.
fftKernSize [3]int //
fftRBuf *data.Slice // FFT input buf for FFT, shares storage with fftCBuf.
fftCBuf *data.Slice // FFT output buf, shares storage with fftRBuf
gpuFFTKern [3]*data.Slice // FFT kernel on device
fwPlan fft3DR2CPlan // Forward FFT (1 component)
bwPlan fft3DC2RPlan // Backward FFT (1 component)
kern [3]*data.Slice // Real-space kernel (host)
mesh *data.Mesh
}
func (c *MFMConvolution) Free() {
if c == nil {
return
}
c.size = [3]int{}
c.kernSize = [3]int{}
c.fftCBuf.Free() // shared with fftRbuf
c.fftCBuf = nil
c.fftRBuf = nil
for j := 0; j < 3; j++ {
c.gpuFFTKern[j].Free()
c.gpuFFTKern[j] = nil
c.kern[j] = nil
}
c.fwPlan.Free()
c.bwPlan.Free()
}
func (c *MFMConvolution) init() {
// init FFT plans
padded := c.kernSize
c.fwPlan = newFFT3DR2C(padded[X], padded[Y], padded[Z])
c.bwPlan = newFFT3DC2R(padded[X], padded[Y], padded[Z])
// init device buffers
nc := fftR2COutputSizeFloats(c.kernSize)
c.fftCBuf = NewSlice(1, nc)
c.fftRBuf = NewSlice(1, c.kernSize)
c.gpuFFTKern[X] = NewSlice(1, nc)
c.gpuFFTKern[Y] = NewSlice(1, nc)
c.gpuFFTKern[Z] = NewSlice(1, nc)
c.initFFTKern3D()
}
func (c *MFMConvolution) initFFTKern3D() {
c.fftKernSize = fftR2COutputSizeFloats(c.kernSize)
for i := 0; i < 3; i++ {
zero1_async(c.fftRBuf)
data.Copy(c.fftRBuf, c.kern[i])
c.fwPlan.ExecAsync(c.fftRBuf, c.fftCBuf)
scale := 2 / float32(c.fwPlan.InputLen()) // ??
zero1_async(c.gpuFFTKern[i])
Madd2(c.gpuFFTKern[i], c.gpuFFTKern[i], c.fftCBuf, 0, scale)
}
}
// store MFM image in output, based on magnetization in inp.
func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Bsat LUTPtr, regions *Bytes) {
for i := 0; i < 3; i++ {
zero1_async(c.fftRBuf)
copyPadMul(c.fftRBuf, inp.Comp(i), vol, c.kernSize, c.size, Bsat, regions)
c.fwPlan.ExecAsync(c.fftRBuf, c.fftCBuf)
Nx, Ny := c.fftKernSize[X]/2, c.fftKernSize[Y] // ??
kernMulC_async(c.fftCBuf, c.gpuFFTKern[i], Nx, Ny)
c.bwPlan.ExecAsync(c.fftCBuf, c.fftRBuf)
copyUnPad(outp.Comp(i), c.fftRBuf, c.size, c.kernSize)
}
}
func (c *MFMConvolution) Reinit(lift, tipsize float64) {
c.kern = mag.MFMKernel(c.mesh, lift, tipsize)
c.initFFTKern3D()
}
// Initializes a convolution to evaluate the demag field for the given mesh geometry.
func NewMFM(mesh *data.Mesh, lift, tipsize float64) *MFMConvolution {
k := mag.MFMKernel(mesh, lift, tipsize)
size := mesh.Size()
c := new(MFMConvolution)
c.size = size
c.kern = k
c.kernSize = k[X].Size()
c.init()
c.mesh = mesh
return c
}