-
Notifications
You must be signed in to change notification settings - Fork 1
/
rk45dp.go
136 lines (113 loc) · 3.36 KB
/
rk45dp.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package engine
import (
"github.com/mumax/3/cuda"
"github.com/mumax/3/data"
"github.com/mumax/3/util"
"math"
)
type RK45DP struct {
k1 *data.Slice // torque at end of step is kept for beginning of next step
}
func (rk *RK45DP) Step() {
m := M.Buffer()
size := m.Size()
if FixDt != 0 {
Dt_si = FixDt
}
// upon resize: remove wrongly sized k1
if rk.k1.Size() != m.Size() {
rk.Free()
}
// first step ever: one-time k1 init and eval
if rk.k1 == nil {
rk.k1 = cuda.NewSlice(3, size)
torqueFn(rk.k1)
}
// FSAL cannot be used with finite temperature
if !Temp.isZero() {
torqueFn(rk.k1)
}
t0 := Time
// backup magnetization
m0 := cuda.Buffer(3, size)
defer cuda.Recycle(m0)
data.Copy(m0, m)
k2, k3, k4, k5, k6 := cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size), cuda.Buffer(3, size)
defer cuda.Recycle(k2)
defer cuda.Recycle(k3)
defer cuda.Recycle(k4)
defer cuda.Recycle(k5)
defer cuda.Recycle(k6)
// k2 will be re-used as k7
h := float32(Dt_si * GammaLL) // internal time step = Dt * gammaLL
// there is no explicit stage 1: k1 from previous step
// stage 2
Time = t0 + (1./5.)*Dt_si
cuda.Madd2(m, m, rk.k1, 1, (1./5.)*h) // m = m*1 + k1*h/5
M.normalize()
torqueFn(k2)
// stage 3
Time = t0 + (3./10.)*Dt_si
cuda.Madd3(m, m0, rk.k1, k2, 1, (3./40.)*h, (9./40.)*h)
M.normalize()
torqueFn(k3)
// stage 4
Time = t0 + (4./5.)*Dt_si
madd4(m, m0, rk.k1, k2, k3, 1, (44./45.)*h, (-56./15.)*h, (32./9.)*h)
M.normalize()
torqueFn(k4)
// stage 5
Time = t0 + (8./9.)*Dt_si
madd5(m, m0, rk.k1, k2, k3, k4, 1, (19372./6561.)*h, (-25360./2187.)*h, (64448./6561.)*h, (-212./729.)*h)
M.normalize()
torqueFn(k5)
// stage 6
Time = t0 + (1.)*Dt_si
madd6(m, m0, rk.k1, k2, k3, k4, k5, 1, (9017./3168.)*h, (-355./33.)*h, (46732./5247.)*h, (49./176.)*h, (-5103./18656.)*h)
M.normalize()
torqueFn(k6)
// stage 7: 5th order solution
Time = t0 + (1.)*Dt_si
// no k2
madd6(m, m0, rk.k1, k3, k4, k5, k6, 1, (35./384.)*h, (500./1113.)*h, (125./192.)*h, (-2187./6784.)*h, (11./84.)*h) // 5th
M.normalize()
k7 := k2 // re-use k2
torqueFn(k7) // next torque if OK
// error estimate
Err := cuda.Buffer(3, size) //k3 // re-use k3 as error estimate
defer cuda.Recycle(Err)
madd6(Err, rk.k1, k3, k4, k5, k6, k7, (35./384.)-(5179./57600.), (500./1113.)-(7571./16695.), (125./192.)-(393./640.), (-2187./6784.)-(-92097./339200.), (11./84.)-(187./2100.), (0.)-(1./40.))
// determine error
err := cuda.MaxVecNorm(Err) * float64(h)
// adjust next time step
if err < MaxErr || Dt_si <= MinDt || FixDt != 0 { // mindt check to avoid infinite loop
// step OK
setLastErr(err)
setMaxTorque(k7)
NSteps++
Time = t0 + Dt_si
adaptDt(math.Pow(MaxErr/err, 1./5.))
data.Copy(rk.k1, k7) // FSAL
} else {
// undo bad step
//util.Println("Bad step at t=", t0, ", err=", err)
util.Assert(FixDt == 0)
Time = t0
data.Copy(m, m0)
NUndone++
adaptDt(math.Pow(MaxErr/err, 1./6.))
}
}
func (rk *RK45DP) Free() {
rk.k1.Free()
rk.k1 = nil
}
// TODO: into cuda
func madd5(dst, src1, src2, src3, src4, src5 *data.Slice, w1, w2, w3, w4, w5 float32) {
cuda.Madd3(dst, src1, src2, src3, w1, w2, w3)
cuda.Madd3(dst, dst, src4, src5, 1, w4, w5)
}
func madd6(dst, src1, src2, src3, src4, src5, src6 *data.Slice, w1, w2, w3, w4, w5, w6 float32) {
madd5(dst, src1, src2, src3, src4, src5, w1, w2, w3, w4, w5)
cuda.Madd2(dst, dst, src6, 1, w6)
}