-
Notifications
You must be signed in to change notification settings - Fork 38
/
clarrays.jl
79 lines (61 loc) · 1.59 KB
/
clarrays.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
####################
# This example demonstrates a BandedMatrix on the GPU using CLArrays
# We construct the Matrix on the CPU first for now as this is currently
# slow when tried to do directly on the GPU.
####################
using GPUArrays, CLArrays, FillArrays, BandedMatrices
import BandedMatrices: _BandedMatrix
function finitedifference(::Type{T}, n, Δt) where T
Δx = 2/n
data = Array{eltype(Δt)}(3, n)
data[2,:] = 1-2*Δt/Δx^2
data[[1,3],:] = Δt/Δx^2
_BandedMatrix(convert(T, data), n, 1, 1)
end
function expliciteuler(L, u₀, n)
u = copy(u₀)
v = copy(u)
for _ = 1:n
mul!(v, L, u)
u,v = v,u
end
u
end
####
# 32 bit
####
nₓ = 20_000
Δx = 2f0/nₓ
Δt = Δx^2/8f0
T = 1f0 / 1_000
n_t = floor(Int, T/Δt)
L = finitedifference(CLArray, nₓ, Δt)
x= range(-1, stop=1, length=nₓ)
u₀ = CLArray{Float32}(exp.(-10x.^2));
@time begin
u = expliciteuler(L, u₀, n_t);
GPUArrays.synchronize(u)
end # 25s
L̃ = BandedMatrix{Float32,Matrix{Float32}}(L)
ũ₀ = Vector{Float32}(u₀)
@time ũ = expliciteuler(L̃, ũ₀, n_t); # 44s
maximum(abs, Array(u)-ũ) # 3E-3
####
# 64 bit
####
nₓ = 20_000
Δx = 2/nₓ
Δt = Δx^2/8
T = 1f0 / 1_000
n_t = floor(Int, T/Δt)
L = finitedifference(CLArray, nₓ, Δt)
x= range(-1, stop=1, length=nₓ)
u₀ = CLArray(exp.(-10x.^2));
@time begin
u = expliciteuler(L, u₀, n_t);
GPUArrays.synchronize(u)
end # 30s
L̃ = BandedMatrix{Float64,Matrix{Float64}}(L)
ũ₀ = Vector{Float64}(u₀)
@time ũ = expliciteuler(L̃, ũ₀, n_t); # 45s
maximum(abs, Array(u)-ũ) # 1.72E-13