-
Notifications
You must be signed in to change notification settings - Fork 73
Open
Description
using LoopVectorization
function grad!(π₯rec, π₯β, rec)
for i in 2:size(π₯rec, 1)-1
for j in 2:size(π₯rec, 2)-1
β°π1 = conj(π₯β[1])
β°π2 = rec[i, j + 1] + rec[i, j - 1]
β°π3 = rec[i + 1, j] + β°π2
β°π4 = rec[i - 1, j] + β°π3
β°π5 = 4 * rec[i, j]
β°π6 = β°π4 - β°π5
β°π7 = 2β°π6
β°π8 = β°π1 * β°π7
β°π9 = conj(β°π8)
β°π10 = rec[i - 1, j] + β°π3
β°π11 = β°π10 - β°π5
β°π12 = 2β°π11
β°π13 = β°π1 * β°π12
β°π14 = conj(β°π13)
β°π15 = rec[i - 1, j] + β°π3
β°π16 = β°π15 - β°π5
β°π17 = 2β°π16
β°π18 = β°π1 * β°π17
β°π19 = conj(β°π18)
β°π20 = rec[i - 1, j] + β°π3
β°π21 = β°π20 - β°π5
β°π22 = 2β°π21
β°π23 = β°π1 * β°π22
β°π24 = conj(β°π23)
β°π25 = rec[i - 1, j] + β°π3
β°π26 = β°π25 - β°π5
β°π27 = 2β°π26
β°π28 = -4β°π27
β°π29 = β°π1 * β°π28
β°π30 = conj(β°π29)
π₯rec[i - 1, j] = π₯rec[i - 1, j] + β°π9
π₯rec[i + 1, j] = π₯rec[i + 1, j] + β°π14
π₯rec[i, j + 1] = π₯rec[i, j + 1] + β°π19
π₯rec[i, j - 1] = π₯rec[i, j - 1] + β°π24
π₯rec[i, j] = π₯rec[i, j] + β°π30
end
end
end
function gradavx!(π₯rec, π₯β, rec)
@avx for i in 2:size(π₯rec, 1)-1
for j in 2:size(π₯rec, 2)-1
β°π1 = conj(π₯β[1])
β°π2 = rec[i, j + 1] + rec[i, j - 1]
β°π3 = rec[i + 1, j] + β°π2
β°π4 = rec[i - 1, j] + β°π3
β°π5 = 4 * rec[i, j]
β°π6 = β°π4 - β°π5
β°π7 = 2β°π6
β°π8 = β°π1 * β°π7
β°π9 = conj(β°π8)
β°π10 = rec[i - 1, j] + β°π3
β°π11 = β°π10 - β°π5
β°π12 = 2β°π11
β°π13 = β°π1 * β°π12
β°π14 = conj(β°π13)
β°π15 = rec[i - 1, j] + β°π3
β°π16 = β°π15 - β°π5
β°π17 = 2β°π16
β°π18 = β°π1 * β°π17
β°π19 = conj(β°π18)
β°π20 = rec[i - 1, j] + β°π3
β°π21 = β°π20 - β°π5
β°π22 = 2β°π21
β°π23 = β°π1 * β°π22
β°π24 = conj(β°π23)
β°π25 = rec[i - 1, j] + β°π3
β°π26 = β°π25 - β°π5
β°π27 = 2β°π26
β°π28 = -4β°π27
β°π29 = β°π1 * β°π28
β°π30 = conj(β°π29)
π₯rec[i - 1, j] = π₯rec[i - 1, j] + β°π9
π₯rec[i + 1, j] = π₯rec[i + 1, j] + β°π14
π₯rec[i, j + 1] = π₯rec[i, j + 1] + β°π19
π₯rec[i, j - 1] = π₯rec[i, j - 1] + β°π24
π₯rec[i, j] = π₯rec[i, j] + β°π30
end
end
end
x = rand(10,10);
dx1 = fill!(similar(x), 0); dx2 = fill!(similar(x), 0);
del = ones(1);
grad!(dx1, del, x); gradavx!(dx2, del, x)
dx1 .- dx2Metadata
Metadata
Assignees
Labels
No labels