Skip to content

Handle loop carried dependencies that are apparent from constant offsetsΒ #122

@chriselrod

Description

@chriselrod
using LoopVectorization
function grad!(π›₯rec, π›₯β„›, rec)
    for i in 2:size(π›₯rec, 1)-1
        for j in 2:size(π›₯rec, 2)-1
            ℰ𝓍1 = conj(π›₯β„›[1])
            ℰ𝓍2 = rec[i, j + 1] + rec[i, j - 1]
            ℰ𝓍3 = rec[i + 1, j] + ℰ𝓍2
            ℰ𝓍4 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍5 = 4 * rec[i, j]
            ℰ𝓍6 = ℰ𝓍4 - ℰ𝓍5
            ℰ𝓍7 = 2ℰ𝓍6
            ℰ𝓍8 = ℰ𝓍1 * ℰ𝓍7
            ℰ𝓍9 = conj(ℰ𝓍8)
            ℰ𝓍10 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍11 = ℰ𝓍10 - ℰ𝓍5
            ℰ𝓍12 = 2ℰ𝓍11
            ℰ𝓍13 = ℰ𝓍1 * ℰ𝓍12
            ℰ𝓍14 = conj(ℰ𝓍13)
            ℰ𝓍15 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍16 = ℰ𝓍15 - ℰ𝓍5
            ℰ𝓍17 = 2ℰ𝓍16
            ℰ𝓍18 = ℰ𝓍1 * ℰ𝓍17
            ℰ𝓍19 = conj(ℰ𝓍18)
            ℰ𝓍20 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍21 = ℰ𝓍20 - ℰ𝓍5
            ℰ𝓍22 = 2ℰ𝓍21
            ℰ𝓍23 = ℰ𝓍1 * ℰ𝓍22
            ℰ𝓍24 = conj(ℰ𝓍23)
            ℰ𝓍25 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍26 = ℰ𝓍25 - ℰ𝓍5
            ℰ𝓍27 = 2ℰ𝓍26
            ℰ𝓍28 = -4ℰ𝓍27
            ℰ𝓍29 = ℰ𝓍1 * ℰ𝓍28
            ℰ𝓍30 = conj(ℰ𝓍29)
            π›₯rec[i - 1, j] = π›₯rec[i - 1, j] + ℰ𝓍9
            π›₯rec[i + 1, j] = π›₯rec[i + 1, j] + ℰ𝓍14
            π›₯rec[i, j + 1] = π›₯rec[i, j + 1] + ℰ𝓍19
            π›₯rec[i, j - 1] = π›₯rec[i, j - 1] + ℰ𝓍24
            π›₯rec[i, j] = π›₯rec[i, j] + ℰ𝓍30
        end
    end
end
function gradavx!(π›₯rec, π›₯β„›, rec)
    @avx for i in 2:size(π›₯rec, 1)-1
        for j in 2:size(π›₯rec, 2)-1
            ℰ𝓍1 = conj(π›₯β„›[1])
            ℰ𝓍2 = rec[i, j + 1] + rec[i, j - 1]
            ℰ𝓍3 = rec[i + 1, j] + ℰ𝓍2
            ℰ𝓍4 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍5 = 4 * rec[i, j]
            ℰ𝓍6 = ℰ𝓍4 - ℰ𝓍5
            ℰ𝓍7 = 2ℰ𝓍6
            ℰ𝓍8 = ℰ𝓍1 * ℰ𝓍7
            ℰ𝓍9 = conj(ℰ𝓍8)
            ℰ𝓍10 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍11 = ℰ𝓍10 - ℰ𝓍5
            ℰ𝓍12 = 2ℰ𝓍11
            ℰ𝓍13 = ℰ𝓍1 * ℰ𝓍12
            ℰ𝓍14 = conj(ℰ𝓍13)
            ℰ𝓍15 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍16 = ℰ𝓍15 - ℰ𝓍5
            ℰ𝓍17 = 2ℰ𝓍16
            ℰ𝓍18 = ℰ𝓍1 * ℰ𝓍17
            ℰ𝓍19 = conj(ℰ𝓍18)
            ℰ𝓍20 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍21 = ℰ𝓍20 - ℰ𝓍5
            ℰ𝓍22 = 2ℰ𝓍21
            ℰ𝓍23 = ℰ𝓍1 * ℰ𝓍22
            ℰ𝓍24 = conj(ℰ𝓍23)
            ℰ𝓍25 = rec[i - 1, j] + ℰ𝓍3
            ℰ𝓍26 = ℰ𝓍25 - ℰ𝓍5
            ℰ𝓍27 = 2ℰ𝓍26
            ℰ𝓍28 = -4ℰ𝓍27
            ℰ𝓍29 = ℰ𝓍1 * ℰ𝓍28
            ℰ𝓍30 = conj(ℰ𝓍29)
            π›₯rec[i - 1, j] = π›₯rec[i - 1, j] + ℰ𝓍9
            π›₯rec[i + 1, j] = π›₯rec[i + 1, j] + ℰ𝓍14
            π›₯rec[i, j + 1] = π›₯rec[i, j + 1] + ℰ𝓍19
            π›₯rec[i, j - 1] = π›₯rec[i, j - 1] + ℰ𝓍24
            π›₯rec[i, j] = π›₯rec[i, j] + ℰ𝓍30
        end
    end
end
x = rand(10,10);
dx1 = fill!(similar(x), 0); dx2 = fill!(similar(x), 0);
del = ones(1);
grad!(dx1, del, x); gradavx!(dx2, del, x)
dx1 .- dx2

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions