From ac3344d0a24e0812107cc844c579b32cc0acb586 Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Fri, 28 Nov 2025 09:12:55 +0100 Subject: [PATCH] add new test for issue 543 --- test/staticsize.jl | 64 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/test/staticsize.jl b/test/staticsize.jl index 6ff2b0f6..5d14e956 100644 --- a/test/staticsize.jl +++ b/test/staticsize.jl @@ -135,3 +135,67 @@ end @test sum2_10turbo(A) ≈ sum(A) end end + +# Test for Issue #543: W=1 nested VecUnroll store on ARM +# This tests the case where vector width is 1 (scalar) with nested unrolling +function issue543_noavx!(data_out, matrix, data_in) + for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * data_in[v, i, jj] + end + data_out[v, i, j] = res + end + return nothing +end + +function issue543_turbo!(data_out, matrix, data_in) + @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1) + res = zero(eltype(data_out)) + for jj in axes(matrix, 2) + res += matrix[j, jj] * data_in[v, i, jj] + end + data_out[v, i, j] = res + end + return nothing +end + +@testset "Issue #543: W=1 Nested VecUnroll" begin + # Test the specific case that was failing: v=1 (first dim size 1) with n=5 + # This triggers W=1 code paths where VecUnroll stores T instead of Vec{1,T} + for v in [1, 2], n in [4, 5, 6, 7, 8] + data_out_ref = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n)) + data_out_turbo = StrideArray(undef, StaticInt(v), StaticInt(n), StaticInt(n)) + matrix = StrideArray(undef, StaticInt(n), StaticInt(n)) + data_in = rand(v, n, n) + + # Initialize with random data + matrix .= rand.() + + fill!(data_out_ref, 0.0) + fill!(data_out_turbo, 0.0) + + issue543_noavx!(data_out_ref, matrix, data_in) + issue543_turbo!(data_out_turbo, matrix, data_in) + + @test data_out_turbo ≈ data_out_ref + end + + # Also test with non-static first dimension but static others + for v in [1, 2], n in [4, 5, 6] + data_out_ref = StrideArray(undef, v, StaticInt(n), StaticInt(n)) + data_out_turbo = StrideArray(undef, v, StaticInt(n), StaticInt(n)) + matrix = StrideArray(undef, StaticInt(n), StaticInt(n)) + data_in = rand(v, n, n) + + matrix .= rand.() + + fill!(data_out_ref, 0.0) + fill!(data_out_turbo, 0.0) + + issue543_noavx!(data_out_ref, matrix, data_in) + issue543_turbo!(data_out_turbo, matrix, data_in) + + @test data_out_turbo ≈ data_out_ref + end +end