- 
                Notifications
    You must be signed in to change notification settings 
- Fork 79
Open
Description
Not really a bug in KA, but @vchuravy asked me to post
When uniform memory is used with a synchronize in an if statement one needs to be careful due to the way that the implicit thread loops arise on the CPU
using KernelAbstractions
using StaticArrays
using Test
# Good: no @synchronize in if statement
@kernel function no_if_uniform_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    l_B = MArray{Tuple{N}, FT}(undef)
    grp_size = @uniform groupsize()[1]
  end
  ##############################
  # Start implicit thread loop?
  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)
  # store value of B in uniform memory
  for n = 1:N
    l_B[n] = B[n, glo_id]
  end
  # Dump value from uniform to shared
  for n = 1:N
    s_B[n, loc_id] = l_B[n]
  end
  # End implicit thread loop?
  ##############################
  @synchronize
  ##############################
  # Start implicit thread loop?
  # Dump value from shared to global memory
  for n = 1:N
    A[n, glo_id] = s_B[n, loc_id]
  end
  # End implicit thread loop?
  ##############################
end
# Bad: @synchronize in if statement with uniform memory usage
@kernel function with_if_uniform_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    l_B = MArray{Tuple{N}, FT}(undef)
    grp_size = @uniform groupsize()[1]
  end
  ##############################
  # Start implicit thread loop?
  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)
  # store value of B in uniform memory
  for n = 1:N
    l_B[n] = B[n, glo_id]
  end
  # End implicit thread loop?
  ##############################
  if true
    ##############################
    # Start implicit thread loop
    #
    # Dump value from uniform to shared
    for n = 1:N
      s_B[n, loc_id] = l_B[n]
    end
    # End implicit thread loop?
    ##############################
    @synchronize
    ##############################
    # Start implicit thread loop?
    # Dump value from shared to global memory
    for n = 1:N
      A[n, glo_id] = s_B[n, loc_id]
    end
    # End implicit thread loop?
    ##############################
  end
end
# Good: @synchronize in if statement with private memory usage
@kernel function with_if_private_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    grp_size = @uniform groupsize()[1]
  end
  p_B = @private FT (N,)
  ##############################
  # Start implicit thread loop?
  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)
  # store value of B in uniform memory
  for n = 1:N
    p_B[n] = B[n, glo_id]
  end
  # End implicit thread loop?
  ##############################
  if true
    ##############################
    # Start implicit thread loop?
    # Dump value from uniform to shared
    for n = 1:N
      s_B[n, loc_id] = p_B[n]
    end
    # End implicit thread loop?
    ##############################
    @synchronize
    ##############################
    # Start implicit thread loop?
    # Dump value from shared to global memory
    for n = 1:N
      A[n, glo_id] = s_B[n, loc_id]
    end
    # End implicit thread loop?
    ##############################
  end
end
@testset "no if uniform copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = no_if_uniform_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
end
@testset "with if private copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = with_if_private_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
end
@testset "with if uniform copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = with_if_uniform_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
endoutput:
julia> include("buggy.jl")
Test Summary:      | Pass  Total
no if uniform copy |    1      1
Test Summary:        | Pass  Total
with if private copy |    1      1
with if uniform copy: Test Failed at /Users/jekozdon/scratch/2019_09_17/buggy.jl:167
  Expression: A == B
   Evaluated: [0.909792046331644 0.909792046331644 … 0.49137323913605546 0.49137323913605546; 0.14386949928286263 0.14386949928286263 … 0.7250834679876621 0.7250834679876621; … ; 0.9275922324520269 0.9275922324520269 … 0.5301867826757798 0.5301867826757798; 0.7105600705440542 0.7105600705440542 … 0.782530472812315 0.782530472812315] == [0.41175116387410693 0.1267238684429859 … 0.9288230234713291 0.49137323913605546; 0.9951976250072363 0.6354672711865443 … 0.0058710270867841086 0.7250834679876621; … ; 0.709797268000828 0.6061527988039019 … 0.188834315207701 0.5301867826757798; 0.9941190027847424 0.0318726131609639 … 0.037656338749129104 0.782530472812315]
Stacktrace:
 [1] top-level scope at /Users/jekozdon/scratch/2019_09_17/buggy.jl:167
 [2] top-level scope at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.4/Test/src/Test.jl:1113
 [3] top-level scope at /Users/jekozdon/scratch/2019_09_17/buggy.jl:161
Test Summary:        | Fail  Total
with if uniform copy |    1      1
ERROR: LoadError: Some tests did not pass: 0 passed, 1 failed, 0 errored, 0 broken.
in expression starting at /Users/jekozdon/scratch/2019_09_17/buggy.jl:160
Metadata
Metadata
Assignees
Labels
No labels