Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

atomic operations error #421

Closed
jakubMitura14 opened this issue Aug 19, 2022 · 4 comments
Closed

atomic operations error #421

jakubMitura14 opened this issue Aug 19, 2022 · 4 comments
Labels
upstream Enzyme proper

Comments

@jakubMitura14
Copy link

jakubMitura14 commented Aug 19, 2022

While trying to add atomically from kernel compilation error occur
System Ubuntu 20.0
Julia version 1.7.3
CUDA.jl :latest
Enzyme.jl : latest
GPU RTX 3080
kernel

function scaleDownP(Nx, Ny, Nz, A, p, Aout)
    #adding one bewcouse of padding
    x = (threadIdx().x + ((blockIdx().x - 1) * CUDA.blockDim_x())) + 1
    y = (threadIdx().y + ((blockIdx().y - 1) * CUDA.blockDim_y())) + 1
    z = (threadIdx().z + ((blockIdx().z - 1) * CUDA.blockDim_z())) + 1
    CUDA.@atomic p[x, y, z] += Float32(1)  #TODO(remove)
    return nothing
end

enzyme function

function scaleDownKernDeffP(Nx, Ny, Nz, A, dA, p, dp, Aout, dAout)
    Enzyme.autodiff_deferred(scaleDownP, Const, Const(Nx), Const(Ny), Const(Nz), Duplicated(A, dA), Duplicated(p, dp), Duplicated(Aout, dAout)
    )
    return nothing
end

invoking function

threads = (4, 4, 4)
blocks = (2, 2, 2)
@cuda threads = threads blocks = blocks scaleDownKernDeffP(Nx, Ny, Nz, A, dA, p, dp, Aout, dAout)

creating test data

function createTestData(Nx, Ny, Nz, oneSidePad, crossBorderWhere)
    totalPad = oneSidePad * 2
    nums = Float32.(reshape(collect(1:Nx*Ny*Nz), (Nx, Ny, Nz)))#./100
    #nums = Float32.(rand(1.0:1000000,Nx,Ny,Nz))

    withPad = Float32.(zeros(Nx + totalPad, Ny + totalPad, Nz + totalPad))
    withPad[(oneSidePad+1):((oneSidePad+Nx)), (oneSidePad+1):(oneSidePad+Ny), (oneSidePad+1):(oneSidePad+Nz)] = nums
    A = CuArray(withPad)
    dA = similar(A)
    probs = Float32.(ones(Nx, Ny, Nz)) .* 0.1

    probs[crossBorderWhere, :, :] .= 0.9
    probs[:, crossBorderWhere, :] .= 0.9
    probs[:, :, crossBorderWhere] .= 0.9
    probsB = ones(Nx, Ny, Nz)
    probs = probsB .- probs#so we will keep low probability on edges
    withPadp = Float32.(zeros(Nx + totalPad, Ny + totalPad, Nz + totalPad))
    withPadp[(oneSidePad+1):((oneSidePad+Nx)), (oneSidePad+1):(oneSidePad+Ny), (oneSidePad+1):(oneSidePad+Nz)] = probs
    dp = CUDA.ones(Nx + totalPad, Ny + totalPad, Nz + totalPad)
    Aout = CUDA.zeros(Nx + totalPad, Ny + totalPad, Nz + totalPad)
    dAout = CUDA.ones(Nx + totalPad, Ny + totalPad, Nz + totalPad)
    dA .= 1
    p = CuArray(withPadp)
    return (A, dA, p, dp, Aout, dAout)

end

### test Data

Nx, Ny, Nz = 8, 8, 8
oneSidePad = 1
crossBorderWhere = 4
A, dA, p, dp, Aout, dAout = createTestData(Nx, Ny, Nz, oneSidePad, crossBorderWhere)

error

<analysis>
i64 0: {[-1]:Anything}, intvals: {0,}
i64 1: {[-1]:Integer}, intvals: {1,}
i32 1: {[-1]:Integer}, intvals: {1,}
i64 2: {[-1]:Integer}, intvals: {2,}
i64 -4: {[-1]:Integer}, intvals: {-4,}
float 1.000000e+00: {[-1]:Float@float}, intvals: {}
  %narrow25 = add nuw nsw i32 %17, %narrow, !dbg !109: {[-1]:Integer}, intvals: {1,}
float 5.000000e-01: {[-1]:Float@float}, intvals: {}
  %28 = icmp sgt i64 %.fca.2.1.extract4, 0, !dbg !136: {[-1]:Integer}, intvals: {}
  %45 = fmul float %44, %44, !dbg !212: {[-1]:Float@float}, intvals: {}
double -4.800000e-01: {[-1]:Float@double}, intvals: {}
  %43 = fdiv float %41, %42, !dbg !207: {[-1]:Float@float}, intvals: {}
  %8 = add nuw nsw i32 %7, 1, !dbg !57: {[-1]:Integer}, intvals: {1,}
  %24 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() #9, !dbg !126, !range !131: {[-1]:Integer}, intvals: {}
  %38 = bitcast i8 addrspace(1)* %.fca.0.extract1 to float addrspace(1)*, !dbg !183: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %16 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() #9, !dbg !87, !range !56: {[-1]:Integer}, intvals: {0,}
  %71 = mul i64 %70, %25, !dbg !271: {[-1]:Integer}, intvals: {}
  %reass.add30 = add i64 %71, %20: {[-1]:Integer}, intvals: {}
  %67 = icmp sgt i64 %.fca.2.0.extract3, 0, !dbg !257: {[-1]:Integer}, intvals: {}
  %69 = icmp sgt i64 %.fca.2.1.extract4, 0, !dbg !257: {[-1]:Integer}, intvals: {}
  %26 = icmp sgt i64 %.fca.2.0.extract3, 0, !dbg !136: {[-1]:Integer}, intvals: {}
  %27 = select i1 %26, i64 %.fca.2.0.extract3, i64 0, !dbg !136: {[-1]:Integer}, intvals: {}
  %65 = fdiv double %64, 5.200000e-01, !dbg !240: {[-1]:Float@double}, intvals: {}
  %44 = fadd float %43, 1.000000e+00, !dbg !209: {[-1]:Float@float}, intvals: {}
  %narrow26 = mul nuw nsw i32 %24, %23, !dbg !132: {[-1]:Integer}, intvals: {0,}
  %narrow27 = add nuw nsw i32 %22, %narrow26, !dbg !134: {[-1]:Integer}, intvals: {1,}
  %17 = add nuw nsw i32 %16, 1, !dbg !94: {[-1]:Integer}, intvals: {1,}
  %21 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() #9, !dbg !111, !range !118: {[-1]:Integer}, intvals: {0,}
  %51 = fmul float %50, %50, !dbg !222: {[-1]:Float@float}, intvals: {}
  %35 = icmp slt i64 %32, 1, !dbg !179: {[-1]:Integer}, intvals: {}
  %.fca.2.2.extract5 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 2, 2, !dbg !41: {[-1]:Integer}, intvals: {}
  %58 = fdiv float %55, %57, !dbg !225: {[-1]:Float@float}, intvals: {}
  %47 = fmul float %46, %46, !dbg !212: {[-1]:Float@float}, intvals: {}
  %64 = fadd double %63, -4.800000e-01, !dbg !238: {[-1]:Float@double}, intvals: {}
  %20 = zext i32 %narrow25 to i64, !dbg !109: {[-1]:Integer}, intvals: {1,}
  %29 = select i1 %28, i64 %.fca.2.1.extract4, i64 0, !dbg !136: {[-1]:Integer}, intvals: {}
  %52 = fmul float %51, %51, !dbg !222: {[-1]:Float@float}, intvals: {}
  %59 = fmul float %41, %58, !dbg !226: {[-1]:Float@float}, intvals: {}
  %49 = fdiv float 5.000000e-01, %42, !dbg !217: {[-1]:Float@float}, intvals: {}
  %reass.mul = mul i64 %reass.add, %27: {[-1]:Integer}, intvals: {}
  %11 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #9, !dbg !71, !range !76: {[-1]:Integer}, intvals: {}
  %68 = select i1 %67, i64 %.fca.2.0.extract3, i64 0, !dbg !257: {[-1]:Integer}, intvals: {}
  %46 = fmul float %45, %45, !dbg !212: {[-1]:Float@float}, intvals: {}
  %54 = fmul float %53, %53, !dbg !222: {[-1]:Float@float}, intvals: {}
  %.fca.2.0.extract3 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 2, 0, !dbg !41: {[-1]:Integer}, intvals: {}
  %73 = shl i64 %72, 2, !dbg !277: {[-1]:Integer}, intvals: {}
  %74 = add i64 %73, -4, !dbg !277: {[-1]:Integer}, intvals: {}
  %62 = fadd float %59, %61, !dbg !224: {[-1]:Float@float}, intvals: {}
  %60 = fdiv float %56, %57, !dbg !225: {[-1]:Float@float}, intvals: {}
  %narrow = mul nuw nsw i32 %19, %18, !dbg !107: {[-1]:Integer}, intvals: {0,}
  %40 = getelementptr inbounds float, float addrspace(1)* %38, i64 %39, !dbg !196: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #9, !dbg !60, !range !67: {[-1]:Integer}, intvals: {0,}
  %75 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract1, i64 %74, !dbg !283: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %77 = atomicrmw fadd float addrspace(1)* %76, float 1.000000e+00 acq_rel, !dbg !285: {[-1]:Float@float}, intvals: {}
  %63 = fpext float %62 to double, !dbg !227: {[-1]:Float@double}, intvals: {}
  %42 = fadd float %41, 5.000000e-01, !dbg !200: {[-1]:Float@float}, intvals: {}
  %76 = bitcast i8 addrspace(1)* %75 to float addrspace(1)*, !dbg !285: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %57 = fadd float %55, %56, !dbg !224: {[-1]:Float@float}, intvals: {}
  %36 = icmp sgt i64 %32, %34, !dbg !179: {[-1]:Integer}, intvals: {}
  %23 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() #9, !dbg !120, !range !101: {[-1]:Integer}, intvals: {0,}
  %25 = zext i32 %narrow27 to i64, !dbg !134: {[-1]:Integer}, intvals: {1,}
  %61 = fmul float %60, 5.000000e-01, !dbg !226: {[-1]:Float@float}, intvals: {}
  %33 = icmp sgt i64 %.fca.3.extract6, 0, !dbg !167: {[-1]:Integer}, intvals: {}
  %48 = fmul float %47, %47, !dbg !212: {[-1]:Float@float}, intvals: {}
i64 %0: {[-1]:Integer}, intvals: {}
i64 %1: {[-1]:Integer}, intvals: {}
i64 %2: {[-1]:Integer}, intvals: {}
{ i8 addrspace(1)*, i64, [3 x i64], i64 } %3: {[0]:Pointer, [0,-1]:Float@float, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer, [12]:Integer, [13]:Integer, [14]:Integer, [15]:Integer, [16]:Integer, [17]:Integer, [18]:Integer, [19]:Integer, [20]:Integer, [21]:Integer, [22]:Integer, [23]:Integer, [24]:Integer, [25]:Integer, [26]:Integer, [27]:Integer, [28]:Integer, [29]:Integer, [30]:Integer, [31]:Integer, [32]:Integer, [33]:Integer, [34]:Integer, [35]:Integer, [36]:Integer, [37]:Integer, [38]:Integer, [39]:Integer, [40]:Integer, [41]:Integer, [42]:Integer, [43]:Integer, [44]:Integer, [45]:Integer, [46]:Integer, [47]:Integer}, intvals: {}
{ i8 addrspace(1)*, i64, [3 x i64], i64 } %4: {[0]:Pointer, [0,-1]:Float@float, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer, [12]:Integer, [13]:Integer, [14]:Integer, [15]:Integer, [16]:Integer, [17]:Integer, [18]:Integer, [19]:Integer, [20]:Integer, [21]:Integer, [22]:Integer, [23]:Integer, [24]:Integer, [25]:Integer, [26]:Integer, [27]:Integer, [28]:Integer, [29]:Integer, [30]:Integer, [31]:Integer, [32]:Integer, [33]:Integer, [34]:Integer, [35]:Integer, [36]:Integer, [37]:Integer, [38]:Integer, [39]:Integer, [40]:Integer, [41]:Integer, [42]:Integer, [43]:Integer, [44]:Integer, [45]:Integer, [46]:Integer, [47]:Integer}, intvals: {}
{ i8 addrspace(1)*, i64, [3 x i64], i64 } %5: {[0]:Pointer, [0,-1]:Float@float, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer, [12]:Integer, [13]:Integer, [14]:Integer, [15]:Integer, [16]:Integer, [17]:Integer, [18]:Integer, [19]:Integer, [20]:Integer, [21]:Integer, [22]:Integer, [23]:Integer, [24]:Integer, [25]:Integer, [26]:Integer, [27]:Integer, [28]:Integer, [29]:Integer, [30]:Integer, [31]:Integer, [32]:Integer, [33]:Integer, [34]:Integer, [35]:Integer, [36]:Integer, [37]:Integer, [38]:Integer, [39]:Integer, [40]:Integer, [41]:Integer, [42]:Integer, [43]:Integer, [44]:Integer, [45]:Integer, [46]:Integer, [47]:Integer}, intvals: {}
  %53 = fmul float %52, %52, !dbg !222: {[-1]:Float@float}, intvals: {}
  %12 = zext i32 %11 to i64, !dbg !77: {[-1]:Integer}, intvals: {}
  %reass.add = add i64 %30, %20: {[-1]:Integer}, intvals: {}
  %.fca.2.1.extract4 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 2, 1, !dbg !41: {[-1]:Integer}, intvals: {}
  %6 = call {}*** @julia.get_pgcstack() #9: {}, intvals: {}
  %.fca.3.extract6 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 3, !dbg !41: {[-1]:Integer}, intvals: {}
  %39 = add i64 %15, %reass.mul, !dbg !184: {[-1]:Integer}, intvals: {}
  %56 = fmul float %54, %54, !dbg !222: {[-1]:Float@float}, intvals: {}
  %30 = mul i64 %29, %25, !dbg !159: {[-1]:Integer}, intvals: {}
  %19 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() #9, !dbg !102, !range !76: {[-1]:Integer}, intvals: {}
  %22 = add nuw nsw i32 %21, 1, !dbg !119: {[-1]:Integer}, intvals: {1,}
  %15 = add nuw nsw i64 %13, %14, !dbg !85: {[-1]:Integer}, intvals: {1,}
  %34 = select i1 %33, i64 %.fca.3.extract6, i64 0, !dbg !167: {[-1]:Integer}, intvals: {}
  %13 = mul nuw nsw i64 %12, %10, !dbg !81: {[-1]:Integer}, intvals: {0,}
  %18 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #9, !dbg !95, !range !101: {[-1]:Integer}, intvals: {0,}
  %reass.mul31 = mul i64 %reass.add30, %68: {[-1]:Integer}, intvals: {}
  %72 = add i64 %31, %reass.mul31, !dbg !276: {[-1]:Integer}, intvals: {}
  %32 = add i64 %31, %reass.mul, !dbg !166: {[-1]:Integer}, intvals: {}
  %41 = load float, float addrspace(1)* %40, align 4, !dbg !196, !tbaa !197: {[-1]:Float@float}, intvals: {}
  %14 = zext i32 %8 to i64, !dbg !83: {[-1]:Integer}, intvals: {1,}
  %70 = select i1 %69, i64 %.fca.2.1.extract4, i64 0, !dbg !257: {[-1]:Integer}, intvals: {}
double 5.200000e-01: {[-1]:Float@double}, intvals: {}
  %50 = fadd float %49, 1.000000e+00, !dbg !220: {[-1]:Float@float}, intvals: {}
  %10 = zext i32 %9 to i64, !dbg !68: {[-1]:Integer}, intvals: {0,}
  %31 = add nuw nsw i64 %15, 1, !dbg !165: {[-1]:Integer}, intvals: {2,}
  %.fca.0.extract1 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 0, !dbg !41: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %.fca.1.extract2 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 1, !dbg !41: {[-1]:Integer}, intvals: {}
  %37 = or i1 %35, %36, !dbg !183: {[-1]:Integer}, intvals: {}
  %66 = fptrunc double %65 to float, !dbg !241: {[-1]:Float@float}, intvals: {}
  %55 = fmul float %48, %48, !dbg !212: {[-1]:Float@float}, intvals: {}
  %78 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_4887() #10, !dbg !183: {}, intvals: {}
  %7 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #9, !dbg !42, !range !56: {[-1]:Integer}, intvals: {0,}
</analysis>
oldFunc: ; Function Attrs: willreturn mustprogress
define internal void @diffejulia_scaleDownP_4859_inner7(i64 signext %0, i64 signext %1, i64 signext %2, { i8 addrspace(1)*, i64, [3 x i64], i64 } %3, { i8 addrspace(1)*, i64, [3 x i64], i64 } %"'", { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, { i8 addrspace(1)*, i64, [3 x i64], i64 } %"'1", { i8 addrspace(1)*, i64, [3 x i64], i64 } %5, { i8 addrspace(1)*, i64, [3 x i64], i64 } %"'2") local_unnamed_addr #8 !dbg !527 {
entry:
  %.fca.0.extract1 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 0, !dbg !528
  %.fca.1.extract2_replacementA = phi i64 , !dbg !528
  %.fca.2.0.extract3 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 2, 0, !dbg !528
  %.fca.2.1.extract4 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 2, 1, !dbg !528
  %.fca.2.2.extract5_replacementA = phi i64 , !dbg !528
  %.fca.3.extract6 = extractvalue { i8 addrspace(1)*, i64, [3 x i64], i64 } %4, 3, !dbg !528
  %_replacementA = phi {}*** 
  %6 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #9, !dbg !529, !range !113
  %7 = add nuw nsw i32 %6, 1, !dbg !536
  %8 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #9, !dbg !537, !range !124
  %9 = zext i32 %8 to i64, !dbg !542
  %10 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #9, !dbg !544, !range !133
  %11 = zext i32 %10 to i64, !dbg !548
  %12 = mul nuw nsw i64 %11, %9, !dbg !550
  %13 = zext i32 %7 to i64, !dbg !552
  %14 = add nuw nsw i64 %12, %13, !dbg !554
  %15 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() #9, !dbg !556, !range !113
  %16 = add nuw nsw i32 %15, 1, !dbg !562
  %17 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #9, !dbg !563, !range !158
  %18 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() #9, !dbg !568, !range !133
  %narrow = mul nuw nsw i32 %18, %17, !dbg !572
  %narrow25 = add nuw nsw i32 %16, %narrow, !dbg !574
  %19 = zext i32 %narrow25 to i64, !dbg !574
  %20 = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() #9, !dbg !576, !range !175
  %21 = add nuw nsw i32 %20, 1, !dbg !582
  %22 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() #9, !dbg !583, !range !158
  %23 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() #9, !dbg !588, !range !188
  %narrow26 = mul nuw nsw i32 %23, %22, !dbg !592
  %narrow27 = add nuw nsw i32 %21, %narrow26, !dbg !594
  %24 = zext i32 %narrow27 to i64, !dbg !594
  %25 = icmp sgt i64 %.fca.2.0.extract3, 0, !dbg !596
  %26 = select i1 %25, i64 %.fca.2.0.extract3, i64 0, !dbg !596
  %27 = icmp sgt i64 %.fca.2.1.extract4, 0, !dbg !596
  %28 = select i1 %27, i64 %.fca.2.1.extract4, i64 0, !dbg !596
  %29 = mul i64 %28, %24, !dbg !606
  %reass.add = add i64 %29, %19
  %reass.mul = mul i64 %reass.add, %26
  %30 = add nuw nsw i64 %14, 1, !dbg !611
  %31 = add i64 %30, %reass.mul, !dbg !612
  %32 = icmp sgt i64 %.fca.3.extract6, 0, !dbg !613
  %33 = select i1 %32, i64 %.fca.3.extract6, i64 0, !dbg !613
  %34 = icmp slt i64 %31, 1, !dbg !622
  %35 = icmp sgt i64 %31, %33, !dbg !622
  %36 = or i1 %34, %35, !dbg !624
  br i1 %36, label %L120.i, label %L118.i, !dbg !624

L118.i:                                           ; preds = %entry
  %37 = bitcast i8 addrspace(1)* %.fca.0.extract1 to float addrspace(1)*, !dbg !624
  %38 = add i64 %14, %reass.mul, !dbg !625
  %39 = getelementptr inbounds float, float addrspace(1)* %37, i64 %38, !dbg !632
  %40 = load float, float addrspace(1)* %39, align 4, !dbg !632, !tbaa !257
  %41 = fadd float %40, 5.000000e-01, !dbg !633
  %42 = fdiv float %40, %41, !dbg !636
  %43 = fadd float %42, 1.000000e+00, !dbg !637
  %44 = fmul float %43, %43, !dbg !639
  %45 = fmul float %44, %44, !dbg !639
  %46 = fmul float %45, %45, !dbg !639
  %47 = fmul float %46, %46, !dbg !639
  %48 = fdiv float 5.000000e-01, %41, !dbg !641
  %49 = fadd float %48, 1.000000e+00, !dbg !643
  %50 = fmul float %49, %49, !dbg !645
  %51 = fmul float %50, %50, !dbg !645
  %52 = fmul float %51, %51, !dbg !645
  %53 = fmul float %52, %52, !dbg !645
  %54 = fmul float %47, %47, !dbg !639
  %55 = fmul float %53, %53, !dbg !645
  %56 = fadd float %54, %55, !dbg !647
  %57 = fdiv float %54, %56, !dbg !648
  %58 = fmul float %40, %57, !dbg !649
  %59 = fdiv float %55, %56, !dbg !648
  %60 = fmul float %59, 5.000000e-01, !dbg !649
  %61 = fadd float %58, %60, !dbg !647
  %62 = fpext float %61 to double, !dbg !650
  %63 = fadd double %62, -4.800000e-01, !dbg !655
  %64 = fdiv double %63, 5.200000e-01, !dbg !656
  %65 = fptrunc double %64 to float, !dbg !657
  store float %65, float addrspace(1)* %39, align 4, !dbg !661, !tbaa !257
  %66 = icmp sgt i64 %.fca.2.0.extract3, 0, !dbg !667
  %67 = select i1 %66, i64 %.fca.2.0.extract3, i64 0, !dbg !667
  %68 = icmp sgt i64 %.fca.2.1.extract4, 0, !dbg !667
  %69 = select i1 %68, i64 %.fca.2.1.extract4, i64 0, !dbg !667
  %70 = mul i64 %69, %24, !dbg !678
  %reass.add30 = add i64 %70, %19
  %reass.mul31 = mul i64 %reass.add30, %67
  %71 = add i64 %30, %reass.mul31, !dbg !683
  %72 = shl i64 %71, 2, !dbg !684
  %73 = add i64 %72, -4, !dbg !684
  %74 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract1, i64 %73, !dbg !688
  %75 = bitcast i8 addrspace(1)* %74 to float addrspace(1)*, !dbg !689
  %76 = atomicrmw fadd float addrspace(1)* %75, float 1.000000e+00 acq_rel, !dbg !689
  br label %invertL118.i, !dbg !528

L120.i:                                           ; preds = %entry
  %77 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_4887() #10, !dbg !624
  unreachable, !dbg !624

allocsForInversion:                               ; No predecessors!

invertentry:                                      ; No predecessors!
  ret void

invertL118.i:                                     ; preds = %L118.i

invertL120.i:                                     ; No predecessors!
}

I:   %77 = atomicrmw fadd float addrspace(1)* %76, float 1.000000e+00 acq_rel, !dbg !285
julia: /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:842: void AdjointGenerator<AugmentedReturnType>::visitAtomicRMWInst(llvm::AtomicRMWInst&) [with AugmentedReturnType = const AugmentedReturn*]: Assertion `gutils->isConstantInstruction(&I)' failed.

signal (6): Aborted
in expression starting at /media/jakub/NewVolume/projects/superVoxelJuliaCode/ODE_pde_play/enzymePlayF.jl:176
gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x7f3c6632a728)
__assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
visitAtomicRMWInst at /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:842 [inlined]
visitAtomicRMW at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/Instruction.def:177 [inlined]
visit at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/Instruction.def:177
visit at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/InstVisitor.h:112 [inlined]
CreatePrimalAndGradient at /workspace/srcdir/Enzyme/enzyme/Enzyme/EnzymeLogic.cpp:3646
EnzymeCreatePrimalAndGradient at /workspace/srcdir/Enzyme/enzyme/Enzyme/CApi.cpp:439
EnzymeCreatePrimalAndGradient at /home/jakub/.julia/packages/Enzyme/di3zM/src/api.jl:111
enzyme! at /home/jakub/.julia/packages/Enzyme/di3zM/src/compiler.jl:3271
unknown function (ip: 0x7f3bd9d6c36d)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
#codegen#80 at /home/jakub/.julia/packages/Enzyme/di3zM/src/compiler.jl:4158
codegen##kw at /home/jakub/.julia/packages/Enzyme/di3zM/src/compiler.jl:3881 [inlined]
#114 at /home/jakub/.julia/packages/GPUCompiler/N98un/src/driver.jl:296
get! at ./dict.jl:464
unknown function (ip: 0x7f3bd9d53dcf)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
macro expansion at /home/jakub/.julia/packages/GPUCompiler/N98un/src/driver.jl:295 [inlined]
#emit_llvm#111 at /home/jakub/.julia/packages/GPUCompiler/N98un/src/utils.jl:64
unknown function (ip: 0x7f3c0aa33903)
emit_llvm##kw at /home/jakub/.julia/packages/GPUCompiler/N98un/src/utils.jl:62 [inlined]
cufunction_compile at /home/jakub/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:353
#224 at /home/jakub/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:347 [inlined]
JuliaContext at /home/jakub/.julia/packages/GPUCompiler/N98un/src/driver.jl:76
unknown function (ip: 0x7f3bd9d2af1a)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
cufunction_compile at /home/jakub/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:346
cached_compilation at /home/jakub/.julia/packages/GPUCompiler/N98un/src/cache.jl:90
#cufunction#221 at /home/jakub/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299
cufunction at /home/jakub/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:293
unknown function (ip: 0x7f3bd9d2a92f)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1788 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:126
eval_value at /buildworker/worker/package_linux64/build/src/interpreter.c:215
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:461
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:731
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:885
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:830
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:944
eval at ./boot.jl:373 [inlined]
include_string at ./loading.jl:1196
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
_include at ./loading.jl:1253
include at ./Base.jl:418
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
exec_options at ./client.jl:292
_start at ./client.jl:495
jfptr__start_22567.clone_1 at /home/jakub/julia-1.7.3-linux-x86_64/julia-1.7.3/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1788 [inlined]
true_main at /buildworker/worker/package_linux64/build/src/jlapi.c:559
jl_repl_entrypoint at /buildworker/worker/package_linux64/build/src/jlapi.c:701
main at /buildworker/worker/package_linux64/build/cli/loader_exe.c:42
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
_start at /home/jakub/julia-1.7.3-linux-x86_64/julia-1.7.3/bin/julia (unknown line)
Allocations: 89207801 (Pool: 89173854; Big: 33947); GC: 51
Aborted (core dumped)

@vchuravy vchuravy added the upstream Enzyme proper label Aug 19, 2022
@jgreener64
Copy link
Contributor

I ran into this too, my MWE is:

using Enzyme, CUDA

function mul_kernel(A)
    i = threadIdx().x
    if i <= length(A)
        CUDA.@atomic A[i] *= A[i]
    end
    return nothing
end

function grad_mul_kernel(A, dA)
    Enzyme.autodiff_deferred(mul_kernel, Const, Duplicated(A, dA))
    return nothing
end

A = CUDA.ones(64,)
dA = similar(A)
dA .= 1
@cuda threads=length(A) grad_mul_kernel(A, dA)
dA

It works fine without CUDA.@atomic. I am on Julia 1.8.0, Enzyme 0.10.4 and CUDA 3.12.0. The error is:

 ; Function Attrs: mustprogress willreturn
define void @preprocess_julia_mul_kernel_4941_inner7({ i8 addrspace(1)*, i64, [1 x i64], i64 } %0) local_unnamed_addr #8 !dbg !235 {
entry:
  %.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 0, !dbg !236
  %.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 2, 0, !dbg !236
  %.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 3, !dbg !236
  %1 = call {}*** @julia.get_pgcstack() #9
  %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #9, !dbg !237, !range !113
  %3 = add nuw nsw i32 %2, 1, !dbg !244
  %4 = zext i32 %3 to i64, !dbg !245
  %.not = icmp slt i64 %.fca.3.extract, %4, !dbg !252
  %5 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !253
  br i1 %.not, label %julia_mul_kernel_4941_inner.exit, label %L15.i, !dbg !253

L15.i:                                            ; preds = %entry
  %6 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !254
  %7 = select i1 %6, i64 %.fca.2.0.extract, i64 0, !dbg !254
  %.not1 = icmp slt i64 %7, %4, !dbg !269
  br i1 %.not1, label %L30.i, label %L28.i, !dbg !272

L28.i:                                            ; preds = %L15.i
  %8 = zext i32 %2 to i64, !dbg !273
  %9 = getelementptr inbounds float, float addrspace(1)* %5, i64 %8, !dbg !281
  %10 = load float, float addrspace(1)* %9, align 4, !dbg !281, !tbaa !189
  %11 = shl nuw nsw i32 %2, 2, !dbg !282
  %12 = zext i32 %11 to i64, !dbg !282
  %13 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract, i64 %12, !dbg !287
  %14 = bitcast i8 addrspace(1)* %13 to i32 addrspace(1)*
  br label %L53.i, !dbg !288

L30.i:                                            ; preds = %L15.i
  %15 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_4950() #10, !dbg !272
  unreachable, !dbg !272

L53.i:                                            ; preds = %L53.i, %L28.i
  %iv = phi i64 [ %iv.next, %L53.i ], [ 0, %L28.i ]
  %value_phi.i = phi float [ %10, %L28.i ], [ %21, %L53.i ]
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !295
  %16 = fmul float %10, %value_phi.i, !dbg !295
  %17 = bitcast float %value_phi.i to i32, !dbg !297
  %18 = bitcast float %16 to i32, !dbg !300
  %19 = cmpxchg i32 addrspace(1)* %14, i32 %17, i32 %18 acq_rel acquire, align 4, !dbg !302
  %20 = extractvalue { i32, i1 } %19, 0, !dbg !302
  %21 = bitcast i32 %20 to float, !dbg !307
  %22 = extractvalue { i32, i1 } %19, 1, !dbg !309
  %23 = fcmp uno float %value_phi.i, 0.000000e+00, !dbg !309
  %24 = fcmp uno float %21, 0.000000e+00, !dbg !309
  %25 = and i1 %23, %24, !dbg !309
  %26 = or i1 %22, %25, !dbg !309
  br i1 %26, label %julia_mul_kernel_4941_inner.exit.loopexit, label %L53.i, !dbg !311

julia_mul_kernel_4941_inner.exit.loopexit:        ; preds = %L53.i
  br label %julia_mul_kernel_4941_inner.exit, !dbg !236

julia_mul_kernel_4941_inner.exit:                 ; preds = %julia_mul_kernel_4941_inner.exit.loopexit, %entry
  ret void, !dbg !236
}

<analysis>
i32 2: {[-1]:Integer}, intvals: {2,}
i64 0: {[-1]:Anything}, intvals: {0,}
i64 1: {[-1]:Integer}, intvals: {1,}
i32 1: {[-1]:Integer}, intvals: {1,}
  %21 = bitcast i32 %20 to float, !dbg !170: {}, intvals: {}
  %14 = bitcast i8 addrspace(1)* %13 to i32 addrspace(1)*: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %17 = bitcast float %value_phi.i to i32, !dbg !157: {[-1]:Float@float}, intvals: {}
  %12 = zext i32 %11 to i64, !dbg !134: {[-1]:Integer}, intvals: {0,}
  %8 = zext i32 %2 to i64, !dbg !117: {[-1]:Integer}, intvals: {0,}
  %13 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract, i64 %12, !dbg !143: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %1 = call {}*** @julia.get_pgcstack() #9: {}, intvals: {}
{ i8 addrspace(1)*, i64, [1 x i64], i64 } %0: {[0]:Pointer, [0,-1]:Float@float, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer, [12]:Integer, [13]:Integer, [14]:Integer, [15]:Integer, [16]:Integer, [17]:Integer, [18]:Integer, [19]:Integer, [20]:Integer, [21]:Integer, [22]:Integer, [23]:Integer, [24]:Integer, [25]:Integer, [26]:Integer, [27]:Integer, [28]:Integer, [29]:Integer, [30]:Integer, [31]:Integer}, intvals: {}
  %10 = load float, float addrspace(1)* %9, align 4, !dbg !130, !tbaa !131: {[-1]:Float@float}, intvals: {}
  %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #9, !dbg !42, !range !56: {[-1]:Integer}, intvals: {0,}
  %15 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_4950() #10, !dbg !116: {}, intvals: {}
  %16 = fmul float %10, %value_phi.i, !dbg !153: {[-1]:Float@float}, intvals: {}
  %23 = fcmp uno float %value_phi.i, 0.000000e+00, !dbg !172: {[-1]:Integer}, intvals: {}
  %26 = or i1 %22, %25, !dbg !172: {[-1]:Integer}, intvals: {}
  %19 = cmpxchg i32 addrspace(1)* %14, i32 %17, i32 %18 acq_rel acquire, align 4, !dbg !164: {}, intvals: {}
  %3 = add nuw nsw i32 %2, 1, !dbg !57: {[-1]:Integer}, intvals: {1,}
  %7 = select i1 %6, i64 %.fca.2.0.extract, i64 0, !dbg !79: {[-1]:Integer}, intvals: {}
  %25 = and i1 %23, %24, !dbg !172: {[-1]:Integer}, intvals: {}
  %.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 3, !dbg !41: {[-1]:Integer}, intvals: {}
  %iv = phi i64 [ %iv.next, %L53.i ], [ 0, %L28.i ]: {[-1]:Integer}, intvals: {0,}
  %.not1 = icmp slt i64 %7, %4, !dbg !112: {[-1]:Integer}, intvals: {}
  %.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 2, 0, !dbg !41: {[-1]:Integer}, intvals: {}
  %20 = extractvalue { i32, i1 } %19, 0, !dbg !164: {}, intvals: {}
  %11 = shl nuw nsw i32 %2, 2, !dbg !134: {[-1]:Integer}, intvals: {0,}
  %24 = fcmp uno float %21, 0.000000e+00, !dbg !172: {[-1]:Integer}, intvals: {}
  %.not = icmp slt i64 %.fca.3.extract, %4, !dbg !76: {[-1]:Integer}, intvals: {}
  %9 = getelementptr inbounds float, float addrspace(1)* %5, i64 %8, !dbg !130: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %value_phi.i = phi float [ %10, %L28.i ], [ %21, %L53.i ]: {[-1]:Float@float}, intvals: {}
  %6 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !79: {[-1]:Integer}, intvals: {}
float 0.000000e+00: {[-1]:Anything}, intvals: {}
  %22 = extractvalue { i32, i1 } %19, 1, !dbg !172: {[-1]:Integer}, intvals: {}
  %4 = zext i32 %3 to i64, !dbg !60: {[-1]:Integer}, intvals: {1,}
  %18 = bitcast float %16 to i32, !dbg !162: {[-1]:Float@float}, intvals: {}
  %5 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !78: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [1 x i64], i64 } %0, 0, !dbg !41: {[-1]:Pointer, [-1,-1]:Float@float}, intvals: {}
  %iv.next = add nuw nsw i64 %iv, 1, !dbg !153: {[-1]:Integer}, intvals: {1,}
</analysis>
   %20 = extractvalue { i32, i1 } %19, 0, !dbg !164
julia: /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:1168: void AdjointGenerator<AugmentedReturnType>::visitCastInst(llvm::CastInst&) [with AugmentedReturnType = const AugmentedReturn*]: Assertion `FT' failed.

signal (6): Aborted
in expression starting at REPL[15]:1
gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x7f0b76c76728)
__assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
visitCastInst at /workspace/srcdir/Enzyme/enzyme/Enzyme/AdjointGenerator.h:1168
visitTruncInst at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/InstVisitor.h:177 [inlined]
visitTrunc at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/Instruction.def:184 [inlined]
visit at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/Instruction.def:184
visit at /opt/x86_64-linux-gnu/x86_64-linux-gnu/sys-root/usr/local/include/llvm/IR/InstVisitor.h:112 [inlined]
CreatePrimalAndGradient at /workspace/srcdir/Enzyme/enzyme/Enzyme/EnzymeLogic.cpp:3646
EnzymeCreatePrimalAndGradient at /workspace/srcdir/Enzyme/enzyme/Enzyme/CApi.cpp:439
EnzymeCreatePrimalAndGradient at /home/jgreener/.julia/packages/Enzyme/di3zM/src/api.jl:111
enzyme! at /home/jgreener/.julia/packages/Enzyme/di3zM/src/compiler.jl:3271
unknown function (ip: 0x7f09da8f09ed)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
#codegen#80 at /home/jgreener/.julia/packages/Enzyme/di3zM/src/compiler.jl:4158
codegen##kw at /home/jgreener/.julia/packages/Enzyme/di3zM/src/compiler.jl:3878 [inlined]
#114 at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:296
get! at ./dict.jl:481
unknown function (ip: 0x7f09da8d788f)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
macro expansion at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:295 [inlined]
#emit_llvm#111 at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/utils.jl:64
unknown function (ip: 0x7f09da855103)
unknown function (ip: 0x7f0b30133b49)
unknown function (ip: 0x7f0b30133b11)
emit_llvm##kw at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/utils.jl:62 [inlined]
cufunction_compile at /home/jgreener/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:353
#224 at /home/jgreener/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:347 [inlined]
JuliaContext at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:76
unknown function (ip: 0x7f09da8a486a)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
cufunction_compile at /home/jgreener/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:346
cached_compilation at /home/jgreener/.julia/packages/GPUCompiler/jVY4I/src/cache.jl:90
#cufunction#221 at /home/jgreener/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299
cufunction at /home/jgreener/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:292
unknown function (ip: 0x7f09da8a41cf)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
jl_apply at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/julia.h:1838 [inlined]
do_call at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:126
eval_value at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:215
eval_body at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:467
jl_interpret_toplevel_thunk at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:750
jl_toplevel_eval_flex at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/toplevel.c:906
jl_toplevel_eval_flex at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/toplevel.c:850
eval_body at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:556
eval_body at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:522
jl_interpret_toplevel_thunk at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/interpreter.c:750
jl_toplevel_eval_flex at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/toplevel.c:906
ijl_toplevel_eval_in at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/toplevel.c:965
eval at ./boot.jl:368 [inlined]
eval_user_input at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/usr/share/julia/stdlib/v1.8/REPL/src/REPL.jl:151
repl_backend_loop at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/usr/share/julia/stdlib/v1.8/REPL/src/REPL.jl:247
start_repl_backend at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/usr/share/julia/stdlib/v1.8/REPL/src/REPL.jl:232
#run_repl#47 at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/usr/share/julia/stdlib/v1.8/REPL/src/REPL.jl:369
run_repl at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/usr/share/julia/stdlib/v1.8/REPL/src/REPL.jl:355
jfptr_run_repl_67214.clone_1 at /home/jgreener/soft/julia/julia-1.8.0/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
#966 at ./client.jl:419
jfptr_YY.966_46273.clone_1 at /home/jgreener/soft/julia/julia-1.8.0/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
jl_apply at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/julia.h:1838 [inlined]
jl_f__call_latest at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/builtins.c:774
#invokelatest#2 at ./essentials.jl:729 [inlined]
invokelatest at ./essentials.jl:726 [inlined]
run_main_repl at ./client.jl:404
exec_options at ./client.jl:318
_start at ./client.jl:522
jfptr__start_30463.clone_1 at /home/jgreener/soft/julia/julia-1.8.0/lib/julia/sys.so (unknown line)
_jl_invoke at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2367 [inlined]
ijl_apply_generic at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/gf.c:2549
jl_apply at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/julia.h:1838 [inlined]
true_main at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/jlapi.c:575
jl_repl_entrypoint at /cache/build/default-amdci4-3/julialang/julia-release-1-dot-8/src/jlapi.c:719
main at julia (unknown line)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x401098)
Allocations: 75743084 (Pool: 75677161; Big: 65923); GC: 57
Aborted (core dumped)

@jakubMitura14
Copy link
Author

Hello I understand that you may have other problems with higher priorities this is huge project that you are working on, but anybody had time to look into this atomic add problem ?

@wsmoses
Copy link
Member

wsmoses commented Sep 7, 2022

Not yet unfortunately

@wsmoses
Copy link
Member

wsmoses commented Sep 28, 2022

This has now landed on main.

@wsmoses wsmoses closed this as completed Sep 28, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
upstream Enzyme proper
Projects
None yet
Development

No branches or pull requests

4 participants