-
Notifications
You must be signed in to change notification settings - Fork 89
Closed
Description
The following code:
A = GPUArray(randn(Float32, 3, 2))
n = 2
A .^ n
gives an error:
ERROR: LLVM error: Cannot select: 0xf179100: f32 = fpow 0xf178f40, 0xf179090
0xf178f40: f32,ch = load<LD4[%30](tbaa=<0xd4c1048>)> 0xf176c80, 0xf177930, undef:i64
0xf177930: i64 = add 0xf1777e0, Constant:i64<-4>
0xf1777e0: i64 = add 0xf174cd0, 0xf1792c0
0xf174cd0: i64,ch = CopyFromReg 0xeb554a0:1, Register:i64 %vreg15
0xeb55740: i64 = Register %vreg15
0xf1792c0: i64 = NVPTXISD::MUL_WIDE_UNSIGNED 0xf1773f0, Constant:i32<4>
0xf1773f0: i32 = NVPTXISD::IMAD 0xf1772a0, 0xeb557b0, 0xeb54c50
0xf1772a0: i32 = add 0xf1771c0, Constant:i32<-1>
0xf1771c0: i32 = select 0xf176f20, 0xeb555f0, 0xf178d10
0xf176f20: i1 = xor 0xf177310, Constant:i1<-1>
0xf177310: i1 = truncate 0xf1751a0
0xf176eb0: i1 = Constant<-1>
0xeb555f0: i32,ch = CopyFromReg 0xeb54fd0:1, Register:i32 %vreg10
0xeb55190: i32 = Register %vreg10
0xf178d10: i32,ch,glue = NVPTXISD::LoadParam<LDST4[<unknown>]> 0xf1775b0:1, Constant:i32<1>, Constant:i32<4>, 0xf1775b0:2
0xf176e40: i32 = Constant<1>
0xf175a60: i32 = Constant<4>
0xf1775b0: i32,ch,glue = NVPTXISD::LoadParam<LDST4[<unknown>]> 0xf177690, Constant:i32<1>, Constant:i32<0>, 0xf177690:1
0xf177230: i32 = Constant<-1>
0xeb557b0: i32,ch = CopyFromReg 0xeb59f50, Register:i32 %vreg1
0xeb55820: i32 = Register %vreg1
0xeb54c50: i32 = select 0xeb54b70, 0xeb54fd0, 0xf1775b0
0xeb54b70: i1 = xor 0xf175600, Constant:i1<-1>
0xf175600: i1 = truncate 0xf174f00
0xf174f00: i32,ch,glue = NVPTXISD::LoadParam<LDST4[<unknown>](align=1)> 0xf174fe0, Constant:i32<1>, Constant:i32<0>, 0xf174fe0:1
0xf176eb0: i1 = Constant<-1>
0xeb54fd0: i32,ch = CopyFromReg 0xeb59f50, Register:i32 %vreg9
0xf177540: i32 = Register %vreg9
0xf1775b0: i32,ch,glue = NVPTXISD::LoadParam<LDST4[<unknown>]> 0xf177690, Constant:i32<1>, Constant:i32<0>, 0xf177690:1
0xf176e40: i32 = Constant<1>
0xf175130: i32 = Constant<0>
0xf177690: ch,glue = NVPTXISD::CallArgEnd 0xf177770, Constant:i32<1>, 0xf177770:1
0xf176e40: i32 = Constant<1>
0xf177770: ch,glue = NVPTXISD::LastCallArg 0xf1756e0, Constant:i32<1>, Constant:i32<1>, 0xf1756e0:1
0xf175a60: i32 = Constant<4>
0xf178c30: i64 = Constant<-4>
0xf176dd0: i64 = undef
0xf179090: f32 = sint_to_fp 0xf179020
0xf179020: i64,ch = CopyFromReg 0xeb59f50, Register:i64 %vreg16
0xf178fb0: i64 = Register %vreg16
In function: ptxcall_broadcast_kernel__61772
Stacktrace:
[1] handle_error(::Cstring) at /home/dfdx/.julia/v0.6/LLVM/src/core/context.jl:96
[2] macro expansion at /home/dfdx/.julia/v0.6/LLVM/src/util/logging.jl:102 [inlined]
[3] macro expansion at /home/dfdx/.julia/v0.6/LLVM/src/base.jl:20 [inlined]
[4] LLVMTargetMachineEmitToMemoryBuffer(::Ptr{LLVM.API.LLVMOpaqueTargetMachine}, ::Ptr{LLVM.API.LLVMOpaqueModule}, ::UInt32, ::Base.RefValue{Cstring}, ::Base.RefValue{Ptr{LLVM.API.LLVMOpaqueMemoryBuffer}}) at /home/dfdx/.julia/v0.6/LLVM/src/../lib/3.9/libLLVM_h.jl:301
[5] emit(::LLVM.TargetMachine, ::LLVM.Module, ::UInt32) at /home/dfdx/.julia/v0.6/LLVM/src/targetmachine.jl:39
[6] #mcgen#46(::Bool, ::Function, ::LLVM.Module, ::LLVM.Function, ::VersionNumber) at /home/dfdx/.julia/v0.6/CUDAnative/src/jit.jl:296
[7] (::CUDAnative.#kw##mcgen)(::Array{Any,1}, ::CUDAnative.#mcgen, ::LLVM.Module, ::LLVM.Function, ::VersionNumber) at ./<missing>:0
[8] #compile_function#47(::Bool, ::Function, ::Any, ::Any, ::VersionNumber) at /home/dfdx/.julia/v0.6/CUDAnative/src/jit.jl:319
[9] cufunction(::CUDAdrv.CuDevice, ::Any, ::Any) at /home/dfdx/.julia/v0.6/CUDAnative/src/jit.jl:356
[10] macro expansion at /home/dfdx/.julia/v0.6/CUDAnative/src/execution.jl:106 [inlined]
[11] _cuda(::Tuple{Int64,Int64}, ::Int64, ::CUDAdrv.CuStream, ::GPUArrays.#broadcast_kernel!, ::Float32, ::Base.#^, ::CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::Tuple{UInt32,UInt32}, ::UInt32, ::Tuple{GPUArrays.BroadcastDescriptorN{Array,2},GPUArrays.BroadcastDescriptorN{Any,0}}, ::CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, ::Int64) at /home/dfdx/.julia/v0.6/CUDAnative/src/execution.jl:79
[12] gpu_call(::Function, ::GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext}, ::Tuple{Base.#^,GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext},Tuple{UInt32,UInt32},UInt32,Tuple{GPUArrays.BroadcastDescriptorN{Array,2},GPUArrays.BroadcastDescriptorN{Any,0}},GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext},Int64}, ::Int64, ::Void) at /home/dfdx/.julia/v0.6/GPUArrays/src/backends/cudanative/cudanative.jl:194
[13] _broadcast!(::Function, ::GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext}, ::Tuple{Tuple{Bool,Bool},Tuple{}}, ::Tuple{Tuple{Int64,Int64},Tuple{}}, ::GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext}, ::Tuple{Int64}, ::Type{Val{1}}, ::CartesianRange{CartesianIndex{2}}) at /home/dfdx/.julia/v0.6/GPUArrays/src/broadcast.jl:66
[14] broadcast_t(::Function, ::Type{Float32}, ::Tuple{Base.OneTo{Int64},Base.OneTo{Int64}}, ::CartesianRange{CartesianIndex{2}}, ::GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext}, ::Int64) at /home/dfdx/.julia/v0.6/GPUArrays/src/broadcast.jl:33
[15] broadcast_c at ./broadcast.jl:314 [inlined]
[16] broadcast(::Function, ::GPUArrays.GPUArray{Float32,2,CUDAdrv.CuArray{Float32,2},GPUArrays.CUBackend.CUContext}, ::Int64) at ./broadcast.jl:434
Note that using a constant power, e.g. A .^ 2, works fine.
Version info:
Julia Version 0.6.0
Commit 9036443 (2017-06-19 13:05 UTC)
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
LAPACK: libopenblas64_
LIBM: libopenlibm
LLVM: libLLVM-3.9.1 (ORCJIT, skylake)
GPU: GeForce GTX 960M
CUDA 8.0
Metadata
Metadata
Assignees
Labels
No labels