diff --git a/lib/cudnn/conv.jl b/lib/cudnn/conv.jl index 1b723da47a..b1f67e2f3a 100644 --- a/lib/cudnn/conv.jl +++ b/lib/cudnn/conv.jl @@ -59,6 +59,17 @@ end # wrappers +# Forward + +function cudnnGetConvolutionForwardAlgorithmMaxCount() + count=@argout( + cudnnGetConvolutionForwardAlgorithmMaxCount( + handle(), + out(Ref{Cint}())) + )[] + return count +end + function cudnnGetConvolutionForwardAlgorithm(y::CuArray{T,N}, x::CuArray{T,N}, w::CuArray{T,N}, cdims::DenseConvDims; preference=1, memoryLimitInBytes=1<<32) where {T,N} algo=@argout( @@ -73,6 +84,64 @@ function cudnnGetConvolutionForwardAlgorithm(y::CuArray{T,N}, x::CuArray{T,N}, w return algo end +function cudnnGetConvolutionForwardAlgorithm_v7(y::CuArray{T,N}, x::CuArray{T,N}, w::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionForwardAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionFwdAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnGetConvolutionForwardAlgorithm_v7( + handle(), TensorDesc(x), + FilterDesc(w), ConvDesc(T, cdims), + TensorDesc(y), + Cint(count), + out(Ref{Cint}()), + perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionForwardAlgorithm(y::CuArray{T,N}, x::CuArray{T,N}, w::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionForwardAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionFwdAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionForwardAlgorithm( + handle(), TensorDesc(x), + FilterDesc(w), ConvDesc(T, cdims), + TensorDesc(y), + Cint(count), + out(Ref{Cint}()), + perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionForwardAlgorithmEx(y::CuArray{T,N}, x::CuArray{T,N}, w::CuArray{T,N}, + cdims::DenseConvDims; count=-1, workspacesize=1<<32) where {T,N} + if count < 0 + count = cudnnGetConvolutionForwardAlgorithmMaxCount() + end + @workspace size=workspacesize workspace->begin + perfResults = Array{cudnnConvolutionFwdAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionForwardAlgorithmEx( + handle(), TensorDesc(x), x, + FilterDesc(w), w, ConvDesc(T, cdims), + TensorDesc(y), y, + Cint(count), + out(Ref{Cint}()), + perfResults, + workspace, + workspacesize) + )[] + return returnedAlgoCount, perfResults + end +end + function cudnnConvolutionForward(y::CuArray{T,N}, x::CuArray{T,N}, w::CuArray{T,N}, cdims::DenseConvDims; algo=0, alpha=1, beta=0) where {T,N} @workspace size=@argout( @@ -110,6 +179,78 @@ function cudnnConvolutionBiasActivationForward(y::CuArray{T,N}, x::CuArray{T,N}, return y end +# Backward data + +function cudnnGetConvolutionBackwardDataAlgorithmMaxCount() + count=@argout( + cudnnGetConvolutionBackwardDataAlgorithmMaxCount( + handle(), + out(Ref{Cint}())) + )[] + return count +end + +function cudnnGetConvolutionBackwardDataAlgorithm(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; preference=1, memoryLimitInBytes=1<<32) where {T,N} + algo=@argout( + cudnnGetConvolutionBackwardDataAlgorithm( + handle(), FilterDesc(w), TensorDesc(dy), ConvDesc(T, cdims), + TensorDesc(dx), cudnnConvolutionBwdDataPreference_t(preference), + Csize_t(memoryLimitInBytes), out(Ref{cudnnConvolutionBwdDataAlgo_t}())) + )[] + return algo +end + +function cudnnGetConvolutionBackwardDataAlgorithm_v7(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardDataAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionBwdDataAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnGetConvolutionBackwardDataAlgorithm_v7( + handle(), FilterDesc(w), TensorDesc(dy), + ConvDesc(T, cdims), TensorDesc(dx), + Cint(count), + out(Ref{Cint}()), perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionBackwardDataAlgorithm(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardDataAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionBwdDataAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionBackwardDataAlgorithm( + handle(), FilterDesc(w), TensorDesc(dy), + ConvDesc(T, cdims), TensorDesc(dx), + Cint(count), + out(Ref{Cint}()), perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionBackwardDataAlgorithmEx(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1, workspacesize=1<<32) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardDataAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionBwdDataAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionBackwardDataAlgorithmEx( + handle(), FilterDesc(w), w, TensorDesc(dy), y, + ConvDesc(T, cdims), TensorDesc(dx), x, + Cint(count), + out(Ref{Cint}()), + perfResults, workspace, + workspacesize) + )[] + return returnedAlgoCount, perfResults +end + function cudnnConvolutionBackwardData(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuArray{T,N}, cdims::DenseConvDims; algo=0, alpha=1, beta=0) where {T,N} @workspace size=@argout( @@ -129,6 +270,82 @@ function cudnnConvolutionBackwardData(dx::CuArray{T,N}, w::CuArray{T,N}, dy::CuA return dx end +# Backward filter + +function cudnnGetConvolutionBackwardFilterAlgorithmMaxCount() + count=@argout( + cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( + handle(), + out(Ref{Cint}())) + )[] + return count +end + +function cudnnGetConvolutionBackwardFilterAlgorithm(dw::CuArray{T,N}, x::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; preference=1, memoryLimitInBytes=1<<32) where {T,N} + algo=@argout( + cudnnGetConvolutionBackwardFilterAlgorithm( + handle(), TensorDesc(x), TensorDesc(dy), + ConvDesc(T, cdims), FilterDesc(dw), cudnnConvolutionBwdFilterPreference_t(preference), + Csize_t(memoryLimitInBytes), out(Ref{cudnnConvolutionBwdFilterAlgo_t}())) + )[] + return algo +end + +function cudnnGetConvolutionBackwardFilterAlgorithm_v7(dw::CuArray{T,N}, x::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardFilterAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionBwdFilterAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnGetConvolutionBackwardFilterAlgorithm_v7( + handle(), TensorDesc(x), TensorDesc(dy), + ConvDesc(T, cdims), FilterDesc(dw), + Cint(count), + out(Ref{Cint}()), + perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionBackwardFilterAlgorithm(dw::CuArray{T,N}, x::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardFilterAlgorithmMaxCount() + end + perfResults = Array{cudnnConvolutionBwdFilterAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionBackwardFilterAlgorithm( + handle(), TensorDesc(x), TensorDesc(dy), + ConvDesc(T, cdims), FilterDesc(dw), + Cint(count), + out(Ref{Cint}()), + perfResults) + )[] + return returnedAlgoCount, perfResults +end + +function cudnnFindConvolutionBackwardFilterAlgorithmEx(dw::CuArray{T,N}, x::CuArray{T,N}, dy::CuArray{T,N}, + cdims::DenseConvDims; count=-1, workspacesize=1<<32) where {T,N} + if count < 0 + count = cudnnGetConvolutionBackwardFilterAlgorithmMaxCount() + end + @workspace size=workspacesize workspace->begin + perfResults = Array{cudnnConvolutionBwdFilterAlgoPerf_t, 1}(undef, count) + returnedAlgoCount=@argout( + cudnnFindConvolutionBackwardFilterAlgorithmEx( + handle(), TensorDesc(x), x, TensorDesc(dy), + dy, ConvDesc(T, cdims), FilterDesc(dw), dw, + Cint(count), + out(Ref{Cint}()), + perfResults, workspace, + workspacesize) + )[] + return returnedAlgoCount, perfResults + end +end + function cudnnConvolutionBackwardFilter(dw::CuArray{T,N}, x::CuArray{T,N}, dy::CuArray{T,N}, cdims::DenseConvDims; algo=0, alpha=1, beta=0) where {T,N} @workspace size=@argout( @@ -149,6 +366,8 @@ function cudnnConvolutionBackwardFilter(dw::CuArray{T,N}, x::CuArray{T,N}, dy::C return dw end +# Backward bias + function cudnnConvolutionBackwardBias(db::CuArray{T,N}, dy::CuArray{T,N}; alpha=1, beta=0) where {T,N} cudnnConvolutionBackwardBias(handle(), Ref(T(alpha)), TensorDesc(dy), dy, Ref(T(beta)), TensorDesc(db), db) return db diff --git a/lib/cudnn/nnlib.jl b/lib/cudnn/nnlib.jl index e7f16bdbd5..0623764874 100644 --- a/lib/cudnn/nnlib.jl +++ b/lib/cudnn/nnlib.jl @@ -73,7 +73,11 @@ function conv_bias_act!(y::CuArray{T}, x::CuArray{T}, w::CuArray{T}, cdims::Dens # only relu and identity are supported if σ == NNlib.relu if algo < 0 - algo = UInt32(cudnnGetConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims))) + # algo = UInt32(cudnnGetConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims))) # will be removed in cuDNN 8 + returnedAlgoCount, perfResults = cudnnGetConvolutionForwardAlgorithm_v7(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + # returnedAlgoCount, perfResults = cudnnFindConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + # returnedAlgoCount, perfResults = cudnnFindConvolutionForwardAlgorithmEx(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + algo = UInt32(perfResults[1].algo) end cudnnConvolutionBiasActivationForward(fix1d(y), fix1d(x), fix1d(w), fix1d(z), fix1d(b), fix1d(cdims), algo=algo, alpha1=alpha1, alpha2=alpha2, @@ -97,7 +101,11 @@ function conv!(y::CuArray{T}, x::CuArray{T}, w::CuArray{T}, cdims::DenseConvDims end if algo < 0 - algo = UInt32(cudnnGetConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims))) + # algo = UInt32(cudnnGetConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims))) # will be removed in cuDNN 8 + returnedAlgoCount, perfResults = cudnnGetConvolutionForwardAlgorithm_v7(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + # returnedAlgoCount, perfResults = cudnnFindConvolutionForwardAlgorithm(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + # returnedAlgoCount, perfResults = cudnnFindConvolutionForwardAlgorithmEx(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims)) + algo = UInt32(perfResults[1].algo) end cudnnConvolutionForward(fix1d(y), fix1d(x), fix1d(w), fix1d(cdims), alpha=alpha, algo=algo)