Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
448 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
*.jl.cov | ||
*.jl.*.cov | ||
*.jl.mem | ||
*~ | ||
.juliahistory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
module CuConv | ||
|
||
# package code goes here | ||
include("core.jl") | ||
|
||
end # module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
|
||
function conv4{T}(w::CuArray{T},x::CuArray{T}; | ||
handle=cudnnhandle(), algo=0, workSpace=C_NULL, workSpaceSizeInBytes=0, alpha=1, | ||
o...) # padding=0, stride=1, upscale=1, mode=0 | ||
y = similar(x, cdims(w,x;o...)) | ||
beta=0 # nonzero beta does not make sense when we create y | ||
@cuda(cudnn, cudnnConvolutionForward, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, UInt32,Cptr, Csize_t, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),TD(x),x.ptr,FD(w),w.ptr,CD(w,x;o...),algo,workSpace,workSpaceSizeInBytes,Ref(T(beta)),TD(y),y.ptr) | ||
return y | ||
end | ||
|
||
function conv4x{T}(w::CuArray{T},x::CuArray{T},dy::CuArray{T}; | ||
handle=cudnnhandle(), alpha=1, algo=0, workSpace=C_NULL, workSpaceSizeInBytes=0, | ||
o...) # padding=0, stride=1, upscale=1, mode=0 | ||
beta = 0 | ||
dx = similar(x) | ||
if cudnnVersion >= 4000 | ||
@cuda(cudnn,cudnnConvolutionBackwardData, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, UInt32,Cptr, Csize_t, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),FD(w),w.ptr,TD(dy),dy.ptr,CD(w,x;o...),algo,workSpace,workSpaceSizeInBytes,Ref(T(beta)),TD(dx),dx.ptr) | ||
elseif cudnnVersion >= 3000 | ||
@cuda(cudnn,cudnnConvolutionBackwardData_v3, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, UInt32,Cptr, Csize_t, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),FD(w),w.ptr,TD(dy),dy.ptr,CD(w,x;o...),algo,workSpace,workSpaceSizeInBytes,Ref(T(beta)),TD(dx),dx.ptr) | ||
else | ||
@cuda(cudnn,cudnnConvolutionBackwardData, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),FD(w),w.ptr,TD(dy),dy.ptr,CD(w,x;o...),Ref(T(beta)),TD(dx),dx.ptr) | ||
end | ||
return dx | ||
end | ||
|
||
function conv4w{T}(w::CuArray{T},x::CuArray{T},dy::CuArray{T}; | ||
handle=cudnnhandle(), alpha=1, algo=0, workSpace=C_NULL, workSpaceSizeInBytes=0, | ||
o...) # padding=0, stride=1, upscale=1, mode=0 | ||
beta = 0 | ||
dw = similar(w) | ||
if cudnnVersion >= 4000 | ||
@cuda(cudnn,cudnnConvolutionBackwardFilter, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, UInt32,Cptr, Csize_t, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),TD(x),x.ptr,TD(dy),dy.ptr,CD(w,x;o...),algo,workSpace,workSpaceSizeInBytes,Ref(T(beta)),FD(dw),dw.ptr) | ||
elseif cudnnVersion >= 3000 | ||
@cuda(cudnn,cudnnConvolutionBackwardFilter_v3, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, UInt32,Cptr, Csize_t, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),TD(x),x.ptr,TD(dy),dy.ptr,CD(w,x;o...),algo,workSpace,workSpaceSizeInBytes,Ref(T(beta)),FD(dw),dw.ptr) | ||
else | ||
@cuda(cudnn,cudnnConvolutionBackwardFilter, | ||
(Cptr,Ptr{T},Cptr,Ptr{T},Cptr,Ptr{T},Cptr, Ptr{T},Cptr,Ptr{T}), | ||
handle,Ref(T(alpha)),TD(x),x.ptr,TD(dy),dy.ptr,CD(w,x;o...),Ref(T(beta)),FD(dw),dw.ptr) | ||
end | ||
return dw | ||
end | ||
|
||
|
||
|
||
|
||
|
||
mutable struct TD; ptr | ||
function TD(a::CuArray) | ||
d = Cptr[0] | ||
@cuda(cudnn,cudnnCreateTensorDescriptor,(Ptr{Cptr},),d) | ||
n = ndims(a) | ||
sz = [Cint(size(a,n-i+1)) for i=1:n] | ||
st = [Cint(stride(a,n-i+1)) for i=1:n] | ||
@cuda(cudnn,cudnnSetTensorNdDescriptor, | ||
(Cptr,UInt32,Cint,Ptr{Cint},Ptr{Cint}), | ||
d[1], DT(a), n, sz, st) | ||
td = new(d[1]) | ||
finalizer(td, x->@cuda(cudnn,cudnnDestroyTensorDescriptor,(Cptr,),x.ptr)) | ||
return td | ||
end | ||
end | ||
|
||
mutable struct FD; ptr | ||
function FD(a::CuArray) | ||
d = Cptr[0] | ||
@cuda(cudnn,cudnnCreateFilterDescriptor,(Ptr{Cptr},),d) | ||
n = ndims(a) | ||
sz = [Cint(size(a,n-i+1)) for i=1:n] | ||
if cudnnVersion >= 5000 | ||
@cuda(cudnn,cudnnSetFilterNdDescriptor, | ||
(Cptr,UInt32,UInt32,Cint,Ptr{Cint}), | ||
d[1], DT(a), 0, n, sz) | ||
elseif cudnnVersion >= 4000 | ||
@cuda(cudnn,cudnnSetFilterNdDescriptor_v4, | ||
(Cptr,UInt32,UInt32,Cint,Ptr{Cint}), | ||
d[1], DT(a), 0, n, sz) | ||
else | ||
@cuda(cudnn,cudnnSetFilterNdDescriptor, | ||
(Cptr,UInt32,Cint,Ptr{Cint}), | ||
d[1], DT(a), n, sz) | ||
end | ||
fd = new(d[1]) | ||
finalizer(fd, x->@cuda(cudnn,cudnnDestroyFilterDescriptor,(Cptr,),x.ptr)) | ||
return fd | ||
end | ||
end | ||
|
||
mutable struct CD; ptr | ||
function CD(w::CuArray,x::CuArray; padding=0, stride=1, upscale=1, mode=0) | ||
d = Cptr[0] | ||
@cuda(cudnn,cudnnCreateConvolutionDescriptor,(Ptr{Cptr},),d) | ||
nd = ndims(x)-2 | ||
if cudnnVersion >= 4000 | ||
@cuda(cudnn,cudnnSetConvolutionNdDescriptor, | ||
(Cptr,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint},UInt32,UInt32), | ||
d[1],nd,cdsize(padding,nd),cdsize(stride,nd),cdsize(upscale,nd),mode,DT(x)) | ||
elseif cudnnVersion > 3000 # does not work when cudnnVersion==3000 | ||
@cuda(cudnn,cudnnSetConvolutionNdDescriptor_v3, | ||
(Cptr,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint},UInt32,UInt32), | ||
d[1],nd,cdsize(padding,nd),cdsize(stride,nd),cdsize(upscale,nd),mode,DT(x)) | ||
else | ||
@cuda(cudnn,cudnnSetConvolutionNdDescriptor, | ||
(Cptr,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint},UInt32), | ||
d[1],nd,cdsize(padding,nd),cdsize(stride,nd),cdsize(upscale,nd),mode) | ||
end | ||
cd = new(d[1]) | ||
finalizer(cd, x->@cuda(cudnn,cudnnDestroyConvolutionDescriptor,(Cptr,),x.ptr)) | ||
return cd | ||
end | ||
end | ||
|
||
mutable struct PD; ptr | ||
function PD(x::CuArray; window=2, padding=0, stride=window, mode=0, maxpoolingNanOpt=0) | ||
d = Cptr[0] | ||
@cuda(cudnn,cudnnCreatePoolingDescriptor,(Ptr{Cptr},),d) | ||
nd = ndims(x)-2 | ||
if cudnnVersion >= 5000 | ||
@cuda(cudnn,cudnnSetPoolingNdDescriptor, | ||
(Cptr,UInt32,UInt32,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint}), | ||
d[1],mode,maxpoolingNanOpt,nd,cdsize(window,nd),cdsize(padding,nd),cdsize(stride,nd)) | ||
elseif cudnnVersion >= 4000 | ||
@cuda(cudnn,cudnnSetPoolingNdDescriptor_v4, | ||
(Cptr,UInt32,UInt32,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint}), | ||
d[1],mode,maxpoolingNanOpt,nd,cdsize(window,nd),cdsize(padding,nd),cdsize(stride,nd)) | ||
else | ||
@cuda(cudnn,cudnnSetPoolingNdDescriptor, | ||
(Cptr,UInt32,Cint,Ptr{Cint},Ptr{Cint},Ptr{Cint}), | ||
d[1],mode,nd,cdsize(window,nd),cdsize(padding,nd),cdsize(stride,nd)) | ||
end | ||
pd = new(d[1]) | ||
finalizer(pd, x->@cuda(cudnn,cudnnDestroyPoolingDescriptor,(Cptr,),x.ptr)) | ||
return pd | ||
end | ||
end | ||
|
||
import Base: unsafe_convert | ||
unsafe_convert(::Type{Cptr}, td::TD)=td.ptr | ||
unsafe_convert(::Type{Cptr}, fd::FD)=fd.ptr | ||
unsafe_convert(::Type{Cptr}, cd::CD)=cd.ptr | ||
unsafe_convert(::Type{Cptr}, pd::PD)=pd.ptr | ||
|
||
# fill and reverse Cint array with padding etc. for cudnn calls | ||
function cdsize(w, nd) | ||
if isa(w,Number) | ||
fill(Cint(w),nd) | ||
elseif length(w)==nd | ||
[ Cint(w[nd-i+1]) for i=1:nd ] | ||
else | ||
throw(DimensionMismatch("$w $nd")) | ||
end | ||
end | ||
|
||
# convert padding etc. size to an Int array of the right dimension | ||
function psize(p, x) | ||
nd = ndims(x)-2 | ||
if isa(p,Number) | ||
fill(Int(p),nd) | ||
elseif length(p)==nd | ||
collect(Int,p) | ||
else | ||
throw(DimensionMismatch("psize: $p $nd")) | ||
end | ||
end | ||
|
||
DT(::CuArray{Float32})=UInt32(0) | ||
DT(::CuArray{Float64})=UInt32(1) | ||
DT(::CuArray{Float16})=UInt32(2) | ||
|
||
function cdims(w,x; padding=0, stride=1, o...) | ||
N = ndims(x) | ||
ntuple(N) do i | ||
if i < N-1 | ||
pi = (if isa(padding,Number); padding; else padding[i]; end) | ||
si = (if isa(stride,Number); stride; else stride[i]; end) | ||
1 + div(size(x,i) - size(w,i) + 2*pi, si) | ||
elseif i == N-1 | ||
size(w,N) | ||
else # i == N | ||
size(x,N) | ||
end | ||
end | ||
end | ||
|
||
function pdims(x; window=2, padding=0, stride=window, o...) | ||
N = ndims(x) | ||
ntuple(N) do i | ||
if i < N-1 | ||
wi = (if isa(window,Number); window; else window[i]; end) | ||
pi = (if isa(padding,Number); padding; else padding[i]; end) | ||
si = (if isa(stride,Number); stride; else stride[i]; end) | ||
1 + div(size(x,i) + 2*pi - wi, si) | ||
else | ||
size(x,i) | ||
end | ||
end | ||
end | ||
|
||
function dcdims(w,x; padding=0, stride=1, o...) | ||
N = ndims(x) | ||
ntuple(N) do i | ||
if i < N-1 | ||
pi = (if isa(padding,Number); padding; else padding[i]; end) | ||
si = (if isa(stride,Number); stride; else stride[i]; end) | ||
si*(size(x,i)-1) + size(w,i) - 2*pi | ||
elseif i == N-1 | ||
size(w,N) | ||
else | ||
size(x,N) | ||
end | ||
end | ||
end | ||
|
||
function updims(x; window=2, padding=0, stride=window, o...) | ||
window = psize(window,x) | ||
stride = psize(stride,x) | ||
padding = psize(padding,x) | ||
N = ndims(x) | ||
ntuple(N) do i | ||
if i < N-1 | ||
(size(x,i)-1)*stride[i]+window[i]-2*padding[i] | ||
else | ||
size(x,i) | ||
end | ||
end | ||
end | ||
|
||
# convolution padding size that preserves the input size when filter size is odd and stride=1 | ||
padsize(w)=ntuple(i->div(size(w,i)-1,2), ndims(w)-2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
|
||
using CuArrays | ||
|
||
const Cptr = Ptr{Void} | ||
macro gs(); if false; esc(:(ccall(("cudaDeviceSynchronize","libcudart"),UInt32,()))); end; end | ||
|
||
include("gpu.jl") | ||
include("conv.jl") | ||
|
||
|
||
|
||
|
||
|
||
# See if we have a gpu at initialization: | ||
function __init__() | ||
try | ||
r = gpu(true) | ||
# info(r >= 0 ? "Knet using GPU $r" : "No GPU found, Knet using the CPU") | ||
catch e | ||
gpu(false) | ||
# warn("Knet using the CPU: $e") | ||
end | ||
end |
Oops, something went wrong.