Skip to content

Commit

Permalink
Simplify checking for memory pool support.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Jan 28, 2022
1 parent 5c4e4ed commit f1f306c
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 17 deletions.
11 changes: 5 additions & 6 deletions lib/cudadrv/devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ end

## attributes

export attribute, warpsize, capability, unified_addressing
export attribute, warpsize, capability, memory_pools_supported, unified_addressing

"""
attribute(dev::CuDevice, code)
Expand Down Expand Up @@ -195,11 +195,10 @@ function capability(dev::CuDevice)
attribute(dev, DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR))
end

has_stream_ordered(dev::CuDevice) =
@memoize dev::CuDevice begin
CUDA.version() >= v"11.2" && !haskey(ENV, "CUDA_MEMCHECK") &&
attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
end::Bool
memory_pools_supported(dev::CuDevice) =
CUDA.version() >= v"11.2" &&
attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
@deprecate has_stream_ordered(dev::CuDevice) memory_pools_supported(dev)

unified_addressing(dev::CuDevice) =
attribute(dev, DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) == 1
Expand Down
2 changes: 1 addition & 1 deletion lib/cudadrv/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ GPU, and requires explicit calls to `unsafe_copyto!`, which wraps `cuMemcpy`,
for access on the CPU.
"""
function alloc(::Type{DeviceBuffer}, bytesize::Integer;
async::Bool=CUDA.has_stream_ordered(device()),
async::Bool=memory_pools_supported(device()),
stream::Union{Nothing,CuStream}=nothing,
pool::Union{Nothing,CuMemoryPool}=nothing)
bytesize == 0 && return DeviceBuffer()
Expand Down
12 changes: 3 additions & 9 deletions src/pool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,10 @@ end

const __stream_ordered = LazyInitialized{Vector{Bool}}()
function stream_ordered(dev::CuDevice)
# TODO: improve @memoize to use the device ID to index a know-length vector cache.
flags = get!(__stream_ordered) do
val = Vector{Bool}(undef, ndevices())
if version() < v"11.2" || get(ENV, "JULIA_CUDA_MEMORY_POOL", "cuda") == "none"
fill!(val, false)
else
for dev in devices()
val[deviceid(dev)+1] = attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
end
end
val
[memory_pools_supported(dev) && get(ENV, "JULIA_CUDA_MEMORY_POOL", "cuda") == "cuda"
for dev in devices()]
end
@inbounds flags[deviceid(dev)+1]
end
Expand Down
2 changes: 1 addition & 1 deletion test/cudadrv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ for srcTy in [Mem.Device, Mem.Host, Mem.Unified],

# test device with context in which pointer was allocated.
@test device(typed_pointer(src, T)) == device()
if !CUDA.has_stream_ordered(device())
if !memory_pools_supported(device())
# NVIDIA bug #3319609
@test context(typed_pointer(src, T)) == context()
end
Expand Down

0 comments on commit f1f306c

Please sign in to comment.