Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slight changes to pool management #1344

Merged
merged 3 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions lib/cudadrv/devices.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ end

## attributes

export attribute, warpsize, capability, unified_addressing
export attribute, warpsize, capability, memory_pools_supported, unified_addressing

"""
attribute(dev::CuDevice, code)
Expand Down Expand Up @@ -195,11 +195,10 @@ function capability(dev::CuDevice)
attribute(dev, DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR))
end

has_stream_ordered(dev::CuDevice) =
@memoize dev::CuDevice begin
CUDA.version() >= v"11.2" && !haskey(ENV, "CUDA_MEMCHECK") &&
attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
end::Bool
memory_pools_supported(dev::CuDevice) =
CUDA.version() >= v"11.2" &&
attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
@deprecate has_stream_ordered(dev::CuDevice) memory_pools_supported(dev)

unified_addressing(dev::CuDevice) =
attribute(dev, DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING) == 1
Expand Down
2 changes: 1 addition & 1 deletion lib/cudadrv/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ GPU, and requires explicit calls to `unsafe_copyto!`, which wraps `cuMemcpy`,
for access on the CPU.
"""
function alloc(::Type{DeviceBuffer}, bytesize::Integer;
async::Bool=CUDA.has_stream_ordered(device()),
async::Bool=memory_pools_supported(device()),
stream::Union{Nothing,CuStream}=nothing,
pool::Union{Nothing,CuMemoryPool}=nothing)
bytesize == 0 && return DeviceBuffer()
Expand Down
16 changes: 6 additions & 10 deletions src/pool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,10 @@ end

const __stream_ordered = LazyInitialized{Vector{Bool}}()
function stream_ordered(dev::CuDevice)
# TODO: improve @memoize to use the device ID to index a know-length vector cache.
flags = get!(__stream_ordered) do
val = Vector{Bool}(undef, ndevices())
if version() < v"11.2" || haskey(ENV, "CUDA_MEMCHECK") ||
get(ENV, "JULIA_CUDA_MEMORY_POOL", "cuda") == "none"
fill!(val, false)
else
for dev in devices()
val[deviceid(dev)+1] = attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
end
end
val
[memory_pools_supported(dev) && get(ENV, "JULIA_CUDA_MEMORY_POOL", "cuda") == "cuda"
for dev in devices()]
end
@inbounds flags[deviceid(dev)+1]
end
Expand All @@ -92,6 +85,9 @@ function pool_mark(dev::CuDevice)
if status[] === nothing
pool = memory_pool(dev)

# allow the pool to use up all memory of this device
attribute!(memory_pool(dev), MEMPOOL_ATTR_RELEASE_THRESHOLD, typemax(UInt64))

# launch a task to periodically trim the pool
if isinteractive() && !isassigned(__pool_cleanup)
__pool_cleanup[] = @async pool_cleanup()
Expand Down
2 changes: 1 addition & 1 deletion test/cudadrv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ for srcTy in [Mem.Device, Mem.Host, Mem.Unified],

# test device with context in which pointer was allocated.
@test device(typed_pointer(src, T)) == device()
if !CUDA.has_stream_ordered(device())
if !memory_pools_supported(device())
# NVIDIA bug #3319609
@test context(typed_pointer(src, T)) == context()
end
Expand Down