/
context.jl
379 lines (292 loc) · 9.55 KB
/
context.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# Context management
export
CuPrimaryContext, CuContext, current_context, has_context, activate,
unsafe_reset!, isactive, flags, setflags!,
device, device_synchronize
## construction and destruction
@enum_without_prefix CUctx_flags CU_
"""
CuPrimaryContext(dev::CuDevice)
Create a primary CUDA context for a given device.
Each primary context is unique per device and is shared with CUDA runtime API. It is meant
for interoperability with (applications using) the runtime API.
"""
struct CuPrimaryContext
dev::CuDevice
end
"""
CuContext(dev::CuDevice, flags=CTX_SCHED_AUTO)
CuContext(f::Function, ...)
Create a CUDA context for device. A context on the GPU is analogous to a process on the CPU,
with its own distinct address space and allocated resources. When a context is destroyed,
the system cleans up the resources allocated to it.
When you are done using the context, call [`CUDA.unsafe_destroy!`](@ref) to mark it for
deletion, or use do-block syntax with this constructor.
"""
mutable struct CuContext
handle::CUcontext
valid::Bool
function new_unique(handle)
# XXX: this makes it dangerous to call this function from finalizers.
# can we do without the lock?
Base.@lock context_lock get!(valid_contexts, handle) do
new(handle, true)
end
end
function CuContext(dev::CuDevice, flags=0)
handle_ref = Ref{CUcontext}()
cuCtxCreate_v2(handle_ref, flags, dev)
new_unique(handle_ref[])
end
function CuContext(pctx::CuPrimaryContext)
handle_ref = Ref{CUcontext}()
cuDevicePrimaryCtxRetain(handle_ref, pctx.dev)
return new_unique(handle_ref[])
end
global function current_context()
handle_ref = Ref{CUcontext}()
cuCtxGetCurrent(handle_ref)
handle_ref[] == C_NULL && throw(UndefRefError())
new_unique(handle_ref[])
end
# for outer constructors
global _CuContext(handle::CUcontext) = new_unique(handle)
end
"""
CuContext(pctx::CuPrimaryContext)
Retain the primary context on the GPU, returning a context compatible with the driver API.
The primary context will be released when the returned driver context is finalized.
As these contexts are refcounted by CUDA, you should not call [`CUDA.unsafe_destroy!`](@ref)
on them but use [`CUDA.unsafe_release!`](@ref) instead (available with do-block syntax as
well).
"""
CuContext(pctx::CuPrimaryContext)
"""
current_context()
Returns the current context.
!!! warning
This is a low-level API, returning the current context as known to the CUDA driver.
For most users, it is recommended to use the [`context`](@ref) method instead.
"""
current_context()
"""
has_context()
Returns whether there is an active context.
"""
function has_context()
handle_ref = Ref{CUcontext}()
cuCtxGetCurrent(handle_ref)
handle_ref[] != C_NULL
end
# the `valid` bit serves two purposes: make sure we don't double-free a context (in case we
# early-freed it ourselves before the GC kicked in), and to make sure we don't free derived
# resources after the owning context has been destroyed (which can happen due to
# out-of-order finalizer execution)
const valid_contexts = Dict{CUcontext,CuContext}()
const context_lock = ReentrantLock()
isvalid(ctx::CuContext) = ctx.valid
# NOTE: we can't just look up by the handle, because contexts derived from a primary one
# have the same handle even though they might have been destroyed in the meantime.
function invalidate!(ctx::CuContext)
Base.@lock context_lock delete!(valid_contexts, ctx.handle)
ctx.valid = false
return
end
"""
unsafe_destroy!(ctx::CuContext)
Immediately destroy a context, freeing up all resources associated with it. This does not
respect any users of the context, and might make other objects unusable.
"""
function unsafe_destroy!(ctx::CuContext)
if isvalid(ctx)
cuCtxDestroy_v2(ctx)
invalidate!(ctx)
end
end
Base.unsafe_convert(::Type{CUcontext}, ctx::CuContext) = ctx.handle
# NOTE: we don't implement `isequal` or `hash` in order to fall back to `===` and `objectid`
# as contexts are unique, and with primary device contexts identical handles might be
# returned after resetting the context (device) and all associated resources.
function Base.show(io::IO, ctx::CuContext)
if ctx.handle != C_NULL
fields = [@sprintf("%p", ctx.handle), @sprintf("instance %x", objectid(ctx))]
if !isvalid(ctx)
push!(fields, "invalidated")
end
print(io, "CuContext(", join(fields, ", "), ")")
else
print(io, "CuContext(NULL)")
end
end
## core context API
"""
push!(CuContext, ctx::CuContext)
Pushes a context on the current CPU thread.
"""
Base.push!(::Type{CuContext}, ctx::CuContext) = cuCtxPushCurrent_v2(ctx)
"""
pop!(CuContext)
Pops the current CUDA context from the current CPU thread.
"""
function Base.pop!(::Type{CuContext})
handle_ref = Ref{CUcontext}()
cuCtxPopCurrent_v2(handle_ref)
# we don't return the context here, because it may be unused
# (and constructing the unique object is expensive)
end
"""
activate(ctx::CuContext)
Binds the specified CUDA context to the calling CPU thread.
"""
activate(ctx::CuContext) = cuCtxSetCurrent(ctx)
function CuContext(f::Function, dev::CuDevice, args...)
ctx = CuContext(dev, args...) # implicitly pushes
try
f(ctx)
finally
pop!(CuContext)
unsafe_destroy!(ctx)
end
end
## primary context management
"""
CUDA.unsafe_release!(ctx::CuContext)
Lower the refcount of a context, possibly freeing up all resources associated with it. This
does not respect any users of the context, and might make other objects unusable.
"""
function unsafe_release!(ctx::CuContext)
if isvalid(ctx)
dev = device(ctx)
pctx = CuPrimaryContext(dev)
if driver_version() >= v"11"
cuDevicePrimaryCtxRelease_v2(dev)
else
cuDevicePrimaryCtxRelease(dev)
end
isactive(pctx) || invalidate!(ctx)
end
return
end
function CuContext(f::Function, pctx::CuPrimaryContext)
ctx = CuContext(pctx)
try
f(ctx)
finally
unsafe_release!(ctx)
end
end
"""
unsafe_reset!(pctx::CuPrimaryContext)
Explicitly destroys and cleans up all resources associated with a device's primary context
in the current process. Note that this forcibly invalidates all contexts derived from this
primary context, and as a result outstanding resources might become invalid.
"""
function unsafe_reset!(pctx::CuPrimaryContext)
ctx = CuContext(pctx)
invalidate!(ctx)
if driver_version() >= v"11"
cuDevicePrimaryCtxReset_v2(pctx.dev)
else
cuDevicePrimaryCtxReset(pctx.dev)
end
return
end
function state(pctx::CuPrimaryContext)
flags = Ref{Cuint}()
active = Ref{Cint}()
cuDevicePrimaryCtxGetState(pctx.dev, flags, active)
return (flags[], active[] == one(Cint))
end
"""
isactive(pctx::CuPrimaryContext)
Query whether a primary context is active.
"""
isactive(pctx::CuPrimaryContext) = state(pctx)[2]
"""
flags(pctx::CuPrimaryContext)
Query the flags of a primary context.
"""
flags(pctx::CuPrimaryContext) = state(pctx)[1]
"""
setflags!(pctx::CuPrimaryContext)
Set the flags of a primary context.
"""
function setflags!(pctx::CuPrimaryContext, flags)
if driver_version() >= v"11"
cuDevicePrimaryCtxSetFlags_v2(pctx.dev, flags)
else
cuDevicePrimaryCtxSetFlags(pctx.dev, flags)
end
end
## context properties
"""
device(::CuContext)
Returns the device for a context.
"""
function device(ctx::CuContext)
push!(CuContext, ctx)
dev = current_device()
pop!(CuContext)
return dev
end
"""
synchronize(ctx::Context)
Block for the all operations on `ctx` to complete. This is a heavyweight operation,
typically you only need to call [`synchronize`](@ref) which only synchronizes the stream
associated with the current task.
"""
function synchronize(ctx::CuContext)
push!(CuContext, ctx)
try
device_synchronize()
finally
pop!(CuContext)
end
end
# same, but without the context switch
"""
device_synchronize()
Block for the all operations on `ctx` to complete. This is a heavyweight operation,
typically you only need to call [`synchronize`](@ref) which only synchronizes the stream
associated with the current task.
On the device, `device_synchronize` acts as a synchronization point for child grids in the
context of dynamic parallelism.
"""
device_synchronize()
# XXX: can we put the device docstring in dynamic_parallelism.jl?
## cache config
export cache_config, cache_config!
@enum_without_prefix CUfunc_cache CU_
function cache_config()
config = Ref{CUfunc_cache}()
cuCtxGetCacheConfig(config)
return config[]
end
function cache_config!(config::CUfunc_cache)
cuCtxSetCacheConfig(config)
end
## shared memory config
export shmem_config, shmem_config!
@enum_without_prefix CUsharedconfig CU_
function shmem_config()
config = Ref{CUsharedconfig}()
cuCtxGetSharedMemConfig(config)
return config[]
end
function shmem_config!(config::CUsharedconfig)
cuCtxSetSharedMemConfig(config)
end
## limits
export limit, limit!
@enum_without_prefix CUlimit CU_
function limit(lim::CUlimit)
val = Ref{Csize_t}()
cuCtxGetLimit(val, lim)
return Int(val[])
end
limit!(lim::CUlimit, val) = cuCtxSetLimit(lim, val)
## p2p
export enable_peer_access, disable_peer_access
enable_peer_access(peer::CuContext, flags=0) =
cuCtxEnablePeerAccess(peer, flags)
disable_peer_access(peer::CuContext) = cuCtxDisablePeerAccess(peer)