/
initialization.jl
246 lines (209 loc) · 9.86 KB
/
initialization.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# initialization
# XXX: we currently allow loading CUDA.jl even if the package is not functional, because
# downstream packages can only unconditionally depend on CUDA.jl. that's why we have
# the errors be non-fatal, sometimes even silencing them, and why we have the
# `functional()` API that allows checking for successfull initialization.
# TODO: once we have conditional dependencies, remove this complexity and have __init__ fail
const _initialized = Ref{Bool}(false)
const _initialization_error = Ref{String}()
"""
functional(show_reason=false)
Check if the package has been configured successfully and is ready to use.
This call is intended for packages that support conditionally using an available GPU. If you
fail to check whether CUDA is functional, actual use of functionality might warn and error.
"""
function functional(show_reason::Bool=false)
_initialized[] && return true
if show_reason && isassigned(_initialization_error)
error(_initialization_error[])
elseif show_reason
error("unknown initialization error")
end
return false
end
function __init__()
precompiling = ccall(:jl_generating_output, Cint, ()) != 0
# TODO: make errors here (and in submodules/subpackages like cuBLAS and cuDNN) fatal,
# and remove functional(), once people sufficiently use weak dependencies.
# check that we have a driver
global libcuda
if CUDA_Driver_jll.is_available()
if isnothing(CUDA_Driver_jll.libcuda)
_initialization_error[] = "CUDA driver not found"
return
end
libcuda = CUDA_Driver_jll.libcuda
else
# CUDA_Driver_jll only kicks in for supported platforms, so fall back to
# a system search if the artifact isn't available (JLLWrappers.jl#50)
library = if Sys.iswindows()
Libdl.find_library("nvcuda")
else
Libdl.find_library(["libcuda.so.1", "libcuda.so"])
end
if library != ""
libcuda = library
else
_initialization_error[] = "CUDA driver not found"
return
end
end
driver = try
driver_version()
catch err
@debug "CUDA driver failed to report a version" exception=(err, catch_backtrace())
_initialization_error[] = "CUDA driver not functional"
return
end
if driver < v"11"
@error "This version of CUDA.jl only supports NVIDIA drivers for CUDA 11.x or higher (yours is for CUDA $driver)"
_initialization_error[] = "CUDA driver too old"
return
end
if driver < v"11.2"
@warn """The NVIDIA driver on this system only supports up to CUDA $driver.
For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.2 or higher."""
end
# check that we have a runtime
if !CUDA_Runtime.is_available()
@error """CUDA.jl could not find an appropriate CUDA runtime to use.
This can have several reasons:
* you are using an unsupported platform: this version of CUDA.jl
only supports Linux (x86_64, aarch64, ppc64le) and Windows (x86_64),
while your platform was identified as $(Base.BinaryPlatforms.triplet(CUDA_Runtime_jll.host_platform));
* you precompiled CUDA.jl in an environment where the CUDA driver
was not available (i.e., a container, or an HPC login node).
in that case, you need to specify which CUDA version to use
by calling `CUDA.set_runtime_version!`;
* you requested use of a local CUDA toolkit, but not all
required components were discovered. try running with
JULIA_DEBUG=all in your environment for more details.
For more details, refer to the CUDA.jl documentation at
https://cuda.juliagpu.org/stable/installation/overview/"""
_initialization_error[] = "CUDA runtime not found"
return
end
runtime = try
runtime_version()
catch err
if err isa CuError && err.code == ERROR_NO_DEVICE
_initialization_error[] = "No CUDA-capable device found"
return
end
rethrow()
end
# ensure the loaded runtime is supported
if runtime < v"10.2"
@error "This version of CUDA.jl only supports CUDA 11 or higher (your toolkit provides CUDA $runtime)"
end
if runtime.major > driver.major
@warn """You are using CUDA $runtime with a driver that only supports up to $(driver.major).x.
It is recommended to upgrade your driver, or switch to automatic installation of CUDA."""
end
# ensure the loaded runtime matches what we precompiled for.
if toolkit_version == nothing
@error """CUDA.jl was precompiled without knowing the CUDA toolkit version. This is unsupported.
You should either precompile CUDA.jl in an environment where the CUDA toolkit is available,
or call `CUDA.set_runtime_version!` to specify which CUDA version to use."""
elseif Base.thisminor(runtime) != Base.thisminor(toolkit_version)
# this can only happen with a local toolkit, but let's always check to be sure
if local_toolkit
@error """You are using a local CUDA $(Base.thisminor(runtime)) toolkit, but CUDA.jl was precompiled for CUDA $(Base.thisminor(toolkit_version)). This is unsupported.
Call `CUDA.set_runtime_version!` to update the CUDA version to match your local installation."""
else
@error """You are using CUDA $(Base.thisminor(runtime)), but CUDA.jl was precompiled for CUDA $(Base.thisminor(toolkit_version)).
This is unexpected; please file an issue."""
end
end
# if we're not running under an external profiler, let CUPTI handle NVTX events
# XXX: JuliaGPU/NVTX.jl#37
if !NVTX.isactive() && !Sys.iswindows()
ENV["NVTX_INJECTION64_PATH"] = CUDA_Runtime.libcupti
NVTX.activate()
end
# finally, initialize CUDA
try
cuInit(0)
catch err
_initialization_error[] = "CUDA initialization failed: " * sprint(showerror, err)
return
end
@static if !isdefined(Base, :get_extension)
@require ChainRulesCore="d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" begin
include("../ext/ChainRulesCoreExt.jl")
end
@require SpecialFunctions="276daf66-3868-5448-9aa4-cd146d93841b" begin
include("../ext/SpecialFunctionsExt.jl")
end
end
# ensure that operations executed by the REPL back-end finish before returning,
# because displaying values happens on a different task (CUDA.jl#831)
if isdefined(Base, :active_repl_backend)
push!(Base.active_repl_backend.ast_transforms, emit_cuda_synchronizer)
end
# enable generation of FMA instructions to mimic behavior of nvcc
LLVM.clopts("-nvptx-fma-level=1")
# warn about old, deprecated environment variables
if haskey(ENV, "JULIA_CUDA_USE_BINARYBUILDER") && !local_toolkit
@error """JULIA_CUDA_USE_BINARYBUILDER is deprecated. Call `CUDA.jl.set_runtime_version!` to use a local toolkit."""
# we do not warn about this when we're already using the new preference,
# because during the transition clusters will be deploying both mechanisms.
end
if haskey(ENV, "JULIA_CUDA_VERSION")
@error """JULIA_CUDA_VERSION is deprecated. Call `CUDA.jl.set_runtime_version!` to use a different version."""
end
if !local_toolkit
# scan for CUDA libraries that may have been loaded from system paths
runtime_libraries = ["cudart",
"nvperf", "nvvm", "nvrtc", "nvJitLink",
"cublas", "cupti", "cusparse", "cufft", "curand", "cusolver"]
for lib in Libdl.dllist()
contains(lib, "artifacts") && continue
if any(rtlib -> contains(lib, rtlib), runtime_libraries)
@warn """CUDA runtime library $(basename(lib)) was loaded from a system path. This may cause errors.
Ensure that you have not set the LD_LIBRARY_PATH environment variable, or that it does not contain paths to CUDA libraries."""
end
end
# warn about Tegra being incompatible with our artifacts
if is_tegra()
@warn """You are using a Tegra device, which is currently not supported by the CUDA.jl artifacts.
Please install the CUDA toolkit, and call `CUDA.set_runtime_version!("local")` to use it.
For more information, see JuliaGPU/CUDA.jl#1978."""
end
end
_initialized[] = true
end
function emit_cuda_synchronizer(ex)
quote
try
$(ex)
finally
$maybe_synchronize_cuda()
end
end
end
@inline function maybe_synchronize_cuda()
# NOTE: this does not use the validating `task_local_state!` (or the helpers built on
# top of it, like `context()`) because we don't want to initialize CUDA here.
if task_local_state() !== nothing && isvalid(task_local_state().context)
device_synchronize()
end
return
end
## convenience functions
# TODO: update docstrings
export has_cuda, has_cuda_gpu
"""
has_cuda()::Bool
Check whether the local system provides an installation of the CUDA driver and runtime.
Use this function if your code loads packages that require CUDA.jl.
```
"""
has_cuda(show_reason::Bool=false) = functional(show_reason)
"""
has_cuda_gpu()::Bool
Check whether the local system provides an installation of the CUDA driver and runtime, and
if it contains a CUDA-capable GPU. See [`has_cuda`](@ref) for more details.
Note that this function initializes the CUDA API in order to check for the number of GPUs.
"""
has_cuda_gpu(show_reason::Bool=false) = has_cuda(show_reason) && length(devices()) > 0