-
Notifications
You must be signed in to change notification settings - Fork 64
/
_utils.pyx
269 lines (227 loc) · 7.49 KB
/
_utils.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES
#
# SPDX-License-Identifier: BSD-3-Clause
from libcpp.utility cimport move
from cython.operator cimport dereference as deref
from enum import IntEnum
from numpy import ndarray as _np_ndarray
cdef bint is_nested_sequence(data):
if not cpython.PySequence_Check(data):
return False
else:
for i in data:
if not cpython.PySequence_Check(i):
return False
else:
return True
cdef int cuqnt_alloc_wrapper(void* ctx, void** ptr, size_t size, Stream stream) with gil:
"""Assuming the user provides an alloc routine: ptr = alloc(size, stream).
Note: this function holds the Python GIL.
"""
cdef tuple pairs
try:
pairs = <object>(ctx)
user_alloc = pairs[0]
ptr[0] = <void*>(<intptr_t>user_alloc(size, <intptr_t>stream))
except:
# TODO: logging?
return 1
else:
return 0
cdef int cuqnt_free_wrapper(void* ctx, void* ptr, size_t size, Stream stream) with gil:
"""Assuming the user provides a free routine: free(ptr, size, stream).
Note: this function holds the Python GIL.
"""
cdef tuple pairs
try:
pairs = <object>(ctx)
user_free = pairs[1]
user_free(<intptr_t>ptr, size, <intptr_t>stream)
except:
# TODO: logging?
return 1
else:
return 0
cdef void logger_callback_with_data(
int32_t log_level, const char* func_name, const char* message,
void* func_arg) with gil:
func, args, kwargs = <object>func_arg
cdef bytes function_name = func_name
cdef bytes function_message = message
func(log_level, function_name.decode(), function_message.decode(),
*args, **kwargs)
cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*:
"""The caller must ensure ``buf`` is alive when the returned pointer is in use."""
cdef void* bufPtr
cdef int flags = cpython.PyBUF_ANY_CONTIGUOUS
if not readonly:
flags |= cpython.PyBUF_WRITABLE
cdef int status = -1
cdef cpython.Py_buffer view
if isinstance(buf, int):
bufPtr = <void*><intptr_t>buf
else: # try buffer protocol
try:
status = cpython.PyObject_GetBuffer(buf, &view, flags)
assert view.len == size
assert view.ndim == 1
except Exception as e:
adj = "writable " if not readonly else ""
raise ValueError(
"buf must be either a Python int representing the pointer "
f"address to a valid buffer, or a 1D contiguous {adj}"
"buffer, of size bytes") from e
else:
bufPtr = view.buf
finally:
if status == 0:
cpython.PyBuffer_Release(&view)
return bufPtr
# The (subset of) compute types below are shared by cuStateVec and cuTensorNet
class ComputeType(IntEnum):
"""An enumeration of CUDA compute types."""
COMPUTE_DEFAULT = 0
COMPUTE_16F = 1 << 0
COMPUTE_32F = 1 << 2
COMPUTE_64F = 1 << 4
COMPUTE_8U = 1 << 6
COMPUTE_8I = 1 << 8
COMPUTE_32U = 1 << 7
COMPUTE_32I = 1 << 9
COMPUTE_16BF = 1 << 10
COMPUTE_TF32 = 1 << 12
# TODO: use those exposed by CUDA Python instead, but before removing these
# duplicates, check if they are fixed to inherit IntEnum instead of Enum.
class cudaDataType(IntEnum):
"""An enumeration of `cudaDataType_t`."""
CUDA_R_16F = 2
CUDA_C_16F = 6
CUDA_R_16BF = 14
CUDA_C_16BF = 15
CUDA_R_32F = 0
CUDA_C_32F = 4
CUDA_R_64F = 1
CUDA_C_64F = 5
CUDA_R_4I = 16
CUDA_C_4I = 17
CUDA_R_4U = 18
CUDA_C_4U = 19
CUDA_R_8I = 3
CUDA_C_8I = 7
CUDA_R_8U = 8
CUDA_C_8U = 9
CUDA_R_16I = 20
CUDA_C_16I = 21
CUDA_R_16U = 22
CUDA_C_16U = 23
CUDA_R_32I = 10
CUDA_C_32I = 11
CUDA_R_32U = 12
CUDA_C_32U = 13
CUDA_R_64I = 24
CUDA_C_64I = 25
CUDA_R_64U = 26
CUDA_C_64U = 27
class libraryPropertyType(IntEnum):
"""An enumeration of library version information."""
MAJOR_VERSION = 0
MINOR_VERSION = 1
PATCH_LEVEL = 2
del IntEnum
# Defined in CPython:
# https://github.com/python/cpython/blob/26bc2cc06128890ac89492eca20e83abe0789c1c/Objects/unicodetype_db.h#L6311-L6349
cdef int[29] _WHITESPACE_UNICODE_INTS = [
0x0009,
0x000A,
0x000B,
0x000C,
0x000D,
0x001C,
0x001D,
0x001E,
0x001F,
0x0020,
0x0085,
0x00A0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200A,
0x2028,
0x2029,
0x202F,
0x205F,
0x3000,
]
WHITESPACE_UNICODE = ''.join(chr(s) for s in _WHITESPACE_UNICODE_INTS)
# Cython can't infer the overload by return type alone, so we need a dummy
# input argument to help it
cdef nullable_unique_ptr[ vector[ResT] ] get_resource_ptr(object obj, ResT* __unused):
cdef nullable_unique_ptr[ vector[ResT] ] ptr
cdef vector[ResT]* vec
if isinstance(obj, _np_ndarray):
# TODO: can we do "assert obj.dtype == some_dtype" here? it seems we have no
# way to check the dtype...
# TODO: how about buffer protocol?
assert <size_t>(obj.dtype.itemsize) == sizeof(ResT)
ptr.reset(<vector[ResT]*><intptr_t>(obj.ctypes.data), False)
elif cpython.PySequence_Check(obj):
vec = new vector[ResT](len(obj))
for i in range(len(obj)):
deref(vec)[i] = obj[i]
ptr.reset(vec, True)
else:
ptr.reset(<vector[ResT]*><intptr_t>obj, False)
return move(ptr)
cdef nullable_unique_ptr[ vector[PtrT*] ] get_resource_ptrs(object obj, PtrT* __unused):
cdef nullable_unique_ptr[ vector[PtrT*] ] ptr
cdef vector[PtrT*]* vec
if cpython.PySequence_Check(obj):
vec = new vector[PtrT*](len(obj))
for i in range(len(obj)):
deref(vec)[i] = <PtrT*><intptr_t>(obj[i])
ptr.reset(vec, True)
else:
ptr.reset(<vector[PtrT*]*><intptr_t>obj, False)
return move(ptr)
cdef nested_resource[ResT] get_nested_resource_ptr(object obj, ResT* __unused):
cdef nested_resource[ResT] res
cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr
cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr
cdef vector[intptr_t]* nested_vec = NULL
cdef vector[vector[ResT]]* nested_res_vec = NULL
cdef size_t i = 0, length = 0
cdef intptr_t addr
if is_nested_sequence(obj):
length = len(obj)
nested_res_vec = new vector[vector[ResT]](length)
nested_vec = new vector[intptr_t](length)
for i, obj_i in enumerate(obj):
deref(nested_res_vec)[i] = obj_i
deref(nested_vec)[i] = <intptr_t>(deref(nested_res_vec)[i].data())
nested_res_ptr.reset(nested_res_vec, True)
nested_ptr.reset(nested_vec, True)
elif cpython.PySequence_Check(obj):
length = len(obj)
nested_vec = new vector[intptr_t](length)
for i, addr in enumerate(obj):
deref(nested_vec)[i] = addr
nested_res_ptr.reset(NULL, False)
nested_ptr.reset(nested_vec, True)
else:
# obj is an int (ResT**)
nested_res_ptr.reset(NULL, False)
nested_ptr.reset(<vector[intptr_t]*><intptr_t>obj, False)
res.ptrs = move(nested_ptr)
res.nested_resource_ptr = move(nested_res_ptr)
return move(res)
class FunctionNotFoundError(RuntimeError): pass
class NotSupportedError(RuntimeError): pass