-
-
Notifications
You must be signed in to change notification settings - Fork 17.8k
/
base.py
294 lines (234 loc) · 8.54 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
"""Extend pandas with custom array types"""
import numpy as np
from pandas.errors import AbstractMethodError
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas import compat
class _DtypeOpsMixin(object):
# Not all of pandas' extension dtypes are compatibile with
# the new ExtensionArray interface. This means PandasExtensionDtype
# can't subclass ExtensionDtype yet, as is_extension_array_dtype would
# incorrectly say that these types are extension types.
#
# In the interim, we put methods that are shared between the two base
# classes ExtensionDtype and PandasExtensionDtype here. Both those base
# classes will inherit from this Mixin. Once everything is compatible, this
# class's methods can be moved to ExtensionDtype and removed.
# na_value is the default NA value to use for this type. This is used in
# e.g. ExtensionArray.take. This should be the user-facing "boxed" version
# of the NA value, not the physical NA vaalue for storage.
# e.g. for JSONArray, this is an empty dictionary.
na_value = np.nan
_metadata = ()
def __eq__(self, other):
"""Check whether 'other' is equal to self.
By default, 'other' is considered equal if either
* it's a string matching 'self.name'.
* it's an instance of this type and all of the
the attributes in ``self._metadata`` are equal between
`self` and `other`.
Parameters
----------
other : Any
Returns
-------
bool
"""
if isinstance(other, compat.string_types):
try:
other = self.construct_from_string(other)
except TypeError:
return False
if isinstance(other, type(self)):
return all(
getattr(self, attr) == getattr(other, attr)
for attr in self._metadata
)
return False
def __hash__(self):
return hash(tuple(getattr(self, attr) for attr in self._metadata))
def __ne__(self, other):
return not self.__eq__(other)
@property
def names(self):
# type: () -> Optional[List[str]]
"""Ordered list of field names, or None if there are no fields.
This is for compatibility with NumPy arrays, and may be removed in the
future.
"""
return None
@classmethod
def is_dtype(cls, dtype):
"""Check if we match 'dtype'.
Parameters
----------
dtype : object
The object to check.
Returns
-------
is_dtype : bool
Notes
-----
The default implementation is True if
1. ``cls.construct_from_string(dtype)`` is an instance
of ``cls``.
2. ``dtype`` is an object and is an instance of ``cls``
3. ``dtype`` has a ``dtype`` attribute, and any of the above
conditions is true for ``dtype.dtype``.
"""
dtype = getattr(dtype, 'dtype', dtype)
if isinstance(dtype, (ABCSeries, ABCIndexClass,
ABCDataFrame, np.dtype)):
# https://github.com/pandas-dev/pandas/issues/22960
# avoid passing data to `construct_from_string`. This could
# cause a FutureWarning from numpy about failing elementwise
# comparison from, e.g., comparing DataFrame == 'category'.
return False
elif dtype is None:
return False
elif isinstance(dtype, cls):
return True
try:
return cls.construct_from_string(dtype) is not None
except TypeError:
return False
@property
def _is_numeric(self):
# type: () -> bool
"""
Whether columns with this dtype should be considered numeric.
By default ExtensionDtypes are assumed to be non-numeric.
They'll be excluded from operations that exclude non-numeric
columns, like (groupby) reductions, plotting, etc.
"""
return False
@property
def _is_boolean(self):
# type: () -> bool
"""
Whether this dtype should be considered boolean.
By default, ExtensionDtypes are assumed to be non-numeric.
Setting this to True will affect the behavior of several places,
e.g.
* is_bool
* boolean indexing
Returns
-------
bool
"""
return False
class ExtensionDtype(_DtypeOpsMixin):
"""
A custom data type, to be paired with an ExtensionArray.
.. versionadded:: 0.23.0
See Also
--------
pandas.api.extensions.register_extension_dtype
pandas.api.extensions.ExtensionArray
Notes
-----
The interface includes the following abstract methods that must
be implemented by subclasses:
* type
* name
* construct_from_string
The following attributes influence the behavior of the dtype in
pandas operations
* _is_numeric
* _is_boolean
Optionally one can override construct_array_type for construction
with the name of this dtype via the Registry. See
:meth:`pandas.api.extensions.register_extension_dtype`.
* construct_array_type
The `na_value` class attribute can be used to set the default NA value
for this type. :attr:`numpy.nan` is used by default.
ExtensionDtypes are required to be hashable. The base class provides
a default implementation, which relies on the ``_metadata`` class
attribute. ``_metadata`` should be a tuple containing the strings
that define your data type. For example, with ``PeriodDtype`` that's
the ``freq`` attribute.
**If you have a parametrized dtype you should set the ``_metadata``
class property**.
Ideally, the attributes in ``_metadata`` will match the
parameters to your ``ExtensionDtype.__init__`` (if any). If any of
the attributes in ``_metadata`` don't implement the standard
``__eq__`` or ``__hash__``, the default implementations here will not
work.
.. versionchanged:: 0.24.0
Added ``_metadata``, ``__hash__``, and changed the default definition
of ``__eq__``.
This class does not inherit from 'abc.ABCMeta' for performance reasons.
Methods and properties required by the interface raise
``pandas.errors.AbstractMethodError`` and no ``register`` method is
provided for registering virtual subclasses.
"""
def __str__(self):
return self.name
@property
def type(self):
# type: () -> type
"""
The scalar type for the array, e.g. ``int``
It's expected ``ExtensionArray[item]`` returns an instance
of ``ExtensionDtype.type`` for scalar ``item``, assuming
that value is valid (not NA). NA values do not need to be
instances of `type`.
"""
raise AbstractMethodError(self)
@property
def kind(self):
# type () -> str
"""
A character code (one of 'biufcmMOSUV'), default 'O'
This should match the NumPy dtype used when the array is
converted to an ndarray, which is probably 'O' for object if
the extension type cannot be represented as a built-in NumPy
type.
See Also
--------
numpy.dtype.kind
"""
return 'O'
@property
def name(self):
# type: () -> str
"""
A string identifying the data type.
Will be used for display in, e.g. ``Series.dtype``
"""
raise AbstractMethodError(self)
@classmethod
def construct_array_type(cls):
"""
Return the array type associated with this dtype
Returns
-------
type
"""
raise NotImplementedError
@classmethod
def construct_from_string(cls, string):
"""
Attempt to construct this type from a string.
Parameters
----------
string : str
Returns
-------
self : instance of 'cls'
Raises
------
TypeError
If a class cannot be constructed from this 'string'.
Examples
--------
If the extension dtype can be constructed without any arguments,
the following may be an adequate implementation.
>>> @classmethod
... def construct_from_string(cls, string)
... if string == cls.name:
... return cls()
... else:
... raise TypeError("Cannot construct a '{}' from "
... "'{}'".format(cls, string))
"""
raise AbstractMethodError(cls)