PERF: faster access to the dtype for masked numeric arrays (pandas-de…

…v#52998)
Rylie-W · May 19, 2023 · 96b6a6f · 96b6a6f
1 parent 564c0fa
commit 96b6a6f
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 34 deletions.
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -280,6 +280,7 @@ Performance improvements
 - Performance improvement when parsing strings to ``boolean[pyarrow]`` dtype (:issue:`51730`)
 - Performance improvement when searching an :class:`Index` sliced from other indexes (:issue:`51738`)
 - Performance improvement in :func:`concat` (:issue:`52291`, :issue:`52290`)
+- Performance improvement accessing :attr:`arrays.IntegerArrays.dtype` & :attr:`arrays.FloatingArray.dtype` (:issue:`52998`)
 - Performance improvement in :class:`Series` reductions (:issue:`52341`)
 - Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`)
 - Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -36,8 +36,8 @@ def construct_array_type(cls) -> type[FloatingArray]:
         return FloatingArray
 
     @classmethod
-    def _str_to_dtype_mapping(cls):
-        return FLOAT_STR_TO_DTYPE
+    def _get_dtype_mapping(cls) -> dict[np.dtype, FloatingDtype]:
+        return NUMPY_FLOAT_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -153,7 +153,7 @@ class Float64Dtype(FloatingDtype):
     __doc__ = _dtype_docstring.format(dtype="float64")
 
 
-FLOAT_STR_TO_DTYPE = {
-    "float32": Float32Dtype(),
-    "float64": Float64Dtype(),
+NUMPY_FLOAT_TO_DTYPE: dict[np.dtype, FloatingDtype] = {
+    np.dtype(np.float32): Float32Dtype(),
+    np.dtype(np.float64): Float64Dtype(),
 }
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -36,8 +36,8 @@ def construct_array_type(cls) -> type[IntegerArray]:
         return IntegerArray
 
     @classmethod
-    def _str_to_dtype_mapping(cls):
-        return INT_STR_TO_DTYPE
+    def _get_dtype_mapping(cls) -> dict[np.dtype, IntegerDtype]:
+        return NUMPY_INT_TO_DTYPE
 
     @classmethod
     def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
@@ -208,13 +208,13 @@ class UInt64Dtype(IntegerDtype):
     __doc__ = _dtype_docstring.format(dtype="uint64")
 
 
-INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
-    "int8": Int8Dtype(),
-    "int16": Int16Dtype(),
-    "int32": Int32Dtype(),
-    "int64": Int64Dtype(),
-    "uint8": UInt8Dtype(),
-    "uint16": UInt16Dtype(),
-    "uint32": UInt32Dtype(),
-    "uint64": UInt64Dtype(),
+NUMPY_INT_TO_DTYPE: dict[np.dtype, IntegerDtype] = {
+    np.dtype(np.int8): Int8Dtype(),
+    np.dtype(np.int16): Int16Dtype(),
+    np.dtype(np.int32): Int32Dtype(),
+    np.dtype(np.int64): Int64Dtype(),
+    np.dtype(np.uint8): UInt8Dtype(),
+    np.dtype(np.uint16): UInt16Dtype(),
+    np.dtype(np.uint32): UInt32Dtype(),
+    np.dtype(np.uint64): UInt64Dtype(),
 }
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -100,7 +100,7 @@ def __from_arrow__(
         return array_class(data.copy(), ~mask, copy=False)
 
     @classmethod
-    def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]:
+    def _get_dtype_mapping(cls) -> Mapping[np.dtype, NumericDtype]:
         raise AbstractMethodError(cls)
 
     @classmethod
@@ -114,9 +114,9 @@ def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtyp
             dtype = dtype.lower()
 
         if not isinstance(dtype, NumericDtype):
-            mapping = cls._str_to_dtype_mapping()
+            mapping = cls._get_dtype_mapping()
             try:
-                dtype = mapping[str(np.dtype(dtype))]
+                dtype = mapping[np.dtype(dtype)]
             except KeyError as err:
                 raise ValueError(f"invalid dtype specified {dtype}") from err
         return dtype
@@ -250,8 +250,8 @@ def __init__(
 
     @cache_readonly
     def dtype(self) -> NumericDtype:
-        mapping = self._dtype_cls._str_to_dtype_mapping()
-        return mapping[str(self._data.dtype)]
+        mapping = self._dtype_cls._get_dtype_mapping()
+        return mapping[self._data.dtype]
 
     @classmethod
     def _coerce_to_array(

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1038,10 +1038,10 @@ def convert_dtypes(
             target_int_dtype = pandas_dtype_func("Int64")
 
             if input_array.dtype.kind in "iu":
-                from pandas.core.arrays.integer import INT_STR_TO_DTYPE
+                from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
 
-                inferred_dtype = INT_STR_TO_DTYPE.get(
-                    input_array.dtype.name, target_int_dtype
+                inferred_dtype = NUMPY_INT_TO_DTYPE.get(
+                    input_array.dtype, target_int_dtype
                 )
             elif input_array.dtype.kind in "fcb":
                 # TODO: de-dup with maybe_cast_to_integer_array?
@@ -1060,10 +1060,10 @@ def convert_dtypes(
         if convert_floating:
             if input_array.dtype.kind in "fcb":
                 # i.e. numeric but not integer
-                from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
+                from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE
 
-                inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get(
-                    input_array.dtype.name, pandas_dtype_func("Float64")
+                inferred_float_dtype: DtypeObj = NUMPY_FLOAT_TO_DTYPE.get(
+                    input_array.dtype, pandas_dtype_func("Float64")
                 )
                 # if we could also convert to integer, check if all floats
                 # are actually integers

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1471,13 +1471,13 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
 
             return BooleanDtype()
         elif dtype.kind in "iu":
-            from pandas.core.arrays.integer import INT_STR_TO_DTYPE
+            from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
 
-            return INT_STR_TO_DTYPE[dtype.name]
+            return NUMPY_INT_TO_DTYPE[dtype]
         elif dtype.kind == "f":
-            from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
+            from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE
 
-            return FLOAT_STR_TO_DTYPE[dtype.name]
+            return NUMPY_FLOAT_TO_DTYPE[dtype]
         else:
             raise NotImplementedError(dtype)
 

diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -12,7 +12,7 @@
 )
 
 import pandas as pd
-from pandas.core.arrays.integer import INT_STR_TO_DTYPE
+from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE
 from pandas.tests.extension.base.base import BaseExtensionTests
 
 
@@ -215,10 +215,10 @@ def get_reduction_result_dtype(dtype):
             if dtype.itemsize == 8:
                 return dtype
             elif dtype.kind in "ib":
-                return INT_STR_TO_DTYPE[np.dtype(int).name]
+                return NUMPY_INT_TO_DTYPE[np.dtype(int)]
             else:
                 # i.e. dtype.kind == "u"
-                return INT_STR_TO_DTYPE[np.dtype(np.uint).name]
+                return NUMPY_INT_TO_DTYPE[np.dtype(np.uint)]
 
         if method in ["median", "sum", "prod"]:
             # std and var are not dtype-preserving