From da9d3a4a54c6063869ad59b8ae2698fe6aaa63a1 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 17 Sep 2025 16:03:46 -0700 Subject: [PATCH] Fix Use-After-Free in NumPy/df For AoS and SoA `.to_numpy()` and dependent logic like `.to_df()`, the lifetime of the temporary copied variable is only handled by NumPy if we first create a named variable. The `to_host()` returned object is a temporary, and `np.array` using the `__array_interface__` protocol does not keep it alive automatically unless it is stored in an actual variable (eg., `tmp`). --- src/amrex/extensions/ArrayOfStructs.py | 12 ++++++++---- src/amrex/extensions/PODVector.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/amrex/extensions/ArrayOfStructs.py b/src/amrex/extensions/ArrayOfStructs.py index 54afb0d4..401550d3 100644 --- a/src/amrex/extensions/ArrayOfStructs.py +++ b/src/amrex/extensions/ArrayOfStructs.py @@ -32,10 +32,14 @@ def aos_to_numpy(self, copy=False): if copy: # This supports a device-to-host copy. # - # todo: validate of the to_host() returned object - # lifetime is always managed correctly by - # Python's GC - otherwise copy twice via copy=True - return np.array(self.to_host(), copy=False) + # The to_host() returned object is a temporary, and + # np.array using the __array_interface__ protocol does + # not keep it alive automatically unless it is stored + # in an actual variable (tmp). + tmp = self.to_host() + ret = np.array(tmp, copy=False) + assert ret.base is tmp + return ret else: return np.array(self, copy=False) diff --git a/src/amrex/extensions/PODVector.py b/src/amrex/extensions/PODVector.py index 667b52c9..06ac3824 100644 --- a/src/amrex/extensions/PODVector.py +++ b/src/amrex/extensions/PODVector.py @@ -29,10 +29,14 @@ def podvector_to_numpy(self, copy=False): if copy: # This supports a device-to-host copy. # - # todo: validate of the to_host() returned object - # lifetime is always managed correctly by - # Python's GC - otherwise copy twice via copy=True - return np.array(self.to_host(), copy=False) + # The to_host() returned object is a temporary, and + # np.array using the __array_interface__ protocol does + # not keep it alive automatically unless it is stored + # in an actual variable (tmp). + tmp = self.to_host() + ret = np.array(tmp, copy=False) + assert ret.base is tmp + return ret else: return np.array(self, copy=False) else: