From 87624a0177dd9d27c3372bedcf2d74a12fdc97dd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:22:14 +0000
Subject: [PATCH 1/9] Initial plan


From e48812e72625e5f5c89effb89c9ddc04ba5e824d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:30:59 +0000
Subject: [PATCH 2/9] Implement release threshold configuration for
 DeviceMemoryResource performance optimization

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_memory.py | 24 +++++++++++++++++++
 cuda_core/tests/test_memory.py              | 26 ++++++++++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index 190ba3e04a..d70a6ba762 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -325,6 +325,30 @@ class DeviceMemoryResource(MemoryResource):
     def __init__(self, device_id: int):
         self._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
         self._dev_id = device_id
+        
+        # Set a higher release threshold to improve performance when there are no active allocations.
+        # By default, the release threshold is 0, which means memory is immediately released back 
+        # to the OS when there are no active suballocations, causing performance issues.
+        try:
+            # Check current release threshold
+            current_threshold = handle_return(
+                driver.cuMemPoolGetAttribute(
+                    self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
+                )
+            )
+            # If threshold is 0 (default), set it to maximum to retain memory in the pool
+            if int(current_threshold) == 0:
+                handle_return(
+                    driver.cuMemPoolSetAttribute(
+                        self._handle,
+                        driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
+                        driver.cuuint64_t(0xFFFFFFFFFFFFFFFF),
+                    )
+                )
+        except Exception:
+            # If setting the attribute fails (e.g., on older drivers), continue without error
+            # to maintain backward compatibility
+            pass
 
     def allocate(self, size: int, stream: Stream = None) -> Buffer:
         """Allocate a buffer of the requested size.
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 2454046465..a2465b8e78 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -10,7 +10,7 @@
 
 import pytest
 
-from cuda.core.experimental import Buffer, Device, MemoryResource
+from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, MemoryResource
 from cuda.core.experimental._memory import DLDeviceType
 from cuda.core.experimental._utils.cuda_utils import handle_return
 
@@ -257,3 +257,27 @@ def test_buffer_dunder_dlpack_device_failure():
     buffer = dummy_mr.allocate(size=1024)
     with pytest.raises(BufferError, match=r"^buffer is neither device-accessible nor host-accessible$"):
         buffer.__dlpack_device__()
+
+
+def test_device_memory_resource_initialization():
+    """Test that DeviceMemoryResource can be initialized successfully.
+    
+    This test verifies that the DeviceMemoryResource initializes properly,
+    including the release threshold configuration for performance optimization.
+    """
+    device = Device()
+    device.set_current()
+    
+    # This should succeed and configure the memory pool release threshold
+    mr = DeviceMemoryResource(device.device_id)
+    
+    # Verify basic properties
+    assert mr.device_id == device.device_id
+    assert mr.is_device_accessible is True
+    assert mr.is_host_accessible is False
+    
+    # Test allocation/deallocation works
+    buffer = mr.allocate(1024)
+    assert buffer.size == 1024
+    assert buffer.device_id == device.device_id
+    buffer.close()

From 39ea29c5e644f5e144d7a569c0ed733a442b8a60 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:33:13 +0000
Subject: [PATCH 3/9] Add performance demo for DeviceMemoryResource release
 threshold optimization

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/examples/memory_performance_demo.py | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 cuda_core/examples/memory_performance_demo.py

diff --git a/cuda_core/examples/memory_performance_demo.py b/cuda_core/examples/memory_performance_demo.py
new file mode 100644
index 0000000000..2480a7ee18
--- /dev/null
+++ b/cuda_core/examples/memory_performance_demo.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+"""
+Performance demonstration for DeviceMemoryResource release threshold optimization.
+
+This script demonstrates the performance improvement achieved by setting a higher 
+release threshold for the memory pool used by DeviceMemoryResource.
+
+The optimization prevents the memory pool from immediately releasing memory back 
+to the OS when there are no active allocations, which can cause significant 
+performance overhead for subsequent allocations.
+"""
+
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import time
+from cuda.core.experimental import Device, DeviceMemoryResource
+
+def benchmark_allocations(mr, num_allocations=1000, size=1024):
+    """Benchmark allocation/deallocation performance."""
+    print(f"Benchmarking {num_allocations} allocations of {size} bytes...")
+    
+    start_time = time.perf_counter()
+    
+    for _ in range(num_allocations):
+        buffer = mr.allocate(size)
+        buffer.close()  # Immediate deallocation
+    
+    end_time = time.perf_counter()
+    total_time = end_time - start_time
+    avg_time = total_time / num_allocations * 1_000_000  # microseconds
+    
+    print(f"Total time: {total_time:.4f} seconds")
+    print(f"Average time per allocation: {avg_time:.2f} μs")
+    return total_time
+
+def main():
+    """Demonstrate the performance benefit of release threshold optimization."""
+    print("=== DeviceMemoryResource Performance Demo ===")
+    print()
+    
+    device = Device()
+    device.set_current()
+    
+    print(f"Using device: {device.device_id}")
+    print()
+    
+    # Create DeviceMemoryResource (with release threshold optimization)
+    mr = DeviceMemoryResource(device.device_id)
+    print("Created DeviceMemoryResource with release threshold optimization")
+    
+    # Warm up
+    print("Warming up...")
+    for _ in range(100):
+        buffer = mr.allocate(1024)
+        buffer.close()
+    
+    # Benchmark
+    print("\nBenchmarking allocation performance...")
+    benchmark_allocations(mr, num_allocations=1000, size=1024)
+    
+    print("\nNote: With the release threshold optimization, subsequent allocations")
+    print("should be significantly faster as memory is retained in the pool rather")
+    print("than being released back to the OS and re-allocated from the OS.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 547129b6dbcfeb2a5dc32a856cb1d1d14395765a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:34:54 +0000
Subject: [PATCH 4/9] Fix linting issues and format code with ruff

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_memory.py   |  8 ++---
 cuda_core/examples/memory_performance_demo.py | 32 +++++++++++--------
 cuda_core/tests/test_memory.py                |  8 ++---
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index d70a6ba762..4df8725a2f 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -325,16 +325,14 @@ class DeviceMemoryResource(MemoryResource):
     def __init__(self, device_id: int):
         self._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
         self._dev_id = device_id
-        
+
         # Set a higher release threshold to improve performance when there are no active allocations.
-        # By default, the release threshold is 0, which means memory is immediately released back 
+        # By default, the release threshold is 0, which means memory is immediately released back
         # to the OS when there are no active suballocations, causing performance issues.
         try:
             # Check current release threshold
             current_threshold = handle_return(
-                driver.cuMemPoolGetAttribute(
-                    self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
-                )
+                driver.cuMemPoolGetAttribute(self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD)
             )
             # If threshold is 0 (default), set it to maximum to retain memory in the pool
             if int(current_threshold) == 0:
diff --git a/cuda_core/examples/memory_performance_demo.py b/cuda_core/examples/memory_performance_demo.py
index 2480a7ee18..5da7986951 100644
--- a/cuda_core/examples/memory_performance_demo.py
+++ b/cuda_core/examples/memory_performance_demo.py
@@ -3,11 +3,11 @@
 """
 Performance demonstration for DeviceMemoryResource release threshold optimization.
 
-This script demonstrates the performance improvement achieved by setting a higher 
+This script demonstrates the performance improvement achieved by setting a higher
 release threshold for the memory pool used by DeviceMemoryResource.
 
-The optimization prevents the memory pool from immediately releasing memory back 
-to the OS when there are no active allocations, which can cause significant 
+The optimization prevents the memory pool from immediately releasing memory back
+to the OS when there are no active allocations, which can cause significant
 performance overhead for subsequent allocations.
 """
 
@@ -15,54 +15,58 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import time
+
 from cuda.core.experimental import Device, DeviceMemoryResource
 
+
 def benchmark_allocations(mr, num_allocations=1000, size=1024):
     """Benchmark allocation/deallocation performance."""
     print(f"Benchmarking {num_allocations} allocations of {size} bytes...")
-    
+
     start_time = time.perf_counter()
-    
+
     for _ in range(num_allocations):
         buffer = mr.allocate(size)
         buffer.close()  # Immediate deallocation
-    
+
     end_time = time.perf_counter()
     total_time = end_time - start_time
     avg_time = total_time / num_allocations * 1_000_000  # microseconds
-    
+
     print(f"Total time: {total_time:.4f} seconds")
     print(f"Average time per allocation: {avg_time:.2f} μs")
     return total_time
 
+
 def main():
     """Demonstrate the performance benefit of release threshold optimization."""
     print("=== DeviceMemoryResource Performance Demo ===")
     print()
-    
+
     device = Device()
     device.set_current()
-    
+
     print(f"Using device: {device.device_id}")
     print()
-    
+
     # Create DeviceMemoryResource (with release threshold optimization)
     mr = DeviceMemoryResource(device.device_id)
     print("Created DeviceMemoryResource with release threshold optimization")
-    
+
     # Warm up
     print("Warming up...")
     for _ in range(100):
         buffer = mr.allocate(1024)
         buffer.close()
-    
+
     # Benchmark
     print("\nBenchmarking allocation performance...")
     benchmark_allocations(mr, num_allocations=1000, size=1024)
-    
+
     print("\nNote: With the release threshold optimization, subsequent allocations")
     print("should be significantly faster as memory is retained in the pool rather")
     print("than being released back to the OS and re-allocated from the OS.")
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index a2465b8e78..9f380ff11b 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -261,21 +261,21 @@ def test_buffer_dunder_dlpack_device_failure():
 
 def test_device_memory_resource_initialization():
     """Test that DeviceMemoryResource can be initialized successfully.
-    
+
     This test verifies that the DeviceMemoryResource initializes properly,
     including the release threshold configuration for performance optimization.
     """
     device = Device()
     device.set_current()
-    
+
     # This should succeed and configure the memory pool release threshold
     mr = DeviceMemoryResource(device.device_id)
-    
+
     # Verify basic properties
     assert mr.device_id == device.device_id
     assert mr.is_device_accessible is True
     assert mr.is_host_accessible is False
-    
+
     # Test allocation/deallocation works
     buffer = mr.allocate(1024)
     assert buffer.size == 1024

From b343a50beba10d60f8b2916dc916a45d46d0111a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:49:27 +0000
Subject: [PATCH 5/9] Remove try-except wrapper and performance demo per code
 review feedback

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_memory.py   | 29 ++++----
 cuda_core/examples/memory_performance_demo.py | 72 -------------------
 2 files changed, 12 insertions(+), 89 deletions(-)
 delete mode 100644 cuda_core/examples/memory_performance_demo.py

diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index 4df8725a2f..c8e7a41974 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -329,24 +329,19 @@ def __init__(self, device_id: int):
         # Set a higher release threshold to improve performance when there are no active allocations.
         # By default, the release threshold is 0, which means memory is immediately released back
         # to the OS when there are no active suballocations, causing performance issues.
-        try:
-            # Check current release threshold
-            current_threshold = handle_return(
-                driver.cuMemPoolGetAttribute(self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD)
-            )
-            # If threshold is 0 (default), set it to maximum to retain memory in the pool
-            if int(current_threshold) == 0:
-                handle_return(
-                    driver.cuMemPoolSetAttribute(
-                        self._handle,
-                        driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
-                        driver.cuuint64_t(0xFFFFFFFFFFFFFFFF),
-                    )
+        # Check current release threshold
+        current_threshold = handle_return(
+            driver.cuMemPoolGetAttribute(self._handle, driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD)
+        )
+        # If threshold is 0 (default), set it to maximum to retain memory in the pool
+        if int(current_threshold) == 0:
+            handle_return(
+                driver.cuMemPoolSetAttribute(
+                    self._handle,
+                    driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
+                    driver.cuuint64_t(0xFFFFFFFFFFFFFFFF),
                 )
-        except Exception:
-            # If setting the attribute fails (e.g., on older drivers), continue without error
-            # to maintain backward compatibility
-            pass
+            )
 
     def allocate(self, size: int, stream: Stream = None) -> Buffer:
         """Allocate a buffer of the requested size.
diff --git a/cuda_core/examples/memory_performance_demo.py b/cuda_core/examples/memory_performance_demo.py
deleted file mode 100644
index 5da7986951..0000000000
--- a/cuda_core/examples/memory_performance_demo.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Performance demonstration for DeviceMemoryResource release threshold optimization.
-
-This script demonstrates the performance improvement achieved by setting a higher
-release threshold for the memory pool used by DeviceMemoryResource.
-
-The optimization prevents the memory pool from immediately releasing memory back
-to the OS when there are no active allocations, which can cause significant
-performance overhead for subsequent allocations.
-"""
-
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-import time
-
-from cuda.core.experimental import Device, DeviceMemoryResource
-
-
-def benchmark_allocations(mr, num_allocations=1000, size=1024):
-    """Benchmark allocation/deallocation performance."""
-    print(f"Benchmarking {num_allocations} allocations of {size} bytes...")
-
-    start_time = time.perf_counter()
-
-    for _ in range(num_allocations):
-        buffer = mr.allocate(size)
-        buffer.close()  # Immediate deallocation
-
-    end_time = time.perf_counter()
-    total_time = end_time - start_time
-    avg_time = total_time / num_allocations * 1_000_000  # microseconds
-
-    print(f"Total time: {total_time:.4f} seconds")
-    print(f"Average time per allocation: {avg_time:.2f} μs")
-    return total_time
-
-
-def main():
-    """Demonstrate the performance benefit of release threshold optimization."""
-    print("=== DeviceMemoryResource Performance Demo ===")
-    print()
-
-    device = Device()
-    device.set_current()
-
-    print(f"Using device: {device.device_id}")
-    print()
-
-    # Create DeviceMemoryResource (with release threshold optimization)
-    mr = DeviceMemoryResource(device.device_id)
-    print("Created DeviceMemoryResource with release threshold optimization")
-
-    # Warm up
-    print("Warming up...")
-    for _ in range(100):
-        buffer = mr.allocate(1024)
-        buffer.close()
-
-    # Benchmark
-    print("\nBenchmarking allocation performance...")
-    benchmark_allocations(mr, num_allocations=1000, size=1024)
-
-    print("\nNote: With the release threshold optimization, subsequent allocations")
-    print("should be significantly faster as memory is retained in the pool rather")
-    print("than being released back to the OS and re-allocated from the OS.")
-
-
-if __name__ == "__main__":
-    main()

From 6a2c4f4d8cab95285a12033ec0f485eff9f9ec5c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 02:56:33 +0000
Subject: [PATCH 6/9] Add release note for DeviceMemoryResource performance
 optimization

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/docs/source/release/0.X.Y-notes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
index 3a9c7076a7..cac94f780d 100644
--- a/cuda_core/docs/source/release/0.X.Y-notes.rst
+++ b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -36,4 +36,5 @@ None.
 Fixes and enhancements
 ----------------------
 
+- Improved :class:`DeviceMemoryResource` allocation performance when there are no active allocations (addresses issue #771).
 - Fix :class:`LaunchConfig` grid unit conversion when cluster is set (addresses issue #867).
\ No newline at end of file

From 3e50d075aad587932d1cc96b2c9314ea66932b72 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 03:47:23 +0000
Subject: [PATCH 7/9] Add skip decorator for mempool support check in device
 memory test

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/tests/test_memory.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 9f380ff11b..6b6515ac2a 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -259,6 +259,7 @@ def test_buffer_dunder_dlpack_device_failure():
         buffer.__dlpack_device__()
 
 
+@pytest.mark.skipif(not Device().properties.memory_pools_supported, reason="memory pools not supported")
 def test_device_memory_resource_initialization():
     """Test that DeviceMemoryResource can be initialized successfully.
 

From 2a8b536136ad762c77cd9fcb068e05f44af0bb31 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 04:29:18 +0000
Subject: [PATCH 8/9] Address code review feedback: move skip logic, add
 docstring note, update release note

Co-authored-by: kkraus14 <3665167+kkraus14@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_memory.py   | 7 +++++++
 cuda_core/docs/source/release/0.X.Y-notes.rst | 2 +-
 cuda_core/tests/test_memory.py                | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index c8e7a41974..0a4d617e41 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -318,6 +318,13 @@ class DeviceMemoryResource(MemoryResource):
         Device ordinal for which a memory resource is constructed. The mempool that is
         set to *current* on ``device_id`` is used. If no mempool is set to current yet,
         the driver would use the *default* mempool on the device.
+
+    Notes
+    -----
+    During initialization, this class automatically configures the memory pool's release
+    threshold for improved performance. If the current threshold is 0 (default), it is
+    set to the maximum value to prevent immediate memory release when there are no active
+    allocations, which can cause performance degradation.
     """
 
     __slots__ = ("_dev_id",)
diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
index cac94f780d..56fd8ac700 100644
--- a/cuda_core/docs/source/release/0.X.Y-notes.rst
+++ b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -36,5 +36,5 @@ None.
 Fixes and enhancements
 ----------------------
 
-- Improved :class:`DeviceMemoryResource` allocation performance when there are no active allocations (addresses issue #771).
+- Improved :class:`DeviceMemoryResource` allocation performance when there are no active allocations by setting a higher release threshold (addresses issue #771).
 - Fix :class:`LaunchConfig` grid unit conversion when cluster is set (addresses issue #867).
\ No newline at end of file
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 6b6515ac2a..2ba7b418fe 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -259,7 +259,6 @@ def test_buffer_dunder_dlpack_device_failure():
         buffer.__dlpack_device__()
 
 
-@pytest.mark.skipif(not Device().properties.memory_pools_supported, reason="memory pools not supported")
 def test_device_memory_resource_initialization():
     """Test that DeviceMemoryResource can be initialized successfully.
 
@@ -267,6 +266,8 @@ def test_device_memory_resource_initialization():
     including the release threshold configuration for performance optimization.
     """
     device = Device()
+    if not device.properties.memory_pools_supported:
+        pytest.skip("memory pools not supported")
     device.set_current()
 
     # This should succeed and configure the memory pool release threshold

From e801d5ec6329ee33d744137904748164ea0400ed Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 21 Aug 2025 13:13:36 +0000
Subject: [PATCH 9/9] Remove verbose docstring Notes section per code review
 feedback

Co-authored-by: leofang <5534781+leofang@users.noreply.github.com>
---
 cuda_core/cuda/core/experimental/_memory.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/cuda_core/cuda/core/experimental/_memory.py b/cuda_core/cuda/core/experimental/_memory.py
index 0a4d617e41..c8e7a41974 100644
--- a/cuda_core/cuda/core/experimental/_memory.py
+++ b/cuda_core/cuda/core/experimental/_memory.py
@@ -318,13 +318,6 @@ class DeviceMemoryResource(MemoryResource):
         Device ordinal for which a memory resource is constructed. The mempool that is
         set to *current* on ``device_id`` is used. If no mempool is set to current yet,
         the driver would use the *default* mempool on the device.
-
-    Notes
-    -----
-    During initialization, this class automatically configures the memory pool's release
-    threshold for improved performance. If the current threshold is 0 (default), it is
-    set to the maximum value to prevent immediate memory release when there are no active
-    allocations, which can cause performance degradation.
     """
 
     __slots__ = ("_dev_id",)