diff --git a/cuda_core/cuda/core/experimental/__init__.py b/cuda_core/cuda/core/experimental/__init__.py
index dc4d20222..94fb0aa08 100644
--- a/cuda_core/cuda/core/experimental/__init__.py
+++ b/cuda_core/cuda/core/experimental/__init__.py
@@ -55,6 +55,8 @@
     DeviceMemoryResourceOptions,
     LegacyPinnedMemoryResource,
     MemoryResource,
+    VirtualMemoryResource,
+    VirtualMemoryResourceOptions,
 )
 from cuda.core.experimental._module import Kernel, ObjectCode  # noqa: E402
 from cuda.core.experimental._program import Program, ProgramOptions  # noqa: E402
diff --git a/cuda_core/cuda/core/experimental/_memory.pyx b/cuda_core/cuda/core/experimental/_memory.pyx
index 39afa6723..3786f066b 100644
--- a/cuda_core/cuda/core/experimental/_memory.pyx
+++ b/cuda_core/cuda/core/experimental/_memory.pyx
@@ -18,19 +18,20 @@ from cuda.core.experimental._utils.cuda_utils cimport (
     HANDLE_RETURN,
 )
 
-from dataclasses import dataclass
-from typing import Optional, TypeVar, Union, TYPE_CHECKING
 import abc
 import array
 import contextlib
 import cython
+from dataclasses import dataclass, field
+from typing import Iterable, Literal, Optional, TYPE_CHECKING, TypeVar, Union
 import multiprocessing
 import os
 import platform
 import weakref
+
 from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
 from cuda.core.experimental._stream import Stream, default_stream
-from cuda.core.experimental._utils.cuda_utils import driver
+from cuda.core.experimental._utils.cuda_utils import ( driver, Transaction, get_binding_version )
 
 if platform.system() == "Linux":
     import socket
@@ -1085,3 +1086,499 @@ class _SynchronousMemoryResource(MemoryResource):
     @property
     def device_id(self) -> int:
         return self._dev_id
+
+
+VirtualMemoryHandleTypeT = Literal["posix_fd", "generic", "none", "win32", "win32_kmt", "fabric"]
+VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"]
+VirtualMemoryGranularityT = Literal["minimum", "recommended"]
+VirtualMemoryAccessTypeT = Literal["rw", "r", "none"]
+VirtualMemoryAllocationTypeT = Literal["pinned", "managed"]
+
+
+@dataclass
+class VirtualMemoryResourceOptions:
+    """A configuration object for the VirtualMemoryResource
+       Stores configuration information which tells the resource how to use the CUDA VMM APIs
+
+    Args:
+        handle_type: Export handle type for the physical allocation. Use
+            CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR on Linux if you plan to
+            import/export the allocation (required for cuMemRetainAllocationHandle).
+            Use CU_MEM_HANDLE_TYPE_NONE if you don't need an exportable handle.
+        gpu_direct_rdma: Hint that the allocation should be GDR-capable (if supported).
+        granularity: 'recommended' or 'minimum'. Controls granularity query and size rounding.
+        addr_hint: A (optional) virtual address hint to try to reserve at. 0 -> let CUDA choose.
+        addr_align: Alignment for the VA reservation. If None, use the queried granularity.
+        peers: Extra device IDs that should be granted access in addition to `device`.
+        self_access: Access flags for the owning device ('rw', 'r', or 'none').
+        peer_access: Access flags for peers ('rw' or 'r').
+    """
+    # Human-friendly strings; normalized in __post_init__
+    allocation_type: VirtualMemoryAllocationTypeT = "pinned"
+    location_type: VirtualMemoryLocationTypeT = "device"
+    handle_type: VirtualMemoryHandleTypeT = "posix_fd"
+    granularity: VirtualMemoryGranularityT = "recommended"
+    gpu_direct_rdma: bool = True
+    addr_hint: Optional[int] = 0
+    addr_align: Optional[int] = None
+    peers: Iterable[int] = field(default_factory=tuple)
+    self_access: VirtualMemoryAccessTypeT = "rw"
+    peer_access: VirtualMemoryAccessTypeT = "rw"
+
+    _a = driver.CUmemAccess_flags
+    _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, "none": 0}
+    _h = driver.CUmemAllocationHandleType
+    _handle_types = {"none": _h.CU_MEM_HANDLE_TYPE_NONE, "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, "win32": _h.CU_MEM_HANDLE_TYPE_WIN32, "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC}
+    _g = driver.CUmemAllocationGranularity_flags
+    _granularity = {"recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM}
+    _l = driver.CUmemLocationType
+    _location_type = {"device": _l.CU_MEM_LOCATION_TYPE_DEVICE, "host": _l.CU_MEM_LOCATION_TYPE_HOST, "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT}
+    # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not
+    _a = driver.CUmemAllocationType
+    _allocation_type = {"pinned": _a.CU_MEM_ALLOCATION_TYPE_PINNED}
+    ver_major, ver_minor = get_binding_version()
+    if ver_major >= 13:
+        _allocation_type["managed"] = _a.CU_MEM_ALLOCATION_TYPE_MANAGED
+
+    @staticmethod
+    def _access_to_flags(spec: str):
+        flags = VirtualMemoryResourceOptions._access_flags.get(spec)
+        if flags is None:
+            raise ValueError(f"Unknown access spec: {spec!r}")
+        return flags
+
+    @staticmethod
+    def _allocation_type_to_driver(spec: str):
+        alloc_type = VirtualMemoryResourceOptions._allocation_type.get(spec)
+        if alloc_type is None:
+            raise ValueError(f"Unsupported allocation_type: {spec!r}")
+        return alloc_type
+
+    @staticmethod
+    def _location_type_to_driver(spec: str):
+        loc_type = VirtualMemoryResourceOptions._location_type.get(spec)
+        if loc_type is None:
+            raise ValueError(f"Unsupported location_type: {spec!r}")
+        return loc_type
+
+    @staticmethod
+    def _handle_type_to_driver(spec: str):
+        handle_type = VirtualMemoryResourceOptions._handle_types.get(spec)
+        if handle_type is None:
+            raise ValueError(f"Unsupported handle_type: {spec!r}")
+        return handle_type
+
+    @staticmethod
+    def _granularity_to_driver(spec: str):
+        granularity = VirtualMemoryResourceOptions._granularity.get(spec)
+        if granularity is None:
+            raise ValueError(f"Unsupported granularity: {spec!r}")
+        return granularity
+
+
+class VirtualMemoryResource(MemoryResource):
+    """Create a device memory resource that uses the CUDA VMM APIs to allocate memory.
+
+    Parameters
+    ----------
+    device_id : int
+        Device ordinal for which a memory resource is constructed.
+
+    config : VirtualMemoryResourceOptions
+        A configuration object for the VirtualMemoryResource
+    """
+    def __init__(self, device, config: VirtualMemoryResourceOptions = None):
+        self.device = device
+        self.config = check_or_create_options(
+            VirtualMemoryResourceOptions, config, "VirtualMemoryResource options", keep_none=False
+        )
+        if self.config.location_type == "host":
+            self.device = None
+        if platform.system() == "Windows":
+            raise NotImplementedError("VirtualMemoryResource is not supported on Windows")
+
+    @staticmethod
+    def _align_up(size: int, gran: int) -> int:
+        """
+        Align a size up to the nearest multiple of a granularity.
+        """
+        return (size + gran - 1) & ~(gran - 1)
+
+    def modify_allocation(self, buf: Buffer, new_size: int, config: VirtualMemoryResourceOptions = None) -> Buffer:
+        """
+        Grow an existing allocation using CUDA VMM, with a configurable policy.
+
+        This implements true growing allocations that preserve the base pointer
+        by extending the virtual address range and mapping additional physical memory.
+
+        This function uses transactional allocation: if any step fails, the original buffer is not modified and
+        all steps the function took are rolled back so a new allocation is not created.
+
+        Parameters
+        ----------
+        buf : Buffer
+            The existing buffer to grow
+        new_size : int
+            The new total size for the allocation
+        config : VirtualMemoryResourceOptions, optional
+            Configuration for the new physical memory chunks. If None, uses current config.
+
+        Returns
+        -------
+        Buffer
+            The same buffer with updated size and properties, preserving the original pointer
+        """
+        if config is not None:
+            self.config = config
+
+        # Build allocation properties for new chunks
+        prop = driver.CUmemAllocationProp()
+        prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(self.config.allocation_type)
+        prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(self.config.location_type)
+        prop.location.id = self.device.device_id
+        prop.allocFlags.gpuDirectRDMACapable = 1 if self.config.gpu_direct_rdma else 0
+        prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(self.config.handle_type)
+
+        # Query granularity
+        gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(self.config.granularity)
+        res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag)
+        raise_if_driver_error(res)
+
+        # Calculate sizes
+        additional_size = new_size - buf.size
+        if additional_size <= 0:
+            # Same size: only update access policy if needed; avoid zero-sized driver calls
+            descs = self._build_access_descriptors(prop)
+            if descs:
+                res, = driver.cuMemSetAccess(int(buf.handle), buf.size, descs, len(descs))
+                raise_if_driver_error(res)
+            return buf
+
+        aligned_additional_size = VirtualMemoryResource._align_up(additional_size, gran)
+        total_aligned_size = VirtualMemoryResource._align_up(new_size, gran)
+        aligned_prev_size = total_aligned_size - aligned_additional_size
+        addr_align = self.config.addr_align or gran
+
+        # Try to extend the existing VA range first
+        res, new_ptr = driver.cuMemAddressReserve(
+            aligned_additional_size,
+            addr_align,
+            int(buf.handle) + aligned_prev_size,  # fixedAddr hint - aligned end of current range
+            0
+        )
+
+        if res != driver.CUresult.CUDA_SUCCESS or new_ptr != (int(buf.handle) + aligned_prev_size):
+            # Check for specific errors that are not recoverable with the slow path
+            if res in (driver.CUresult.CUDA_ERROR_INVALID_VALUE, driver.CUresult.CUDA_ERROR_NOT_PERMITTED, driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, driver.CUresult.CUDA_ERROR_NOT_SUPPORTED):
+                raise_if_driver_error(res)
+            res2, = driver.cuMemAddressFree(new_ptr, aligned_additional_size)
+            raise_if_driver_error(res2)
+            # Fallback: couldn't extend contiguously, need full remapping
+            return self._grow_allocation_slow_path(buf, new_size, prop, aligned_additional_size, total_aligned_size, addr_align)
+        else:
+            # Success! We can extend the VA range contiguously
+            return self._grow_allocation_fast_path(buf, new_size, prop, aligned_additional_size, new_ptr)
+
+    def _grow_allocation_fast_path(self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp,
+                                   aligned_additional_size: int, new_ptr: int) -> Buffer:
+        """
+        Fast path for growing a virtual memory allocation when the new region can be
+        reserved contiguously after the existing buffer.
+
+        This function creates and maps new physical memory for the additional size,
+        sets access permissions, and updates the buffer size in place (the pointer
+        remains unchanged).
+
+        Args:
+            buf (Buffer): The buffer to grow.
+            new_size (int): The new total size in bytes.
+            prop (driver.CUmemAllocationProp): Allocation properties for the new memory.
+            aligned_additional_size (int): The size of the new region to allocate, aligned to granularity.
+            new_ptr (int): The address of the newly reserved contiguous VA region (should be at the end of the current buffer).
+
+        Returns:
+            Buffer: The same buffer object with its size updated to `new_size`.
+        """
+        with Transaction() as trans:
+            # Create new physical memory for the additional size
+            trans.append(lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]))
+            res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0)
+            raise_if_driver_error(res)
+            # Register undo for creation
+            trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0]))
+
+            # Map the new physical memory to the extended VA range
+            res, = driver.cuMemMap(new_ptr, aligned_additional_size, 0, new_handle, 0)
+            raise_if_driver_error(res)
+            # Register undo for mapping
+            trans.append(lambda np=new_ptr, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]))
+
+            # Set access permissions for the new portion
+            descs = self._build_access_descriptors(prop)
+            if descs:
+                res, = driver.cuMemSetAccess(new_ptr, aligned_additional_size, descs, len(descs))
+                raise_if_driver_error(res)
+
+            # All succeeded, cancel undo actions
+            trans.commit()
+
+        # Update the buffer size (pointer stays the same)
+        buf._size = new_size
+        return buf
+
+    def _grow_allocation_slow_path(self, buf: Buffer, new_size: int, prop: driver.CUmemAllocationProp,
+                                   aligned_additional_size: int, total_aligned_size: int, addr_align: int) -> Buffer:
+        """
+        Slow path for growing a virtual memory allocation when the new region cannot be
+        reserved contiguously after the existing buffer.
+
+        This function reserves a new, larger virtual address (VA) range, remaps the old
+        physical memory to the beginning of the new VA range, creates and maps new physical
+        memory for the additional size, sets access permissions, and updates the buffer's
+        pointer and size.
+
+        Args:
+            buf (Buffer): The buffer to grow.
+            new_size (int): The new total size in bytes.
+            prop (driver.CUmemAllocationProp): Allocation properties for the new memory.
+            aligned_additional_size (int): The size of the new region to allocate, aligned to granularity.
+            total_aligned_size (int): The total new size to reserve, aligned to granularity.
+            addr_align (int): The required address alignment for the new VA range.
+
+        Returns:
+            Buffer: The buffer object updated with the new pointer and size.
+        """
+        with Transaction() as trans:
+            # Reserve a completely new, larger VA range
+            res, new_ptr = driver.cuMemAddressReserve(total_aligned_size, addr_align, 0, 0)
+            raise_if_driver_error(res)
+            # Register undo for VA reservation
+            trans.append(lambda np=new_ptr, s=total_aligned_size: raise_if_driver_error(driver.cuMemAddressFree(np, s)[0]))
+
+            # Get the old allocation handle for remapping
+            result, old_handle = driver.cuMemRetainAllocationHandle(buf.handle)
+            raise_if_driver_error(result)
+            # Register undo for old_handle
+            trans.append(lambda h=old_handle: raise_if_driver_error(driver.cuMemRelease(h)[0]))
+
+            # Unmap the old VA range (aligned previous size)
+            aligned_prev_size = total_aligned_size - aligned_additional_size
+            result, = driver.cuMemUnmap(int(buf.handle), aligned_prev_size)
+            raise_if_driver_error(result)
+
+            def _remap_old():
+                # Try to remap the old physical memory back to the original VA range
+                try:
+                    res, = driver.cuMemMap(int(buf.handle), aligned_prev_size, 0, old_handle, 0)
+                    raise_if_driver_error(res)
+                except Exception:
+                    pass
+            trans.append(_remap_old)
+
+            # Remap the old physical memory to the new VA range (aligned previous size)
+            res, = driver.cuMemMap(int(new_ptr), aligned_prev_size, 0, old_handle, 0)
+            raise_if_driver_error(res)
+
+            # Register undo for mapping
+            trans.append(lambda np=new_ptr, s=aligned_prev_size: raise_if_driver_error(driver.cuMemUnmap(np, s)[0]))
+
+            # Create new physical memory for the additional size
+            res, new_handle = driver.cuMemCreate(aligned_additional_size, prop, 0)
+            raise_if_driver_error(res)
+
+            # Register undo for new physical memory
+            trans.append(lambda h=new_handle: raise_if_driver_error(driver.cuMemRelease(h)[0]))
+
+            # Map the new physical memory to the extended portion (aligned offset)
+            res, = driver.cuMemMap(int(new_ptr) + aligned_prev_size, aligned_additional_size, 0, new_handle, 0)
+            raise_if_driver_error(res)
+
+            # Register undo for mapping
+            trans.append(lambda base=int(new_ptr), offs=aligned_prev_size, s=aligned_additional_size: raise_if_driver_error(driver.cuMemUnmap(base + offs, s)[0]))
+
+            # Set access permissions for the entire new range
+            descs = self._build_access_descriptors(prop)
+            if descs:
+                res, = driver.cuMemSetAccess(new_ptr, total_aligned_size, descs, len(descs))
+                raise_if_driver_error(res)
+
+            # All succeeded, cancel undo actions
+            trans.commit()
+
+        # Free the old VA range (aligned previous size)
+        res2, = driver.cuMemAddressFree(int(buf.handle), aligned_prev_size)
+        raise_if_driver_error(res2)
+
+        # Invalidate the old buffer so its destructor won't try to free again
+        buf._ptr = 0
+        buf._ptr_obj = None
+        buf._size = 0
+        buf._mr = None
+
+        # Return a new Buffer for the new mapping
+        return Buffer.from_handle(ptr=new_ptr, size=new_size, mr=self)
+
+
+    def _build_access_descriptors(self, prop: driver.CUmemAllocationProp) -> list:
+        """
+        Build access descriptors for memory access permissions.
+
+        Returns
+        -------
+        list
+            List of CUmemAccessDesc objects for setting memory access
+        """
+        descs = []
+
+        # Owner access
+        owner_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.self_access)
+        if owner_flags:
+            d = driver.CUmemAccessDesc()
+            d.location.type = prop.location.type
+            d.location.id = prop.location.id
+            d.flags = owner_flags
+            descs.append(d)
+
+        # Peer device access
+        peer_flags = VirtualMemoryResourceOptions._access_to_flags(self.config.peer_access)
+        if peer_flags:
+            for peer_dev in self.config.peers:
+                d = driver.CUmemAccessDesc()
+                d.location.type = driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
+                d.location.id = int(peer_dev)
+                d.flags = peer_flags
+                descs.append(d)
+
+        return descs
+
+
+    def allocate(self, size: int, stream: Stream = None) -> Buffer:
+        """
+        Allocate a buffer of the given size using CUDA virtual memory.
+
+        Parameters
+        ----------
+        size : int
+            The size in bytes of the buffer to allocate.
+        stream : Stream, optional
+            CUDA stream to associate with the allocation (not currently supported).
+
+        Returns
+        -------
+        Buffer
+            A Buffer object representing the allocated virtual memory.
+
+        Raises
+        ------
+        NotImplementedError
+            If a stream is provided or if the location type is not device memory.
+        CUDAError
+            If any CUDA driver API call fails during allocation.
+
+        Notes
+        -----
+        This method uses transactional allocation: if any step fails, all resources
+        allocated so far are automatically cleaned up. The allocation is performed
+        with the configured granularity, access permissions, and peer access as
+        specified in the resource's configuration.
+        """
+        if stream is not None:
+            raise NotImplementedError("Stream is not supported with VirtualMemoryResource")
+
+        config = self.config
+        # ---- Build allocation properties ----
+        prop = driver.CUmemAllocationProp()
+        prop.type = VirtualMemoryResourceOptions._allocation_type_to_driver(config.allocation_type)
+
+        prop.location.type = VirtualMemoryResourceOptions._location_type_to_driver(config.location_type)
+        prop.location.id = self.device.device_id if config.location_type == "device" else -1
+        prop.allocFlags.gpuDirectRDMACapable = 1 if config.gpu_direct_rdma else 0
+        prop.requestedHandleTypes = VirtualMemoryResourceOptions._handle_type_to_driver(config.handle_type)
+
+        # ---- Query and apply granularity ----
+        # Choose min vs recommended granularity per config
+        gran_flag = VirtualMemoryResourceOptions._granularity_to_driver(config.granularity)
+        res, gran = driver.cuMemGetAllocationGranularity(prop, gran_flag)
+        raise_if_driver_error(res)
+
+        aligned_size = VirtualMemoryResource._align_up(size, gran)
+        addr_align = config.addr_align or gran
+
+        # ---- Transactional allocation ----
+        with Transaction() as trans:
+            # ---- Create physical memory ----
+            res, handle = driver.cuMemCreate(aligned_size, prop, 0)
+            raise_if_driver_error(res)
+            # Register undo for physical memory
+            trans.append(lambda h=handle: raise_if_driver_error(driver.cuMemRelease(h)[0]))
+
+            # ---- Reserve VA space ----
+            # Potentially, use a separate size for the VA reservation from the physical allocation size
+            res, ptr = driver.cuMemAddressReserve(aligned_size, addr_align, config.addr_hint, 0)
+            raise_if_driver_error(res)
+            # Register undo for VA reservation
+            trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemAddressFree(p, s)[0]))
+
+            # ---- Map physical memory into VA ----
+            res, = driver.cuMemMap(ptr, aligned_size, 0, handle, 0)
+            trans.append(lambda p=ptr, s=aligned_size: raise_if_driver_error(driver.cuMemUnmap(p, s)[0]))
+            raise_if_driver_error(res)
+
+            # ---- Set access for owner + peers ----
+            descs = self._build_access_descriptors(prop)
+            if descs:
+                res, = driver.cuMemSetAccess(ptr, aligned_size, descs, len(descs))
+                raise_if_driver_error(res)
+
+            trans.commit()
+
+        # Done — return a Buffer that tracks this VA range
+        buf = Buffer.from_handle(ptr=ptr, size=aligned_size, mr=self)
+        return buf
+
+    def deallocate(self, ptr: int, size: int, stream: Stream=None) -> None:
+        """
+        Deallocate memory on the device using CUDA VMM APIs.
+        """
+        result, handle = driver.cuMemRetainAllocationHandle(ptr)
+        raise_if_driver_error(result)
+        result, = driver.cuMemUnmap(ptr, size)
+        raise_if_driver_error(result)
+        result, = driver.cuMemAddressFree(ptr, size)
+        raise_if_driver_error(result)
+        result, = driver.cuMemRelease(handle)
+        raise_if_driver_error(result)
+
+
+    @property
+    def is_device_accessible(self) -> bool:
+        """
+        Indicates whether the allocated memory is accessible from the device.
+        """
+        return self.config.location_type == "device"
+
+    @property
+    def is_host_accessible(self) -> bool:
+        """
+        Indicates whether the allocated memory is accessible from the host.
+        """
+        return self.config.location_type == "host"
+
+    @property
+    def device_id(self) -> int:
+        """
+        Get the device ID associated with this memory resource.
+
+        Returns:
+            int: CUDA device ID. -1 if the memory resource allocates host memory
+        """
+        return self.device.device_id if self.config.location_type == "device" else -1
+
+    def __repr__(self) -> str:
+        """
+        Return a string representation of the VirtualMemoryResource.
+
+        Returns:
+            str: A string describing the object
+        """
+        return f"<VirtualMemoryResource device={self.device}>"
diff --git a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
index ddb7683bc..d57a77753 100644
--- a/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
+++ b/cuda_core/cuda/core/experimental/_utils/cuda_utils.pyx
@@ -3,9 +3,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import functools
+from functools import partial
 import importlib.metadata
 from collections import namedtuple
 from collections.abc import Sequence
+from contextlib import ExitStack
 from typing import Callable
 
 try:
@@ -232,3 +234,52 @@ def get_binding_version():
     except importlib.metadata.PackageNotFoundError:
         major_minor = importlib.metadata.version("cuda-python").split(".")[:2]
     return tuple(int(v) for v in major_minor)
+
+
+class Transaction:
+    """
+    A context manager for transactional operations with undo capability.
+
+    The Transaction class allows you to register undo actions (callbacks) that will be executed
+    if the transaction is not committed before exiting the context. This is useful for managing
+    resources or operations that need to be rolled back in case of errors or early exits.
+
+    Usage:
+        with Transaction() as txn:
+            txn.append(some_cleanup_function, arg1, arg2)
+            # ... perform operations ...
+            txn.commit()  # Disarm undo actions; nothing will be rolled back on exit
+
+    Methods:
+        append(fn, *args, **kwargs): Register an undo action to be called on rollback.
+        commit(): Disarm all undo actions; nothing will be rolled back on exit.
+    """
+    def __init__(self):
+        self._stack = ExitStack()
+        self._entered = False
+
+    def __enter__(self):
+        self._stack.__enter__()
+        self._entered = True
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        # If exit callbacks remain, they'll run in LIFO order.
+        self._entered = False
+        return self._stack.__exit__(exc_type, exc, tb)
+
+    def append(self, fn, /, *args, **kwargs):
+        """
+        Register an undo action (runs if the with-block exits without commit()).
+        Values are bound now via partial so late mutations don't bite you.
+        """
+        if not self._entered:
+            raise RuntimeError("Transaction must be entered before append()")
+        self._stack.callback(partial(fn, *args, **kwargs))
+
+    def commit(self):
+        """
+        Disarm all undo actions. After this, exiting the with-block does nothing.
+        """
+        # pop_all() empties this stack so no callbacks are triggered on exit.
+        self._stack.pop_all()
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
index f239c69cd..d7f4d3642 100644
--- a/cuda_core/docs/source/api.rst
+++ b/cuda_core/docs/source/api.rst
@@ -27,6 +27,7 @@ CUDA runtime
    MemoryResource
    DeviceMemoryResource
    LegacyPinnedMemoryResource
+   VirtualMemoryResource
 
    :template: dataclass.rst
 
@@ -36,6 +37,7 @@ CUDA runtime
    GraphDebugPrintOptions
    StreamOptions
    LaunchConfig
+   VirtualMemoryResourceOptions
 
 
 CUDA compilation toolchain
diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
index 7c1487329..7907839e8 100644
--- a/cuda_core/docs/source/release/0.X.Y-notes.rst
+++ b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -32,6 +32,7 @@ New features
 - Stream-ordered memory allocation can now be shared on Linux via :class:`DeviceMemoryResource`.
 - Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``.
 - Added an :attr:`ObjectCode.code_type` attribute for querying the code type.
+- Added :class:`VirtualMemoryResource` for low-level virtual memory management.
 
 
 New examples
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 26cd2a139..8c980837e 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -15,7 +15,15 @@
 import platform
 
 import pytest
-from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions, MemoryResource
+from cuda.core.experimental import (
+    Buffer,
+    Device,
+    DeviceMemoryResource,
+    DeviceMemoryResourceOptions,
+    MemoryResource,
+    VirtualMemoryResource,
+    VirtualMemoryResourceOptions,
+)
 from cuda.core.experimental._memory import DLDeviceType, IPCBufferDescriptor
 from cuda.core.experimental._utils.cuda_utils import handle_return
 from cuda.core.experimental.utils import StridedMemoryView
@@ -306,6 +314,141 @@ def test_device_memory_resource_initialization(mempool_device, use_device_object
     buffer.close()
 
 
+def test_vmm_allocator_basic_allocation():
+    """Test basic VMM allocation functionality.
+
+    This test verifies that VirtualMemoryResource can allocate memory
+    using CUDA VMM APIs with default configuration.
+    """
+    if platform.system() == "Windows":
+        pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
+    device = Device()
+    device.set_current()
+    options = VirtualMemoryResourceOptions()
+    # Create VMM allocator with default config
+    vmm_mr = VirtualMemoryResource(device, config=options)
+
+    # Test basic allocation
+    buffer = vmm_mr.allocate(4096)
+    assert buffer.size >= 4096  # May be aligned up
+    assert buffer.device_id == device.device_id
+    assert buffer.memory_resource == vmm_mr
+
+    # Test deallocation
+    buffer.close()
+
+    # Test multiple allocations
+    buffers = []
+    for i in range(5):
+        buf = vmm_mr.allocate(1024 * (i + 1))
+        buffers.append(buf)
+        assert buf.size >= 1024 * (i + 1)
+
+    # Clean up
+    for buf in buffers:
+        buf.close()
+
+
+def test_vmm_allocator_policy_configuration():
+    """Test VMM allocator with different policy configurations.
+
+    This test verifies that VirtualMemoryResource can be configured
+    with different allocation policies and that the configuration affects
+    the allocation behavior.
+    """
+    if platform.system() == "Windows":
+        pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
+    device = Device()
+    device.set_current()
+
+    # Test with custom VMM config
+    custom_config = VirtualMemoryResourceOptions(
+        allocation_type="pinned",
+        location_type="device",
+        granularity="minimum",
+        gpu_direct_rdma=True,
+        handle_type="posix_fd" if platform.system() != "Windows" else "win32",
+        peers=(),
+        self_access="rw",
+        peer_access="rw",
+    )
+
+    vmm_mr = VirtualMemoryResource(device, config=custom_config)
+
+    # Verify configuration is applied
+    assert vmm_mr.config == custom_config
+    assert vmm_mr.config.gpu_direct_rdma is True
+    assert vmm_mr.config.granularity == "minimum"
+
+    # Test allocation with custom config
+    buffer = vmm_mr.allocate(8192)
+    assert buffer.size >= 8192
+    assert buffer.device_id == device.device_id
+
+    # Test policy modification
+    new_config = VirtualMemoryResourceOptions(
+        allocation_type="pinned",
+        location_type="device",
+        granularity="recommended",
+        gpu_direct_rdma=False,
+        handle_type="posix_fd",
+        peers=(),
+        self_access="r",  # Read-only access
+        peer_access="r",
+    )
+
+    # Modify allocation policy
+    modified_buffer = vmm_mr.modify_allocation(buffer, 16384, config=new_config)
+    assert modified_buffer.size >= 16384
+    assert vmm_mr.config == new_config
+    assert vmm_mr.config.self_access == "r"
+
+    # Clean up
+    modified_buffer.close()
+
+
+def test_vmm_allocator_grow_allocation():
+    """Test VMM allocator's ability to grow existing allocations.
+
+    This test verifies that VirtualMemoryResource can grow existing
+    allocations while preserving the base pointer when possible.
+    """
+    if platform.system() == "Windows":
+        pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
+    device = Device()
+    device.set_current()
+
+    options = VirtualMemoryResourceOptions()
+
+    vmm_mr = VirtualMemoryResource(device, config=options)
+
+    # Create initial allocation
+    buffer = vmm_mr.allocate(2 * 1024 * 1024)
+    original_size = buffer.size
+
+    # Grow the allocation
+    grown_buffer = vmm_mr.modify_allocation(buffer, 4 * 1024 * 1024)
+
+    # Verify growth
+    assert grown_buffer.size >= 4 * 1024 * 1024
+    assert grown_buffer.size > original_size
+    # Because of the slow path, the pointer may change
+    # We cannot assert that the new pointer is the same,
+    # but we can assert that a new pointer was assigned
+    assert grown_buffer.handle is not None
+
+    # Test growing to same size (should return original buffer)
+    same_buffer = vmm_mr.modify_allocation(grown_buffer, 4 * 1024 * 1024)
+    assert same_buffer.size == grown_buffer.size
+
+    # Test growing to smaller size (should return original buffer)
+    smaller_buffer = vmm_mr.modify_allocation(grown_buffer, 2 * 1024 * 1024)
+    assert smaller_buffer.size == grown_buffer.size
+
+    # Clean up
+    grown_buffer.close()
+
+
 def test_mempool(mempool_device):
     device = mempool_device