NVIDIA · cpcloud · Feb 27, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/cuda_core/cuda/core/_program.pxd b/cuda_core/cuda/core/_program.pxd
@@ -16,3 +16,5 @@ cdef class Program:
         object _compile_lock  # Per-instance lock for compile-time mutation
         bint _use_libdevice      # Flag for libdevice loading
         bint _libdevice_added
+        bytes _nvrtc_code       # Source code for NVRTC retry (PCH auto-resize)
+        str _pch_status         # PCH creation outcome after compile
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
@@ -105,6 +105,32 @@ cdef class Program:
         """
         return Program_compile(self, target_type, name_expressions, logs)
 
+    @property
+    def pch_status(self) -> str | None:
+        """PCH creation outcome from the most recent :meth:`compile` call.
+
+        Possible values:
+
+        * ``"created"`` — PCH file was written successfully.
+        * ``"not_attempted"`` — PCH creation was not attempted (e.g. the
+          compiler decided not to, or automatic PCH processing skipped it).
+        * ``"failed"`` — an error prevented PCH creation.
+        * ``None`` — PCH was not requested, the program has not been
+          compiled yet, the backend is not NVRTC (e.g. PTX or NVVM),
+          or the NVRTC bindings are too old to report status.
+
+        When ``create_pch`` is set in :class:`ProgramOptions` and the PCH
+        heap is too small, :meth:`compile` automatically resizes the heap
+        and retries, so ``"created"`` should be the common outcome.
+
+        .. note::
+
+           PCH is only supported for ``code_type="c++"`` programs that
+           use the NVRTC backend. For PTX and NVVM programs this property
+           always returns ``None``.
+        """
+        return self._pch_status
+
     @property
     def backend(self) -> str:
         """Return this Program instance's underlying backend."""
@@ -477,6 +503,8 @@ def _find_libdevice_path():
     return find_bitcode_lib("device")
 
 
+
+
 cdef inline bint _process_define_macro_inner(list options, object macro) except? -1:
     """Process a single define macro, returning True if successful."""
     if isinstance(macro, str):
@@ -548,6 +576,8 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
     self._use_libdevice = False
     self._libdevice_added = False
 
+    self._pch_status = None
+
     if code_type == "c++":
         assert_type(code, str)
         if options.extra_sources is not None:
@@ -562,6 +592,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
             HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcCreateProgram(
                 &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL))
         self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog)
+        self._nvrtc_code = code_bytes
         self._backend = "NVRTC"
         self._linker = None
 
@@ -649,9 +680,15 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
     return 0
 
 
-cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
-    """Compile using NVRTC backend and return ObjectCode."""
-    cdef cynvrtc.nvrtcProgram prog = as_cu(self._h_nvrtc)
+cdef object _nvrtc_compile_and_extract(
+    cynvrtc.nvrtcProgram prog, str target_type, object name_expressions,
+    object logs, list options_list, str name,
+):
+    """Run nvrtcCompileProgram on *prog* and extract the output.
+
+    This is the inner compile+extract loop, factored out so the PCH
+    auto-retry path can call it on a fresh program handle.
+    """
     cdef size_t output_size = 0
     cdef size_t logsize = 0
     cdef vector[const char*] options_vec
@@ -669,7 +706,6 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
             HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcAddNameExpression(prog, name_ptr))
 
     # Build options array
-    options_list = self._options.as_bytes("nvrtc", target_type)
     options_vec.resize(len(options_list))
     for i in range(len(options_list)):
         options_vec[i] = <const char*>(<bytes>options_list[i])
@@ -716,7 +752,84 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
                 HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcGetProgramLog(prog, data_ptr))
             logs.write(log.decode("utf-8", errors="backslashreplace"))
 
-    return ObjectCode._init(bytes(data), target_type, symbol_mapping=symbol_mapping, name=self._options.name)
+    return ObjectCode._init(bytes(data), target_type, symbol_mapping=symbol_mapping, name=name)
+
+
+cdef int _nvrtc_pch_apis_cached = -1  # -1 = unchecked
+
+cdef bint _has_nvrtc_pch_apis():
+    global _nvrtc_pch_apis_cached
+    if _nvrtc_pch_apis_cached < 0:
+        _nvrtc_pch_apis_cached = hasattr(nvrtc, "nvrtcGetPCHCreateStatus")
+    return _nvrtc_pch_apis_cached
+
+
+cdef str _PCH_STATUS_CREATED = "created"
+cdef str _PCH_STATUS_NOT_ATTEMPTED = "not_attempted"
+cdef str _PCH_STATUS_FAILED = "failed"
+
+
+cdef str _read_pch_status(cynvrtc.nvrtcProgram prog):
+    """Query nvrtcGetPCHCreateStatus and translate to a high-level string."""
+    cdef cynvrtc.nvrtcResult err
+    with nogil:
+        err = cynvrtc.nvrtcGetPCHCreateStatus(prog)
+    if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS:
+        return _PCH_STATUS_CREATED
+    if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED:
+        return None  # sentinel: caller should auto-retry
+    if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED:
+        return _PCH_STATUS_NOT_ATTEMPTED
+    return _PCH_STATUS_FAILED
+
+
+cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
+    """Compile using NVRTC backend and return ObjectCode."""
+    cdef cynvrtc.nvrtcProgram prog = as_cu(self._h_nvrtc)
+    cdef list options_list = self._options.as_bytes("nvrtc", target_type)
+
+    result = _nvrtc_compile_and_extract(
+        prog, target_type, name_expressions, logs, options_list, self._options.name,
+    )
+
+    cdef bint pch_creation_possible = self._options.create_pch or self._options.pch
+    if not pch_creation_possible or not _has_nvrtc_pch_apis():
+        self._pch_status = None
+        return result
+
+    try:
+        status = _read_pch_status(prog)
+    except RuntimeError as e:
+        raise RuntimeError(
+            "PCH was requested but the runtime libnvrtc does not support "
+            "PCH APIs. Update to CUDA toolkit 12.8 or newer."
+        ) from e
+
+    if status is not None:
+        self._pch_status = status
+        return result
+
+    # Heap exhausted — auto-resize and retry with a fresh program
+    cdef size_t required = 0
+    with nogil:
+        HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcGetPCHHeapSizeRequired(prog, &required))
+        HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcSetPCHHeapSize(required))
+
+    cdef cynvrtc.nvrtcProgram retry_prog
+    cdef const char* code_ptr = <const char*>self._nvrtc_code
+    cdef const char* name_ptr = <const char*>self._options._name
+    with nogil:
+        HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcCreateProgram(
+            &retry_prog, code_ptr, name_ptr, 0, NULL, NULL))
+    self._h_nvrtc = create_nvrtc_program_handle(retry_prog)
+
+    result = _nvrtc_compile_and_extract(
+        retry_prog, target_type, name_expressions, logs, options_list, self._options.name,
+    )
+
+    status = _read_pch_status(retry_prog)
+    self._pch_status = status if status is not None else _PCH_STATUS_FAILED
+    return result
 
 
 cdef object Program_compile_nvvm(Program self, str target_type, object logs):

diff --git a/cuda_core/docs/source/release/0.6.0-notes.rst b/cuda_core/docs/source/release/0.6.0-notes.rst
@@ -54,6 +54,11 @@ New features
 - Added CUDA version compatibility check at import time to detect mismatches between
   ``cuda.core`` and the installed ``cuda-bindings`` version.
 
+- ``Program.compile()`` now automatically resizes the NVRTC PCH heap and
+  retries when precompiled header creation fails due to heap exhaustion.
+  The ``pch_status`` property reports the PCH creation outcome
+  (``"created"``, ``"not_attempted"``, ``"failed"``, or ``None``).
+
 
 Fixes and enhancements
 ----------------------

diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
@@ -57,6 +57,22 @@ def _get_nvrtc_version_for_tests():
         return None
 
 
+def _has_nvrtc_pch_apis_for_tests():
+    required = (
+        "nvrtcGetPCHHeapSize",
+        "nvrtcSetPCHHeapSize",
+        "nvrtcGetPCHCreateStatus",
+        "nvrtcGetPCHHeapSizeRequired",
+    )
+    return all(hasattr(nvrtc, name) for name in required)
+
+
+nvrtc_pch_available = pytest.mark.skipif(
+    (_get_nvrtc_version_for_tests() or 0) < 12800 or not _has_nvrtc_pch_apis_for_tests(),
+    reason="PCH runtime APIs require NVRTC >= 12.8 bindings",
+)
+
+
 _libnvvm_version = None
 _libnvvm_version_attempted = False
 
@@ -316,6 +332,25 @@ def test_cpp_program_with_pch_options(init_cuda, tmp_path):
         program.close()
 
 
+@nvrtc_pch_available
+def test_cpp_program_pch_auto_creates(init_cuda, tmp_path):
+    code = 'extern "C" __global__ void my_kernel() {}'
+    pch_path = str(tmp_path / "test.pch")
+    program = Program(code, "c++", ProgramOptions(create_pch=pch_path))
+    assert program.pch_status is None  # not compiled yet
+    program.compile("ptx")
+    assert program.pch_status in ("created", "not_attempted", "failed")
+    program.close()
+
+
+def test_cpp_program_pch_status_none_without_pch(init_cuda):
+    code = 'extern "C" __global__ void my_kernel() {}'
+    program = Program(code, "c++")
+    program.compile("ptx")
+    assert program.pch_status is None
+    program.close()
+
+
 options = [
     ProgramOptions(max_register_count=32),
     ProgramOptions(debug=True),