From 3c5ae7587c191e213dfaad30f924802739645477 Mon Sep 17 00:00:00 2001 From: juan-g-bonilla Date: Thu, 20 Nov 2025 20:01:46 -0800 Subject: [PATCH 1/5] Make spice kernel (un)loading faster and safe in parallel sims --- .../spiceInterface/spiceInterface.cpp | 185 +++++++++++++----- .../spiceInterface/spiceInterface.h | 99 +++++++++- .../spiceInterface/spiceInterface.i | 60 ++++++ 3 files changed, 290 insertions(+), 54 deletions(-) diff --git a/src/simulation/environment/spiceInterface/spiceInterface.cpp b/src/simulation/environment/spiceInterface/spiceInterface.cpp index bdcff1241c..f261d895ee 100755 --- a/src/simulation/environment/spiceInterface/spiceInterface.cpp +++ b/src/simulation/environment/spiceInterface/spiceInterface.cpp @@ -23,6 +23,49 @@ #include "architecture/utilities/simDefinitions.h" #include "architecture/utilities/macroDefinitions.h" #include "architecture/utilities/rigidBodyKinematics.h" +#include "spiceInterface.h" + +namespace { + /** + * RAII guard for SPICE error mode. + * + * Sets SPICE error action to RETURN while the guard is alive so that + * calls report failures via failed_c() instead of aborting. Restores + * the previous error action and print settings on destruction. + */ + struct SpiceErrorModeGuard + { + char oldAction[32]; + char oldPrint[32]; + + SpiceErrorModeGuard() + { + erract_c("GET", sizeof(oldAction), oldAction); + errprt_c("GET", sizeof(oldPrint), oldPrint); + + // Only override the abort behavior + erract_c("SET", 0, const_cast("RETURN")); + // DO NOT suppress printing: errprt is left untouched + } + + ~SpiceErrorModeGuard() + { + erract_c("SET", 0, oldAction); + errprt_c("SET", 0, oldPrint); + } + }; + + /** + * Normalize a file system path to a canonical absolute string. + * + * Used to key kernels so that one physical file maps to a single + * cache entry even if referenced through different relative paths. + */ + std::string absolutize(const std::filesystem::path& path) + { + return std::filesystem::absolute(path).lexically_normal().string(); + } +} /*! This constructor initializes the variables that spice uses. Most of them are not intended to be changed, but a couple are user configurable. @@ -74,11 +117,6 @@ SpiceInterface::~SpiceInterface() delete this->transRefStateOutMsgs.at(c); } delete [] this->spiceBuffer; -// if(this->SPICELoaded) -// { -// this->clearKeeper(); -// } - return; } void SpiceInterface::clearKeeper() @@ -426,63 +464,46 @@ void SpiceInterface::pullSpiceData(std::vector *spic } } -/*! This method loads a requested SPICE kernel into the system memory. It is - its own method because we have to load several SPICE kernels in for our - application. Note that they are stored in the SPICE library and are not - held locally in this object. - @return int Zero for success one for failure - @param kernelName The name of the kernel we are loading - @param dataPath The path to the data area on the filesystem +/** + * Load a SPICE kernel for use by this interface. + * + * This function takes a kernel file name and a base directory and + * ensures that the corresponding SPICE kernel is available to the + * simulation. Internally the module keeps track of which kernels it + * has already loaded so that the same file is not loaded multiple + * times. + * + * @param kernelName File name of the kernel inside dataPath. + * @param dataPath Directory where the kernel is located. + * @return 0 on success, 1 if loading the kernel failed. */ int SpiceInterface::loadSpiceKernel(char *kernelName, const char *dataPath) { - char *fileName = new char[this->charBufferSize]; - SpiceChar *name = new SpiceChar[this->charBufferSize]; - - //! - The required calls come from the SPICE documentation. - //! - The most critical call is furnsh_c - strcpy(name, "REPORT"); - erract_c("SET", this->charBufferSize, name); - strcpy(fileName, dataPath); - strcat(fileName, kernelName); - furnsh_c(fileName); - - //! - Check to see if we had trouble loading a kernel and alert user if so - strcpy(name, "DEFAULT"); - erract_c("SET", this->charBufferSize, name); - delete[] fileName; - delete[] name; - if(failed_c()) { - return 1; - } + std::filesystem::path base(dataPath); + std::filesystem::path fullPath = base / kernelName; + auto kernel = SpiceKernel::request(fullPath.string()); + if (!kernel->wasLoadSuccesful()) return 1; + this->loadedKernels[kernel->getPath()] = kernel; return 0; } -/*! This method unloads a requested SPICE kernel into the system memory. It is - its own method because we have to load several SPICE kernels in for our - application. Note that they are stored in the SPICE library and are not - held locally in this object. - @return int Zero for success one for failure - @param kernelName The name of the kernel we are unloading - @param dataPath The path to the data area on the filesystem +/** + * Tell this interface that a SPICE kernel is no longer needed. + * + * This function removes the kernel from the set of kernels managed + * by this interface. Once no users remain, the underlying kernel is + * also removed from SPICE so it no longer affects future queries. + * + * @param kernelName File name of the kernel inside dataPath. + * @param dataPath Directory where the kernel is located. + * @return always 0. */ int SpiceInterface::unloadSpiceKernel(char *kernelName, const char *dataPath) { - char *fileName = new char[this->charBufferSize]; - SpiceChar *name = new SpiceChar[this->charBufferSize]; - - //! - The required calls come from the SPICE documentation. - //! - The most critical call is furnsh_c - strcpy(name, "REPORT"); - erract_c("SET", this->charBufferSize, name); - strcpy(fileName, dataPath); - strcat(fileName, kernelName); - unload_c(fileName); - delete[] fileName; - delete[] name; - if(failed_c()) { - return 1; - } + std::filesystem::path base(dataPath); + std::filesystem::path fullPath = base / kernelName; + auto key = absolutize(fullPath); + this->loadedKernels.erase(key); return 0; } @@ -506,3 +527,61 @@ std::string SpiceInterface::getCurrentTimeString() delete[] spiceOutputBuffer; return(returnTimeString); } + +std::mutex SpiceKernel::mutex; +std::unordered_map> SpiceKernel::cache; + +std::shared_ptr +SpiceKernel::request(const std::filesystem::path& path) +{ + const std::string key = absolutize(path); + + std::lock_guard lock(mutex); + + auto it = cache.find(key); + if (it != cache.end()) + { + if (auto existing = it->second.lock()) + { + // Already have a live handle to this kernel + return existing; + } + // Weak pointer expired - fall through and create a new one + } + + // First live handle for this absolute path in this process + auto handle = std::shared_ptr(new SpiceKernel(key)); + + if (handle->loadSucceeded) cache[key] = handle; + + return handle; +} + +SpiceKernel::~SpiceKernel() noexcept +{ + if (!loadSucceeded) return; + + SpiceErrorModeGuard guard; + unload_c(path.c_str()); + if (failed_c()) + { + reset_c(); // SPICE printed its own messages already + } +} + +SpiceKernel::SpiceKernel(std::string path_) + : path(std::move(path_)) +{ + SpiceErrorModeGuard guard; + furnsh_c(path.c_str()); + + if (failed_c()) + { + reset_c(); // SPICE already printed diagnostics + loadSucceeded = false; // destructor will not unload + } + else + { + loadSucceeded = true; + } +} diff --git a/src/simulation/environment/spiceInterface/spiceInterface.h b/src/simulation/environment/spiceInterface/spiceInterface.h index e162ae1f6b..22d9573739 100755 --- a/src/simulation/environment/spiceInterface/spiceInterface.h +++ b/src/simulation/environment/spiceInterface/spiceInterface.h @@ -22,6 +22,11 @@ #include #include +#include +#include +#include +#include +#include #include "architecture/_GeneralModuleFiles/sys_model.h" #include "architecture/utilities/linearAlgebra.h" #include "architecture/utilities/bskLogging.h" @@ -35,6 +40,79 @@ #include "architecture/msgPayloadDefC/TransRefMsgPayload.h" #include "architecture/messaging/messaging.h" +/** + * Thin RAII wrapper around a single SPICE kernel. + * + * The class furnishes a kernel on construction and unloads it on + * destruction, and provides a static request function that caches + * instances by canonical absolute path so that a given kernel file is + * not furnished multiple times. + */ +class SpiceKernel +{ +public: + /** + * Request a shared handle for the kernel at the given path. + * + * The first call for a canonical path constructs a SpiceKernel, which + * furnishes the kernel once. Later calls for the same path reuse the + * existing instance as long as it is still alive. + */ + static std::shared_ptr request(const std::filesystem::path& path); + + /** + * Destructor unloads the kernel from SPICE if the load succeeded. + * + * This runs once when the last shared_ptr owning this SpiceKernel + * instance is destroyed. + */ + ~SpiceKernel(); + + /// Canonical absolute path used as the cache key and SPICE file name. + const std::string& getPath() const { return path; } + + /// True if furnsh_c succeeded for this kernel. + bool wasLoadSuccesful() const {return loadSucceeded; }; + + // avoid copy operations + SpiceKernel(const SpiceKernel&) = delete; + SpiceKernel& operator=(const SpiceKernel&) = delete; + +private: + /** + * Construct a SpiceKernel by furnishing the given canonical path. + * + * The constructor switches SPICE into RETURN mode, calls furnsh_c, + * checks failed_c, and records the load status. The destructor will + * unload only if loadSucceeded is true. + */ + explicit SpiceKernel(std::string path); + + /// Canonical absolute path used as the cache key and SPICE file name. + std::string path; + + /// True if furnsh_c succeeded for this kernel. + bool loadSucceeded; + + /** + * Static mutex guarding the shared kernel cache. + * + * All access to SpiceKernel::cache must take this lock so that repeated + * calls to request from different threads do not race. + */ + static std::mutex mutex; + + /** + * Global cache mapping canonical absolute paths to weak pointers. + * + * A non expired weak pointer means there is already a live SpiceKernel + * instance owning that kernel, so request can reuse it instead of + * calling furnsh_c again. + */ + static std::unordered_map> cache; +}; + + /*! @brief spice interface class */ class SpiceInterface: public SysModel { public: @@ -50,7 +128,17 @@ class SpiceInterface: public SysModel { void computeGPSData(); void pullSpiceData(std::vector *spiceData); void writeOutputMessages(uint64_t CurrentClock); - void clearKeeper(); //!< class method + + /** Resets all data loaded to SPICE. + * + * Calls `kclear_c`, which resets all loaded kernels for all simulations + * in this process. Avoid using this, as it can affect other simulations + * running in parallel. Kernels loaded with `loadSpiceKernel` will be + * automatically cleared when all simulations that need it have closed. + * + * Deprecated, pending removal 11/20/2026. + */ + void clearKeeper(); void addPlanetNames(std::vector planetNames); void addSpacecraftNames(std::vector spacecraftNames); @@ -90,6 +178,15 @@ class SpiceInterface: public SysModel { std::vector planetData; std::vector scData; + /** + * Map of loaded kernel paths to their RAII handles. + * + * As long as an entry is present, the corresponding kernel remains + * furnished in SPICE. Removing an entry allows the SpiceKernel + * destructor to unload the kernel when all shared_ptr copies are + * gone. + */ + std::unordered_map> loadedKernels; }; diff --git a/src/simulation/environment/spiceInterface/spiceInterface.i b/src/simulation/environment/spiceInterface/spiceInterface.i index 5ab522e955..16cfdbc3ec 100755 --- a/src/simulation/environment/spiceInterface/spiceInterface.i +++ b/src/simulation/environment/spiceInterface/spiceInterface.i @@ -23,6 +23,7 @@ %{ #include "spiceInterface.h" + #include "SpiceUsr.h" %} %pythoncode %{ @@ -31,6 +32,13 @@ from Basilisk.architecture.swig_common_model import * %include "swig_conly_data.i" %include "std_string.i" %include "std_vector.i" +%include "swig_deprecated.i" + +%deprecated_function( + SpiceInterface::clearKeeper, + "2026/11/20", + "This method will delete kernels that other simulations running in parallel may be using. spiceInterface will clear automatically the kernels that it has loaded." +) %template() std::vector; @@ -44,6 +52,9 @@ from Basilisk.architecture.swig_common_model import * // this raises an error because mySpiceInterface.planetFrames is returned by value %naturalvar SpiceInterface::planetFrames; +// utility class, not needed in the Python layer +%ignore SpiceKernel; + %include "sys_model.i" %include "spiceInterface.h" @@ -61,6 +72,55 @@ struct AttRefMsg_C; %include "architecture/msgPayloadDefC/TransRefMsgPayload.h" struct TransRefMsg_C; +%inline %{ + +/** + * Lightweight helper to query SPICE for a loaded kernel by file name. + * + * This function directly walks the SPICE kernel list using ktotal_c and + * kdata_c and returns true if a kernel with the given file name string + * is currently loaded. The comparison is done on the raw path string as + * stored inside SPICE. + */ +bool isKernelLoaded(const std::string &path) +{ + SpiceInt count = 0; + ktotal_c("ALL", &count); + + for (SpiceInt i = 0; i < count; ++i) + { + SpiceChar file[512]; + SpiceChar type[32]; + SpiceChar source[512]; + SpiceInt handle; + SpiceBoolean found; + + kdata_c(i, + "ALL", + (SpiceInt)sizeof(file), + (SpiceInt)sizeof(type), + (SpiceInt)sizeof(source), + file, + type, + source, + &handle, + &found); + + if (found && path == std::string(file)) + { + return true; + } + } + return false; +} + +size_t countKernelsLoaded() +{ + SpiceInt count = 0; + ktotal_c("ALL", &count); + return static_cast(count); +} +%} %pythoncode %{ import sys From d0c9d8613dfd489c31c899cf0c95f728e6935ed2 Mon Sep 17 00:00:00 2001 From: juan-g-bonilla Date: Thu, 20 Nov 2025 20:05:11 -0800 Subject: [PATCH 2/5] Add test to check spice kernels are only loaded once and cleared --- .../_UnitTest/test_multipleInterfaces.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 src/simulation/environment/spiceInterface/_UnitTest/test_multipleInterfaces.py diff --git a/src/simulation/environment/spiceInterface/_UnitTest/test_multipleInterfaces.py b/src/simulation/environment/spiceInterface/_UnitTest/test_multipleInterfaces.py new file mode 100644 index 0000000000..0d78a40e0a --- /dev/null +++ b/src/simulation/environment/spiceInterface/_UnitTest/test_multipleInterfaces.py @@ -0,0 +1,98 @@ +from Basilisk.utilities import SimulationBaseClass +from Basilisk.simulation import spiceInterface + +from Basilisk import __path__ +bskPath = __path__[0] + + +def createOneSim(): + """ + Create a minimal Basilisk simulation containing a single SpiceInterface. + + Returns + ------- + TotalSim : Basilisk SimulationBaseClass instance + The newly created simulation object. + SpiceObject : SpiceInterface + The SpiceInterface instance inside the created simulation. + """ + TotalSim = SimulationBaseClass.SimBaseClass() + DynUnitTestProc = TotalSim.CreateNewProcess("process") + DynUnitTestProc.addTask(TotalSim.CreateNewTask("task", 1)) + + # Create and register the SpiceInterface + SpiceObject = spiceInterface.SpiceInterface() + SpiceObject.SPICEDataPath = bskPath + '/supportData/EphemerisData/' + TotalSim.AddModelToTask("task", SpiceObject) + + # Run long enough for the SpiceInterface to furnish its kernels + TotalSim.ConfigureStopTime(2) + TotalSim.InitializeSimulation() + TotalSim.ExecuteSimulation() + + return TotalSim, SpiceObject + + +def test_multipleInterfaces(): + """ + Verify that SPICE kernels loaded through SpiceInterface are correctly + reference-counted and unloaded when all SpiceInterface instances are gone. + + The test performs the following high-level checks: + + 1. Before creating any SpiceInterface objects, the target kernel must not + be loaded in SPICE. + + 2. Creating the first simulation should cause the kernel to be furnished. + + 3. Creating many additional simulations must *not* load the kernel again. + + 4. After all simulations have loaded, the number of loaded kernels should + be the same as after loading one sim. + + 5. After all simulations fall out of scope and Python's garbage collector + runs, the kernel must be fully unloaded from SPICE. + + This guarantees that: + - furnsh_c() is only called once per unique kernel file + - unload_c() is only called when the last user disappears + - the shared-pointer-based lifetime system works correctly + """ + kernel = f"{bskPath}/supportData/EphemerisData/de430.bsp" + + # Step 1 - Kernel not yet loaded + assert not spiceInterface.isKernelLoaded(kernel) + + def smallScope(): + # Step 2 - First SpiceInterface loads the kernel + firstSim, firstSpice = createOneSim() + assert spiceInterface.isKernelLoaded(kernel) + + kernelsLoadedWithOneSim = spiceInterface.countKernelsLoaded() + + # Step 3 - Many more SpiceInterfaces do NOT reload the kernel + cacheSims = [] + N = 20 + for _ in range(N): + cacheSims.append(createOneSim()) + + kernelsLoadedWithNSims = spiceInterface.countKernelsLoaded() + + # Step 4 - check kernels are not being loaded again + assert kernelsLoadedWithOneSim == kernelsLoadedWithNSims + + # sanity check kernel is still loaded + assert spiceInterface.isKernelLoaded(kernel) + + # Everything in smallScope is destroyed once we leave the function + smallScope() + + import gc + gc.collect() + + # Step 5 - Kernel must now be fully unloaded + assert not spiceInterface.isKernelLoaded(kernel) + + +if __name__ == "__main__": + test_multipleInterfaces() From 6bdb3bdb67a40466112571b66a905ac00d8b5a55 Mon Sep 17 00:00:00 2001 From: juan-g-bonilla Date: Thu, 20 Nov 2025 20:05:39 -0800 Subject: [PATCH 3/5] Add test to load many kernels concurrently and check for errors --- .../_UnitTest/test_spiceThreadSafety.py | 343 ++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 src/simulation/environment/spiceInterface/_UnitTest/test_spiceThreadSafety.py diff --git a/src/simulation/environment/spiceInterface/_UnitTest/test_spiceThreadSafety.py b/src/simulation/environment/spiceInterface/_UnitTest/test_spiceThreadSafety.py new file mode 100644 index 0000000000..c0dd15b00f --- /dev/null +++ b/src/simulation/environment/spiceInterface/_UnitTest/test_spiceThreadSafety.py @@ -0,0 +1,343 @@ +# +# ISC License +# +# Copyright (c) 2025, Autonomous Vehicle +# Systems Lab, University of Colorado at Boulder +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +# + +import os +import sys +import time +import multiprocessing as mp +import pytest +import traceback + +from Basilisk import __path__ +from Basilisk.simulation import spiceInterface + +r""" +Unit Test for SPICE Interface Thread Safety +=========================================== + +This script stress-tests the SPICE interface in parallel, reproducing +the conditions of GitHub issue #220 where parallel simulations using +SPICE could deadlock or corrupt data. + +Multiple worker processes repeatedly create and destroy SpiceInterface +instances, forcing concurrent kernel load/unload operations. The test +passes if all workers complete without hangs or unhandled exceptions. +""" + +bskPath = __path__[0] + +def createLoadDestroySpice(workerId, iterations, dataPath): + """ + Repeatedly create, reset, and destroy SpiceInterface objects. + + This function is run in parallel by multiple processes. Each worker + performs `iterations` cycles of: + 1. Constructing a SpiceInterface + 2. Configuring planet names and SPICE data path + 3. Calling Reset (which triggers kernel loads) + 4. Brief sleep to increase contention + 5. Deleting the interface (allowing kernels to be released) + + Parameters + ---------- + workerId : int + Identifier for this worker process. + iterations : int + Number of create/reset/destroy cycles to perform. + dataPath : str + Directory containing SPICE kernel data. + + Returns + ------- + dict + Summary for this worker with counts of successes, failures, and + a list of captured exception details. + """ + print(f"Worker {workerId} starting with {iterations} iterations") + + successCount = 0 + failureCount = 0 + exceptionList = [] + + try: + for iteration in range(iterations): + try: + # Create a new SpiceInterface + spiceObj = spiceInterface.SpiceInterface() + + # Use a fixed planet set to avoid random differences + planets = ["earth", "sun"] + spiceObj.addPlanetNames(planets) + + # Configure SPICE data path and trigger kernel loads + spiceObj.SPICEDataPath = dataPath + spiceObj.Reset(0) + + # Short sleep to encourage overlap among workers + time.sleep(0.001) + + # Drop reference so the object can be destroyed + del spiceObj + + successCount += 1 + print( + f"Worker {workerId} completed iteration " + f"{iteration + 1}/{iterations}" + ) + except Exception as exc: + failureCount += 1 + errorInfo = { + "workerId": workerId, + "iteration": iteration, + "error": str(exc), + "traceback": traceback.format_exc(), + } + exceptionList.append(errorInfo) + print( + f"Worker {workerId} failed at iteration {iteration} " + f"with error: {exc}" + ) + # Continue with next iteration + continue + + except Exception as exc: + # Catch any exception outside the main loop + failureCount += 1 + errorInfo = { + "workerId": workerId, + "iteration": -1, # Outside the loop + "error": str(exc), + "traceback": traceback.format_exc(), + } + exceptionList.append(errorInfo) + print( + f"Worker {workerId} failed with error outside iteration loop: {exc}" + ) + + return { + "workerId": workerId, + "successCount": successCount, + "failureCount": failureCount, + "exceptions": exceptionList, + } + + +def runThreadSafetyTest(numWorkers=2, iterationsPerWorker=5): + """ + Run the SPICE thread-safety stress test. + + Parameters + ---------- + numWorkers : int + Number of parallel worker processes to launch. + iterationsPerWorker : int + Number of create/reset/destroy cycles per worker. + + Returns + ------- + results : dict + Aggregate statistics over all workers. + success : bool + True if all iterations completed without failure, False otherwise. + """ + print(f"Starting SPICE Thread Safety Test with {numWorkers} workers") + print(f"Each worker will perform {iterationsPerWorker} iterations") + + dataPath = bskPath + "/supportData/EphemerisData/" + + startTime = time.time() + + workerArgs = [ + (workerId, iterationsPerWorker, dataPath) + for workerId in range(numWorkers) + ] + + with mp.Pool(processes=numWorkers) as pool: + workerResults = list(pool.starmap(createLoadDestroySpice, workerArgs)) + + endTime = time.time() + executionTime = endTime - startTime + + totalSuccess = sum(r["successCount"] for r in workerResults) + totalFailure = sum(r["failureCount"] for r in workerResults) + allExceptions = [e for r in workerResults for e in r["exceptions"]] + + results = { + "executionTime": executionTime, + "totalIterations": numWorkers * iterationsPerWorker, + "successfulIterations": totalSuccess, + "failedIterations": totalFailure, + "exceptions": allExceptions, + } + + print("\n--- SPICE Thread Safety Test Report ---") + print(f"Total execution time: {executionTime:.2f} seconds") + print(f"Total iterations: {numWorkers * iterationsPerWorker}") + print(f"Successful iterations: {totalSuccess}") + print(f"Failed iterations: {totalFailure}") + print(f"Exceptions encountered: {len(allExceptions)}") + print("--------------------------------------\n") + + if totalSuccess == 0: + print("TEST FAILED: No successful iterations completed") + if len(allExceptions) > 0: + print("\nFirst exception details:") + print(allExceptions[0]["traceback"]) + success = False + else: + success = (totalFailure == 0) + if success: + print("TEST PASSED: SPICE interface thread safety looks robust") + else: + print("TEST FAILED: Issues detected with SPICE interface thread safety") + if len(allExceptions) > 0: + print("\nFirst exception details:") + print(allExceptions[0]["traceback"]) + + return results, success + + +def _runTestWithTimeout(resultQueue, numWorkers, iterationsPerWorker): + """ + Helper used as a process entry point to run the test with a timeout. + + This is defined at module level so that it is picklable by + multiprocessing on all supported platforms. + """ + try: + results, success = runThreadSafetyTest(numWorkers, iterationsPerWorker) + resultQueue.put((results, success)) + except Exception as exc: + resultQueue.put( + ( + { + "error": str(exc), + "traceback": traceback.format_exc(), + }, + False, + ) + ) + + +@pytest.mark.parametrize( + "numWorkers, iterationsPerWorker", + [ + (50, 3), + ], +) +def testSpiceThreadSafety(numWorkers, iterationsPerWorker): + """ + Pytest entry point for the SPICE thread-safety test. + + Parameters + ---------- + numWorkers : int + Number of parallel worker processes. + iterationsPerWorker : int + Number of load/unload cycles per worker. + """ + from multiprocessing import Process, Queue + import queue + + resultQueue = Queue() + testProcess = Process( + target=_runTestWithTimeout, + args=(resultQueue, numWorkers, iterationsPerWorker), + ) + testProcess.start() + + timeoutSeconds = 60 + testProcess.join(timeoutSeconds) + + if testProcess.is_alive(): + # Hard timeout: kill the worker process and fail the test + testProcess.terminate() + testProcess.join(1) + if testProcess.is_alive(): + os.kill(testProcess.pid, 9) + pytest.fail( + f"Thread safety test timed out after {timeoutSeconds} seconds" + ) + + try: + results, success = resultQueue.get(block=False) + + if isinstance(results, dict) and "error" in results: + pytest.fail( + "Thread safety test failed with error: " + f"{results['error']}\n{results.get('traceback')}" + ) + + assert success, "Thread safety test reported thread-safety issues" + assert ( + results["failedIterations"] == 0 + ), "Some iterations failed in the thread-safety test" + except queue.Empty: + pytest.fail( + "Thread safety test completed but did not return any results" + ) + + +if __name__ == "__main__": + from multiprocessing import Process, Queue + import queue + + numWorkers = 50 + iterationsPerWorker = 3 + + if len(sys.argv) > 1: + numWorkers = int(sys.argv[1]) + if len(sys.argv) > 2: + iterationsPerWorker = int(sys.argv[2]) + + resultQueue = Queue() + testProcess = Process( + target=_runTestWithTimeout, + args=(resultQueue, numWorkers, iterationsPerWorker), + ) + testProcess.start() + + timeoutSeconds = 60 + testProcess.join(timeoutSeconds) + + if testProcess.is_alive(): + testProcess.terminate() + testProcess.join(1) + if testProcess.is_alive(): + os.kill(testProcess.pid, 9) + print( + f"ERROR: Thread safety test timed out after {timeoutSeconds} seconds" + ) + sys.exit(2) + + try: + results, success = resultQueue.get(block=False) + + if isinstance(results, dict) and "error" in results: + print( + "ERROR: Thread safety test failed with error: " + f"{results['error']}" + ) + print(results.get("traceback")) + sys.exit(1) + + sys.exit(0 if success else 1) + except queue.Empty: + print("ERROR: Thread safety test completed but did not return results") + sys.exit(1) From 69a2114a9f72ae5554887f3dbdcd22342b75280d Mon Sep 17 00:00:00 2001 From: juan-g-bonilla Date: Thu, 20 Nov 2025 20:05:49 -0800 Subject: [PATCH 4/5] Update release notes --- docs/source/Support/bskReleaseNotes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/Support/bskReleaseNotes.rst b/docs/source/Support/bskReleaseNotes.rst index 7d0b169a1c..9823e335ad 100644 --- a/docs/source/Support/bskReleaseNotes.rst +++ b/docs/source/Support/bskReleaseNotes.rst @@ -87,6 +87,10 @@ Version |release| :ref:`scenarioPrescribedMotionWithTranslationBranching` and :ref:`scenarioPrescribedMotionWithRotationBranching`. - Fixed a bug where :ref:`spinningBodyOneDOFStateEffector` and :ref:`spinningBodyNDOFStateEffector` both registered their states under the same name, resulting in overwriting and a ``BSK_ERROR``. +- Avoid reloading and re-unloading SPICE kernels when multiple simulations run in the same process. This + fixes the problem with Spice becoming very slow when multiple simulation are run in parallel, addresses + the Spice kernel load limit of 5000 kernels, and prevents a rare bug where kernels are corrupted when + loaded from multiple simulations at the same time. Version 2.8.0 (August 30, 2025) From 4a5779a550aeb5b87eec1e00c2dadf3039998f6b Mon Sep 17 00:00:00 2001 From: juan-g-bonilla Date: Fri, 21 Nov 2025 11:11:09 -0800 Subject: [PATCH 5/5] Check if mutex-guarding unload solves concurrent spice errors --- src/simulation/environment/spiceInterface/spiceInterface.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/simulation/environment/spiceInterface/spiceInterface.cpp b/src/simulation/environment/spiceInterface/spiceInterface.cpp index f261d895ee..8f8e13eabf 100755 --- a/src/simulation/environment/spiceInterface/spiceInterface.cpp +++ b/src/simulation/environment/spiceInterface/spiceInterface.cpp @@ -561,6 +561,8 @@ SpiceKernel::~SpiceKernel() noexcept { if (!loadSucceeded) return; + std::lock_guard lock(mutex); + SpiceErrorModeGuard guard; unload_c(path.c_str()); if (failed_c())