ModelCloud · Qubitium · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/README.md b/README.md
@@ -157,8 +157,8 @@ bytes.
   | Env var                        | Effect (per-call, `pattern.match("fo")`) |
   |--------------------------------|------------------------------------------|
   | _(baseline)_                   | 0.60 µs                                  |
-  | `PCRE2_DISABLE_CONTEXT_CACHE=1`| 0.60 µs                                  |
-  | `PCRE2_FORCE_JIT_LOCK=1`       | 0.60 µs                                  |
+  | `PYPCRE_DISABLE_CONTEXT_CACHE=1` *(was `PCRE2_DISABLE_CONTEXT_CACHE`)* | 0.60 µs |
+  | `PYPCRE_FORCE_JIT_LOCK=1` *(was `PCRE2_FORCE_JIT_LOCK`)*       | 0.60 µs |
   | `pcre.match()` helper          | 4.43 µs                                  |
 
   The toggles reintroduce the legacy GIL hand-off, per-call match-context

diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,8 @@
+"""Project-wide pytest configuration bridging vendored test suites."""
+from _setuptools import conftest as _setuptools_conftest
+
+_plugins = getattr(_setuptools_conftest, "pytest_plugins", ())
+if isinstance(_plugins, str):
+    pytest_plugins = [_plugins]
+else:
+    pytest_plugins = list(_plugins)
diff --git a/pcre/cache.py b/pcre/cache.py
@@ -8,10 +8,9 @@
 from __future__ import annotations
 
 import os
-from collections import OrderedDict
 from enum import Enum
 from threading import RLock, local
-from typing import Any, Callable, Tuple, TypeVar, cast
+from typing import Any, Callable, Dict, Tuple, TypeVar, cast
 
 import pcre_ext_c as _pcre2
 
@@ -32,7 +31,7 @@ class _ThreadCacheState(local):
 
     def __init__(self) -> None:
         self.cache_limit: int | None = _DEFAULT_THREAD_CACHE_LIMIT
-        self.pattern_cache: OrderedDict[Tuple[Any, int, bool], Any] = OrderedDict()
+        self.pattern_cache: Dict[Tuple[Any, int, bool], Any] = {}
 
 
 class _GlobalCacheState:
@@ -42,7 +41,7 @@ class _GlobalCacheState:
 
     def __init__(self) -> None:
         self.cache_limit: int | None = _DEFAULT_GLOBAL_CACHE_LIMIT
-        self.pattern_cache: OrderedDict[Tuple[Any, int, bool], Any] = OrderedDict()
+        self.pattern_cache: Dict[Tuple[Any, int, bool], Any] = {}
         self.lock = RLock()
 
 
@@ -112,35 +111,22 @@ def _cached_compile_thread_local(
     if cache_limit == 0:
         return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
 
+    key = (pattern, flags, bool(jit))
+    cache = _THREAD_LOCAL.pattern_cache
     try:
-        key = (pattern, flags, bool(jit))
-        hash(key)
+        cached = cache[key]
+    except KeyError:
+        compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
+        if cache_limit != 0:
+            if cache_limit is not None and len(cache) >= cache_limit:
+                cache.pop(next(iter(cache)))
+            cache[key] = compiled
+        return compiled
     except TypeError:
         return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
-
-    cache = _THREAD_LOCAL.pattern_cache
-    cached = cache.get(key)
-    if cached is not None:
-        cache.move_to_end(key)
+    else:
         return cast(T, cached)
 
-    compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
-
-    cache_limit = _THREAD_LOCAL.cache_limit
-    if cache_limit == 0:
-        return compiled
-
-    cache = _THREAD_LOCAL.pattern_cache
-    existing = cache.get(key)
-    if existing is not None:
-        cache.move_to_end(key)
-        return cast(T, existing)
-
-    cache[key] = compiled
-    if (cache_limit is not None) and len(cache) > cache_limit:
-        cache.popitem(last=False)
-    return compiled
-
 
 def _cached_compile_global(
     pattern: Any,
@@ -153,34 +139,33 @@ def _cached_compile_global(
     if cache_limit == 0:
         return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
 
-    try:
-        key = (pattern, flags, bool(jit))
-        hash(key)
-    except TypeError:
-        return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
-
+    key = (pattern, flags, bool(jit))
     lock = _GLOBAL_STATE.lock
     with lock:
-        cached = _GLOBAL_STATE.pattern_cache.get(key)
-        if cached is not None:
-            _GLOBAL_STATE.pattern_cache.move_to_end(key)
+        try:
+            cached = _GLOBAL_STATE.pattern_cache[key]
+        except KeyError:
+            pass
+        except TypeError:
+            return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
+        else:
             return cast(T, cached)
 
     compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit))
 
     with lock:
         if _GLOBAL_STATE.cache_limit == 0:
             return compiled
-        existing = _GLOBAL_STATE.pattern_cache.get(key)
-        if existing is not None:
-            _GLOBAL_STATE.pattern_cache.move_to_end(key)
+        try:
+            existing = _GLOBAL_STATE.pattern_cache[key]
+        except KeyError:
+            if _GLOBAL_STATE.cache_limit is not None and len(_GLOBAL_STATE.pattern_cache) >= _GLOBAL_STATE.cache_limit:
+                _GLOBAL_STATE.pattern_cache.pop(next(iter(_GLOBAL_STATE.pattern_cache)))
+            _GLOBAL_STATE.pattern_cache[key] = compiled
+        except TypeError:
+            return compiled
+        else:
             return cast(T, existing)
-        _GLOBAL_STATE.pattern_cache[key] = compiled
-        if (
-            _GLOBAL_STATE.cache_limit is not None
-            and len(_GLOBAL_STATE.pattern_cache) > _GLOBAL_STATE.cache_limit
-        ):
-            _GLOBAL_STATE.pattern_cache.popitem(last=False)
         return compiled
 
 
@@ -233,7 +218,7 @@ def set_cache_limit(limit: int | None) -> None:
             cache.clear()
         elif new_limit is not None:
             while len(cache) > new_limit:
-                cache.popitem(last=False)
+                cache.pop(next(iter(cache)))
     else:
         with _GLOBAL_STATE.lock:
             _GLOBAL_STATE.cache_limit = new_limit
@@ -242,7 +227,7 @@ def set_cache_limit(limit: int | None) -> None:
                 cache.clear()
             elif new_limit is not None:
                 while len(cache) > new_limit:
-                    cache.popitem(last=False)
+                    cache.pop(next(iter(cache)))
 
 
 def get_cache_limit() -> int | None:

diff --git a/pcre_ext/cache.c b/pcre_ext/cache.c
@@ -19,8 +19,11 @@ typedef struct ThreadCacheState {
 
     pcre2_match_context *match_context;
     pcre2_match_context *offset_match_context;
+    PyObject *cleanup_token;
 } ThreadCacheState;
 
+static void thread_cache_state_clear(ThreadCacheState *state);
+
 typedef enum CacheStrategy {
     CACHE_STRATEGY_THREAD_LOCAL = 0,
     CACHE_STRATEGY_GLOBAL = 1
@@ -43,6 +46,14 @@ static _Atomic uint32_t global_jit_capacity = ATOMIC_VAR_INIT(1);
 static _Atomic size_t global_jit_start_size = ATOMIC_VAR_INIT(32 * 1024);
 static _Atomic size_t global_jit_max_size = ATOMIC_VAR_INIT(1024 * 1024);
 
+static _Atomic int debug_thread_cache_count = ATOMIC_VAR_INIT(0);
+static int debug_thread_cache_enabled = 0;
+
+static PyObject *thread_cache_cleanup_key = NULL;
+#define THREAD_CACHE_CAPSULE_NAME "pcre.cache.thread_state"
+
+static void thread_cache_capsule_destructor(PyObject *capsule);
+
 static inline uint32_t
 clamp_cache_capacity(unsigned long value)
 {
@@ -59,6 +70,24 @@ required_ovector_pairs(PatternObject *self)
     return required;
 }
 
+static int
+env_flag_is_true(const char *value)
+{
+    if (value == NULL || value[0] == '\0') {
+        return 0;
+    }
+    switch (value[0]) {
+        case '0':
+        case 'f':
+        case 'F':
+        case 'n':
+        case 'N':
+            return 0;
+        default:
+            return 1;
+    }
+}
+
 static inline ThreadCacheState *
 thread_cache_state_get(void)
 {
@@ -98,6 +127,34 @@ thread_cache_state_get_or_create(void)
         return NULL;
     }
 
+    if (debug_thread_cache_enabled) {
+        atomic_fetch_add_explicit(&debug_thread_cache_count, 1, memory_order_relaxed);
+    }
+
+    PyObject *dict = PyThreadState_GetDict();
+    if (dict != NULL) {
+        PyObject *key = thread_cache_cleanup_key;
+        if (key == NULL) {
+            key = PyUnicode_FromString("_pcre2_cache_state");
+            if (key == NULL) {
+                PyThread_tss_set(&cache_tss, NULL);
+                thread_cache_state_clear(state);
+                PyMem_Free(state);
+                return NULL;
+            }
+            thread_cache_cleanup_key = key;
+        }
+        PyObject *capsule = PyCapsule_New(state, THREAD_CACHE_CAPSULE_NAME, thread_cache_capsule_destructor);
+        if (capsule != NULL) {
+            if (PyDict_SetItem(dict, key, capsule) == 0) {
+                state->cleanup_token = capsule;
+            } else {
+                PyErr_Clear();
+            }
+            Py_DECREF(capsule);
+        }
+    }
+
     return state;
 }
 
@@ -140,6 +197,40 @@ thread_cache_state_clear(ThreadCacheState *state)
     }
 }
 
+static inline void
+thread_cache_state_free(ThreadCacheState *state)
+{
+    if (state == NULL) {
+        return;
+    }
+    thread_cache_state_clear(state);
+    if (debug_thread_cache_enabled) {
+        atomic_fetch_sub_explicit(&debug_thread_cache_count, 1, memory_order_relaxed);
+    }
+    PyMem_Free(state);
+}
+
+static void
+thread_cache_capsule_destructor(PyObject *capsule)
+{
+    ThreadCacheState *state = PyCapsule_GetPointer(capsule, THREAD_CACHE_CAPSULE_NAME);
+    if (state == NULL) {
+        PyErr_Clear();
+        return;
+    }
+    if (state->cleanup_token != capsule) {
+        return;
+    }
+    state->cleanup_token = NULL;
+    if (atomic_load_explicit(&cache_tss_ready, memory_order_acquire)) {
+        ThreadCacheState *current = (ThreadCacheState *)PyThread_tss_get(&cache_tss);
+        if (current == state) {
+            (void)PyThread_tss_set(&cache_tss, NULL);
+        }
+    }
+    thread_cache_state_free(state);
+}
+
 static void
 thread_cache_teardown(void)
 {
@@ -149,9 +240,19 @@ thread_cache_teardown(void)
 
     ThreadCacheState *state = thread_cache_state_get();
     if (state != NULL) {
-        thread_cache_state_clear(state);
-        PyMem_Free(state);
-        PyThread_tss_set(&cache_tss, NULL);
+        if (state->cleanup_token != NULL) {
+            PyObject *dict = PyThreadState_GetDict();
+            if (dict != NULL && thread_cache_cleanup_key != NULL) {
+                if (PyDict_DelItem(dict, thread_cache_cleanup_key) < 0) {
+                    PyErr_Clear();
+                }
+            }
+            PyThread_tss_set(&cache_tss, NULL);
+        } else {
+            thread_cache_state_free(state);
+            PyThread_tss_set(&cache_tss, NULL);
+            state = NULL;
+        }
     }
 
     PyThread_tss_delete(&cache_tss);
@@ -412,6 +513,18 @@ cache_initialize(void)
         atomic_store_explicit(&cache_tss_ready, 1, memory_order_release);
     }
 
+    if (thread_cache_cleanup_key == NULL) {
+        thread_cache_cleanup_key = PyUnicode_FromString("_pcre2_cache_state");
+        if (thread_cache_cleanup_key == NULL) {
+            return -1;
+        }
+    }
+
+    debug_thread_cache_enabled = env_flag_is_true(Py_GETENV("PYPCRE_DEBUG"));
+    if (!debug_thread_cache_enabled) {
+        atomic_store_explicit(&debug_thread_cache_count, 0, memory_order_relaxed);
+    }
+
     cache_strategy_set(CACHE_STRATEGY_THREAD_LOCAL);
     cache_strategy_set_locked(0);
     atomic_store_explicit(&context_cache_enabled, 1, memory_order_release);
@@ -432,6 +545,7 @@ cache_teardown(void)
     global_cache_teardown();
     cache_strategy_set_locked(0);
     cache_strategy_set(CACHE_STRATEGY_THREAD_LOCAL);
+    Py_CLEAR(thread_cache_cleanup_key);
 }
 
 pcre2_match_data *
@@ -520,6 +634,16 @@ cache_set_context_cache_enabled(int enabled)
     atomic_store_explicit(&context_cache_enabled, enabled ? 1 : 0, memory_order_release);
 }
 
+PyObject *
+module_debug_thread_cache_count(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
+{
+    if (!debug_thread_cache_enabled) {
+        return PyLong_FromLong(-1);
+    }
+    int value = atomic_load_explicit(&debug_thread_cache_count, memory_order_relaxed);
+    return PyLong_FromLong(value);
+}
+
 pcre2_jit_stack *
 jit_stack_cache_acquire(void)
 {