diff --git a/README.md b/README.md index e57d275..feed7a7 100644 --- a/README.md +++ b/README.md @@ -157,8 +157,8 @@ bytes. | Env var | Effect (per-call, `pattern.match("fo")`) | |--------------------------------|------------------------------------------| | _(baseline)_ | 0.60 µs | - | `PCRE2_DISABLE_CONTEXT_CACHE=1`| 0.60 µs | - | `PCRE2_FORCE_JIT_LOCK=1` | 0.60 µs | + | `PYPCRE_DISABLE_CONTEXT_CACHE=1` *(was `PCRE2_DISABLE_CONTEXT_CACHE`)* | 0.60 µs | + | `PYPCRE_FORCE_JIT_LOCK=1` *(was `PCRE2_FORCE_JIT_LOCK`)* | 0.60 µs | | `pcre.match()` helper | 4.43 µs | The toggles reintroduce the legacy GIL hand-off, per-call match-context diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..a5d2719 --- /dev/null +++ b/conftest.py @@ -0,0 +1,8 @@ +"""Project-wide pytest configuration bridging vendored test suites.""" +from _setuptools import conftest as _setuptools_conftest + +_plugins = getattr(_setuptools_conftest, "pytest_plugins", ()) +if isinstance(_plugins, str): + pytest_plugins = [_plugins] +else: + pytest_plugins = list(_plugins) diff --git a/pcre/cache.py b/pcre/cache.py index dbb9106..0ef8713 100644 --- a/pcre/cache.py +++ b/pcre/cache.py @@ -8,10 +8,9 @@ from __future__ import annotations import os -from collections import OrderedDict from enum import Enum from threading import RLock, local -from typing import Any, Callable, Tuple, TypeVar, cast +from typing import Any, Callable, Dict, Tuple, TypeVar, cast import pcre_ext_c as _pcre2 @@ -32,7 +31,7 @@ class _ThreadCacheState(local): def __init__(self) -> None: self.cache_limit: int | None = _DEFAULT_THREAD_CACHE_LIMIT - self.pattern_cache: OrderedDict[Tuple[Any, int, bool], Any] = OrderedDict() + self.pattern_cache: Dict[Tuple[Any, int, bool], Any] = {} class _GlobalCacheState: @@ -42,7 +41,7 @@ class _GlobalCacheState: def __init__(self) -> None: self.cache_limit: int | None = _DEFAULT_GLOBAL_CACHE_LIMIT - self.pattern_cache: OrderedDict[Tuple[Any, int, bool], Any] = OrderedDict() + self.pattern_cache: Dict[Tuple[Any, int, bool], Any] = {} self.lock = RLock() @@ -112,35 +111,22 @@ def _cached_compile_thread_local( if cache_limit == 0: return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) + key = (pattern, flags, bool(jit)) + cache = _THREAD_LOCAL.pattern_cache try: - key = (pattern, flags, bool(jit)) - hash(key) + cached = cache[key] + except KeyError: + compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) + if cache_limit != 0: + if cache_limit is not None and len(cache) >= cache_limit: + cache.pop(next(iter(cache))) + cache[key] = compiled + return compiled except TypeError: return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) - - cache = _THREAD_LOCAL.pattern_cache - cached = cache.get(key) - if cached is not None: - cache.move_to_end(key) + else: return cast(T, cached) - compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) - - cache_limit = _THREAD_LOCAL.cache_limit - if cache_limit == 0: - return compiled - - cache = _THREAD_LOCAL.pattern_cache - existing = cache.get(key) - if existing is not None: - cache.move_to_end(key) - return cast(T, existing) - - cache[key] = compiled - if (cache_limit is not None) and len(cache) > cache_limit: - cache.popitem(last=False) - return compiled - def _cached_compile_global( pattern: Any, @@ -153,17 +139,16 @@ def _cached_compile_global( if cache_limit == 0: return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) - try: - key = (pattern, flags, bool(jit)) - hash(key) - except TypeError: - return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) - + key = (pattern, flags, bool(jit)) lock = _GLOBAL_STATE.lock with lock: - cached = _GLOBAL_STATE.pattern_cache.get(key) - if cached is not None: - _GLOBAL_STATE.pattern_cache.move_to_end(key) + try: + cached = _GLOBAL_STATE.pattern_cache[key] + except KeyError: + pass + except TypeError: + return wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) + else: return cast(T, cached) compiled = wrapper(_pcre2.compile(pattern, flags=flags, jit=jit)) @@ -171,16 +156,16 @@ def _cached_compile_global( with lock: if _GLOBAL_STATE.cache_limit == 0: return compiled - existing = _GLOBAL_STATE.pattern_cache.get(key) - if existing is not None: - _GLOBAL_STATE.pattern_cache.move_to_end(key) + try: + existing = _GLOBAL_STATE.pattern_cache[key] + except KeyError: + if _GLOBAL_STATE.cache_limit is not None and len(_GLOBAL_STATE.pattern_cache) >= _GLOBAL_STATE.cache_limit: + _GLOBAL_STATE.pattern_cache.pop(next(iter(_GLOBAL_STATE.pattern_cache))) + _GLOBAL_STATE.pattern_cache[key] = compiled + except TypeError: + return compiled + else: return cast(T, existing) - _GLOBAL_STATE.pattern_cache[key] = compiled - if ( - _GLOBAL_STATE.cache_limit is not None - and len(_GLOBAL_STATE.pattern_cache) > _GLOBAL_STATE.cache_limit - ): - _GLOBAL_STATE.pattern_cache.popitem(last=False) return compiled @@ -233,7 +218,7 @@ def set_cache_limit(limit: int | None) -> None: cache.clear() elif new_limit is not None: while len(cache) > new_limit: - cache.popitem(last=False) + cache.pop(next(iter(cache))) else: with _GLOBAL_STATE.lock: _GLOBAL_STATE.cache_limit = new_limit @@ -242,7 +227,7 @@ def set_cache_limit(limit: int | None) -> None: cache.clear() elif new_limit is not None: while len(cache) > new_limit: - cache.popitem(last=False) + cache.pop(next(iter(cache))) def get_cache_limit() -> int | None: diff --git a/pcre_ext/cache.c b/pcre_ext/cache.c index 922e1c4..c1cb3a9 100644 --- a/pcre_ext/cache.c +++ b/pcre_ext/cache.c @@ -19,8 +19,11 @@ typedef struct ThreadCacheState { pcre2_match_context *match_context; pcre2_match_context *offset_match_context; + PyObject *cleanup_token; } ThreadCacheState; +static void thread_cache_state_clear(ThreadCacheState *state); + typedef enum CacheStrategy { CACHE_STRATEGY_THREAD_LOCAL = 0, CACHE_STRATEGY_GLOBAL = 1 @@ -43,6 +46,14 @@ static _Atomic uint32_t global_jit_capacity = ATOMIC_VAR_INIT(1); static _Atomic size_t global_jit_start_size = ATOMIC_VAR_INIT(32 * 1024); static _Atomic size_t global_jit_max_size = ATOMIC_VAR_INIT(1024 * 1024); +static _Atomic int debug_thread_cache_count = ATOMIC_VAR_INIT(0); +static int debug_thread_cache_enabled = 0; + +static PyObject *thread_cache_cleanup_key = NULL; +#define THREAD_CACHE_CAPSULE_NAME "pcre.cache.thread_state" + +static void thread_cache_capsule_destructor(PyObject *capsule); + static inline uint32_t clamp_cache_capacity(unsigned long value) { @@ -59,6 +70,24 @@ required_ovector_pairs(PatternObject *self) return required; } +static int +env_flag_is_true(const char *value) +{ + if (value == NULL || value[0] == '\0') { + return 0; + } + switch (value[0]) { + case '0': + case 'f': + case 'F': + case 'n': + case 'N': + return 0; + default: + return 1; + } +} + static inline ThreadCacheState * thread_cache_state_get(void) { @@ -98,6 +127,34 @@ thread_cache_state_get_or_create(void) return NULL; } + if (debug_thread_cache_enabled) { + atomic_fetch_add_explicit(&debug_thread_cache_count, 1, memory_order_relaxed); + } + + PyObject *dict = PyThreadState_GetDict(); + if (dict != NULL) { + PyObject *key = thread_cache_cleanup_key; + if (key == NULL) { + key = PyUnicode_FromString("_pcre2_cache_state"); + if (key == NULL) { + PyThread_tss_set(&cache_tss, NULL); + thread_cache_state_clear(state); + PyMem_Free(state); + return NULL; + } + thread_cache_cleanup_key = key; + } + PyObject *capsule = PyCapsule_New(state, THREAD_CACHE_CAPSULE_NAME, thread_cache_capsule_destructor); + if (capsule != NULL) { + if (PyDict_SetItem(dict, key, capsule) == 0) { + state->cleanup_token = capsule; + } else { + PyErr_Clear(); + } + Py_DECREF(capsule); + } + } + return state; } @@ -140,6 +197,40 @@ thread_cache_state_clear(ThreadCacheState *state) } } +static inline void +thread_cache_state_free(ThreadCacheState *state) +{ + if (state == NULL) { + return; + } + thread_cache_state_clear(state); + if (debug_thread_cache_enabled) { + atomic_fetch_sub_explicit(&debug_thread_cache_count, 1, memory_order_relaxed); + } + PyMem_Free(state); +} + +static void +thread_cache_capsule_destructor(PyObject *capsule) +{ + ThreadCacheState *state = PyCapsule_GetPointer(capsule, THREAD_CACHE_CAPSULE_NAME); + if (state == NULL) { + PyErr_Clear(); + return; + } + if (state->cleanup_token != capsule) { + return; + } + state->cleanup_token = NULL; + if (atomic_load_explicit(&cache_tss_ready, memory_order_acquire)) { + ThreadCacheState *current = (ThreadCacheState *)PyThread_tss_get(&cache_tss); + if (current == state) { + (void)PyThread_tss_set(&cache_tss, NULL); + } + } + thread_cache_state_free(state); +} + static void thread_cache_teardown(void) { @@ -149,9 +240,19 @@ thread_cache_teardown(void) ThreadCacheState *state = thread_cache_state_get(); if (state != NULL) { - thread_cache_state_clear(state); - PyMem_Free(state); - PyThread_tss_set(&cache_tss, NULL); + if (state->cleanup_token != NULL) { + PyObject *dict = PyThreadState_GetDict(); + if (dict != NULL && thread_cache_cleanup_key != NULL) { + if (PyDict_DelItem(dict, thread_cache_cleanup_key) < 0) { + PyErr_Clear(); + } + } + PyThread_tss_set(&cache_tss, NULL); + } else { + thread_cache_state_free(state); + PyThread_tss_set(&cache_tss, NULL); + state = NULL; + } } PyThread_tss_delete(&cache_tss); @@ -412,6 +513,18 @@ cache_initialize(void) atomic_store_explicit(&cache_tss_ready, 1, memory_order_release); } + if (thread_cache_cleanup_key == NULL) { + thread_cache_cleanup_key = PyUnicode_FromString("_pcre2_cache_state"); + if (thread_cache_cleanup_key == NULL) { + return -1; + } + } + + debug_thread_cache_enabled = env_flag_is_true(Py_GETENV("PYPCRE_DEBUG")); + if (!debug_thread_cache_enabled) { + atomic_store_explicit(&debug_thread_cache_count, 0, memory_order_relaxed); + } + cache_strategy_set(CACHE_STRATEGY_THREAD_LOCAL); cache_strategy_set_locked(0); atomic_store_explicit(&context_cache_enabled, 1, memory_order_release); @@ -432,6 +545,7 @@ cache_teardown(void) global_cache_teardown(); cache_strategy_set_locked(0); cache_strategy_set(CACHE_STRATEGY_THREAD_LOCAL); + Py_CLEAR(thread_cache_cleanup_key); } pcre2_match_data * @@ -520,6 +634,16 @@ cache_set_context_cache_enabled(int enabled) atomic_store_explicit(&context_cache_enabled, enabled ? 1 : 0, memory_order_release); } +PyObject * +module_debug_thread_cache_count(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + if (!debug_thread_cache_enabled) { + return PyLong_FromLong(-1); + } + int value = atomic_load_explicit(&debug_thread_cache_count, memory_order_relaxed); + return PyLong_FromLong(value); +} + pcre2_jit_stack * jit_stack_cache_acquire(void) { diff --git a/pcre_ext/pcre2.c b/pcre_ext/pcre2.c index 725e9f8..243f665 100644 --- a/pcre_ext/pcre2.c +++ b/pcre_ext/pcre2.c @@ -121,6 +121,112 @@ jit_guard_release(void) } } +static inline pcre2_match_data * +pattern_match_data_acquire(PatternObject *pattern, int *from_pattern_cache) +{ + *from_pattern_cache = 0; +#if defined(PCRE_EXT_HAVE_ATOMICS) + pcre2_match_data *cached = atomic_exchange_explicit( + &pattern->cached_match_data, + NULL, + memory_order_acq_rel + ); + if (cached != NULL) { + *from_pattern_cache = 1; + return cached; + } +#else + (void)pattern; +#endif + return match_data_cache_acquire(pattern); +} + +static inline void +pattern_match_data_release(PatternObject *pattern, + pcre2_match_data *match_data, + int from_pattern_cache) +{ + if (match_data == NULL) { + return; + } +#if defined(PCRE_EXT_HAVE_ATOMICS) + if (from_pattern_cache) { + pcre2_match_data *expected = NULL; + if (!atomic_compare_exchange_strong_explicit( + &pattern->cached_match_data, + &expected, + match_data, + memory_order_release, + memory_order_relaxed)) { + match_data_cache_release(match_data); + } + return; + } +#else + (void)pattern; + (void)from_pattern_cache; +#endif + match_data_cache_release(match_data); +} + +static inline pcre2_match_context * +pattern_match_context_acquire(PatternObject *pattern, + int use_offset_limit, + int *from_pattern_cache) +{ + *from_pattern_cache = 0; +#if defined(PCRE_EXT_HAVE_ATOMICS) + pcre2_match_context *cached = atomic_exchange_explicit( + &pattern->cached_match_context, + NULL, + memory_order_acq_rel + ); + if (cached != NULL) { + *from_pattern_cache = 1; + return cached; + } +#else + (void)pattern; +#endif + return match_context_cache_acquire(use_offset_limit); +} + +static inline void +pattern_match_context_release(PatternObject *pattern, + pcre2_match_context *context, + int had_offset_limit, + int from_pattern_cache) +{ + if (context == NULL) { + return; + } +#if defined(PCRE_EXT_HAVE_ATOMICS) + if (from_pattern_cache) { + pcre2_jit_stack_assign(context, NULL, NULL); +#if defined(PCRE2_USE_OFFSET_LIMIT) + if (had_offset_limit) { + (void)pcre2_set_offset_limit(context, PCRE2_UNSET); + } +#else + (void)had_offset_limit; +#endif + pcre2_match_context *expected = NULL; + if (!atomic_compare_exchange_strong_explicit( + &pattern->cached_match_context, + &expected, + context, + memory_order_release, + memory_order_relaxed)) { + match_context_cache_release(context, 0); + } + return; + } +#else + (void)pattern; +#endif + match_context_cache_release(context, had_offset_limit); +} + static inline int pattern_cache_is_global(void) { @@ -1358,19 +1464,22 @@ Pattern_create_finditer(PatternObject *pattern, if (PyUnicode_READY(subject_obj) < 0) { goto error; } - Py_ssize_t utf8_length = 0; - const char *utf8_data = PyUnicode_AsUTF8AndSize(subject_obj, &utf8_length); - if (utf8_data == NULL) { - goto error; - } + Py_INCREF(subject_obj); iter->subject_is_bytes = 0; - iter->subject_length_bytes = utf8_length; iter->logical_length = PyUnicode_GET_LENGTH(subject_obj); - iter->utf8_data = utf8_data; - Py_INCREF(subject_obj); iter->utf8_owner = subject_obj; if (PyUnicode_IS_ASCII(subject_obj)) { + iter->subject_length_bytes = iter->logical_length; + iter->utf8_data = (const char *)PyUnicode_1BYTE_DATA(subject_obj); iter->utf8_is_ascii = 1; + } else { + Py_ssize_t utf8_length = 0; + const char *utf8_data = PyUnicode_AsUTF8AndSize(subject_obj, &utf8_length); + if (utf8_data == NULL) { + goto error; + } + iter->subject_length_bytes = utf8_length; + iter->utf8_data = utf8_data; } } else { PyErr_SetString(PyExc_TypeError, "subject must be str or bytes"); @@ -1518,6 +1627,33 @@ Pattern_dealloc(PatternObject *self) PyThread_free_lock(self->jit_lock); self->jit_lock = NULL; } +#endif +#if defined(PCRE_EXT_HAVE_ATOMICS) + pcre2_match_data *cached_match = atomic_exchange_explicit( + &self->cached_match_data, + NULL, + memory_order_acq_rel + ); + if (cached_match != NULL) { + pcre2_match_data_free(cached_match); + } + pcre2_match_context *cached_context = atomic_exchange_explicit( + &self->cached_match_context, + NULL, + memory_order_acq_rel + ); + if (cached_context != NULL) { + pcre2_match_context_free(cached_context); + } +#else + if (self->cached_match_data != NULL) { + pcre2_match_data_free(self->cached_match_data); + self->cached_match_data = NULL; + } + if (self->cached_match_context != NULL) { + pcre2_match_context_free(self->cached_match_context); + self->cached_match_context = NULL; + } #endif if (self->code != NULL) { pcre2_code_free(self->code); @@ -1609,18 +1745,22 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, if (PyUnicode_READY(subject_obj) < 0) { return NULL; } - Py_ssize_t utf8_length = 0; - const char *utf8_data = PyUnicode_AsUTF8AndSize(subject_obj, &utf8_length); - if (utf8_data == NULL) { - return NULL; - } Py_INCREF(subject_obj); utf8_owner = subject_obj; - buffer = utf8_data; - subject_length_bytes = utf8_length; logical_length = PyUnicode_GET_LENGTH(subject_obj); if (PyUnicode_IS_ASCII(subject_obj)) { + buffer = (const char *)PyUnicode_1BYTE_DATA(subject_obj); + subject_length_bytes = logical_length; ascii_text = 1; + } else { + Py_ssize_t utf8_length = 0; + const char *utf8_data = PyUnicode_AsUTF8AndSize(subject_obj, &utf8_length); + if (utf8_data == NULL) { + Py_DECREF(utf8_owner); + return NULL; + } + buffer = utf8_data; + subject_length_bytes = utf8_length; } } else { PyErr_SetString(PyExc_TypeError, "expected str or bytes"); @@ -1686,6 +1826,32 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, return NULL; } + if (mode == EXEC_MODE_SEARCH && self->has_first_literal) { + if (byte_start >= byte_end) { + Py_DECREF(utf8_owner); + Py_RETURN_NONE; + } + const unsigned char *scan_start = (const unsigned char *)(buffer + byte_start); + size_t span = (size_t)(byte_end - byte_start); + if (memchr(scan_start, (unsigned char)self->first_literal, span) == NULL) { + Py_DECREF(utf8_owner); + Py_RETURN_NONE; + } + } + + if ((mode == EXEC_MODE_MATCH || mode == EXEC_MODE_FULLMATCH) && + self->has_first_literal) { + if (byte_start >= byte_end) { + Py_DECREF(utf8_owner); + Py_RETURN_NONE; + } + unsigned char leading = (unsigned char)buffer[byte_start]; + if (leading != (unsigned char)self->first_literal) { + Py_DECREF(utf8_owner); + Py_RETURN_NONE; + } + } + PCRE2_SIZE offset_limit = (PCRE2_SIZE)byte_end; uint32_t match_options = options; @@ -1698,7 +1864,8 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, match_options |= PCRE2_NO_UTF_CHECK; } - pcre2_match_data *match_data = match_data_cache_acquire(self); + int match_data_from_pattern = 0; + pcre2_match_data *match_data = pattern_match_data_acquire(self, &match_data_from_pattern); if (match_data == NULL) { Py_DECREF(utf8_owner); PyErr_NoMemory(); @@ -1708,7 +1875,7 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, int rc = 0; int attempt_jit = pattern_jit_get(self); pcre2_match_context *match_context = NULL; - int match_context_acquired = 0; + int match_context_from_pattern = 0; int match_context_used_offset_limit = 0; pcre2_jit_stack *jit_stack = NULL; PCRE2_SIZE exec_length = (PCRE2_SIZE)subject_length_bytes; @@ -1720,21 +1887,30 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, #endif if (use_offset_limit_option || attempt_jit) { - match_context = match_context_cache_acquire(use_offset_limit_option); + match_context = pattern_match_context_acquire( + self, + use_offset_limit_option, + &match_context_from_pattern + ); if (match_context == NULL) { - match_data_cache_release(match_data); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); + PyErr_NoMemory(); return NULL; } - match_context_acquired = 1; } #if defined(PCRE2_USE_OFFSET_LIMIT) if (use_offset_limit_option) { int ctx_rc = pcre2_set_offset_limit(match_context, offset_limit); if (ctx_rc < 0) { - match_context_cache_release(match_context, /*had_offset_limit=*/0); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + /*had_offset_limit=*/0, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); raise_pcre_error("set_offset_limit", ctx_rc, 0); return NULL; @@ -1753,10 +1929,15 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, if (attempt_jit) { jit_stack = jit_stack_cache_acquire(); if (jit_stack == NULL) { - if (match_context_acquired) { - match_context_cache_release(match_context, match_context_used_offset_limit); + if (match_context != NULL) { + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); } - match_data_cache_release(match_data); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); PyErr_NoMemory(); return NULL; @@ -1780,8 +1961,13 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, pattern_jit_set(self, 0); } else if (rc != PCRE2_ERROR_NOMATCH && rc < 0) { PCRE2_SIZE error_offset = pcre2_get_startchar(match_data); - match_context_cache_release(match_context, match_context_used_offset_limit); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); raise_pcre_error("jit_match", rc, error_offset); return NULL; @@ -1799,16 +1985,26 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, } if (rc == PCRE2_ERROR_NOMATCH) { - match_context_cache_release(match_context, match_context_used_offset_limit); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); Py_RETURN_NONE; } if (rc < 0) { PCRE2_SIZE error_offset = pcre2_get_startchar(match_data); - match_context_cache_release(match_context, match_context_used_offset_limit); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); raise_pcre_error("match", rc, error_offset); return NULL; @@ -1817,8 +2013,13 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, uint32_t available_ovector_pairs = pcre2_get_ovector_count(match_data); PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); if (ovector == NULL || available_ovector_pairs == 0) { - match_context_cache_release(match_context, match_context_used_offset_limit); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); Py_DECREF(utf8_owner); PyErr_SetString(PyExc_RuntimeError, "PCRE2 returned empty match data"); return NULL; @@ -1838,8 +2039,13 @@ Pattern_execute(PatternObject *self, PyObject *subject_obj, Py_ssize_t pos, (uint32_t)expected_pairs, ovector); - match_context_cache_release(match_context, match_context_used_offset_limit); - match_data_cache_release(match_data); + pattern_match_context_release( + self, + match_context, + match_context_used_offset_limit, + match_context_from_pattern + ); + pattern_match_data_release(self, match_data, match_data_from_pattern); if (match == NULL) { Py_DECREF(utf8_owner); @@ -1970,6 +2176,8 @@ Pattern_create(PyObject *pattern_obj, uint32_t options, int jit, int jit_explici pattern->groupindex = NULL; #if defined(PCRE_EXT_HAVE_ATOMICS) atomic_store_explicit(&pattern->jit_enabled, 0, memory_order_relaxed); + atomic_store_explicit(&pattern->cached_match_data, NULL, memory_order_relaxed); + atomic_store_explicit(&pattern->cached_match_context, NULL, memory_order_relaxed); #else pattern->jit_lock = PyThread_allocate_lock(); if (pattern->jit_lock == NULL) { @@ -1980,6 +2188,8 @@ Pattern_create(PyObject *pattern_obj, uint32_t options, int jit, int jit_explici return NULL; } pattern->jit_enabled = 0; + pattern->cached_match_data = NULL; + pattern->cached_match_context = NULL; #endif pattern->code = code; @@ -1989,6 +2199,9 @@ Pattern_create(PyObject *pattern_obj, uint32_t options, int jit, int jit_explici pattern->pattern_is_bytes = is_bytes; pattern->compile_options = compile_options; pattern_jit_set(pattern, 0); + pattern->has_first_literal = 0; + pattern->first_literal = 0; + pattern->first_literal_caseless = (compile_options & PCRE2_CASELESS) != 0; uint32_t capture_count = 0; if (pcre2_pattern_info(code, PCRE2_INFO_CAPTURECOUNT, &capture_count) != 0) { @@ -1996,6 +2209,17 @@ Pattern_create(PyObject *pattern_obj, uint32_t options, int jit, int jit_explici } pattern->capture_count = capture_count; + uint32_t first_code_type = 0; + if (!pattern->first_literal_caseless && + pcre2_pattern_info(code, PCRE2_INFO_FIRSTCODETYPE, &first_code_type) == 0 && + first_code_type == 1u) { + uint32_t first_code_unit = 0; + if (pcre2_pattern_info(code, PCRE2_INFO_FIRSTCODEUNIT, &first_code_unit) == 0) { + pattern->has_first_literal = 1; + pattern->first_literal = first_code_unit & 0xFFu; + } + } + pattern->groupindex = create_groupindex_dict(code); if (pattern->groupindex == NULL) { Py_DECREF(pattern); @@ -2156,18 +2380,6 @@ module_compile(PyObject *Py_UNUSED(module), PyObject *args, PyObject *kwargs) return (PyObject *)compiled; } -static PyObject * -call_pattern_method(PatternObject *pattern, PyObject *callable, PyObject *subject) -{ - PyObject *args = PyTuple_Pack(1, subject); - if (args == NULL) { - return NULL; - } - PyObject *result = PyObject_Call(callable, args, NULL); - Py_DECREF(args); - return result; -} - static PyObject * module_match(PyObject *Py_UNUSED(module), PyObject *args, PyObject *kwargs) { @@ -2192,13 +2404,7 @@ module_match(PyObject *Py_UNUSED(module), PyObject *args, PyObject *kwargs) return NULL; } - PyObject *callable = PyObject_GetAttrString((PyObject *)pattern, "match"); - if (callable == NULL) { - Py_DECREF(pattern); - return NULL; - } - PyObject *result = call_pattern_method(pattern, callable, subject); - Py_DECREF(callable); + PyObject *result = Pattern_execute(pattern, subject, 0, -1, 0, EXEC_MODE_MATCH); Py_DECREF(pattern); return result; } @@ -2227,13 +2433,7 @@ module_search(PyObject *Py_UNUSED(module), PyObject *args, PyObject *kwargs) return NULL; } - PyObject *callable = PyObject_GetAttrString((PyObject *)pattern, "search"); - if (callable == NULL) { - Py_DECREF(pattern); - return NULL; - } - PyObject *result = call_pattern_method(pattern, callable, subject); - Py_DECREF(callable); + PyObject *result = Pattern_execute(pattern, subject, 0, -1, 0, EXEC_MODE_SEARCH); Py_DECREF(pattern); return result; } @@ -2262,13 +2462,7 @@ module_fullmatch(PyObject *Py_UNUSED(module), PyObject *args, PyObject *kwargs) return NULL; } - PyObject *callable = PyObject_GetAttrString((PyObject *)pattern, "fullmatch"); - if (callable == NULL) { - Py_DECREF(pattern); - return NULL; - } - PyObject *result = call_pattern_method(pattern, callable, subject); - Py_DECREF(callable); + PyObject *result = Pattern_execute(pattern, subject, 0, -1, 0, EXEC_MODE_FULLMATCH); Py_DECREF(pattern); return result; } @@ -2413,6 +2607,7 @@ static PyMethodDef module_methods[] = { {"get_library_version", (PyCFunction)module_get_pcre2_version, METH_NOARGS, PyDoc_STR("Return the PCRE2 library version string." )}, {"get_allocator", (PyCFunction)module_memory_allocator, METH_NOARGS, PyDoc_STR("Return the name of the active heap allocator (tcmalloc/jemalloc/malloc)." )}, {"_cpu_ascii_vector_mode", (PyCFunction)module_cpu_ascii_vector_mode, METH_NOARGS, PyDoc_STR("Return the active ASCII vector width (0=scalar,1=SSE2,2=AVX2,3=AVX512)." )}, + {"_debug_thread_cache_count", (PyCFunction)module_debug_thread_cache_count, METH_NOARGS, PyDoc_STR("Return the number of live thread cache states (requires PYPCRE_DEBUG=1)." )}, {"translate_unicode_escapes", (PyCFunction)module_translate_unicode_escapes, METH_O, PyDoc_STR("Translate literal \\uXXXX/\\UXXXXXXXX escapes to PCRE2-compatible \\x{...} sequences." )}, {NULL, NULL, 0, NULL}, }; @@ -2502,7 +2697,10 @@ PyInit_pcre_ext_c(void) } #endif - force_lock_env = Py_GETENV("PCRE2_FORCE_JIT_LOCK"); + force_lock_env = Py_GETENV("PYPCRE_FORCE_JIT_LOCK"); + if (force_lock_env == NULL) { + force_lock_env = Py_GETENV("PCRE2_FORCE_JIT_LOCK"); + } if (env_flag_is_true(force_lock_env)) { pcre_force_jit_lock = 1; if (jit_serial_lock == NULL) { @@ -2516,10 +2714,16 @@ PyInit_pcre_ext_c(void) pcre_force_jit_lock = 0; } - context_cache_env = Py_GETENV("PCRE2_DISABLE_CONTEXT_CACHE"); + context_cache_env = Py_GETENV("PYPCRE_DISABLE_CONTEXT_CACHE"); + if (context_cache_env == NULL) { + context_cache_env = Py_GETENV("PCRE2_DISABLE_CONTEXT_CACHE"); + } cache_set_context_cache_enabled(env_flag_is_true(context_cache_env) ? 0 : 1); pattern_cache_env = Py_GETENV("PYPCRE_CACHE_PATTERN_GLOBAL"); + if (pattern_cache_env == NULL) { + pattern_cache_env = Py_GETENV("PCRE2_CACHE_PATTERN_GLOBAL"); + } pattern_cache_global = env_flag_is_true(pattern_cache_env); if (pattern_cache_initialize(pattern_cache_global) < 0) { goto error_locks; diff --git a/pcre_ext/pcre2_module.h b/pcre_ext/pcre2_module.h index 8b66939..5501031 100644 --- a/pcre_ext/pcre2_module.h +++ b/pcre_ext/pcre2_module.h @@ -45,10 +45,17 @@ typedef struct { int pattern_is_bytes; #if defined(PCRE_EXT_HAVE_ATOMICS) _Atomic int jit_enabled; + _Atomic(pcre2_match_data *) cached_match_data; + _Atomic(pcre2_match_context *) cached_match_context; #else PyThread_type_lock jit_lock; int jit_enabled; + pcre2_match_data *cached_match_data; + pcre2_match_context *cached_match_context; #endif + int has_first_literal; + uint32_t first_literal; + int first_literal_caseless; } PatternObject; typedef struct { @@ -97,6 +104,7 @@ PyObject *module_get_jit_stack_limits(PyObject *module, PyObject *args); PyObject *module_set_jit_stack_limits(PyObject *module, PyObject *args); PyObject *module_get_cache_strategy(PyObject *module, PyObject *args); PyObject *module_set_cache_strategy(PyObject *module, PyObject *args); +PyObject *module_debug_thread_cache_count(PyObject *module, PyObject *args); /* Utilities */ PyObject *bytes_from_text(PyObject *obj); diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 6e44e86..8829036 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -27,10 +27,10 @@ from tabulate import tabulate -RUN_BENCHMARKS = os.getenv("PCRE2_RUN_BENCHMARKS") == "1" -THREAD_COUNT = int(os.getenv("PCRE2_BENCH_THREADS", "16")) -SINGLE_ITERATIONS = int(os.getenv("PCRE2_BENCH_ITERS", "5000")) -THREAD_ITERATIONS = int(os.getenv("PCRE2_BENCH_THREAD_ITERS", "40")) +RUN_BENCHMARKS = os.getenv("PYPCRE_RUN_BENCHMARKS") == "1" +THREAD_COUNT = int(os.getenv("PYPCRE_BENCH_THREADS", "16")) +SINGLE_ITERATIONS = int(os.getenv("PYPCRE_BENCH_ITERS", "5000")) +THREAD_ITERATIONS = int(os.getenv("PYPCRE_BENCH_THREAD_ITERS", "40")) UNICODE_SAMPLE_LENGTH = 128 @@ -98,7 +98,7 @@ def _build_module_operations(module): return operations -@unittest.skipUnless(RUN_BENCHMARKS, "Set PCRE2_RUN_BENCHMARKS=1 to enable benchmark tests") +@unittest.skipUnless(RUN_BENCHMARKS, "Set PYPCRE_RUN_BENCHMARKS=1 to enable benchmark tests") class TestRegexBenchmarks(unittest.TestCase): @classmethod def setUpClass(cls): diff --git a/tests/test_cache.py b/tests/test_cache.py index 13bbb25..96fcb6d 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -378,6 +378,32 @@ def wrapper(raw: Any) -> Any: cache_mod.cache_strategy("global") +def test_thread_cache_cleanup_no_leak() -> None: + script = textwrap.dedent( + """ + import gc + import json + import threading + import pcre_ext_c as ext + + def worker() -> None: + for _ in range(200): + ext.match("foo", "foo") + + threads = [threading.Thread(target=worker) for _ in range(10)] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + gc.collect() + print(json.dumps({"count": ext._debug_thread_cache_count()})) + """ + ) + data = _run_cache_script(script, env_overrides={"PYPCRE_DEBUG": "1"}) + assert data["count"] == 0 + + def test_cache_strategy_global_shares_cache_across_threads() -> None: script = textwrap.dedent( """