diff --git a/CHANGELOG.md b/CHANGELOG.md index 0daaa22..bf01853 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,20 @@ All notable changes to this project will be documented in this file. ## [Unreleased] -## [0.10.7] - 2025-09-22 +## [0.10.7] - 2025-10-24 + +### Tests +- Added regression coverage for long classifier needles to exercise the + heap-allocation fallback. ### Changed - Added an owning `From` conversion for `ErrorResponse` and updated the Axum adapter to use it, eliminating redundant clones when building HTTP error bodies. + - Precomputed lowercase Turnkey classifier needles with a stack-backed buffer + to remove repeated transformations while keeping the common zero-allocation + path for short patterns. + ## [0.10.6] - 2025-09-21 diff --git a/src/turnkey/classifier.rs b/src/turnkey/classifier.rs index f9a2d93..747f76e 100644 --- a/src/turnkey/classifier.rs +++ b/src/turnkey/classifier.rs @@ -1,9 +1,13 @@ use super::domain::TurnkeyErrorKind; +const STACK_NEEDLE_INLINE_CAP: usize = 64; + /// Heuristic classifier for raw SDK/provider messages (ASCII case-insensitive). /// -/// This helper **does not allocate**; it performs case-insensitive `contains` -/// checks over the input string to map common upstream texts to stable kinds. +/// This helper keeps allocations to a minimum; it performs case-insensitive +/// `contains` checks over the input string to map common upstream texts to +/// stable kinds while reusing stack buffers for the short ASCII patterns we +/// match. /// /// The classifier is intentionally minimal; providers can and will change /// messages. Prefer returning structured errors from adapters whenever @@ -55,20 +59,41 @@ pub fn classify_turnkey_error(msg: &str) -> TurnkeyErrorKind { } /// Returns true if `haystack` contains `needle` ignoring ASCII case. -/// Performs the search without allocating. +/// +/// The search avoids heap allocations for needles up to +/// `STACK_NEEDLE_INLINE_CAP` bytes by reusing a stack buffer. Longer needles +/// allocate once to store their lowercased representation. #[inline] fn contains_nocase(haystack: &str, needle: &str) -> bool { // Fast path: empty needle always matches. if needle.is_empty() { return true; } - // Walk haystack windows and compare ASCII case-insensitively. - haystack.as_bytes().windows(needle.len()).any(|w| { - w.iter() - .copied() - .map(ascii_lower) - .eq(needle.as_bytes().iter().copied().map(ascii_lower)) - }) + let haystack_bytes = haystack.as_bytes(); + let needle_bytes = needle.as_bytes(); + + let search = |needle_lower: &[u8]| { + haystack_bytes.windows(needle_lower.len()).any(|window| { + window + .iter() + .zip(needle_lower.iter()) + .all(|(hay, lower_needle)| ascii_lower(*hay) == *lower_needle) + }) + }; + + if needle_bytes.len() <= STACK_NEEDLE_INLINE_CAP { + let mut inline = [0u8; STACK_NEEDLE_INLINE_CAP]; + for (idx, byte) in needle_bytes.iter().enumerate() { + inline[idx] = ascii_lower(*byte); + } + search(&inline[..needle_bytes.len()]) + } else { + let mut lowercased = Vec::with_capacity(needle_bytes.len()); + for byte in needle_bytes { + lowercased.push(ascii_lower(*byte)); + } + search(lowercased.as_slice()) + } } /// Check whether `haystack` contains any of the `needles` (ASCII @@ -90,10 +115,17 @@ pub(super) mod internal_tests { use super::*; #[test] - fn contains_nocase_works_without_alloc() { + fn contains_nocase_matches_ascii_case_insensitively() { assert!(contains_nocase("ABCdef", "cDe")); assert!(contains_any_nocase("hello world", &["nope", "WORLD"])); assert!(!contains_nocase("rustacean", "python")); assert!(contains_nocase("", "")); } + + #[test] + fn contains_nocase_handles_long_needles() { + let haystack = "prefixed".to_owned() + &"A".repeat(128) + "suffix"; + let needle = "a".repeat(128); + assert!(contains_nocase(&haystack, &needle)); + } }