From 1ac00785b089350b78781d75a27bb499f2957562 Mon Sep 17 00:00:00 2001 From: RA <70325462+RAprogramm@users.noreply.github.com> Date: Sun, 21 Sep 2025 06:41:48 +0700 Subject: [PATCH] Optimize Turnkey classifier case-insensitive search --- CHANGELOG.md | 11 ++++++++ Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 14 +++++----- src/turnkey/classifier.rs | 54 +++++++++++++++++++++++++++++++-------- 5 files changed, 63 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b9d410..2886b2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.10.7] - 2025-10-24 + +### Changed +- Precomputed lowercase Turnkey classifier needles with a stack-backed buffer + to remove repeated transformations while keeping the common zero-allocation + path for short patterns. + +### Tests +- Added regression coverage for long classifier needles to exercise the + heap-allocation fallback. + ## [0.10.6] - 2025-09-21 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 20c236e..3ad3692 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1606,7 +1606,7 @@ dependencies = [ [[package]] name = "masterror" -version = "0.10.6" +version = "0.10.7" dependencies = [ "actix-web", "axum", diff --git a/Cargo.toml b/Cargo.toml index 509485f..21c8f5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "masterror" -version = "0.10.6" +version = "0.10.7" rust-version = "1.90" edition = "2024" license = "MIT OR Apache-2.0" diff --git a/README.md b/README.md index eb6568f..b376d9a 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,9 @@ Stable categories, conservative HTTP mapping, no `unsafe`. ~~~toml [dependencies] -masterror = { version = "0.10.6", default-features = false } +masterror = { version = "0.10.7", default-features = false } # or with features: -# masterror = { version = "0.10.6", features = [ +# masterror = { version = "0.10.7", features = [ # "axum", "actix", "openapi", "serde_json", # "sqlx", "sqlx-migrate", "reqwest", "redis", # "validator", "config", "tokio", "multipart", @@ -66,10 +66,10 @@ masterror = { version = "0.10.6", default-features = false } ~~~toml [dependencies] # lean core -masterror = { version = "0.10.6", default-features = false } +masterror = { version = "0.10.7", default-features = false } # with Axum/Actix + JSON + integrations -# masterror = { version = "0.10.6", features = [ +# masterror = { version = "0.10.7", features = [ # "axum", "actix", "openapi", "serde_json", # "sqlx", "sqlx-migrate", "reqwest", "redis", # "validator", "config", "tokio", "multipart", @@ -623,13 +623,13 @@ assert_eq!(resp.status, 401); Minimal core: ~~~toml -masterror = { version = "0.10.6", default-features = false } +masterror = { version = "0.10.7", default-features = false } ~~~ API (Axum + JSON + deps): ~~~toml -masterror = { version = "0.10.6", features = [ +masterror = { version = "0.10.7", features = [ "axum", "serde_json", "openapi", "sqlx", "reqwest", "redis", "validator", "config", "tokio" ] } @@ -638,7 +638,7 @@ masterror = { version = "0.10.6", features = [ API (Actix + JSON + deps): ~~~toml -masterror = { version = "0.10.6", features = [ +masterror = { version = "0.10.7", features = [ "actix", "serde_json", "openapi", "sqlx", "reqwest", "redis", "validator", "config", "tokio" ] } diff --git a/src/turnkey/classifier.rs b/src/turnkey/classifier.rs index f9a2d93..747f76e 100644 --- a/src/turnkey/classifier.rs +++ b/src/turnkey/classifier.rs @@ -1,9 +1,13 @@ use super::domain::TurnkeyErrorKind; +const STACK_NEEDLE_INLINE_CAP: usize = 64; + /// Heuristic classifier for raw SDK/provider messages (ASCII case-insensitive). /// -/// This helper **does not allocate**; it performs case-insensitive `contains` -/// checks over the input string to map common upstream texts to stable kinds. +/// This helper keeps allocations to a minimum; it performs case-insensitive +/// `contains` checks over the input string to map common upstream texts to +/// stable kinds while reusing stack buffers for the short ASCII patterns we +/// match. /// /// The classifier is intentionally minimal; providers can and will change /// messages. Prefer returning structured errors from adapters whenever @@ -55,20 +59,41 @@ pub fn classify_turnkey_error(msg: &str) -> TurnkeyErrorKind { } /// Returns true if `haystack` contains `needle` ignoring ASCII case. -/// Performs the search without allocating. +/// +/// The search avoids heap allocations for needles up to +/// `STACK_NEEDLE_INLINE_CAP` bytes by reusing a stack buffer. Longer needles +/// allocate once to store their lowercased representation. #[inline] fn contains_nocase(haystack: &str, needle: &str) -> bool { // Fast path: empty needle always matches. if needle.is_empty() { return true; } - // Walk haystack windows and compare ASCII case-insensitively. - haystack.as_bytes().windows(needle.len()).any(|w| { - w.iter() - .copied() - .map(ascii_lower) - .eq(needle.as_bytes().iter().copied().map(ascii_lower)) - }) + let haystack_bytes = haystack.as_bytes(); + let needle_bytes = needle.as_bytes(); + + let search = |needle_lower: &[u8]| { + haystack_bytes.windows(needle_lower.len()).any(|window| { + window + .iter() + .zip(needle_lower.iter()) + .all(|(hay, lower_needle)| ascii_lower(*hay) == *lower_needle) + }) + }; + + if needle_bytes.len() <= STACK_NEEDLE_INLINE_CAP { + let mut inline = [0u8; STACK_NEEDLE_INLINE_CAP]; + for (idx, byte) in needle_bytes.iter().enumerate() { + inline[idx] = ascii_lower(*byte); + } + search(&inline[..needle_bytes.len()]) + } else { + let mut lowercased = Vec::with_capacity(needle_bytes.len()); + for byte in needle_bytes { + lowercased.push(ascii_lower(*byte)); + } + search(lowercased.as_slice()) + } } /// Check whether `haystack` contains any of the `needles` (ASCII @@ -90,10 +115,17 @@ pub(super) mod internal_tests { use super::*; #[test] - fn contains_nocase_works_without_alloc() { + fn contains_nocase_matches_ascii_case_insensitively() { assert!(contains_nocase("ABCdef", "cDe")); assert!(contains_any_nocase("hello world", &["nope", "WORLD"])); assert!(!contains_nocase("rustacean", "python")); assert!(contains_nocase("", "")); } + + #[test] + fn contains_nocase_handles_long_needles() { + let haystack = "prefixed".to_owned() + &"A".repeat(128) + "suffix"; + let needle = "a".repeat(128); + assert!(contains_nocase(&haystack, &needle)); + } }