-
Notifications
You must be signed in to change notification settings - Fork 7
Harden BrainLayer search validation and backfill coverage #79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| """Search and retrieval methods for VectorStore (mixin).""" | ||
|
|
||
| import copy | ||
| import hashlib | ||
| import json | ||
| import math | ||
| import os | ||
|
|
@@ -21,15 +22,36 @@ | |
| # - Filter-scoped: all query-affecting filters belong in the cache key. | ||
| # - Copy-on-read: callers enrich and mutate result metadata after search. | ||
| _HYBRID_CACHE_TTL = 60.0 # seconds | ||
| _HYBRID_CACHE_MAX = 128 # max entries (LRU eviction) | ||
| _HYBRID_CACHE_MAX = 128 # max entries (LRU eviction) | ||
|
|
||
| # Module-level LRU cache: {cache_key: (result, timestamp)} | ||
| _hybrid_cache: "OrderedDict[tuple, tuple[dict, float]]" = OrderedDict() | ||
|
|
||
|
|
||
| def clear_hybrid_search_cache(store_key: Any = None) -> None: | ||
| """Clear cached hybrid search results, optionally scoped to a single DB.""" | ||
| if store_key is None: | ||
| _hybrid_cache.clear() | ||
| return | ||
|
|
||
| normalized_store_key = os.fspath(store_key) | ||
| stale_keys = [key for key in _hybrid_cache if key and key[0] == normalized_store_key] | ||
| for key in stale_keys: | ||
| _hybrid_cache.pop(key, None) | ||
|
|
||
|
|
||
| def _hybrid_embedding_key(query_embedding: Optional[List[float]]) -> bytes: | ||
| """Hash embeddings so cache keys stay stable across equivalent iterables.""" | ||
| if query_embedding is None: | ||
| return b"" | ||
| embedding_bytes = serialize_f32([float(value) for value in query_embedding]) | ||
| return hashlib.sha256(embedding_bytes).digest() | ||
|
|
||
|
|
||
| def _hybrid_cache_key( | ||
| store_key: str, | ||
| query_text: str, | ||
| query_embedding: Optional[List[float]], | ||
| n_results: int, | ||
| project_filter: Optional[str], | ||
| content_type_filter: Optional[str], | ||
|
|
@@ -48,6 +70,7 @@ def _hybrid_cache_key( | |
| return ( | ||
| store_key, | ||
| query_text, | ||
| _hybrid_embedding_key(query_embedding), | ||
| n_results, | ||
| project_filter, | ||
| content_type_filter, | ||
|
|
@@ -424,6 +447,7 @@ def hybrid_search( | |
| cache_key = _hybrid_cache_key( | ||
| store_key, | ||
| query_text, | ||
| query_embedding, | ||
| n_results, | ||
|
Comment on lines
+447
to
+451
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The cache key excludes Useful? React with 👍 / 👎. |
||
| project_filter, | ||
| content_type_filter, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.