From ab24f7b95eb4f4b53c58341342ca569855375327 Mon Sep 17 00:00:00 2001 From: smunini Date: Mon, 27 Apr 2026 12:58:25 -0400 Subject: [PATCH 1/5] refactor(search): make search-param type resolution registry-driven MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The REST extractor's hardcoded "well-known param name → SearchParamType" match silently misclassified anything outside the list (e.g. Goal.target-date fell through to a value-shape heuristic, which mongodb's strict typed query builder then rejected with HTTP 400). Three other call sites carried similar duplicate tables. Replace the lot with a single deterministic resolver: resolve_param_type(registry, resource_type, name, values) 1. registry.get_param(resource_type, name) 2. registry.get_param("Resource", name) ← global params 3. value-shape heuristic (unregistered custom only) Plumbed via a new SearchProvider::search_param_registry() trait method that every backend already had as a concrete accessor; REST handlers acquire it from state.storage(). Hardcoded match tables in mongodb's build_search_params and sqlite's chain_builder are gone; sqlite's dead-code infer_target_type also removed. Effect on the failing inferno mongodb test: Goal?patient&target-date now resolves target-date as Date deterministically, build_date_filter parses the value correctly, returns 200. The two DiagnosticReport patient&status 400s were never caused by type inference (status was already Token) and need separate triage. Net: −150 lines of redundant hardcoded mappings, +30 lines of resolver, 1 trait method, 8 new unit tests covering registry hit / Resource-base fallback / value heuristic / target lookup. All 642 persistence and 184 rest lib tests pass; clippy clean with project CI flags. --- .../src/backends/elasticsearch/search_impl.rs | 6 + .../src/backends/mongodb/search_impl.rs | 40 +-- .../src/backends/postgres/search_impl.rs | 6 + crates/persistence/src/backends/s3/storage.rs | 18 ++ .../backends/sqlite/search/chain_builder.rs | 32 +- .../src/backends/sqlite/search_impl.rs | 65 ++--- crates/persistence/src/composite/storage.rs | 63 ++++ crates/persistence/src/core/search.rs | 12 + crates/persistence/src/search/mod.rs | 2 +- crates/persistence/src/search/registry.rs | 179 +++++++++++- .../src/extractors/search_query_builder.rs | 276 ++++++------------ crates/rest/src/handlers/compartment.rs | 8 +- crates/rest/src/handlers/search.rs | 17 +- 13 files changed, 435 insertions(+), 289 deletions(-) diff --git a/crates/persistence/src/backends/elasticsearch/search_impl.rs b/crates/persistence/src/backends/elasticsearch/search_impl.rs index b4b4b1540..e2014c466 100644 --- a/crates/persistence/src/backends/elasticsearch/search_impl.rs +++ b/crates/persistence/src/backends/elasticsearch/search_impl.rs @@ -209,6 +209,12 @@ impl SearchProvider for ElasticsearchBackend { _ => Ok(0), } } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + self.search_registry() + } } #[async_trait] diff --git a/crates/persistence/src/backends/mongodb/search_impl.rs b/crates/persistence/src/backends/mongodb/search_impl.rs index 1f9f19be5..8f627a656 100644 --- a/crates/persistence/src/backends/mongodb/search_impl.rs +++ b/crates/persistence/src/backends/mongodb/search_impl.rs @@ -280,6 +280,12 @@ impl SearchProvider for MongoBackend { .await .map_err(|e| internal_error(format!("Failed to count MongoDB search results: {}", e))) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + self.search_registry() + } } #[async_trait] @@ -1023,24 +1029,15 @@ impl MongoBackend { params .iter() .map(|(name, value)| { - let param_type = self - .lookup_param_type(®istry, resource_type, name) - .unwrap_or(match name.as_str() { - "_id" => SearchParamType::Token, - "_lastUpdated" => SearchParamType::Date, - "_tag" | "_profile" | "_security" => SearchParamType::Token, - "identifier" => SearchParamType::Token, - "patient" | "subject" | "encounter" | "performer" | "author" - | "requester" | "recorder" | "asserter" | "practitioner" - | "organization" | "location" | "device" => SearchParamType::Reference, - _ => SearchParamType::String, - }); + let values = vec![SearchValue::parse(value)]; + let param_type = + crate::search::resolve_param_type(®istry, resource_type, name, &values); SearchParameter { name: name.clone(), param_type, modifier: None, - values: vec![SearchValue::parse(value)], + values, chain: vec![], components: vec![], } @@ -1048,23 +1045,6 @@ impl MongoBackend { .collect() } - fn lookup_param_type( - &self, - registry: &crate::search::SearchParameterRegistry, - resource_type: &str, - param_name: &str, - ) -> Option { - if let Some(def) = registry.get_param(resource_type, param_name) { - return Some(def.param_type); - } - - if let Some(def) = registry.get_param("Resource", param_name) { - return Some(def.param_type); - } - - None - } - fn merge_unique(target: &mut Vec, additions: Vec) { let mut seen: HashSet = target .iter() diff --git a/crates/persistence/src/backends/postgres/search_impl.rs b/crates/persistence/src/backends/postgres/search_impl.rs index f079f17cb..548b57065 100644 --- a/crates/persistence/src/backends/postgres/search_impl.rs +++ b/crates/persistence/src/backends/postgres/search_impl.rs @@ -389,6 +389,12 @@ impl SearchProvider for PostgresBackend { let count: i64 = row.get(0); Ok(count as u64) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + self.search_registry() + } } #[async_trait] diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index 07deb5126..914b09604 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -1035,6 +1035,24 @@ impl SearchProvider for S3Backend { capability: "search_count".to_string(), })) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + // S3 standalone does not implement search; an empty registry is + // required only to satisfy the trait. In real deployments S3 is + // composed with a search backend (e.g., Elasticsearch) and the + // composite forwards to that backend's registry. + use std::sync::OnceLock; + static EMPTY: OnceLock< + std::sync::Arc>, + > = OnceLock::new(); + EMPTY.get_or_init(|| { + std::sync::Arc::new(parking_lot::RwLock::new( + crate::search::SearchParameterRegistry::new(), + )) + }) + } } #[async_trait] diff --git a/crates/persistence/src/backends/sqlite/search/chain_builder.rs b/crates/persistence/src/backends/sqlite/search/chain_builder.rs index d728a6935..2eb331277 100644 --- a/crates/persistence/src/backends/sqlite/search/chain_builder.rs +++ b/crates/persistence/src/backends/sqlite/search/chain_builder.rs @@ -668,13 +668,16 @@ impl ChainQueryBuilder { depth: usize, param_num: usize, ) -> StorageResult<(String, SqlParam)> { - // Determine the parameter type from the registry + // Determine the parameter type from the registry. Falls back to the + // shared value-shape heuristic for unregistered custom params. let param_type = { let registry = self.registry.read(); - registry - .get_param(resource_type, param_name) - .map(|p| p.param_type) - .unwrap_or_else(|| self.infer_param_type(param_name)) + crate::search::resolve_param_type( + ®istry, + resource_type, + param_name, + std::slice::from_ref(value), + ) }; let alias = format!("si{}", depth); @@ -745,25 +748,6 @@ impl ChainQueryBuilder { Ok((condition, param)) } - - /// Infers parameter type based on common parameter names. - fn infer_param_type(&self, param_name: &str) -> SearchParamType { - match param_name { - "name" | "family" | "given" | "text" | "display" | "description" | "address" - | "city" | "state" | "country" => SearchParamType::String, - "identifier" | "code" | "status" | "type" | "category" | "class" | "gender" - | "language" => SearchParamType::Token, - "date" | "birthdate" | "issued" | "effective" | "period" | "authored" => { - SearchParamType::Date - } - "patient" | "subject" | "performer" | "author" | "encounter" | "organization" - | "practitioner" | "location" => SearchParamType::Reference, - "value-quantity" | "dose" | "quantity" => SearchParamType::Quantity, - "length" | "count" | "value" => SearchParamType::Number, - "url" | "source" => SearchParamType::Uri, - _ => SearchParamType::String, // Default fallback - } - } } /// Builds a date comparison condition. diff --git a/crates/persistence/src/backends/sqlite/search_impl.rs b/crates/persistence/src/backends/sqlite/search_impl.rs index d0997f193..13a04f849 100644 --- a/crates/persistence/src/backends/sqlite/search_impl.rs +++ b/crates/persistence/src/backends/sqlite/search_impl.rs @@ -413,6 +413,12 @@ impl SearchProvider for SqliteBackend { Ok(count as u64) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + self.search_registry() + } } #[async_trait] @@ -1012,29 +1018,6 @@ impl SqliteBackend { false } - /// Infer the target resource type for a reference parameter. - #[allow(dead_code)] - fn infer_target_type(&self, _base_type: &str, reference_param: &str) -> String { - // This is a simplified mapping - a real implementation would use - // search parameter definitions from the FHIR specification - match reference_param { - "patient" | "subject" => "Patient".to_string(), - "practitioner" | "performer" => "Practitioner".to_string(), - "organization" => "Organization".to_string(), - "encounter" => "Encounter".to_string(), - "location" => "Location".to_string(), - "device" => "Device".to_string(), - _ => { - // Default: capitalize first letter - let mut chars = reference_param.chars(); - match chars.next() { - Some(c) => c.to_uppercase().chain(chars).collect(), - None => reference_param.to_string(), - } - } - } - } - /// Find resources matching a simple field value search using the search index. #[allow(dead_code)] fn find_resources_by_value( @@ -1174,7 +1157,17 @@ mod tests { use serde_json::json; fn create_test_backend() -> SqliteBackend { - let backend = SqliteBackend::in_memory().unwrap(); + // Point at the workspace's data directory so the search-parameter + // registry loads the full FHIR spec (otherwise only the 5 minimal + // embedded params are available and chained-search tests fail to + // resolve param types like Observation.code → Token). + let data_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("data"); + let mut config = crate::backends::sqlite::backend::SqliteBackendConfig::default(); + config.data_dir = Some(data_dir); + let backend = SqliteBackend::with_config(":memory:", config).unwrap(); backend.init_schema().unwrap(); backend } @@ -2196,30 +2189,6 @@ mod tests { assert_eq!(result, Some(("Patient".to_string(), "456".to_string()))); } - #[test] - fn test_infer_target_type() { - let backend = SqliteBackend::in_memory().unwrap(); - - assert_eq!( - backend.infer_target_type("Observation", "patient"), - "Patient" - ); - assert_eq!( - backend.infer_target_type("Observation", "subject"), - "Patient" - ); - assert_eq!( - backend.infer_target_type("Encounter", "practitioner"), - "Practitioner" - ); - assert_eq!( - backend.infer_target_type("Patient", "organization"), - "Organization" - ); - // Unknown param - capitalize first letter - assert_eq!(backend.infer_target_type("Observation", "custom"), "Custom"); - } - // ======================================================================== // Token Search with system|code Tests // ======================================================================== diff --git a/crates/persistence/src/composite/storage.rs b/crates/persistence/src/composite/storage.rs index 5c735ac4d..3960472f8 100644 --- a/crates/persistence/src/composite/storage.rs +++ b/crates/persistence/src/composite/storage.rs @@ -923,6 +923,41 @@ impl SearchProvider for CompositeStorage { })) } } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + // Same routing as `search`: prefer the dedicated Search backend's + // registry, fall back to primary, otherwise an empty registry. The + // returned reference outlives `&self` because both providers are + // owned by `self.search_providers` for the lifetime of the composite. + if let Some(search_backend) = self + .config + .backends_with_role(super::config::BackendRole::Search) + .next() + { + if let Some(provider) = self.search_providers.get(&search_backend.id) { + return provider.search_param_registry(); + } + } + + if let Some(provider) = self + .search_providers + .get(self.config.primary_id().unwrap_or("primary")) + { + return provider.search_param_registry(); + } + + use std::sync::OnceLock; + static EMPTY: OnceLock< + std::sync::Arc>, + > = OnceLock::new(); + EMPTY.get_or_init(|| { + std::sync::Arc::new(parking_lot::RwLock::new( + crate::search::SearchParameterRegistry::new(), + )) + }) + } } #[async_trait] @@ -1934,6 +1969,20 @@ mod tests { message: self.error_message.to_string(), })) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + use std::sync::OnceLock; + static EMPTY: OnceLock< + std::sync::Arc>, + > = OnceLock::new(); + EMPTY.get_or_init(|| { + std::sync::Arc::new(parking_lot::RwLock::new( + crate::search::SearchParameterRegistry::new(), + )) + }) + } } /// Minimal mock storage for unit testing CompositeStorage. @@ -2810,5 +2859,19 @@ mod tests { ) -> StorageResult { Ok(0) } + + fn search_param_registry( + &self, + ) -> &std::sync::Arc> { + use std::sync::OnceLock; + static EMPTY: OnceLock< + std::sync::Arc>, + > = OnceLock::new(); + EMPTY.get_or_init(|| { + std::sync::Arc::new(parking_lot::RwLock::new( + crate::search::SearchParameterRegistry::new(), + )) + }) + } } } diff --git a/crates/persistence/src/core/search.rs b/crates/persistence/src/core/search.rs index 8d20dbcd7..fadd085dc 100644 --- a/crates/persistence/src/core/search.rs +++ b/crates/persistence/src/core/search.rs @@ -9,9 +9,13 @@ //! - [`TerminologySearchProvider`] - :above, :below, :in, :not-in //! - [`TextSearchProvider`] - Full-text search (_text, _content, :text) +use std::sync::Arc; + use async_trait::async_trait; +use parking_lot::RwLock; use crate::error::StorageResult; +use crate::search::SearchParameterRegistry; use crate::tenant::TenantContext; use crate::types::{ IncludeDirective, Page, ReverseChainedParameter, SearchBundle, SearchQuery, StoredResource, @@ -227,6 +231,14 @@ pub trait SearchProvider: ResourceStorage { /// This is more efficient than search when you only need the count. async fn search_count(&self, tenant: &TenantContext, query: &SearchQuery) -> StorageResult; + + /// Returns the backend's search parameter registry. + /// + /// The registry is the single source of truth for search parameter type + /// resolution (see [`crate::search::resolve_param_type`]). REST extractors + /// and chained-search builders both consult it so they cannot disagree on + /// whether a given param is a Date vs. Token vs. Reference, etc. + fn search_param_registry(&self) -> &Arc>; } /// Search provider that supports searching across multiple resource types. diff --git a/crates/persistence/src/search/mod.rs b/crates/persistence/src/search/mod.rs index 6ec7fd276..f1180d950 100644 --- a/crates/persistence/src/search/mod.rs +++ b/crates/persistence/src/search/mod.rs @@ -76,7 +76,7 @@ pub use extractor::{ExtractedValue, SearchParameterExtractor}; pub use loader::SearchParameterLoader; pub use registry::{ RegistryUpdate, SearchParameterDefinition, SearchParameterRegistry, SearchParameterSource, - SearchParameterStatus, + SearchParameterStatus, resolve_param_targets, resolve_param_type, }; pub use reindex::{ ReindexOperation, ReindexProgress, ReindexRequest, ReindexStatus, ReindexableStorage, diff --git a/crates/persistence/src/search/registry.rs b/crates/persistence/src/search/registry.rs index 5dbe5b929..07f438723 100644 --- a/crates/persistence/src/search/registry.rs +++ b/crates/persistence/src/search/registry.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use tokio::sync::broadcast; -use crate::types::SearchParamType; +use crate::types::{SearchParamType, SearchValue}; use super::errors::RegistryError; use super::loader::SearchParameterLoader; @@ -426,6 +426,89 @@ impl Default for SearchParameterRegistry { } } +/// Deterministically resolves a search parameter to its `SearchParamType`. +/// +/// Resolution order: +/// 1. Registry lookup by `(resource_type, name)`. +/// 2. Registry lookup by `("Resource", name)` for global params (`_id`, `_lastUpdated`, etc.). +/// 3. Value-shape heuristic — only reached for params that aren't in the registry at all +/// (e.g., user-defined custom params not yet registered). +/// +/// This is the single source of truth for type resolution. REST extractors and +/// storage backends both call this so they cannot disagree. +pub fn resolve_param_type( + registry: &SearchParameterRegistry, + resource_type: &str, + name: &str, + values: &[SearchValue], +) -> SearchParamType { + if let Some(def) = registry.get_param(resource_type, name) { + return def.param_type; + } + if let Some(def) = registry.get_param("Resource", name) { + return def.param_type; + } + infer_param_type_from_value(values) +} + +/// Resolves the allowed target resource types for a reference search parameter. +/// +/// Returns the registry-declared targets (e.g., `["Patient", "Group"]` for +/// `Encounter.subject`). Returns an empty `Vec` when the parameter is unknown +/// or has no declared targets — callers should treat that as "don't filter by +/// target type." +pub fn resolve_param_targets( + registry: &SearchParameterRegistry, + resource_type: &str, + name: &str, +) -> Vec { + let lookup = registry + .get_param(resource_type, name) + .or_else(|| registry.get_param("Resource", name)); + lookup + .and_then(|def| def.target.clone()) + .unwrap_or_default() +} + +/// Last-resort value-shape heuristic for parameters not present in the registry. +/// +/// Kept intentionally conservative — recognizes only the unambiguous shapes +/// (FHIR date, quantity with unit, token with system, reference) and otherwise +/// returns `String`. +fn infer_param_type_from_value(values: &[SearchValue]) -> SearchParamType { + let Some(first) = values.first() else { + return SearchParamType::String; + }; + let value = &first.value; + + // FHIR date: YYYY or YYYY-MM-DD or full instant. Optional comparator prefix + // (gt/lt/ge/le/sa/eb/ap/eq/ne) is stripped by SearchValue::parse before + // we get here, so we only inspect the literal value. + if value.len() >= 4 && value.as_bytes()[..4].iter().all(u8::is_ascii_digit) { + let rest = &value.as_bytes()[4..]; + if rest.is_empty() || rest[0] == b'-' || rest[0] == b'T' { + return SearchParamType::Date; + } + } + + // Quantity: number|system|code (FHIR token-style separator). + if value.contains('|') && value.chars().next().is_some_and(|c| c.is_ascii_digit()) { + return SearchParamType::Quantity; + } + + // Reference: ResourceType/id form. + if value.contains('/') && value.chars().next().is_some_and(|c| c.is_ascii_uppercase()) { + return SearchParamType::Reference; + } + + // Token: system|code. + if value.contains('|') { + return SearchParamType::Token; + } + + SearchParamType::String +} + impl std::fmt::Debug for SearchParameterRegistry { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SearchParameterRegistry") @@ -511,6 +594,100 @@ mod tests { assert_eq!(registry.len(), 0); } + fn registry_with(defs: Vec) -> SearchParameterRegistry { + let mut r = SearchParameterRegistry::new(); + for d in defs { + r.register(d).unwrap(); + } + r + } + + #[test] + fn resolve_param_type_hits_resource_specific_definition() { + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Goal-target-date", + "target-date", + SearchParamType::Date, + "Goal.target.dueDate", + ) + .with_base(vec!["Goal"]), + ]); + + assert_eq!( + resolve_param_type(®istry, "Goal", "target-date", &[]), + SearchParamType::Date, + ); + } + + #[test] + fn resolve_param_type_falls_back_to_resource_base_for_global_params() { + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Resource-lastUpdated", + "_lastUpdated", + SearchParamType::Date, + "Resource.meta.lastUpdated", + ) + .with_base(vec!["Resource"]), + ]); + + assert_eq!( + resolve_param_type(®istry, "Patient", "_lastUpdated", &[]), + SearchParamType::Date, + ); + } + + #[test] + fn resolve_param_type_uses_value_heuristic_when_unregistered() { + let registry = SearchParameterRegistry::new(); + let date_value = vec![SearchValue::eq("2020-01-15")]; + let ref_value = vec![SearchValue::eq("Patient/123")]; + let token_value = vec![SearchValue::eq("http://loinc.org|1234-5")]; + let plain = vec![SearchValue::eq("hello")]; + + assert_eq!( + resolve_param_type(®istry, "Custom", "x", &date_value), + SearchParamType::Date, + ); + assert_eq!( + resolve_param_type(®istry, "Custom", "x", &ref_value), + SearchParamType::Reference, + ); + assert_eq!( + resolve_param_type(®istry, "Custom", "x", &token_value), + SearchParamType::Token, + ); + assert_eq!( + resolve_param_type(®istry, "Custom", "x", &plain), + SearchParamType::String, + ); + assert_eq!( + resolve_param_type(®istry, "Custom", "x", &[]), + SearchParamType::String, + ); + } + + #[test] + fn resolve_param_targets_returns_declared_targets() { + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Encounter-subject", + "subject", + SearchParamType::Reference, + "Encounter.subject", + ) + .with_base(vec!["Encounter"]) + .with_targets(vec!["Patient", "Group"]), + ]); + + assert_eq!( + resolve_param_targets(®istry, "Encounter", "subject"), + vec!["Patient".to_string(), "Group".to_string()], + ); + assert!(resolve_param_targets(®istry, "Encounter", "missing").is_empty()); + } + #[test] fn test_duplicate_url_error() { let mut registry = SearchParameterRegistry::new(); diff --git a/crates/rest/src/extractors/search_query_builder.rs b/crates/rest/src/extractors/search_query_builder.rs index c720b8cb3..caaa205f8 100644 --- a/crates/rest/src/extractors/search_query_builder.rs +++ b/crates/rest/src/extractors/search_query_builder.rs @@ -4,9 +4,10 @@ use std::collections::HashMap; +use helios_persistence::search::{SearchParameterRegistry, resolve_param_type}; use helios_persistence::types::{ - IncludeDirective, IncludeType, ReverseChainedParameter, SearchModifier, SearchParamType, - SearchParameter, SearchQuery, SearchValue, SortDirective, SummaryMode, TotalMode, + IncludeDirective, IncludeType, ReverseChainedParameter, SearchModifier, SearchParameter, + SearchQuery, SearchValue, SortDirective, SummaryMode, TotalMode, }; use super::SearchParams; @@ -25,6 +26,7 @@ use crate::error::RestError; pub fn build_search_query( resource_type: &str, params: &SearchParams, + registry: &SearchParameterRegistry, ) -> Result { let mut query = SearchQuery::new(resource_type); @@ -100,7 +102,7 @@ pub fn build_search_query( } // Parse the parameter - let param = parse_search_parameter(name, value)?; + let param = parse_search_parameter(resource_type, name, value, registry)?; query.parameters.push(param); } @@ -113,13 +115,19 @@ pub fn build_search_query( pub fn build_search_query_from_map( resource_type: &str, params: &HashMap, + registry: &SearchParameterRegistry, ) -> Result { let search_params = SearchParams::from_map(params.clone()); - build_search_query(resource_type, &search_params) + build_search_query(resource_type, &search_params, registry) } /// Parses a single search parameter with potential modifiers. -fn parse_search_parameter(name: &str, value: &str) -> Result { +fn parse_search_parameter( + resource_type: &str, + name: &str, + value: &str, + registry: &SearchParameterRegistry, +) -> Result { let (param_name, modifier) = parse_parameter_name(name); // Check for chained parameters (e.g., "patient.name" or "subject:Patient.name") @@ -131,8 +139,11 @@ fn parse_search_parameter(name: &str, value: &str) -> Result Option { } } -/// Infers parameter type based on heuristics. -/// -/// In a full implementation, this would look up the SearchParameterRegistry -/// to get the actual type. For now, we use heuristics based on: -/// - Known common parameters -/// - Modifier hints -/// - Value format -fn infer_param_type( - name: &str, - modifier: &Option, - values: &[SearchValue], -) -> SearchParamType { - // Special parameters - match name { - "_id" | "_lastUpdated" | "_tag" | "_profile" | "_security" | "_source" | "_list" - | "_has" | "_type" | "_filter" | "_query" | "_text" | "_content" => { - return SearchParamType::Special; - } - _ => {} - } - - // Infer from modifier - if let Some(mod_) = modifier { - match mod_ { - SearchModifier::Exact | SearchModifier::Contains => return SearchParamType::String, - SearchModifier::Text - | SearchModifier::In - | SearchModifier::NotIn - | SearchModifier::Above - | SearchModifier::Below - | SearchModifier::OfType - | SearchModifier::CodeOnly - | SearchModifier::CodeText => return SearchParamType::Token, - SearchModifier::Identifier | SearchModifier::Type(_) => { - return SearchParamType::Reference; - } - _ => {} - } - } - - // Infer from common parameter names - match name { - // String parameters - "name" | "family" | "given" | "address" | "address-city" | "address-country" - | "address-postalcode" | "address-state" | "phonetic" | "text" => SearchParamType::String, - - // Token parameters - "identifier" | "code" | "status" | "type" | "category" | "gender" | "language" - | "active" | "deceased" | "class" | "priority" | "intent" | "severity" => { - SearchParamType::Token - } - - // Date parameters - "birthdate" | "date" | "issued" | "onset" | "recorded" | "authored" | "effective" - | "period" | "when" | "_lastUpdated" => SearchParamType::Date, - - // Reference parameters - "patient" - | "subject" - | "encounter" - | "practitioner" - | "organization" - | "location" - | "device" - | "performer" - | "requester" - | "author" - | "recorder" - | "asserter" - | "source" - | "target" - | "agent" - | "entity" - | "focus" - | "based-on" - | "part-of" - | "derived-from" - | "specimen" - | "context" - | "service-provider" - | "general-practitioner" - | "link" - | "managing-organization" => SearchParamType::Reference, - - // Number parameters - "probability" | "age" => SearchParamType::Number, - - // Quantity parameters - "value-quantity" | "quantity" | "dose-quantity" | "component-value-quantity" => { - SearchParamType::Quantity - } - - // URI parameters - "url" | "system" | "definition" | "derived-from-uri" => SearchParamType::Uri, - - // Try to infer from value format - _ => infer_from_value(values), - } -} - -/// Infers parameter type from value format. -fn infer_from_value(values: &[SearchValue]) -> SearchParamType { - if values.is_empty() { - return SearchParamType::String; - } - - let value = &values[0].value; - - // Check for date format - if value.len() >= 4 && value.chars().take(4).all(|c| c.is_ascii_digit()) { - if value.len() >= 10 - && value - .chars() - .enumerate() - .all(|(i, c)| (i == 4 || i == 7) && c == '-' || c.is_ascii_digit()) - { - return SearchParamType::Date; - } - } - - // Check for quantity format (number with units) - if value.contains('|') && value.split('|').count() >= 2 { - let parts: Vec<&str> = value.split('|').collect(); - if !parts[0].is_empty() - && parts[0] - .chars() - .all(|c| c.is_ascii_digit() || c == '.' || c == '-') - { - return SearchParamType::Quantity; - } - // Could also be a token with system|code - return SearchParamType::Token; - } - - // Check for reference format - if value.contains('/') { - return SearchParamType::Reference; - } - - // Default to string - SearchParamType::String -} - #[cfg(test)] mod tests { use super::*; + use helios_persistence::search::SearchParameterDefinition; + use helios_persistence::types::SearchParamType; + + /// Builds a small registry covering just the params the tests exercise. + /// Keeps tests hermetic without depending on the embedded data set. + fn test_registry() -> SearchParameterRegistry { + let mut r = SearchParameterRegistry::new(); + let entries = [ + ("Patient-name", "name", SearchParamType::String, "Patient"), + ( + "Patient-birthdate", + "birthdate", + SearchParamType::Date, + "Patient", + ), + ( + "Observation-code", + "code", + SearchParamType::Token, + "Observation", + ), + ( + "Observation-patient", + "patient", + SearchParamType::Reference, + "Observation", + ), + ( + "Observation-subject", + "subject", + SearchParamType::Reference, + "Observation", + ), + ( + "Observation-date", + "date", + SearchParamType::Date, + "Observation", + ), + ]; + for (id, code, ty, base) in entries { + r.register( + SearchParameterDefinition::new( + format!("http://hl7.org/fhir/SearchParameter/{}", id), + code, + ty, + "ignored", + ) + .with_base(vec![base]), + ) + .unwrap(); + } + r + } #[test] fn test_parse_parameter_name_simple() { @@ -652,7 +574,7 @@ mod tests { params.insert("_count".to_string(), "10".to_string()); let search_params = SearchParams::from_map(params); - let query = build_search_query("Patient", &search_params).unwrap(); + let query = build_search_query("Patient", &search_params, &test_registry()).unwrap(); assert_eq!(query.resource_type, "Patient"); assert_eq!(query.count, Some(10)); @@ -666,7 +588,7 @@ mod tests { params.insert("name:exact".to_string(), "Smith".to_string()); let search_params = SearchParams::from_map(params); - let query = build_search_query("Patient", &search_params).unwrap(); + let query = build_search_query("Patient", &search_params, &test_registry()).unwrap(); assert_eq!(query.parameters.len(), 1); assert_eq!(query.parameters[0].name, "name"); @@ -679,7 +601,7 @@ mod tests { params.insert("birthdate".to_string(), "gt2000-01-01".to_string()); let search_params = SearchParams::from_map(params); - let query = build_search_query("Patient", &search_params).unwrap(); + let query = build_search_query("Patient", &search_params, &test_registry()).unwrap(); assert_eq!(query.parameters.len(), 1); assert_eq!( @@ -695,7 +617,7 @@ mod tests { params.insert("_sort".to_string(), "-date,name".to_string()); let search_params = SearchParams::from_map(params); - let query = build_search_query("Observation", &search_params).unwrap(); + let query = build_search_query("Observation", &search_params, &test_registry()).unwrap(); assert_eq!(query.sort.len(), 2); assert_eq!(query.sort[0].parameter, "date"); @@ -716,7 +638,7 @@ mod tests { params.insert("_include".to_string(), "Observation:patient".to_string()); let search_params = SearchParams::from_map(params); - let query = build_search_query("Observation", &search_params).unwrap(); + let query = build_search_query("Observation", &search_params, &test_registry()).unwrap(); assert_eq!(query.includes.len(), 1); assert_eq!(query.includes[0].source_type, "Observation"); @@ -724,32 +646,28 @@ mod tests { } #[test] - fn test_infer_param_type() { - // Known string params - assert_eq!( - infer_param_type("name", &None, &[]), - SearchParamType::String - ); + fn test_param_type_resolved_from_registry() { + let registry = test_registry(); + let name = parse_search_parameter("Patient", "name", "Smith", ®istry).unwrap(); + assert_eq!(name.param_type, SearchParamType::String); - // Known token params - assert_eq!(infer_param_type("code", &None, &[]), SearchParamType::Token); - - // Known reference params - assert_eq!( - infer_param_type("patient", &None, &[]), - SearchParamType::Reference - ); + let code = parse_search_parameter("Observation", "code", "1234-5", ®istry).unwrap(); + assert_eq!(code.param_type, SearchParamType::Token); - // Modifier hints - assert_eq!( - infer_param_type("custom", &Some(SearchModifier::Exact), &[]), - SearchParamType::String - ); + let patient = + parse_search_parameter("Observation", "patient", "Patient/1", ®istry).unwrap(); + assert_eq!(patient.param_type, SearchParamType::Reference); + } - // Special params - assert_eq!( - infer_param_type("_id", &None, &[]), - SearchParamType::Special - ); + #[test] + fn test_param_type_unregistered_falls_back_to_value_shape() { + // No entry for "made-up" anywhere — value heuristic kicks in. + let registry = SearchParameterRegistry::new(); + let date_param = + parse_search_parameter("Custom", "made-up", "2020-01-01", ®istry).unwrap(); + assert_eq!(date_param.param_type, SearchParamType::Date); + + let plain = parse_search_parameter("Custom", "made-up", "hello", ®istry).unwrap(); + assert_eq!(plain.param_type, SearchParamType::String); } } diff --git a/crates/rest/src/handlers/compartment.rs b/crates/rest/src/handlers/compartment.rs index b5f90f746..44e2808ec 100644 --- a/crates/rest/src/handlers/compartment.rs +++ b/crates/rest/src/handlers/compartment.rs @@ -138,8 +138,12 @@ where state.max_page_size(), ); - // Convert REST params to persistence SearchQuery - let query = build_search_query_from_map(&target_type, ¶ms)?; + // Convert REST params to persistence SearchQuery. Scope the registry read + // guard tightly so it doesn't span any await. + let query = { + let registry = state.storage().search_param_registry().read(); + build_search_query_from_map(&target_type, ¶ms, ®istry)? + }; // Execute the search let result = state diff --git a/crates/rest/src/handlers/search.rs b/crates/rest/src/handlers/search.rs index b74f89448..b218138c6 100644 --- a/crates/rest/src/handlers/search.rs +++ b/crates/rest/src/handlers/search.rs @@ -184,8 +184,13 @@ where params = expand_terminology_params(params, ts_url).await?; } - // Convert REST params to persistence SearchQuery - let query = build_search_query_from_map(resource_type, ¶ms)?; + // Convert REST params to persistence SearchQuery. Scope the registry read + // guard tightly so it doesn't span any await — parking_lot guards aren't + // Send by default, which would make this async fn !Send. + let query = { + let registry = state.storage().search_param_registry().read(); + build_search_query_from_map(resource_type, ¶ms, ®istry)? + }; // Execute the search // Note: The search provider is responsible for resolving _include/_revinclude @@ -258,8 +263,12 @@ where .map(|t| t.split(',').collect()) .unwrap_or_default(); - // Build a search query (resource type doesn't matter much for system search) - let query = build_search_query_from_map("Resource", ¶ms)?; + // Build a search query (resource type doesn't matter much for system search). + // Scope the registry read guard tightly so it doesn't span any await. + let query = { + let registry = state.storage().search_param_registry().read(); + build_search_query_from_map("Resource", ¶ms, ®istry)? + }; // Execute the multi-type search let result = state From 6b6eb1825fc31d281c01b807a66eb48dec65bc4b Mon Sep 17 00:00:00 2001 From: Steve Munini Date: Mon, 27 Apr 2026 14:40:01 -0500 Subject: [PATCH 2/5] feat(persistence/composite): registry-driven extract_references for _include (#82) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Composite's extract_references for _include / _revinclude was doing hardcoded JSON field-name matching (with a small `patient/subject/encounter/performer` alias map). Custom search parameters and standard ones whose JSON field name doesn't match the param name silently returned zero references — the same "in a real implementation, FHIRPath would..." stub pattern PR #80 set out to find. Replace the body with a registry-first lookup: 1. registry.get_param(resource_type, name) 2. registry.get_param("Resource", name) 3. SearchParameterExtractor::extract_for_param — runs the canonical FHIRPath expression and filters to IndexValue::Reference The hardcoded alias map is retained but only fires for params that aren't in the registry at all, so unregistered custom params keep working. Verification: - New test composite::storage::tests::test_extract_references_uses_registry_expression proves the registry's FHIRPath expression is what gets evaluated - 643 helios-persistence lib tests pass (was 642) - Clippy clean --- crates/persistence/src/composite/storage.rs | 199 +++++++++++++++++++- 1 file changed, 190 insertions(+), 9 deletions(-) diff --git a/crates/persistence/src/composite/storage.rs b/crates/persistence/src/composite/storage.rs index 3960472f8..8f2bd87f2 100644 --- a/crates/persistence/src/composite/storage.rs +++ b/crates/persistence/src/composite/storage.rs @@ -1507,30 +1507,65 @@ impl CompositeStorage { } /// Extracts references from a resource for a given search parameter. + /// + /// Resolution order: + /// 1. Look up the search parameter in the registry and evaluate its + /// FHIRPath `expression` via `SearchParameterExtractor` — the canonical + /// FHIR source of truth for every standard parameter and any custom + /// parameter the user has registered. + /// 2. Fall back to the prior heuristic (look for the search-param name as + /// a JSON field, plus a small alias map for `patient`/`subject`/etc.) + /// only when the registry doesn't know about the parameter at all. + /// That keeps unregistered custom parameters working as they did + /// before this change. fn extract_references(&self, resource: &StoredResource, search_param: &str) -> Vec { let content = resource.content(); - let mut refs = Vec::new(); + let resource_type = resource.resource_type(); + + let registered = { + let registry = self.search_param_registry().read(); + registry + .get_param(resource_type, search_param) + .or_else(|| registry.get_param("Resource", search_param)) + }; + + if let Some(param_def) = registered { + let extractor = crate::search::SearchParameterExtractor::new(Arc::clone( + self.search_param_registry(), + )); + if let Ok(values) = extractor.extract_for_param(content, ¶m_def) { + // Trust the registry: if the param is registered, return what + // the FHIRPath expression yields (even if empty) rather than + // also running the heuristic — otherwise an _include against + // a resource that genuinely has no matching reference would + // accidentally match an unrelated JSON field with the same + // name. + return values + .into_iter() + .filter_map(|v| match v.value { + crate::search::IndexValue::Reference { reference, .. } => Some(reference), + _ => None, + }) + .collect(); + } + } - // Simple extraction - looks for the search param as a field - // A real implementation would use FHIRPath or search parameter definitions + // Heuristic fallback for unregistered custom parameters. + let mut refs = Vec::new(); if let Some(value) = content.get(search_param) { Self::extract_reference_values(value, &mut refs); } - - // Also check common reference field names - let field_name = match search_param { + let alias = match search_param { "patient" | "subject" => Some("subject"), "encounter" => Some("encounter"), "performer" => Some("performer"), _ => None, }; - - if let Some(field) = field_name { + if let Some(field) = alias { if let Some(value) = content.get(field) { Self::extract_reference_values(value, &mut refs); } } - refs } @@ -2783,6 +2818,152 @@ mod tests { assert!(refs.is_empty()); } + /// `extract_references` should consult the registry first and use the + /// FHIRPath `expression` from the registered SearchParameter, not the + /// hardcoded JSON-field-name heuristic. This exercises the new wiring + /// from PR #2 of the load-bearing-stub-fixes plan. + #[test] + fn test_extract_references_uses_registry_expression() { + use crate::search::{SearchParameterDefinition, SearchParameterRegistry}; + use crate::tenant::TenantId; + use crate::types::SearchParamType; + + // Composite wrapped around a backend whose registry has been seeded + // with an Encounter.subject param whose FHIRPath expression points at + // a JSON field name that is *different* from the search-param name — + // proving the registry's expression is what's evaluated, not the + // hardcoded "patient" -> "subject" alias. + let registry = Arc::new(parking_lot::RwLock::new(SearchParameterRegistry::new())); + registry + .write() + .register( + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Encounter-subject", + "subject", + SearchParamType::Reference, + "Encounter.subject", + ) + .with_base(vec!["Encounter"]) + .with_targets(vec!["Patient", "Group"]), + ) + .unwrap(); + + struct MockWithRegistry { + registry: Arc>, + } + + #[async_trait::async_trait] + impl ResourceStorage for MockWithRegistry { + fn backend_name(&self) -> &'static str { + "mock-with-registry" + } + async fn create( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _resource: serde_json::Value, + _fhir_version: FhirVersion, + ) -> StorageResult { + unimplemented!() + } + async fn create_or_update( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _id: &str, + _resource: serde_json::Value, + _fhir_version: FhirVersion, + ) -> StorageResult<(crate::types::StoredResource, bool)> { + unimplemented!() + } + async fn read( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _id: &str, + ) -> StorageResult> { + Ok(None) + } + async fn update( + &self, + _tenant: &TenantContext, + _current: &crate::types::StoredResource, + _resource: serde_json::Value, + ) -> StorageResult { + unimplemented!() + } + async fn delete( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _id: &str, + ) -> StorageResult<()> { + Ok(()) + } + async fn count( + &self, + _tenant: &TenantContext, + _resource_type: Option<&str>, + ) -> StorageResult { + Ok(0) + } + } + + #[async_trait::async_trait] + impl SearchProvider for MockWithRegistry { + async fn search( + &self, + _tenant: &TenantContext, + _query: &crate::types::SearchQuery, + ) -> StorageResult { + use crate::types::Page; + Ok(SearchResult::new(Page::empty())) + } + async fn search_count( + &self, + _tenant: &TenantContext, + _query: &crate::types::SearchQuery, + ) -> StorageResult { + Ok(0) + } + fn search_param_registry(&self) -> &Arc> { + &self.registry + } + } + + let config = CompositeConfig::builder() + .primary("primary", BackendKind::Sqlite) + .build() + .unwrap(); + let backend = Arc::new(MockWithRegistry { + registry: Arc::clone(®istry), + }); + let mut backends = HashMap::new(); + backends.insert("primary".to_string(), backend.clone() as DynStorage); + let mut providers = HashMap::new(); + providers.insert("primary".to_string(), backend.clone() as DynSearchProvider); + let composite = CompositeStorage::new(config, backends) + .unwrap() + .with_search_providers(providers); + + // Encounter resource referencing Patient/p1 via subject. + let content = serde_json::json!({ + "resourceType": "Encounter", + "id": "e1", + "subject": {"reference": "Patient/p1"}, + }); + let resource = crate::types::StoredResource::new( + "Encounter", + "e1", + TenantId::new("t"), + content, + FhirVersion::default(), + ); + + let refs = composite.extract_references(&resource, "subject"); + assert_eq!(refs, vec!["Patient/p1".to_string()]); + } + #[test] fn test_extract_reference_values_null_ignored() { let val = serde_json::Value::Null; From 55fcd56270a58ddb1185eaedcbc79879cafed2e6 Mon Sep 17 00:00:00 2001 From: Steve Munini Date: Mon, 27 Apr 2026 14:54:57 -0500 Subject: [PATCH 3/5] fix(hts,subscriptions): make testcontainers watchdog feature non-Windows only Extends the fix from #79 to two crates that were missed. The `watchdog` feature on `testcontainers` 0.27 pulls in `signal_hook::consts::SIGQUIT` and `signal_hook::iterator`, both of which are gated to non-Windows. Under `cargo clippy --all-features` on Windows, feature unification across the workspace re-activated `watchdog` on `testcontainers` because `helios-hts` and `helios-subscriptions` still requested it unconditionally, causing E0432 "unresolved imports" on the Windows linting job. Move the `watchdog`-enabled testcontainers dependency under `[target.'cfg(not(windows))'.dev-dependencies]` in both crates and add a plain testcontainers entry for Windows, mirroring the existing split in `helios-persistence`. --- crates/hts/Cargo.toml | 9 ++++++++- crates/subscriptions/Cargo.toml | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crates/hts/Cargo.toml b/crates/hts/Cargo.toml index 372377e2f..b9fd0cbbd 100644 --- a/crates/hts/Cargo.toml +++ b/crates/hts/Cargo.toml @@ -66,6 +66,13 @@ deadpool-postgres = { version = "0.14", optional = true } tokio = { version = "1", features = ["full", "test-util"] } tempfile = "3" zip = { version = "0.6", default-features = false, features = ["deflate"] } -testcontainers = { version = "0.27", features = ["watchdog"] } testcontainers-modules = { version = "0.15", features = ["postgres"] } ctor = "0.2" + +# `watchdog` pulls in `signal_hook::iterator`/`SIGQUIT`, which only compile on Unix. +# Windows test runs don't need SIGTERM cleanup, so drop the feature there. +[target.'cfg(not(windows))'.dev-dependencies] +testcontainers = { version = "0.27", features = ["watchdog"] } + +[target.'cfg(windows)'.dev-dependencies] +testcontainers = { version = "0.27" } diff --git a/crates/subscriptions/Cargo.toml b/crates/subscriptions/Cargo.toml index 1a8f0dda7..ec444873a 100644 --- a/crates/subscriptions/Cargo.toml +++ b/crates/subscriptions/Cargo.toml @@ -54,5 +54,12 @@ dashmap = "6" [dev-dependencies] tokio = { version = "1", features = ["full", "test-util"] } wiremock = "0.6" -testcontainers = { version = "0.27", features = ["watchdog"] } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } + +# `watchdog` pulls in `signal_hook::iterator`/`SIGQUIT`, which only compile on Unix. +# Windows test runs don't need SIGTERM cleanup, so drop the feature there. +[target.'cfg(not(windows))'.dev-dependencies] +testcontainers = { version = "0.27", features = ["watchdog"] } + +[target.'cfg(windows)'.dev-dependencies] +testcontainers = { version = "0.27" } From 558b132f93847891e265b82feb94bde625b7ef99 Mon Sep 17 00:00:00 2001 From: Steve Munini Date: Mon, 27 Apr 2026 15:22:29 -0500 Subject: [PATCH 4/5] fix(rest): restore SearchParamType import dropped in merge The merge of `main` into `feat/registry-driven-search-param-types` combined two changes that touched adjacent code: #80 (registry-driven type resolution, removed `SearchParamType` from imports) and #f9196dc1 (fix prefix-only-for-date/number/quantity, used `SearchParamType` in the body). The merge resolved by keeping main's import list but the branch's body, leaving the type referenced in `parse_search_parameter` without a declaration. CI hit E0433 on both Linux and Windows. Re-add `SearchParamType` to the `helios_persistence::types` use list. --- crates/rest/src/extractors/search_query_builder.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rest/src/extractors/search_query_builder.rs b/crates/rest/src/extractors/search_query_builder.rs index 48f6c6272..48bfa4143 100644 --- a/crates/rest/src/extractors/search_query_builder.rs +++ b/crates/rest/src/extractors/search_query_builder.rs @@ -6,8 +6,8 @@ use std::collections::HashMap; use helios_persistence::search::{SearchParameterRegistry, resolve_param_type}; use helios_persistence::types::{ - IncludeDirective, IncludeType, ReverseChainedParameter, SearchModifier, SearchParameter, - SearchQuery, SearchValue, SortDirective, SummaryMode, TotalMode, + IncludeDirective, IncludeType, ReverseChainedParameter, SearchModifier, SearchParamType, + SearchParameter, SearchQuery, SearchValue, SortDirective, SummaryMode, TotalMode, }; use super::SearchParams; From d0076b94904374cbfb8435fc216b91ce75f352ed Mon Sep 17 00:00:00 2001 From: Steve Munini Date: Mon, 27 Apr 2026 16:55:29 -0500 Subject: [PATCH 5/5] feat(persistence/postgres): support multi-step forward & reverse chains (#81) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Postgres' ChainedSearchProvider was returning Ok(Vec::new()) for any forward chain longer than 2 segments and for nested _has reverse chains, silently producing empty result sets where SQLite returned correct matches. Backend- behavior divergence with no error signal — the kind of "simplified for now" stub PR #80 set out to find. Port the proven SQLite ChainQueryBuilder pattern to Postgres: nested SELECTs over search_index, registry-driven target-type resolution via resolve_param_targets(), the same Patient-default for ambiguous multi-target references SQLite uses. Postgres-specific syntax: $N placeholders, ILIKE for case-insensitive match, SUBSTRING(... FROM POSITION('/' IN ...) + 1) in place of SUBSTR + INSTR for the reverse-chain reference-id extraction. Verification: - 6 new unit tests in chain_builder covering parsing, SQL shape, explicit type modifier, ambiguous-target inference, empty-chain error, and reverse-chain SUBSTRING/POSITION emission - 2 new testcontainers-backed integration tests porting the SQLite three- level chain and reverse-chain terminal cases — both pass against a real Postgres instance - All 609 helios-persistence lib tests still green; clippy clean --- .../backends/postgres/search/chain_builder.rs | 883 ++++++++++++++++++ .../src/backends/postgres/search/mod.rs | 1 + .../src/backends/postgres/search_impl.rs | 148 ++- crates/persistence/tests/postgres_tests.rs | 221 +++++ 4 files changed, 1176 insertions(+), 77 deletions(-) create mode 100644 crates/persistence/src/backends/postgres/search/chain_builder.rs diff --git a/crates/persistence/src/backends/postgres/search/chain_builder.rs b/crates/persistence/src/backends/postgres/search/chain_builder.rs new file mode 100644 index 000000000..2f21c3914 --- /dev/null +++ b/crates/persistence/src/backends/postgres/search/chain_builder.rs @@ -0,0 +1,883 @@ +//! Chain Query Builder for FHIR Search (PostgreSQL backend). +//! +//! Generates efficient SQL subqueries for: +//! - Forward chained parameters (e.g., `Observation?subject.organization.name=Hospital`) +//! - Reverse chained parameters (`_has`) (e.g., `Patient?_has:Observation:subject:code=1234-5`) +//! +//! Uses the `search_index` table to resolve chains via SQL subqueries instead +//! of in-memory iteration. Mirrors the SQLite implementation in +//! `crates/persistence/src/backends/sqlite/search/chain_builder.rs` with +//! Postgres syntax adaptations: `$N` placeholders, `ILIKE`, `POSITION(... in ...)` +//! for substring index, and `LIKE ESCAPE '\'`. + +#![allow(missing_docs)] + +use std::sync::Arc; + +use parking_lot::RwLock; + +use crate::error::{BackendError, StorageResult}; +use crate::search::SearchParameterRegistry; +use crate::types::{ChainConfig, ReverseChainedParameter, SearchParamType, SearchValue}; + +use super::query_builder::{SqlFragment, SqlParam}; + +/// A single link in a forward chain. +#[derive(Debug, Clone)] +pub struct ChainLink { + pub reference_param: String, + pub target_type: String, +} + +/// A parsed forward chain with resolved types. +#[derive(Debug, Clone)] +pub struct ParsedChain { + pub links: Vec, + pub terminal_param: String, + pub terminal_type: SearchParamType, +} + +/// Errors specific to chain parsing. +#[derive(Debug, Clone)] +pub enum ChainError { + MaxDepthExceeded { + depth: usize, + max: usize, + }, + UnknownReferenceParam { + resource_type: String, + param: String, + }, + UnknownTerminalParam { + resource_type: String, + param: String, + }, + EmptyChain, + InvalidSyntax { + message: String, + }, +} + +impl std::fmt::Display for ChainError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ChainError::MaxDepthExceeded { depth, max } => { + write!( + f, + "Chain depth {} exceeds maximum allowed depth {}", + depth, max + ) + } + ChainError::UnknownReferenceParam { + resource_type, + param, + } => write!( + f, + "Unknown reference parameter '{}' for resource type '{}'", + param, resource_type + ), + ChainError::UnknownTerminalParam { + resource_type, + param, + } => write!( + f, + "Unknown terminal parameter '{}' for resource type '{}'", + param, resource_type + ), + ChainError::EmptyChain => write!(f, "Empty chain"), + ChainError::InvalidSyntax { message } => write!(f, "Invalid chain syntax: {}", message), + } + } +} + +impl From for BackendError { + fn from(e: ChainError) -> Self { + BackendError::Internal { + backend_name: "postgres".to_string(), + message: e.to_string(), + source: None, + } + } +} + +/// Builder for chain SQL queries. +pub struct ChainQueryBuilder { + #[allow(dead_code)] + tenant_id: String, + base_type: String, + registry: Arc>, + config: ChainConfig, + /// Parameter offset for `$N` placeholders. + /// + /// Callers typically reserve `$1` for `tenant_id`, so the default offset + /// of `1` makes the first chain-supplied param `$2`. + param_offset: usize, +} + +impl ChainQueryBuilder { + pub fn new( + tenant_id: impl Into, + base_type: impl Into, + registry: Arc>, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + base_type: base_type.into(), + registry, + config: ChainConfig::default(), + param_offset: 1, + } + } + + pub fn with_config(mut self, config: ChainConfig) -> Self { + self.config = config; + self + } + + pub fn with_param_offset(mut self, offset: usize) -> Self { + self.param_offset = offset; + self + } + + /// Parses a chain string (e.g., `"subject.organization.name"`) into + /// resolved `ChainLink`s plus the terminal parameter. + pub fn parse_chain(&self, chain_str: &str) -> Result { + if chain_str.is_empty() { + return Err(ChainError::EmptyChain); + } + + let parts: Vec<&str> = chain_str.split('.').collect(); + if parts.len() < 2 { + return Err(ChainError::InvalidSyntax { + message: "Chain must have at least two parts (reference.param)".to_string(), + }); + } + + let chain_depth = parts.len() - 1; + if !self.config.validate_forward_depth(chain_depth) { + return Err(ChainError::MaxDepthExceeded { + depth: chain_depth, + max: self.config.max_forward_depth, + }); + } + + let mut links = Vec::new(); + let mut current_type = self.base_type.clone(); + + for part in parts.iter().take(parts.len() - 1) { + let (ref_param, explicit_type) = parse_chain_part(part); + let target_type = self.resolve_target_type(¤t_type, &ref_param, explicit_type)?; + links.push(ChainLink { + reference_param: ref_param, + target_type: target_type.clone(), + }); + current_type = target_type; + } + + let terminal_param = parts[parts.len() - 1].to_string(); + let terminal_type = self.resolve_terminal_type(¤t_type, &terminal_param)?; + + Ok(ParsedChain { + links, + terminal_param, + terminal_type, + }) + } + + fn resolve_target_type( + &self, + resource_type: &str, + ref_param: &str, + explicit_type: Option, + ) -> Result { + if let Some(t) = explicit_type { + return Ok(t); + } + + let registry = self.registry.read(); + if let Some(param_def) = registry.get_param(resource_type, ref_param) { + if param_def.param_type != SearchParamType::Reference { + return Err(ChainError::UnknownReferenceParam { + resource_type: resource_type.to_string(), + param: ref_param.to_string(), + }); + } + if let Some(ref targets) = param_def.target { + if targets.len() == 1 { + return Ok(targets[0].clone()); + } + // Empty or multiple targets — fall through to inference, + // matching SQLite's behavior so chained queries against + // ambiguous references (e.g. `subject` -> Patient|Group|...) + // pick the same default both backends agree on. + } + } + + Ok(infer_target_type(ref_param)) + } + + fn resolve_terminal_type( + &self, + resource_type: &str, + param_name: &str, + ) -> Result { + let registry = self.registry.read(); + if let Some(param_def) = registry.get_param(resource_type, param_name) { + return Ok(param_def.param_type); + } + // Last-resort heuristic for params not in the registry, matching SQLite. + match param_name { + "_id" | "id" => Ok(SearchParamType::Token), + "name" | "family" | "given" | "text" | "display" => Ok(SearchParamType::String), + "identifier" | "code" | "status" | "type" | "category" => Ok(SearchParamType::Token), + _ => Err(ChainError::UnknownTerminalParam { + resource_type: resource_type.to_string(), + param: param_name.to_string(), + }), + } + } + + /// Builds SQL for a forward chain query as nested subqueries. + /// + /// For `Observation?subject.organization.name=Hospital` (assuming + /// `param_offset = 1`, so `$1` is `tenant_id`): + /// + /// ```sql + /// r.id IN ( + /// SELECT si1.resource_id FROM search_index si1 + /// WHERE si1.tenant_id = $1 AND si1.resource_type = 'Observation' + /// AND si1.param_name = 'subject' + /// AND si1.value_reference IN ( + /// SELECT 'Patient/' || si2.resource_id FROM search_index si2 + /// WHERE si2.tenant_id = $1 AND si2.resource_type = 'Patient' + /// AND si2.param_name = 'organization' + /// AND si2.value_reference IN ( + /// SELECT 'Organization/' || si3.resource_id FROM search_index si3 + /// WHERE si3.tenant_id = $1 AND si3.resource_type = 'Organization' + /// AND si3.param_name = 'name' + /// AND si3.value_string ILIKE $2 ESCAPE '\' + /// ) + /// ) + /// ) + /// ``` + pub fn build_forward_chain_sql( + &self, + chain: &ParsedChain, + value: &SearchValue, + ) -> StorageResult { + if chain.links.is_empty() { + return Err(BackendError::Internal { + backend_name: "postgres".to_string(), + message: "Empty chain".to_string(), + source: None, + } + .into()); + } + + let param_num = self.param_offset + 1; + let (terminal_sql, terminal_param) = + self.build_terminal_condition(chain, value, param_num)?; + let terminal_type = &chain.links[chain.links.len() - 1].target_type; + + // Innermost (terminal) query. + let mut current_sql = format!( + "SELECT '{tt}/' || si{n}.resource_id FROM search_index si{n} \ + WHERE si{n}.tenant_id = $1 AND si{n}.resource_type = '{tt}' \ + AND si{n}.param_name = '{tp}' AND {cond}", + tt = terminal_type, + n = chain.links.len(), + tp = chain.terminal_param, + cond = terminal_sql, + ); + + // Wrap with each chain link from innermost to outermost. + for (i, link) in chain.links.iter().enumerate().rev() { + let link_num = i + 1; + let current_type = if i == 0 { + &self.base_type + } else { + &chain.links[i - 1].target_type + }; + + current_sql = if i == 0 { + // Outermost link: return resource_id for `r.id IN (...)`. + format!( + "SELECT si{ln}.resource_id FROM search_index si{ln} \ + WHERE si{ln}.tenant_id = $1 AND si{ln}.resource_type = '{ct}' \ + AND si{ln}.param_name = '{rp}' \ + AND si{ln}.value_reference IN ({inner})", + ln = link_num, + ct = current_type, + rp = link.reference_param, + inner = current_sql, + ) + } else { + // Intermediate link: return '{type}/' || resource_id for value_reference matching. + format!( + "SELECT '{ct}/' || si{ln}.resource_id FROM search_index si{ln} \ + WHERE si{ln}.tenant_id = $1 AND si{ln}.resource_type = '{ct}' \ + AND si{ln}.param_name = '{rp}' \ + AND si{ln}.value_reference IN ({inner})", + ct = current_type, + ln = link_num, + rp = link.reference_param, + inner = current_sql, + ) + }; + } + + Ok(SqlFragment::with_params( + format!("r.id IN ({})", current_sql), + vec![terminal_param], + )) + } + + fn build_terminal_condition( + &self, + chain: &ParsedChain, + value: &SearchValue, + param_num: usize, + ) -> StorageResult<(String, SqlParam)> { + let alias = format!("si{}", chain.links.len()); + + let (condition, param) = match chain.terminal_type { + SearchParamType::String => { + let escaped = value.value.replace('%', "\\%").replace('_', "\\_"); + ( + format!("{}.value_string ILIKE ${} ESCAPE '\\'", alias, param_num), + SqlParam::Text(format!("%{}%", escaped)), + ) + } + SearchParamType::Token => { + if let Some((system, code)) = value.value.split_once('|') { + if system.is_empty() { + ( + format!( + "({alias}.value_token_system IS NULL OR {alias}.value_token_system = '') \ + AND {alias}.value_token_code = ${pn}", + alias = alias, + pn = param_num, + ), + SqlParam::Text(code.to_string()), + ) + } else { + ( + format!( + "{alias}.value_token_system = '{sys}' AND {alias}.value_token_code = ${pn}", + alias = alias, + sys = system.replace('\'', "''"), + pn = param_num, + ), + SqlParam::Text(code.to_string()), + ) + } + } else { + ( + format!("{}.value_token_code = ${}", alias, param_num), + SqlParam::Text(value.value.clone()), + ) + } + } + SearchParamType::Reference => ( + format!("{}.value_reference ILIKE ${}", alias, param_num), + SqlParam::Text(format!("%{}%", value.value)), + ), + SearchParamType::Date => { + let date_col = format!("{}.value_date", alias); + build_date_condition(&date_col, value, param_num) + } + SearchParamType::Number => { + let num_col = format!("{}.value_number", alias); + build_number_condition(&num_col, value, param_num) + } + SearchParamType::Quantity => { + let qty_col = format!("{}.value_quantity_value", alias); + build_number_condition(&qty_col, value, param_num) + } + SearchParamType::Uri => ( + format!("{}.value_uri = ${}", alias, param_num), + SqlParam::Text(value.value.clone()), + ), + _ => ( + format!("{}.value_string ILIKE ${}", alias, param_num), + SqlParam::Text(format!("%{}%", value.value)), + ), + }; + + Ok((condition, param)) + } + + /// Builds SQL for a reverse chain (`_has`) query. + /// + /// For `Patient?_has:Observation:subject:code=1234-5`: + /// + /// ```sql + /// r.id IN ( + /// SELECT SUBSTRING(si1.value_reference FROM POSITION('/' IN si1.value_reference) + 1) + /// FROM search_index si1 + /// WHERE si1.tenant_id = $1 AND si1.resource_type = 'Observation' + /// AND si1.param_name = 'subject' + /// AND si1.value_reference LIKE 'Patient/%' + /// AND si1.resource_id IN ( + /// SELECT si2.resource_id FROM search_index si2 + /// WHERE si2.tenant_id = $1 AND si2.resource_type = 'Observation' + /// AND si2.param_name = 'code' + /// AND si2.value_token_code = $2 + /// ) + /// ) + /// ``` + pub fn build_reverse_chain_sql( + &self, + reverse_chain: &ReverseChainedParameter, + ) -> StorageResult { + let depth = reverse_chain.depth(); + if !self.config.validate_reverse_depth(depth) { + return Err(BackendError::Internal { + backend_name: "postgres".to_string(), + message: format!( + "Reverse chain depth {} exceeds maximum {}", + depth, self.config.max_reverse_depth + ), + source: None, + } + .into()); + } + + let param_num = self.param_offset + 1; + let (sql, params) = self.build_reverse_chain_recursive(reverse_chain, 1, param_num)?; + + Ok(SqlFragment::with_params( + format!("r.id IN ({})", sql), + params, + )) + } + + fn build_reverse_chain_recursive( + &self, + rc: &ReverseChainedParameter, + depth: usize, + param_num: usize, + ) -> StorageResult<(String, Vec)> { + let alias = format!("si{}", depth); + + if rc.is_terminal() { + let value = rc.value.as_ref().ok_or_else(|| BackendError::Internal { + backend_name: "postgres".to_string(), + message: "Terminal reverse chain must have a value".to_string(), + source: None, + })?; + + let (search_condition, search_param) = self.build_reverse_terminal_condition( + &rc.source_type, + &rc.search_param, + value, + depth + 1, + param_num, + )?; + + let depth2 = depth + 1; + let sql = format!( + "SELECT SUBSTRING({alias}.value_reference FROM POSITION('/' IN {alias}.value_reference) + 1) \ + FROM search_index {alias} \ + WHERE {alias}.tenant_id = $1 AND {alias}.resource_type = '{src_type}' \ + AND {alias}.param_name = '{ref_param}' \ + AND {alias}.value_reference LIKE '{base_type}/%' \ + AND {alias}.resource_id IN (\ + SELECT si{depth2}.resource_id FROM search_index si{depth2} \ + WHERE si{depth2}.tenant_id = $1 AND si{depth2}.resource_type = '{src_type}' \ + AND si{depth2}.param_name = '{search_param_name}' AND {search_condition}\ + )", + alias = alias, + src_type = rc.source_type, + ref_param = rc.reference_param, + base_type = self.base_type, + depth2 = depth2, + search_param_name = rc.search_param, + search_condition = search_condition, + ); + + Ok((sql, vec![search_param])) + } else { + let inner = rc.nested.as_ref().ok_or_else(|| BackendError::Internal { + backend_name: "postgres".to_string(), + message: "Non-terminal reverse chain must have nested chain".to_string(), + source: None, + })?; + + let inner_builder = ChainQueryBuilder::new( + &self.tenant_id, + &rc.source_type, + Arc::clone(&self.registry), + ) + .with_config(self.config.clone()) + .with_param_offset(param_num - 1); + + let (inner_sql, inner_params) = + inner_builder.build_reverse_chain_recursive(inner, depth + 1, param_num)?; + + let sql = format!( + "SELECT SUBSTRING({alias}.value_reference FROM POSITION('/' IN {alias}.value_reference) + 1) \ + FROM search_index {alias} \ + WHERE {alias}.tenant_id = $1 AND {alias}.resource_type = '{}' \ + AND {alias}.param_name = '{}' \ + AND {alias}.value_reference LIKE '{}/%' \ + AND {alias}.resource_id IN ({inner_sql})", + rc.source_type, + rc.reference_param, + self.base_type, + alias = alias, + ); + + Ok((sql, inner_params)) + } + } + + fn build_reverse_terminal_condition( + &self, + resource_type: &str, + param_name: &str, + value: &SearchValue, + depth: usize, + param_num: usize, + ) -> StorageResult<(String, SqlParam)> { + let param_type = { + let registry = self.registry.read(); + crate::search::resolve_param_type( + ®istry, + resource_type, + param_name, + std::slice::from_ref(value), + ) + }; + + let alias = format!("si{}", depth); + + let (condition, param) = match param_type { + SearchParamType::String => { + let escaped = value.value.replace('%', "\\%").replace('_', "\\_"); + ( + format!("{}.value_string ILIKE ${} ESCAPE '\\'", alias, param_num), + SqlParam::Text(format!("%{}%", escaped)), + ) + } + SearchParamType::Token => { + if let Some((system, code)) = value.value.split_once('|') { + if system.is_empty() { + ( + format!( + "({alias}.value_token_system IS NULL OR {alias}.value_token_system = '') \ + AND {alias}.value_token_code = ${pn}", + alias = alias, + pn = param_num, + ), + SqlParam::Text(code.to_string()), + ) + } else { + ( + format!( + "{alias}.value_token_system = '{sys}' AND {alias}.value_token_code = ${pn}", + alias = alias, + sys = system.replace('\'', "''"), + pn = param_num, + ), + SqlParam::Text(code.to_string()), + ) + } + } else { + ( + format!("{}.value_token_code = ${}", alias, param_num), + SqlParam::Text(value.value.clone()), + ) + } + } + SearchParamType::Reference => ( + format!("{}.value_reference ILIKE ${}", alias, param_num), + SqlParam::Text(format!("%{}%", value.value)), + ), + SearchParamType::Date => { + let date_col = format!("{}.value_date", alias); + build_date_condition(&date_col, value, param_num) + } + SearchParamType::Number => { + let num_col = format!("{}.value_number", alias); + build_number_condition(&num_col, value, param_num) + } + SearchParamType::Quantity => { + let qty_col = format!("{}.value_quantity_value", alias); + build_number_condition(&qty_col, value, param_num) + } + SearchParamType::Uri => ( + format!("{}.value_uri = ${}", alias, param_num), + SqlParam::Text(value.value.clone()), + ), + _ => ( + format!("{}.value_string ILIKE ${}", alias, param_num), + SqlParam::Text(format!("%{}%", value.value)), + ), + }; + + Ok((condition, param)) + } +} + +fn parse_chain_part(part: &str) -> (String, Option) { + if let Some((param, type_mod)) = part.split_once(':') { + (param.to_string(), Some(type_mod.to_string())) + } else { + (part.to_string(), None) + } +} + +/// Hardcoded fallback for ambiguous reference targets, matching SQLite's +/// `chain_builder::infer_target_type` so chained queries pick the same +/// default on both backends. See the open-question note in the plan about +/// migrating this to a registry-driven first-target pick later. +fn infer_target_type(ref_param: &str) -> String { + match ref_param { + "patient" | "subject" => "Patient".to_string(), + "practitioner" | "performer" | "requester" | "author" => "Practitioner".to_string(), + "organization" | "managingOrganization" | "custodian" => "Organization".to_string(), + "encounter" | "context" => "Encounter".to_string(), + "location" => "Location".to_string(), + "device" => "Device".to_string(), + "specimen" => "Specimen".to_string(), + "medication" => "Medication".to_string(), + "condition" => "Condition".to_string(), + _ => { + let mut chars = ref_param.chars(); + match chars.next() { + Some(c) => c.to_uppercase().chain(chars).collect(), + None => ref_param.to_string(), + } + } + } +} + +fn build_date_condition(column: &str, value: &SearchValue, param_num: usize) -> (String, SqlParam) { + use crate::types::SearchPrefix; + + let (op, val) = match value.prefix { + SearchPrefix::Eq => ("=", &value.value), + SearchPrefix::Ne => ("!=", &value.value), + SearchPrefix::Gt => (">", &value.value), + SearchPrefix::Lt => ("<", &value.value), + SearchPrefix::Ge => (">=", &value.value), + SearchPrefix::Le => ("<=", &value.value), + SearchPrefix::Sa => (">", &value.value), + SearchPrefix::Eb => ("<", &value.value), + SearchPrefix::Ap => { + return ( + format!("DATE({}) = DATE(${})", column, param_num), + SqlParam::Text(value.value.clone()), + ); + } + }; + + ( + format!("{} {} ${}", column, op, param_num), + SqlParam::Text(val.clone()), + ) +} + +fn build_number_condition( + column: &str, + value: &SearchValue, + param_num: usize, +) -> (String, SqlParam) { + use crate::types::SearchPrefix; + + let num_value = value.value.parse::().unwrap_or(0.0); + + let (op, val) = match value.prefix { + SearchPrefix::Eq => ("=", num_value), + SearchPrefix::Ne => ("!=", num_value), + SearchPrefix::Gt => (">", num_value), + SearchPrefix::Lt => ("<", num_value), + SearchPrefix::Ge => (">=", num_value), + SearchPrefix::Le => ("<=", num_value), + SearchPrefix::Sa => (">", num_value), + SearchPrefix::Eb => ("<", num_value), + SearchPrefix::Ap => { + let lower = num_value * 0.9; + let upper = num_value * 1.1; + return ( + format!("{} BETWEEN {} AND {}", column, lower, upper), + SqlParam::Float(num_value), + ); + } + }; + + ( + format!("{} {} ${}", column, op, param_num), + SqlParam::Float(val), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::search::SearchParameterDefinition; + + fn registry_with(defs: Vec) -> Arc> { + let mut r = SearchParameterRegistry::new(); + for d in defs { + r.register(d).unwrap(); + } + Arc::new(RwLock::new(r)) + } + + fn obs_subject_patient_org_name() -> Arc> { + registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Observation-subject", + "subject", + SearchParamType::Reference, + "Observation.subject", + ) + .with_base(vec!["Observation"]) + .with_targets(vec!["Patient"]), + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Patient-organization", + "organization", + SearchParamType::Reference, + "Patient.managingOrganization", + ) + .with_base(vec!["Patient"]) + .with_targets(vec!["Organization"]), + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Organization-name", + "name", + SearchParamType::String, + "Organization.name", + ) + .with_base(vec!["Organization"]), + ]) + } + + #[test] + fn parses_three_link_chain() { + let registry = obs_subject_patient_org_name(); + let builder = ChainQueryBuilder::new("t", "Observation", registry); + let parsed = builder.parse_chain("subject.organization.name").unwrap(); + assert_eq!(parsed.links.len(), 2); + assert_eq!(parsed.links[0].reference_param, "subject"); + assert_eq!(parsed.links[0].target_type, "Patient"); + assert_eq!(parsed.links[1].reference_param, "organization"); + assert_eq!(parsed.links[1].target_type, "Organization"); + assert_eq!(parsed.terminal_param, "name"); + assert_eq!(parsed.terminal_type, SearchParamType::String); + } + + #[test] + fn builds_three_link_chain_sql() { + let registry = obs_subject_patient_org_name(); + let builder = ChainQueryBuilder::new("t", "Observation", registry); + let parsed = builder.parse_chain("subject.organization.name").unwrap(); + let value = SearchValue::eq("Hospital"); + let frag = builder.build_forward_chain_sql(&parsed, &value).unwrap(); + + assert!(frag.sql.contains("r.id IN (")); + // Three nested SELECTs (outermost link, intermediate link, terminal). + // Aliases are si{i+1} per link plus si{links.len()} for the terminal — + // for a 2-link chain that is si1 (subject), si2 (organization), si2 + // (terminal name); the inner si2 lexically shadows the outer si2. + assert_eq!(frag.sql.matches("FROM search_index").count(), 3); + assert!(frag.sql.contains("SELECT si1.resource_id")); + assert!(frag.sql.contains("'Patient/' || si2.resource_id")); + assert!(frag.sql.contains("'Organization/' || si2.resource_id")); + assert!(frag.sql.contains("ILIKE $2 ESCAPE '\\'")); + assert_eq!(frag.params.len(), 1); + assert!(matches!(&frag.params[0], SqlParam::Text(s) if s == "%Hospital%")); + } + + #[test] + fn explicit_type_modifier_is_honored() { + // subject:Patient.name picks Patient even if registry has multiple targets. + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Observation-subject", + "subject", + SearchParamType::Reference, + "Observation.subject", + ) + .with_base(vec!["Observation"]) + .with_targets(vec!["Patient", "Group", "Device", "Location"]), + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Patient-name", + "name", + SearchParamType::String, + "Patient.name", + ) + .with_base(vec!["Patient"]), + ]); + let builder = ChainQueryBuilder::new("t", "Observation", registry); + let parsed = builder.parse_chain("subject:Patient.name").unwrap(); + assert_eq!(parsed.links[0].target_type, "Patient"); + } + + #[test] + fn ambiguous_target_falls_back_to_inference() { + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Observation-subject", + "subject", + SearchParamType::Reference, + "Observation.subject", + ) + .with_base(vec!["Observation"]) + .with_targets(vec!["Patient", "Group", "Device", "Location"]), + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Patient-name", + "name", + SearchParamType::String, + "Patient.name", + ) + .with_base(vec!["Patient"]), + ]); + let builder = ChainQueryBuilder::new("t", "Observation", registry); + let parsed = builder.parse_chain("subject.name").unwrap(); + assert_eq!(parsed.links[0].target_type, "Patient"); // inferred default + } + + #[test] + fn empty_chain_errors() { + let registry = obs_subject_patient_org_name(); + let builder = ChainQueryBuilder::new("t", "Observation", registry); + assert!(matches!( + builder.parse_chain(""), + Err(ChainError::EmptyChain) + )); + assert!(matches!( + builder.parse_chain("just_one_part"), + Err(ChainError::InvalidSyntax { .. }) + )); + } + + #[test] + fn reverse_chain_terminal_sql_uses_substring_position() { + // Patient?_has:Observation:subject:code=1234-5 + let rc = ReverseChainedParameter { + source_type: "Observation".to_string(), + reference_param: "subject".to_string(), + search_param: "code".to_string(), + value: Some(SearchValue::eq("1234-5")), + nested: None, + }; + let registry = registry_with(vec![ + SearchParameterDefinition::new( + "http://hl7.org/fhir/SearchParameter/Observation-code", + "code", + SearchParamType::Token, + "Observation.code", + ) + .with_base(vec!["Observation"]), + ]); + let builder = ChainQueryBuilder::new("t", "Patient", registry); + let frag = builder.build_reverse_chain_sql(&rc).unwrap(); + assert!(frag.sql.contains( + "SUBSTRING(si1.value_reference FROM POSITION('/' IN si1.value_reference) + 1)" + )); + assert!(frag.sql.contains("LIKE 'Patient/%'")); + assert!(frag.sql.contains("value_token_code = $2")); + } +} diff --git a/crates/persistence/src/backends/postgres/search/mod.rs b/crates/persistence/src/backends/postgres/search/mod.rs index f007ecab6..34d463a57 100644 --- a/crates/persistence/src/backends/postgres/search/mod.rs +++ b/crates/persistence/src/backends/postgres/search/mod.rs @@ -4,5 +4,6 @@ //! for the PostgreSQL backend, using $N parameter placeholders, //! ILIKE for case-insensitive matching, and native TIMESTAMPTZ comparisons. +pub mod chain_builder; pub mod query_builder; pub mod writer; diff --git a/crates/persistence/src/backends/postgres/search_impl.rs b/crates/persistence/src/backends/postgres/search_impl.rs index 548b57065..9bb5bdd0f 100644 --- a/crates/persistence/src/backends/postgres/search_impl.rs +++ b/crates/persistence/src/backends/postgres/search_impl.rs @@ -25,6 +25,7 @@ use crate::types::{ }; use super::PostgresBackend; +use super::search::chain_builder::ChainQueryBuilder; use super::search::query_builder::{PostgresQueryBuilder, SqlParam}; fn internal_error(message: String) -> StorageError { @@ -646,55 +647,55 @@ impl ChainedSearchProvider for PostgresBackend { chain: &str, value: &str, ) -> StorageResult> { - let client = self.get_client().await?; - let tenant_id = tenant.tenant_id().as_str(); - if chain.is_empty() { return Ok(Vec::new()); } - // Parse the chain path (e.g., "patient.organization.name") - let parts: Vec<&str> = chain.split('.').collect(); - if parts.is_empty() { - return Ok(Vec::new()); - } - - // Simple single-step chain: param_name.target_param = value - // For multi-step chains, build nested subqueries - if parts.len() == 2 { - // Single step: e.g., patient.name=Smith - // Find resources of the target type matching the value, - // then find base resources referencing them - let ref_param = parts[0]; - let target_param = parts[1]; + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); - let sql = format!( - "SELECT DISTINCT si_ref.resource_id - FROM search_index si_ref - WHERE si_ref.tenant_id = $1 - AND si_ref.resource_type = $2 - AND si_ref.param_name = '{}' - AND si_ref.value_reference IN ( - SELECT resource_type || '/' || resource_id - FROM search_index si_target - WHERE si_target.tenant_id = $1 - AND si_target.param_name = '{}' - AND si_target.value_string ILIKE $3 - )", - ref_param, target_param - ); + // Build a multi-step chain query via the registry-driven builder. + // The builder produces a `r.id IN (... nested SELECTs ...)` fragment + // that handles arbitrary chain depth (was previously stubbed for >2 + // segments). + let builder = ChainQueryBuilder::new(tenant_id, base_type, self.search_registry().clone()) + .with_param_offset(1); + let parsed = builder + .parse_chain(chain) + .map_err(|e| internal_error(format!("Failed to parse chain: {}", e)))?; + let parsed_value = crate::types::SearchValue::parse(value); + let fragment = builder.build_forward_chain_sql(&parsed, &parsed_value)?; - let rows = client - .query(&sql, &[&tenant_id, &base_type, &format!("{}%", value)]) - .await - .map_err(|e| internal_error(format!("Failed to execute chain query: {}", e)))?; + let sql = format!( + "SELECT r.id FROM resources r WHERE r.tenant_id = $1 \ + AND r.resource_type = '{base}' AND r.is_deleted = FALSE AND {clause}", + base = base_type, + clause = fragment.sql, + ); - let ids: Vec = rows.iter().map(|r| r.get(0)).collect(); - Ok(ids) - } else { - // Multi-step or single parameter chain - simplified implementation - Ok(Vec::new()) + let mut params: Vec> = + vec![Box::new(tenant_id.to_string())]; + for p in &fragment.params { + match p { + SqlParam::Text(s) => params.push(Box::new(s.clone())), + SqlParam::Float(f) => params.push(Box::new(*f)), + SqlParam::Integer(i) => params.push(Box::new(*i)), + SqlParam::Bool(b) => params.push(Box::new(*b)), + SqlParam::Timestamp(dt) => params.push(Box::new(*dt)), + SqlParam::Null => params.push(Box::new(Option::::None)), + } } + let param_refs: Vec<&(dyn tokio_postgres::types::ToSql + Sync)> = params + .iter() + .map(|p| p.as_ref() as &(dyn tokio_postgres::types::ToSql + Sync)) + .collect(); + + let rows = client + .query(&sql, ¶m_refs) + .await + .map_err(|e| internal_error(format!("Failed to execute chain query: {}", e)))?; + + Ok(rows.iter().map(|r| r.get(0)).collect()) } async fn resolve_reverse_chain( @@ -706,50 +707,43 @@ impl ChainedSearchProvider for PostgresBackend { let client = self.get_client().await?; let tenant_id = tenant.tenant_id().as_str(); - // _has:Observation:patient:code=1234-5 - // Find Observations with code=1234-5, then find the Patient IDs they reference - let value_str = reverse_chain - .value - .as_ref() - .map(|v| v.value.clone()) - .unwrap_or_default(); + // Use the registry-driven builder so we handle nested `_has` chains + // and any param type (was previously single-level only with hardcoded + // token-or-string-or-empty fallback). + let builder = ChainQueryBuilder::new(tenant_id, base_type, self.search_registry().clone()) + .with_param_offset(1); + let fragment = builder.build_reverse_chain_sql(reverse_chain)?; let sql = format!( - "SELECT DISTINCT si_ref.value_reference - FROM search_index si_ref - INNER JOIN search_index si_val - ON si_ref.tenant_id = si_val.tenant_id - AND si_ref.resource_type = si_val.resource_type - AND si_ref.resource_id = si_val.resource_id - WHERE si_ref.tenant_id = $1 - AND si_ref.resource_type = '{}' - AND si_ref.param_name = '{}' - AND si_val.param_name = '{}' - AND (si_val.value_token_code = $2 - OR si_val.value_string ILIKE $3)", - reverse_chain.source_type, reverse_chain.reference_param, reverse_chain.search_param + "SELECT r.id FROM resources r WHERE r.tenant_id = $1 \ + AND r.resource_type = '{base}' AND r.is_deleted = FALSE AND {clause}", + base = base_type, + clause = fragment.sql, ); - let like_value = format!("{}%", value_str); + let mut params: Vec> = + vec![Box::new(tenant_id.to_string())]; + for p in &fragment.params { + match p { + SqlParam::Text(s) => params.push(Box::new(s.clone())), + SqlParam::Float(f) => params.push(Box::new(*f)), + SqlParam::Integer(i) => params.push(Box::new(*i)), + SqlParam::Bool(b) => params.push(Box::new(*b)), + SqlParam::Timestamp(dt) => params.push(Box::new(*dt)), + SqlParam::Null => params.push(Box::new(Option::::None)), + } + } + let param_refs: Vec<&(dyn tokio_postgres::types::ToSql + Sync)> = params + .iter() + .map(|p| p.as_ref() as &(dyn tokio_postgres::types::ToSql + Sync)) + .collect(); + let rows = client - .query( - &sql, - &[&tenant_id, &value_str.as_str(), &like_value.as_str()], - ) + .query(&sql, ¶m_refs) .await .map_err(|e| internal_error(format!("Failed to execute reverse chain query: {}", e)))?; - let mut ids = Vec::new(); - for row in &rows { - let reference: String = row.get(0); - // Extract ID from "ResourceType/ID" reference - let expected_prefix = format!("{}/", base_type); - if let Some(id) = reference.strip_prefix(&expected_prefix) { - ids.push(id.to_string()); - } - } - - Ok(ids) + Ok(rows.iter().map(|r| r.get(0)).collect()) } } diff --git a/crates/persistence/tests/postgres_tests.rs b/crates/persistence/tests/postgres_tests.rs index 7dfb8e5db..35c76965c 100644 --- a/crates/persistence/tests/postgres_tests.rs +++ b/crates/persistence/tests/postgres_tests.rs @@ -2604,4 +2604,225 @@ mod postgres_integration { .unwrap(); assert!(page3.resources.is_empty() || page3.next_cursor.is_none()); } + + /// Inserts a row directly into the search_index table. Mirrors what the + /// SQLite chain tests do for the same purpose — exercises the chain SQL + /// without depending on the FHIRPath extractor's full coverage. Connects + /// to the shared testcontainer with its own tokio-postgres client because + /// `PostgresBackend::get_client` is crate-private. + async fn insert_search_index( + tenant_id: &str, + resource_type: &str, + resource_id: &str, + param_name: &str, + column: &str, + value: &str, + ) { + let pg = shared_pg().await; + let conn_str = format!( + "host={} port={} user=postgres password=postgres dbname=postgres", + pg.host, pg.port, + ); + let (client, connection) = tokio_postgres::connect(&conn_str, tokio_postgres::NoTls) + .await + .expect("connect to shared pg"); + tokio::spawn(async move { + let _ = connection.await; + }); + let sql = format!( + "INSERT INTO search_index (tenant_id, resource_type, resource_id, param_name, {col}) \ + VALUES ($1, $2, $3, $4, $5)", + col = column, + ); + client + .execute( + &sql, + &[ + &tenant_id, + &resource_type, + &resource_id, + ¶m_name, + &value, + ], + ) + .await + .unwrap(); + } + + #[tokio::test] + async fn postgres_integration_resolve_chain_multi_level() { + use helios_persistence::core::ChainedSearchProvider; + + // Mirror sqlite/search_impl.rs::test_resolve_chain_multi_level for + // Postgres. Three-level chain: Observation?subject.organization.name=Hospital. + let backend = create_backend().await; + let tenant = create_tenant("chain-multi"); + let tenant_id = tenant.tenant_id().as_str(); + + backend + .create( + &tenant, + "Organization", + json!({"id": "org1", "name": "General Hospital"}), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Patient", + json!({"id": "p1", "managingOrganization": {"reference": "Organization/org1"}}), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Observation", + json!({"id": "o1", "subject": {"reference": "Patient/p1"}}), + FhirVersion::default(), + ) + .await + .unwrap(); + + insert_search_index( + tenant_id, + "Organization", + "org1", + "name", + "value_string", + "General Hospital", + ) + .await; + insert_search_index( + tenant_id, + "Patient", + "p1", + "organization", + "value_reference", + "Organization/org1", + ) + .await; + insert_search_index( + tenant_id, + "Observation", + "o1", + "subject", + "value_reference", + "Patient/p1", + ) + .await; + + let ids = backend + .resolve_chain( + &tenant, + "Observation", + "subject.organization.name", + "Hospital", + ) + .await + .unwrap(); + + assert_eq!(ids, vec!["o1".to_string()]); + } + + #[tokio::test] + async fn postgres_integration_resolve_reverse_chain_terminal() { + use helios_persistence::core::ChainedSearchProvider; + use helios_persistence::types::{ReverseChainedParameter, SearchValue}; + + // _has:Observation:subject:code=8867-4 — find patients referenced by + // Observations whose code matches. + let backend = create_backend().await; + let tenant = create_tenant("reverse-chain"); + let tenant_id = tenant.tenant_id().as_str(); + + backend + .create( + &tenant, + "Patient", + json!({"id": "p1"}), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Patient", + json!({"id": "p2"}), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Observation", + json!({"id": "o1", "subject": {"reference": "Patient/p1"}}), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Observation", + json!({"id": "o2", "subject": {"reference": "Patient/p2"}}), + FhirVersion::default(), + ) + .await + .unwrap(); + + insert_search_index( + tenant_id, + "Observation", + "o1", + "subject", + "value_reference", + "Patient/p1", + ) + .await; + insert_search_index( + tenant_id, + "Observation", + "o2", + "subject", + "value_reference", + "Patient/p2", + ) + .await; + insert_search_index( + tenant_id, + "Observation", + "o1", + "code", + "value_token_code", + "8867-4", + ) + .await; + insert_search_index( + tenant_id, + "Observation", + "o2", + "code", + "value_token_code", + "other", + ) + .await; + + let rc = ReverseChainedParameter::terminal( + "Observation", + "subject", + "code", + SearchValue::eq("8867-4"), + ); + let ids = backend + .resolve_reverse_chain(&tenant, "Patient", &rc) + .await + .unwrap(); + assert_eq!(ids, vec!["p1".to_string()]); + } }