diff --git a/.typos.toml b/.typos.toml index 375991d026fd..7927fae6e56a 100644 --- a/.typos.toml +++ b/.typos.toml @@ -22,3 +22,6 @@ wee = "wee" flate = "flate" # distinct_ons is correct (SQL DISTINCT ON) ons = "ons" +# SQL UNIONs - typos incorrectly thinks UNIO should be UNION +# This happens because typos tokenizes UNIONs and finds UNIO +UNIO = "UNIO" diff --git a/prqlc/prqlc/src/semantic/resolver/inference.rs b/prqlc/prqlc/src/semantic/resolver/inference.rs index 336d5ade30b6..36530f3378c8 100644 --- a/prqlc/prqlc/src/semantic/resolver/inference.rs +++ b/prqlc/prqlc/src/semantic/resolver/inference.rs @@ -71,17 +71,36 @@ impl Resolver<'_> { input_id: usize, ) -> Lineage { let table_decl = self.root_mod.module.get(table_fq).unwrap(); - let TableDecl { ty, .. } = table_decl.kind.as_table_decl().unwrap(); + let TableDecl { ty, expr } = table_decl.kind.as_table_decl().unwrap(); - // TODO: can this panic? - let columns = ty.as_ref().unwrap().as_relation().unwrap(); + // For CTEs (RelationVar), trace lineage back to the underlying source tables. + // For UNIONs and JOINs, this includes all underlying source tables. + let underlying_inputs = match expr { + TableExpr::RelationVar(rel) => rel.lineage.as_ref().map(|l| &l.inputs), + _ => None, + }; - let mut instance_frame = Lineage { - inputs: vec![LineageInput { + let inputs = match underlying_inputs { + Some(inputs) if !inputs.is_empty() => inputs + .iter() + .map(|inp| LineageInput { + id: input_id, + name: input_name.clone(), + table: inp.table.clone(), + }) + .collect(), + _ => vec![LineageInput { id: input_id, name: input_name.clone(), table: table_fq.clone(), }], + }; + + // TODO: can this panic? + let columns = ty.as_ref().unwrap().as_relation().unwrap(); + + let mut instance_frame = Lineage { + inputs, columns: Vec::new(), ..Default::default() }; diff --git a/prqlc/prqlc/src/semantic/resolver/mod.rs b/prqlc/prqlc/src/semantic/resolver/mod.rs index 5c774a567829..3453dd7cb13e 100644 --- a/prqlc/prqlc/src/semantic/resolver/mod.rs +++ b/prqlc/prqlc/src/semantic/resolver/mod.rs @@ -481,4 +481,117 @@ pub(super) mod test { _ => panic!("Expected All column"), } } + + #[test] + fn test_cte_lineage_traces_to_source_table() { + // This test verifies that simple CTEs trace lineage back to + // the underlying source table instead of showing the CTE name. + use crate::internal::pl_to_lineage; + + let query = r#" + let employees_usa = (from employees | filter country == "USA") + from employees_usa + select {name, salary} + "#; + + let pl = crate::prql_to_pl(query).unwrap(); + let fc = pl_to_lineage(pl).unwrap(); + let final_lineage = &fc.frames.last().unwrap().1; + + assert_eq!( + final_lineage.inputs.len(), + 1, + "Simple CTE should have 1 input, got {:?}", + final_lineage.inputs + ); + + let input = &final_lineage.inputs[0]; + assert_eq!( + input.name, "employees_usa", + "Input name should be the CTE alias" + ); + assert_eq!( + input.table.name, "employees", + "Table should trace back to source table 'employees', got {:?}", + input.table + ); + } + + #[test] + fn test_direct_table_lineage_uses_table_itself() { + // This test verifies that direct table references (non-CTEs) + // use the table itself as the lineage input, exercising the + // fallback path in lineage_of_table_decl. + use crate::internal::pl_to_lineage; + + let query = r#" + from employees + select {name, salary} + "#; + + let pl = crate::prql_to_pl(query).unwrap(); + let fc = pl_to_lineage(pl).unwrap(); + let final_lineage = &fc.frames.last().unwrap().1; + + assert_eq!( + final_lineage.inputs.len(), + 1, + "Direct table should have 1 input" + ); + + let input = &final_lineage.inputs[0]; + assert_eq!( + input.table.name, "employees", + "Table should be 'employees' directly" + ); + } + + #[test] + fn test_cte_lineage_with_union_traces_to_all_source_tables() { + // This test verifies that CTEs with UNIONs trace lineage + // back to ALL underlying source tables. + use crate::internal::pl_to_lineage; + + let query = r#" + let combined = ( + from employees + select {name, dept} + append ( + from contractors + select {name, dept} + ) + ) + from combined + select {name} + "#; + + let pl = crate::prql_to_pl(query).unwrap(); + let fc = pl_to_lineage(pl).unwrap(); + let final_lineage = &fc.frames.last().unwrap().1; + + // Should have inputs from both employees and contractors + assert_eq!( + final_lineage.inputs.len(), + 2, + "CTE with UNION should have 2 inputs, got {:?}", + final_lineage.inputs + ); + + let tables: Vec<_> = final_lineage + .inputs + .iter() + .map(|inp| inp.table.name.as_str()) + .collect(); + + assert!( + tables.contains(&"employees"), + "Should contain employees table, got {:?}", + tables + ); + assert!( + tables.contains(&"contractors"), + "Should contain contractors table, got {:?}", + tables + ); + } } diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap index 8ba9ac7871bd..b7f88e09376a 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap @@ -16,7 +16,8 @@ frames: - id: 133 name: genre_count table: - - genre_count + - default_db + - genres - - 1:217-230 - columns: - !Single @@ -28,7 +29,8 @@ frames: - id: 133 name: genre_count table: - - genre_count + - default_db + - genres nodes: - id: 133 kind: Ident