From 17f64417833d480471c1988eb22737800d37d20e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 17 Sep 2025 15:21:30 -0400 Subject: [PATCH 1/2] remove cross join from EXPLAIN ANALYZE --- src/sql/src/plan/statement/dml.rs | 33 +-- .../singlereplica_attribution_sources.slt | 224 ++++++------------ 2 files changed, 98 insertions(+), 159 deletions(-) diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index bad0da8f0a348..7382abcd1db2b 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -851,9 +851,10 @@ pub fn plan_explain_analyze( SUM(mas.records) AS total_records, CASE WHEN COUNT(DISTINCT mas.worker_id) <> 0 THEN SUM(mas.size) / COUNT(DISTINCT mas.worker_id) ELSE NULL END AS avg_memory, CASE WHEN COUNT(DISTINCT mas.worker_id) <> 0 THEN SUM(mas.records) / COUNT(DISTINCT mas.worker_id) ELSE NULL END AS avg_records - FROM mz_introspection.mz_lir_mapping mlm - JOIN mz_introspection.mz_arrangement_sizes_per_worker mas - ON (mlm.operator_id_start <= mas.operator_id AND mas.operator_id < mlm.operator_id_end) + FROM mz_introspection.mz_lir_mapping mlm + CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id + JOIN mz_introspection.mz_arrangement_sizes_per_worker mas + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id"#, )); from.push("LEFT JOIN summary_memory sm USING (global_id, lir_id)"); @@ -867,9 +868,10 @@ GROUP BY mlm.global_id, mlm.lir_id"#, mas.worker_id AS worker_id, SUM(mas.size) AS worker_memory, SUM(mas.records) AS worker_records - FROM mz_introspection.mz_lir_mapping mlm - JOIN mz_introspection.mz_arrangement_sizes_per_worker mas - ON (mlm.operator_id_start <= mas.operator_id AND mas.operator_id < mlm.operator_id_end) + FROM mz_introspection.mz_lir_mapping mlm + CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id + JOIN mz_introspection.mz_arrangement_sizes_per_worker mas + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id"#, )); from.push("LEFT JOIN per_worker_memory pwm USING (global_id, lir_id)"); @@ -907,9 +909,10 @@ GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id"#, mlm.lir_id AS lir_id, SUM(mse.elapsed_ns) AS total_ns, CASE WHEN COUNT(DISTINCT mse.worker_id) <> 0 THEN SUM(mse.elapsed_ns) / COUNT(DISTINCT mse.worker_id) ELSE NULL END AS avg_ns - FROM mz_introspection.mz_lir_mapping mlm - JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse - ON (mlm.operator_id_start <= mse.id AND mse.id < mlm.operator_id_end) + FROM mz_introspection.mz_lir_mapping mlm + CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id + JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id"#, )); from.push("LEFT JOIN summary_cpu sc USING (global_id, lir_id)"); @@ -922,9 +925,10 @@ GROUP BY mlm.global_id, mlm.lir_id"#, mlm.lir_id AS lir_id, mse.worker_id AS worker_id, SUM(mse.elapsed_ns) AS worker_ns - FROM mz_introspection.mz_lir_mapping mlm - JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse - ON (mlm.operator_id_start <= mse.id AND mse.id < mlm.operator_id_end) + FROM mz_introspection.mz_lir_mapping mlm + CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id + JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id"#, )); from.push("LEFT JOIN per_worker_cpu pwc USING (global_id, lir_id)"); @@ -955,10 +959,11 @@ GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id"#, "megsa.levels AS levels", "megsa.to_cut AS to_cut", "megsa.hint AS hint", - "pg_size_pretty(savings) AS savings", + "pg_size_pretty(megsa.savings) AS savings", ]); from.extend(["JOIN mz_introspection.mz_dataflow_global_ids mdgi ON (mlm.global_id = mdgi.global_id)", - "LEFT JOIN mz_introspection.mz_expected_group_size_advice megsa ON (megsa.dataflow_id = mdgi.id AND mlm.operator_id_start <= megsa.region_id AND megsa.region_id < mlm.operator_id_end)"]); + "LEFT JOIN (generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id JOIN \ + mz_introspection.mz_expected_group_size_advice megsa ON (megsa.region_id = valid_id)) ON (megsa.dataflow_id = mdgi.id)"]); } } diff --git a/test/sqllogictest/introspection/singlereplica_attribution_sources.slt b/test/sqllogictest/introspection/singlereplica_attribution_sources.slt index 5219151b04b16..cb4c3f7e6ab41 100644 --- a/test/sqllogictest/introspection/singlereplica_attribution_sources.slt +++ b/test/sqllogictest/introspection/singlereplica_attribution_sources.slt @@ -34,8 +34,8 @@ SELECT mz_unsafe.mz_sleep(8) query IT SELECT id, global_id FROM mz_internal.mz_dataflow_global_ids ORDER BY id, global_id; ---- -4 u2 -4 u3 +5 u2 +5 u3 query TI SELECT global_id, lir_id FROM mz_internal.mz_lir_mapping ORDER BY global_id, lir_id DESC; @@ -119,17 +119,17 @@ SELECT mz_unsafe.mz_sleep(8) query IT SELECT id, global_id FROM mz_internal.mz_dataflow_global_ids ORDER BY id, global_id; ---- -9 t44 +10 t52 query TI SELECT global_id, lir_id FROM mz_internal.mz_lir_mapping ORDER BY global_id, lir_id DESC; ---- -t44 5 -t44 4 -t44 3 -t44 2 -t44 1 +t52 5 +t52 4 +t52 3 +t52 2 +t52 1 ## attribution queries @@ -142,11 +142,11 @@ SELECT global_id, lir_id, parent_lir_id, REPEAT(' ', nesting * 2) || operator AS GROUP BY global_id, lir_id, operator, parent_lir_id, nesting ORDER BY global_id, lir_id DESC; ---- -t44 5 NULL Differential␠Join␠%0␠»␠%1 -t44 4 5 ␠␠Arrange␠(#0{y}) -t44 3 4 ␠␠␠␠Read␠u4 -t44 2 5 ␠␠Arrange␠(#0{x}) -t44 1 2 ␠␠␠␠Read␠u4 +t52 5 NULL Differential␠Join␠%0␠»␠%1 +t52 4 5 ␠␠Arrange␠(#0{y}) +t52 3 4 ␠␠␠␠Read␠u4 +t52 2 5 ␠␠Arrange␠(#0{x}) +t52 1 2 ␠␠␠␠Read␠u4 # omitting pg_size_pretty(sum(size)) as size query TIIT @@ -157,11 +157,11 @@ SELECT global_id, lir_id, parent_lir_id, REPEAT(' ', nesting * 2) || operator AS GROUP BY global_id, lir_id, operator, parent_lir_id, nesting ORDER BY global_id, lir_id DESC; ---- -t44 5 NULL Differential␠Join␠%0␠»␠%1 -t44 4 5 ␠␠Arrange␠(#0{y}) -t44 3 4 ␠␠␠␠Read␠u4 -t44 2 5 ␠␠Arrange␠(#0{x}) -t44 1 2 ␠␠␠␠Read␠u4 +t52 5 NULL Differential␠Join␠%0␠»␠%1 +t52 4 5 ␠␠Arrange␠(#0{y}) +t52 3 4 ␠␠␠␠Read␠u4 +t52 2 5 ␠␠Arrange␠(#0{x}) +t52 1 2 ␠␠␠␠Read␠u4 statement ok DROP TABLE u CASCADE; @@ -417,26 +417,26 @@ SELECT global_id, REPEAT(' ', nesting * 2) || operator AS operator FROM mz_internal.mz_lir_mapping mlm ORDER BY global_id, lir_id DESC; ---- -t69 Returning␠Distinct␠GroupAggregate -t69 ␠␠Union -t69 ␠␠␠␠Differential␠Join␠%0␠»␠%1 -t69 ␠␠␠␠␠␠Arrange␠(#0{messageid}) -t69 ␠␠␠␠␠␠␠␠Stream␠l0 -t69 ␠␠␠␠␠␠Arranged␠u15 -t69 ␠␠␠␠Arranged␠u13 -t69 With␠Recursive␠l0␠=␠Unarranged␠Raw␠Stream -t69 ␠␠Distinct␠GroupAggregate -t69 ␠␠␠␠Union -t69 ␠␠␠␠␠␠Differential␠Join␠%0␠»␠%1 -t69 ␠␠␠␠␠␠␠␠Arrange␠(#0{messageid}) -t69 ␠␠␠␠␠␠␠␠␠␠Read␠l0 -t69 ␠␠␠␠␠␠␠␠Arrange␠(#1{parentcommentid}) -t69 ␠␠␠␠␠␠␠␠␠␠Arranged␠u15 -t69 ␠␠␠␠␠␠Differential␠Join␠%1␠»␠%0 -t69 ␠␠␠␠␠␠␠␠Arranged␠u13 -t69 ␠␠␠␠␠␠␠␠Arrange␠(#1{parentpostid}) -t69 ␠␠␠␠␠␠␠␠␠␠Arranged␠u15 -t69 ␠␠␠␠␠␠Arranged␠u13 +t77 Returning␠Distinct␠GroupAggregate +t77 ␠␠Union +t77 ␠␠␠␠Differential␠Join␠%0␠»␠%1 +t77 ␠␠␠␠␠␠Arrange␠(#0{messageid}) +t77 ␠␠␠␠␠␠␠␠Stream␠l0 +t77 ␠␠␠␠␠␠Arranged␠u15 +t77 ␠␠␠␠Arranged␠u13 +t77 With␠Recursive␠l0␠=␠Unarranged␠Raw␠Stream +t77 ␠␠Distinct␠GroupAggregate +t77 ␠␠␠␠Union +t77 ␠␠␠␠␠␠Differential␠Join␠%0␠»␠%1 +t77 ␠␠␠␠␠␠␠␠Arrange␠(#0{messageid}) +t77 ␠␠␠␠␠␠␠␠␠␠Read␠l0 +t77 ␠␠␠␠␠␠␠␠Arrange␠(#1{parentcommentid}) +t77 ␠␠␠␠␠␠␠␠␠␠Arranged␠u15 +t77 ␠␠␠␠␠␠Differential␠Join␠%1␠»␠%0 +t77 ␠␠␠␠␠␠␠␠Arranged␠u13 +t77 ␠␠␠␠␠␠␠␠Arrange␠(#1{parentpostid}) +t77 ␠␠␠␠␠␠␠␠␠␠Arranged␠u15 +t77 ␠␠␠␠␠␠Arranged␠u13 u10 Arrange␠(#1{person1id}) u10 ␠␠Stream␠u9 u11 Arrange␠(#2{person2id}) @@ -592,7 +592,7 @@ materialize.public.v2_idx_x u26 2 materialize.public.v2_idx_x u27 2 materialize.public.v_idx_x u28 5 materialize.public.v_idx_x u29 2 -materialize.public.w t82 5 +materialize.public.w t90 5 # explain analyze SQL generate @@ -622,14 +622,10 @@ WITH AS avg_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ) SELECT @@ -666,14 +662,10 @@ WITH AS avg_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ) SELECT @@ -715,14 +707,10 @@ WITH AS avg_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), summary_cpu AS @@ -741,14 +729,10 @@ WITH AS avg_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ) SELECT @@ -793,14 +777,10 @@ WITH AS avg_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_memory AS @@ -813,14 +793,10 @@ WITH sum(mas.records) AS worker_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id ), summary_cpu AS @@ -839,14 +815,10 @@ WITH AS avg_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_cpu AS @@ -858,14 +830,10 @@ WITH sum(mse.elapsed_ns) AS worker_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id ) SELECT @@ -931,14 +899,10 @@ WITH AS avg_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_cpu AS @@ -950,14 +914,10 @@ WITH sum(mse.elapsed_ns) AS worker_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id ), summary_memory AS @@ -982,14 +942,10 @@ WITH AS avg_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_memory AS @@ -1002,14 +958,10 @@ WITH sum(mas.records) AS worker_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id ) SELECT @@ -1072,22 +1024,20 @@ SELECT megsa.levels AS levels, megsa.to_cut AS to_cut, megsa.hint AS hint, - pg_size_pretty(savings) AS savings + pg_size_pretty(megsa.savings) AS savings FROM mz_introspection.mz_lir_mapping AS mlm JOIN mz_introspection.mz_dataflow_global_ids AS mdgi ON (mlm.global_id = mdgi.global_id) LEFT JOIN - mz_introspection.mz_expected_group_size_advice AS megsa - ON - ( - megsa.dataflow_id = mdgi.id - AND - mlm.operator_id_start <= megsa.region_id - AND - megsa.region_id < mlm.operator_id_end - ) + ( + generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id + JOIN + mz_introspection.mz_expected_group_size_advice AS megsa + ON (megsa.region_id = valid_id) + ) + ON (megsa.dataflow_id = mdgi.id) JOIN mz_introspection.mz_mappable_objects AS mo ON (mlm.global_id = mo.global_id) @@ -1115,14 +1065,10 @@ WITH AS avg_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_cpu AS @@ -1134,14 +1080,10 @@ WITH sum(mse.elapsed_ns) AS worker_ns FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_scheduling_elapsed_per_worker AS mse - ON - ( - mlm.operator_id_start <= mse.id - AND - mse.id < mlm.operator_id_end - ) + ON (mse.id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id ), summary_memory AS @@ -1166,14 +1108,10 @@ WITH AS avg_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id ), per_worker_memory AS @@ -1186,14 +1124,10 @@ WITH sum(mas.records) AS worker_records FROM mz_introspection.mz_lir_mapping AS mlm + CROSS JOIN generate_series((mlm.operator_id_start)::int8, (mlm.operator_id_end - 1)::int8) AS valid_id JOIN mz_introspection.mz_arrangement_sizes_per_worker AS mas - ON - ( - mlm.operator_id_start <= mas.operator_id - AND - mas.operator_id < mlm.operator_id_end - ) + ON (mas.operator_id = valid_id) GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id ) SELECT From 095769b2d4d81727d2c172a3fed1f3a5e064469e Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 24 Sep 2025 13:41:25 -0400 Subject: [PATCH 2/2] drop unnecessary dump of old SQL output --- doc/user/content/sql/explain-analyze.md | 31 +------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/doc/user/content/sql/explain-analyze.md b/doc/user/content/sql/explain-analyze.md index e3e6961bf14f7..ae585420af906 100644 --- a/doc/user/content/sql/explain-analyze.md +++ b/doc/user/content/sql/explain-analyze.md @@ -313,35 +313,6 @@ EXPLAIN ANALYZE HINTS FOR INDEX wins_by_item AS SQL; ``` The results show the SQL that `EXPLAIN ANALYZE` would run to get the TopK hints -for the `wins_by_items` index: - -```none -SELECT - repeat(' ', nesting * 2) || operator AS operator, - megsa.levels AS levels, - megsa.to_cut AS to_cut, - megsa.hint AS hint, - pg_size_pretty(savings) AS savings -FROM - mz_introspection.mz_lir_mapping AS mlm - JOIN - mz_introspection.mz_dataflow_global_ids AS mdgi - ON (mlm.global_id = mdgi.global_id) - LEFT JOIN - mz_introspection.mz_expected_group_size_advice AS megsa - ON - ( - megsa.dataflow_id = mdgi.id - AND - mlm.operator_id_start <= megsa.region_id - AND - megsa.region_id < mlm.operator_id_end - ) - JOIN - mz_introspection.mz_mappable_objects AS mo - ON (mlm.global_id = mo.global_id) -WHERE mo.name = 'materialize.public.wins_by_item' -ORDER BY mlm.lir_id DESC; -``` +for the `wins_by_items` index. [TopK hints]: /transform-data/idiomatic-materialize-sql/top-k/#query-hints-1