Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 1 addition & 30 deletions doc/user/content/sql/explain-analyze.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,35 +313,6 @@ EXPLAIN ANALYZE HINTS FOR INDEX wins_by_item AS SQL;
```

The results show the SQL that `EXPLAIN ANALYZE` would run to get the TopK hints
for the `wins_by_items` index:

```none
SELECT
repeat(' ', nesting * 2) || operator AS operator,
megsa.levels AS levels,
megsa.to_cut AS to_cut,
megsa.hint AS hint,
pg_size_pretty(savings) AS savings
FROM
mz_introspection.mz_lir_mapping AS mlm
JOIN
mz_introspection.mz_dataflow_global_ids AS mdgi
ON (mlm.global_id = mdgi.global_id)
LEFT JOIN
mz_introspection.mz_expected_group_size_advice AS megsa
ON
(
megsa.dataflow_id = mdgi.id
AND
mlm.operator_id_start <= megsa.region_id
AND
megsa.region_id < mlm.operator_id_end
)
JOIN
mz_introspection.mz_mappable_objects AS mo
ON (mlm.global_id = mo.global_id)
WHERE mo.name = 'materialize.public.wins_by_item'
ORDER BY mlm.lir_id DESC;
```
for the `wins_by_items` index.

[TopK hints]: /transform-data/idiomatic-materialize-sql/top-k/#query-hints-1
33 changes: 19 additions & 14 deletions src/sql/src/plan/statement/dml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -851,9 +851,10 @@ pub fn plan_explain_analyze(
SUM(mas.records) AS total_records,
CASE WHEN COUNT(DISTINCT mas.worker_id) <> 0 THEN SUM(mas.size) / COUNT(DISTINCT mas.worker_id) ELSE NULL END AS avg_memory,
CASE WHEN COUNT(DISTINCT mas.worker_id) <> 0 THEN SUM(mas.records) / COUNT(DISTINCT mas.worker_id) ELSE NULL END AS avg_records
FROM mz_introspection.mz_lir_mapping mlm
JOIN mz_introspection.mz_arrangement_sizes_per_worker mas
ON (mlm.operator_id_start <= mas.operator_id AND mas.operator_id < mlm.operator_id_end)
FROM mz_introspection.mz_lir_mapping mlm
CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id
JOIN mz_introspection.mz_arrangement_sizes_per_worker mas
ON (mas.operator_id = valid_id)
GROUP BY mlm.global_id, mlm.lir_id"#,
));
from.push("LEFT JOIN summary_memory sm USING (global_id, lir_id)");
Expand All @@ -867,9 +868,10 @@ GROUP BY mlm.global_id, mlm.lir_id"#,
mas.worker_id AS worker_id,
SUM(mas.size) AS worker_memory,
SUM(mas.records) AS worker_records
FROM mz_introspection.mz_lir_mapping mlm
JOIN mz_introspection.mz_arrangement_sizes_per_worker mas
ON (mlm.operator_id_start <= mas.operator_id AND mas.operator_id < mlm.operator_id_end)
FROM mz_introspection.mz_lir_mapping mlm
CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id
JOIN mz_introspection.mz_arrangement_sizes_per_worker mas
ON (mas.operator_id = valid_id)
GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id"#,
));
from.push("LEFT JOIN per_worker_memory pwm USING (global_id, lir_id)");
Expand Down Expand Up @@ -907,9 +909,10 @@ GROUP BY mlm.global_id, mlm.lir_id, mas.worker_id"#,
mlm.lir_id AS lir_id,
SUM(mse.elapsed_ns) AS total_ns,
CASE WHEN COUNT(DISTINCT mse.worker_id) <> 0 THEN SUM(mse.elapsed_ns) / COUNT(DISTINCT mse.worker_id) ELSE NULL END AS avg_ns
FROM mz_introspection.mz_lir_mapping mlm
JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse
ON (mlm.operator_id_start <= mse.id AND mse.id < mlm.operator_id_end)
FROM mz_introspection.mz_lir_mapping mlm
CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id
JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse
ON (mse.id = valid_id)
GROUP BY mlm.global_id, mlm.lir_id"#,
));
from.push("LEFT JOIN summary_cpu sc USING (global_id, lir_id)");
Expand All @@ -922,9 +925,10 @@ GROUP BY mlm.global_id, mlm.lir_id"#,
mlm.lir_id AS lir_id,
mse.worker_id AS worker_id,
SUM(mse.elapsed_ns) AS worker_ns
FROM mz_introspection.mz_lir_mapping mlm
JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse
ON (mlm.operator_id_start <= mse.id AND mse.id < mlm.operator_id_end)
FROM mz_introspection.mz_lir_mapping mlm
CROSS JOIN generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id
JOIN mz_introspection.mz_scheduling_elapsed_per_worker mse
ON (mse.id = valid_id)
GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id"#,
));
from.push("LEFT JOIN per_worker_cpu pwc USING (global_id, lir_id)");
Expand Down Expand Up @@ -955,10 +959,11 @@ GROUP BY mlm.global_id, mlm.lir_id, mse.worker_id"#,
"megsa.levels AS levels",
"megsa.to_cut AS to_cut",
"megsa.hint AS hint",
"pg_size_pretty(savings) AS savings",
"pg_size_pretty(megsa.savings) AS savings",
]);
from.extend(["JOIN mz_introspection.mz_dataflow_global_ids mdgi ON (mlm.global_id = mdgi.global_id)",
"LEFT JOIN mz_introspection.mz_expected_group_size_advice megsa ON (megsa.dataflow_id = mdgi.id AND mlm.operator_id_start <= megsa.region_id AND megsa.region_id < mlm.operator_id_end)"]);
"LEFT JOIN (generate_series((mlm.operator_id_start) :: int8, (mlm.operator_id_end - 1) :: int8) AS valid_id JOIN \
Copy link
Contributor

@ggevay ggevay Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if this generate_series could be on the left side of the left join. Having it on the right side means that the right side is correlated with the left side, which is a complicated left join lowering case, with its own code path.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started with it on the left side, but I got redundant rows. (There may be a good way to hide those, but my SQL skill wasn't sufficient to the task.) The resulting plan is not small, but there is no cross join...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to merge after a tiny doc update, but I made https://github.com/MaterializeInc/database-issues/issues/9730 to record your idea.

mz_introspection.mz_expected_group_size_advice megsa ON (megsa.region_id = valid_id)) ON (megsa.dataflow_id = mdgi.id)"]);
}
}

Expand Down
Loading