Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions datajunction-clients/python/tests/examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,7 @@
"JOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_idLEFTOUTERJOIN"
"default_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.state)SELECT"
"repair_order_details_0.stateASstate,\tSUM(repair_order_details_0.price_sum_252381cf)"
"/SUM(repair_order_details_0.price_count_252381cf)ASavg_repair_priceFROM"
"/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)ASavg_repair_priceFROM"
"repair_order_details_0GROUPBYrepair_order_details_0.state": QueryWithResults(
**{
"id": "bd98d6be-e2d2-413e-94c7-96d9411ddee2",
Expand Down Expand Up @@ -1399,7 +1399,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.city)"
"SELECTCOALESCE(repair_order_details_0.city)AScity,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.city": QueryWithResults(
id="v3-avg-repair-price-city",
submitted_query="...",
Expand Down Expand Up @@ -1437,7 +1437,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.city)"
"SELECTrepair_order_details_0.cityAScity,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.city": QueryWithResults(
id="v3-avg-repair-price-city",
submitted_query="...",
Expand Down Expand Up @@ -1465,7 +1465,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.state)"
"SELECTCOALESCE(repair_order_details_0.state)ASstate,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.state": QueryWithResults(
id="v3-avg-repair-price-state-no-data",
submitted_query="...",
Expand All @@ -1481,7 +1481,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.state)"
"SELECTrepair_order_details_0.stateASstate,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.state": QueryWithResults(
id="v3-avg-repair-price-state-no-data-no-coalesce",
submitted_query="...",
Expand All @@ -1498,7 +1498,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.city)"
"SELECTrepair_order_details_0.cityAScity,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.city": QueryWithResults(
id="v3-avg-repair-price-city-with-join-key",
submitted_query="...",
Expand Down Expand Up @@ -1526,7 +1526,7 @@
"LEFTOUTERJOINdefault_repair_ordert2ONt1.repair_order_id=t2.repair_order_id"
"LEFTOUTERJOINdefault_hard_hatt3ONt2.hard_hat_id=t3.hard_hat_idGROUPBYt3.state)"
"SELECTrepair_order_details_0.stateASstate,\tSUM(repair_order_details_0."
"price_sum_252381cf)/SUM(repair_order_details_0.price_count_252381cf)AS"
"price_sum_252381cf)/NULLIF(SUM(repair_order_details_0.price_count_252381cf),0)AS"
"avg_repair_priceFROMrepair_order_details_0GROUPBYrepair_order_details_0.state": QueryWithResults(
id="v3-avg-repair-price-state-no-data-with-join-key",
submitted_query="...",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from datajunction_server.construction.build_v3.utils import get_cte_name
from datajunction_server.database.node import Node
from datajunction_server.models.node_type import NodeType
from datajunction_server.sql.decompose import wrap_divisions_in_nullif
from datajunction_server.sql.parsing import ast
from datajunction_server.utils import SEPARATOR

Expand Down Expand Up @@ -1593,4 +1594,8 @@ def process_metric_combiner_expression(
partition_cte_alias=cte_alias,
)

# Wrap denominators so user-authored ratio metrics don't blow up on
# zero (NaN/Infinity/error → NULL). Idempotent.
wrap_divisions_in_nullif(expr_ast)

return expr_ast
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
from copy import deepcopy
from dataclasses import dataclass
from typing import Any, Optional
from typing import Any, Optional, cast

from datajunction_server.construction.build_v3.cte import (
build_alias_to_dimension_node,
Expand Down Expand Up @@ -50,6 +50,7 @@
from datajunction_server.errors import DJInvalidInputException
from datajunction_server.models.decompose import Aggregability
from datajunction_server.models.node_type import NodeType
from datajunction_server.sql.decompose import wrap_divisions_in_nullif
from datajunction_server.sql.parsing import ast

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -686,6 +687,10 @@ def build_intermediate_metric_expr(
# The dependency hasn't been built, so defer this metric
return None # pragma: no cover

# Intermediate derived metrics like avg_order_value =
# total_revenue / order_count inline raw aggregations on both
# sides — wrap denominators to avoid NaN/Infinity/error on 0.
wrap_divisions_in_nullif(cast(ast.Expression, expr_ast))
return expr_ast # type: ignore


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,14 @@ def decompose_expression(
args=[ast.Column(name=numerator_measure_name)],
),
right=ast.Function(
ast.Name("count"),
args=[ast.Column(name=denominator_measure_name)],
ast.Name("NULLIF"),
args=[
ast.Function(
ast.Name("count"),
args=[ast.Column(name=denominator_measure_name)],
),
ast.Number(value=0),
],
),
op=ast.BinaryOpKind.Divide,
)
Expand All @@ -455,6 +461,14 @@ def decompose_expression(
if expr.op in acceptable_binary_ops: # pragma: no cover
measures_combiner_left, measures_left = decompose_expression(expr.left)
measures_combiner_right, measures_right = decompose_expression(expr.right)
if expr.op == ast.BinaryOpKind.Divide and not (
isinstance(measures_combiner_right, ast.Function)
and measures_combiner_right.alias_or_name.name.lower() == "nullif"
):
measures_combiner_right = ast.Function(
ast.Name("NULLIF"),
args=[measures_combiner_right, ast.Number(value=0)],
)
combiner = ast.BinaryOp(
left=measures_combiner_left,
right=measures_combiner_right,
Expand Down
53 changes: 53 additions & 0 deletions datajunction-server/datajunction_server/sql/decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,45 @@ def make_func(name: str, *args: ast.Expression | str) -> ast.Function:
)


def safe_denominator(expr: ast.Expression) -> ast.Expression:
"""Wrap expr in NULLIF(expr, 0) to make it safe as a divisor.

Idempotent: if expr is already NULLIF(_, 0) or a numeric literal,
returns it unchanged. Caller passes the RHS of a Divide.
"""
# Numeric literals: x / 100 doesn't need wrapping.
if isinstance(expr, ast.Number):
return expr
# Already wrapped.
if (
isinstance(expr, ast.Function)
and expr.name.name.upper() == "NULLIF"
and len(expr.args) == 2
and isinstance(expr.args[1], ast.Number)
and expr.args[1].value == 0
):
return expr
return ast.Function(
ast.Name("NULLIF"),
args=[expr, ast.Number(value=0)],
)


def wrap_divisions_in_nullif(expr: ast.Expression) -> ast.Expression:
"""Walk expr and wrap the RHS of every Divide BinaryOp in NULLIF(_, 0)
so division-by-zero produces NULL instead of NaN/Infinity/error.

Mutates and returns expr. Idempotent via :func:`safe_denominator`.
"""
for node in expr.find_all(ast.BinaryOp):
if node.op != ast.BinaryOpKind.Divide:
continue
wrapped = safe_denominator(node.right)
if wrapped is not node.right:
node.right = wrapped
return expr


# =============================================================================
# Decomposition Framework
# =============================================================================
Expand Down Expand Up @@ -999,6 +1038,15 @@ def _extract_base(
components_tracker.add(comp.name)
components.append(comp)

# Wrap user-authored divisions in the outer expression too.
# _decompose only wraps the small combiners it builds for AVG /
# variance / stddev / covariance; the user's top-level
# expression (e.g. CAST(SUM(...)) / COUNT(*)) is unchanged
# after sub-aggregation replacements and would otherwise emit
# bare divisions.
for proj in query_ast.select.projection:
wrap_divisions_in_nullif(cast(ast.Expression, proj))

return components, query_ast

def _substitute_metric_references(
Expand Down Expand Up @@ -1080,6 +1128,11 @@ def _decompose(
)
else:
combiner_ast = decomposition.combine(components)
# Decomposed AVG / variance / stddev / covariance all build
# SUM(...) / SUM(count)-style combiners where the denominator
# can legitimately be 0. Wrap to produce NULL rather than
# NaN/Infinity/error.
combiner_ast = wrap_divisions_in_nullif(combiner_ast)

return DecompositionResult(components, combiner_ast)

Expand Down
26 changes: 13 additions & 13 deletions datajunction-server/tests/api/cubes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,9 +947,9 @@ async def test_create_cube(
COALESCE(repair_order_details_0.company_name, repair_orders_fact_0.company_name) AS company_name,
COALESCE(repair_order_details_0.local_region, repair_orders_fact_0.local_region) AS local_region,
COALESCE(repair_order_details_0.hire_date, repair_orders_fact_0.hire_date) AS hire_date,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / SUM(repair_orders_fact_0.count_c8e42e74) AS discounted_orders_rate,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / NULLIF(SUM(repair_orders_fact_0.count_c8e42e74), 0) AS discounted_orders_rate,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost,
SUM(repair_orders_fact_0.price_discount_sum_e4ba5456) AS total_repair_order_discounts,
SUM(repair_order_details_0.price_sum_252381cf) + SUM(repair_order_details_0.price_sum_252381cf) AS double_total_repair_cost
Expand Down Expand Up @@ -1082,9 +1082,9 @@ async def test_cube_filters_merged_with_request_filters(
COALESCE(repair_order_details_0.company_name, repair_orders_fact_0.company_name) AS company_name,
COALESCE(repair_order_details_0.local_region, repair_orders_fact_0.local_region) AS local_region,
COALESCE(repair_order_details_0.hire_date, repair_orders_fact_0.hire_date) AS hire_date,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / SUM(repair_orders_fact_0.count_c8e42e74) AS discounted_orders_rate,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / NULLIF(SUM(repair_orders_fact_0.count_c8e42e74), 0) AS discounted_orders_rate,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost,
SUM(repair_orders_fact_0.price_discount_sum_e4ba5456) AS total_repair_order_discounts,
SUM(repair_order_details_0.price_sum_252381cf) + SUM(repair_order_details_0.price_sum_252381cf) AS double_total_repair_cost
Expand Down Expand Up @@ -1163,7 +1163,7 @@ async def test_cube_filters_applied_in_v3_sql_via_cube_param(
SELECT repair_orders_fact_0.state AS state,
repair_orders_fact_0.company_name AS company_name,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost
FROM repair_orders_fact_0
WHERE repair_orders_fact_0.state = 'AZ'
Expand Down Expand Up @@ -1247,7 +1247,7 @@ async def test_cube_filters_applied_in_v3_sql_via_cube_param(
SELECT repair_orders_fact_0.state AS state,
repair_orders_fact_0.company_name AS company_name,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost
FROM repair_orders_fact_0
GROUP BY repair_orders_fact_0.state, repair_orders_fact_0.company_name
Expand Down Expand Up @@ -1296,7 +1296,7 @@ async def test_cube_filters_applied_in_v3_sql_via_cube_param(
SELECT repair_orders_fact_0.state AS state,
repair_orders_fact_0.company_name AS company_name,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost
FROM repair_orders_fact_0
WHERE repair_orders_fact_0.state = 'AZ' AND repair_orders_fact_0.company_name = 'Potts LLC'
Expand Down Expand Up @@ -1398,9 +1398,9 @@ async def test_cube_only_no_metrics_no_dims(client_with_repairs_cube: AsyncClien
COALESCE(repair_order_details_0.company_name, repair_orders_fact_0.company_name) AS company_name,
COALESCE(repair_order_details_0.local_region, repair_orders_fact_0.local_region) AS local_region,
COALESCE(repair_order_details_0.hire_date, repair_orders_fact_0.hire_date) AS hire_date,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / SUM(repair_orders_fact_0.count_c8e42e74) AS discounted_orders_rate,
CAST(SUM(repair_orders_fact_0.discount_sum_30b84e6c) AS DOUBLE) / NULLIF(SUM(repair_orders_fact_0.count_c8e42e74), 0) AS discounted_orders_rate,
SUM(repair_orders_fact_0.repair_order_id_count_bd241964) AS num_repair_orders,
SUM(repair_orders_fact_0.price_sum_935e7117) / SUM(repair_orders_fact_0.price_count_935e7117) AS avg_repair_price,
SUM(repair_orders_fact_0.price_sum_935e7117) / NULLIF(SUM(repair_orders_fact_0.price_count_935e7117), 0) AS avg_repair_price,
SUM(repair_orders_fact_0.total_repair_cost_sum_67874507) AS total_repair_cost,
SUM(repair_orders_fact_0.price_discount_sum_e4ba5456) AS total_repair_order_discounts,
SUM(repair_order_details_0.price_sum_252381cf) + SUM(repair_order_details_0.price_sum_252381cf) AS double_total_repair_cost
Expand Down Expand Up @@ -3000,10 +3000,10 @@ async def test_cube_materialization_metadata(
assert results["metrics"] == [
{
"derived_expression": "SELECT CAST(SUM(discount_sum_30b84e6c) AS DOUBLE) / "
"SUM(count_c8e42e74) AS default_DOT_discounted_orders_rate FROM "
"NULLIF(SUM(count_c8e42e74), 0) AS default_DOT_discounted_orders_rate FROM "
"default.repair_orders_fact",
"metric_expression": "CAST(SUM(discount_sum_30b84e6c) AS DOUBLE) / "
"SUM(count_c8e42e74)",
"NULLIF(SUM(count_c8e42e74), 0)",
"metric": {
"name": "default.discounted_orders_rate",
"version": mock.ANY,
Expand Down Expand Up @@ -3049,9 +3049,9 @@ async def test_cube_materialization_metadata(
],
},
{
"derived_expression": "SELECT SUM(price_sum_935e7117) / SUM(price_count_935e7117) FROM "
"derived_expression": "SELECT SUM(price_sum_935e7117) / NULLIF(SUM(price_count_935e7117), 0) FROM "
"default.repair_orders_fact",
"metric_expression": "SUM(price_sum_935e7117) / SUM(price_count_935e7117)",
"metric_expression": "SUM(price_sum_935e7117) / NULLIF(SUM(price_count_935e7117), 0)",
"metric": {
"name": "default.avg_repair_price",
"version": mock.ANY,
Expand Down
4 changes: 2 additions & 2 deletions datajunction-server/tests/api/deployments_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,8 +1064,8 @@ def default_regional_repair_efficiency():
"Total Repair Amount in Region" is the total amount spent on repairs in a given region.
"Total Repair Amount Nationwide" is the total amount spent on all repairs nationwide.""",
query="""SELECT
(SUM(rm.completed_repairs) * 1.0 / SUM(rm.total_repairs_dispatched)) *
(SUM(rm.total_amount_in_region) * 1.0 / SUM(na.total_amount_nationwide)) * 100
(SUM(rm.completed_repairs) * 1.0 / NULLIF(SUM(rm.total_repairs_dispatched), 0)) *
(SUM(rm.total_amount_in_region) * 1.0 / NULLIF(SUM(na.total_amount_nationwide), 0)) * 100
FROM
${prefix}default.regional_level_agg rm
CROSS JOIN
Expand Down
2 changes: 1 addition & 1 deletion datajunction-server/tests/api/djql_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ async def test_get_djsql_with_orderby_and_limit(
)
SELECT
repair_orders_fact_0.country AS country,
SUM(repair_orders_fact_0.price_sum_HASH) / SUM(repair_orders_fact_0.price_count_HASH) AS avg_repair_price
SUM(repair_orders_fact_0.price_sum_HASH) / NULLIF(SUM(repair_orders_fact_0.price_count_HASH), 0) AS avg_repair_price
FROM repair_orders_fact_0
GROUP BY repair_orders_fact_0.country
ORDER BY country DESC
Expand Down
Loading
Loading