Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
### Unreleased

#### New Features
* Add `clickhouse__safe_cast` macro that automatically provides default values for ClickHouse types when casting null values. This eliminates the need to specify all non-nullable columns in unit test fixtures.


### Release [1.9.5], 2025-10-20

#### Bugs
Expand Down
12 changes: 12 additions & 0 deletions dbt/include/clickhouse/macros/utils/safe_cast.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- This macro provides type-safe casting with automatic default values for ClickHouse types.
-- When the literal string 'null' is passed as the field parameter, it returns the ClickHouse
-- default value for the specified type. This is primarily used in unit test fixtures to avoid
-- having to specify all non-nullable columns.

{% macro clickhouse__safe_cast(field, dtype) %}
{%- if field == 'null' -%}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition field == 'null' checks if the field is literally the string 'null', not if it's a SQL NULL value. For SQL NULL values, you should use {{ field }} is null instead. The current implementation will only work if 'null' is passed as a string literal.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're correct that the condition field == 'null' checks for the literal string 'null' rather than a SQL NULL value. This is actually intentional for this macro's specific use case.
The safe_cast macro is designed specifically to solve issue #315 - handling non-nullable ClickHouse columns in dbt unit tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @MaelleBaillet5 ! Thanks for contributing this. This PR is looking nice and I'd like to merge it ASAP.

The only thing I'm missing now is a proper test for the unit-testing part. As you mention, this is specifically designed to solve the issue in #315, so it would be great to add a test to ensure #315 is not happening again. I think creating an additional test and adding there the unit-test listed in the "Steps to reproduce" section would be enough. Would you add it?

CAST(defaultValueOfTypeName('{{ dtype | replace("'", "\\'") }}') AS {{ dtype }})
{%- else -%}
CAST({{ field }} AS {{ dtype }})
{%- endif -%}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
Test that unit tests work correctly when column values are omitted from input rows.
The safe_cast macro should provide default values instead of NULL for missing columns.
"""
import pytest
from dbt.tests.util import run_dbt


# First model: a table with non-nullable columns
my_first_dbt_model_sql = """
select 1 as id, 'a' AS foo
union all
select 2 as id, 'b' AS foo
"""

# Second model: filters the first model
my_second_dbt_model_sql = """
select *
from {{ ref('my_first_dbt_model') }}
where id = 1
"""

# Unit test with missing column values (foo is omitted from input rows)
test_my_model_yml = """
version: 2

models:
- name: my_first_dbt_model
description: "A starter dbt model"
columns:
- name: id
data_type: uint64
- name: foo
data_type: string
- name: my_second_dbt_model
description: "A starter dbt model"
columns:
- name: id
data_type: uint64
- name: foo
data_type: string
unit_tests:
- name: test_not_null
model: my_second_dbt_model
given:
- input: ref('my_first_dbt_model')
rows:
- {id: 1}
- {id: 2}
expect:
rows:
- {id: 1}
"""


class TestMissingColumnValues:
"""
Test that unit tests handle missing column values correctly.
The safe_cast macro should provide appropriate default values instead.
"""

@pytest.fixture(scope="class")
def models(self):
return {
"my_first_dbt_model.sql": my_first_dbt_model_sql,
"my_second_dbt_model.sql": my_second_dbt_model_sql,
"unit_tests.yml": test_my_model_yml,
}

def test_missing_column_values(self, project):
"""
Test that unit tests work when column values are omitted from input rows.

This test should pass without errors, demonstrating that the safe_cast macro
correctly handles NULL values by providing appropriate defaults for ClickHouse
non-nullable types.
"""
# Run the models
results = run_dbt(["run"])
assert len(results) == 2

# Run the unit test - this should pass without ClickHouse type conversion errors
results = run_dbt(["test", "--select", "test_type:unit"])
assert len(results) == 1
160 changes: 160 additions & 0 deletions tests/integration/adapter/utils/test_safe_cast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import pytest
from datetime import datetime, date, timezone
from uuid import UUID
from dbt.tests.util import run_dbt


# Model that tests safe_cast with various ClickHouse types

safe_cast_model_sql = """
select
-- String types
{{ safe_cast("null", "String") }} as string_default,
{{ safe_cast("null", "FixedString(10)") }} as fixedstring_default,

-- Integer types
{{ safe_cast("null", "Int32") }} as int_default,
{{ safe_cast("null", "UInt32") }} as uint_default,

-- Floating point types
{{ safe_cast("null", "Float32") }} as float_default,
{{ safe_cast("null", "Decimal(10, 2)") }} as decimal_default,

-- Date/Time types
{{ safe_cast("null", "Date") }} as date_default,
{{ safe_cast("null", "DateTime") }} as datetime_default,
{{ safe_cast("null", "DateTime64(3)") }} as datetime64_default,
{{ safe_cast("null", "DateTime('Europe/Paris')") }} as datetime_tz_default,

-- Other types
{{ safe_cast("null", "UUID") }} as uuid_default,
{{ safe_cast("null", "Bool") }} as bool_default,

-- Complex types
{{ safe_cast("null", "Array(String)") }} as array_default,
{{ safe_cast("null", "Map(String, Int32)") }} as map_default,
{{ safe_cast("null", "Tuple(String, Int32)") }} as tuple_default,

-- Nullable
{{ safe_cast("null", "Nullable(String)") }} as nullable_default,

-- Provided values (non-null)
{{ safe_cast("'Alice'", "String") }} as provided_string,
{{ safe_cast("42", "Int32") }} as provided_int,
{{ safe_cast("toUUID('00000000-0000-0000-0000-000000000001')", "UUID") }} as provided_uuid
"""


class TestSafeCast:
"""Test ClickHouse-specific safe_cast functionality"""

@pytest.fixture(scope="class")
def models(self):
return {
"safe_cast_test.sql": safe_cast_model_sql,
}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
"""Run the model once for all tests in this class"""
results = run_dbt(["run", "--select", "safe_cast_test"])
assert len(results) == 1
yield

def test_safe_cast_defaults(self, project):
"""Test that safe_cast generates correct default values for ClickHouse types"""

# Query the results
result = project.run_sql(
"select * from safe_cast_test",
fetch="one"
)

# String types
assert result[0] == '' # String default
# FixedString(10) default: some drivers return bytes of nulls, others empty string
if isinstance(result[1], (bytes, bytearray)):
assert result[1] == b'\x00' * 10
else:
# In some environments, trailing nulls are stripped and returned as empty string
assert result[1] in ('', '\x00' * 10)

# Integer types
assert result[2] == 0 # Int32 default
assert result[3] == 0 # UInt32 default

# Floating point types
assert result[4] == 0.0 # Float32 default
assert result[5] == 0.0 # Decimal default

# Date/Time types
assert result[6] == date(1970, 1, 1) # Date default
assert result[7] == datetime(1970, 1, 1, 0, 0, 0) # DateTime default
assert result[8] == datetime(1970, 1, 1, 0, 0, 0) # DateTime64 default
# For timezone-aware DateTime, compare in UTC to avoid local TZ shifts
assert result[9].astimezone(timezone.utc) == datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc) # DateTime with timezone default

# Other types
assert result[10] == UUID('00000000-0000-0000-0000-000000000000') # UUID default
assert result[11] is False # Bool default

# Complex types
assert result[12] == [] # Array default
assert result[13] == {} # Map default
assert result[14] == ('', 0) # Tuple default

# Nullable
assert result[15] is None # Nullable default

# Provided values (should be kept as-is)
assert result[16] == 'Alice' # Provided string
assert result[17] == 42 # Provided int
assert result[18] == UUID('00000000-0000-0000-0000-000000000001') # Provided UUID

def test_safe_cast_types(self, project):
"""Test that safe_cast preserves the expected data types"""
# Get column types from ClickHouse
columns = project.run_sql(
"SELECT name, type FROM system.columns WHERE table = 'safe_cast_test' AND database = currentDatabase() ORDER BY name",
fetch="all"
)

# Create a dict for easier lookup
column_types = {col[0]: col[1] for col in columns}

# Verify each column has the expected type
# String types
assert column_types['string_default'] == 'String'
assert column_types['fixedstring_default'] == 'FixedString(10)'

# Integer types
assert column_types['int_default'] == 'Int32'
assert column_types['uint_default'] == 'UInt32'

# Floating point types
assert column_types['float_default'] == 'Float32'
assert column_types['decimal_default'] == 'Decimal(10, 2)'

# Date/Time types
assert column_types['date_default'] == 'Date'
assert column_types['datetime_default'] == 'DateTime'
assert column_types['datetime64_default'] == 'DateTime64(3)'
assert column_types['datetime_tz_default'] == "DateTime('Europe/Paris')"

# Other types
assert column_types['uuid_default'] == 'UUID'
assert column_types['bool_default'] == 'Bool'

# Complex types
assert column_types['array_default'] == 'Array(String)'
assert column_types['map_default'] == 'Map(String, Int32)'
assert column_types['tuple_default'] == 'Tuple(String, Int32)'

# Nullable
assert column_types['nullable_default'] == 'Nullable(String)'

# Provided values
assert column_types['provided_string'] == 'String'
assert column_types['provided_int'] == 'Int32'
assert column_types['provided_uuid'] == 'UUID'