From d8b03886616f42c77458ee4447db55227008f0b9 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Fri, 17 Feb 2023 14:31:18 -0800 Subject: [PATCH] Include columns provided by a user as part of the seed model definition into the snapshot fingerprint --- docs/concepts/audits.md | 2 +- sqlmesh/core/snapshot/definition.py | 3 +++ tests/core/test_snapshot.py | 39 +++++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/docs/concepts/audits.md b/docs/concepts/audits.md index e645fa89f7..c0c7a4c6f6 100644 --- a/docs/concepts/audits.md +++ b/docs/concepts/audits.md @@ -1,5 +1,5 @@ # Auditing -Audits are one of the tools SQLMesh provides to validate your models. Along with [tests](tests.md), they are a great way to ensure the quality of your data and to build trust in it across your organization. +Audits are one of the tools SQLMesh provides to validate your models. Along with [tests](tests.md), they are a great way to ensure the quality of your data and to build trust in it across your organization. Unlike tests, audits are used to validate the output of a model after every evaluation. A comprehensive suite of audits can identify data issues upstream, whether they are from your vendors or other teams. Audits also empower your data engineers and analysts to work with confidence by catching problems early as they work on new features or make updates to your models. diff --git a/sqlmesh/core/snapshot/definition.py b/sqlmesh/core/snapshot/definition.py index 0198c40e37..b65fadc17f 100644 --- a/sqlmesh/core/snapshot/definition.py +++ b/sqlmesh/core/snapshot/definition.py @@ -721,6 +721,9 @@ def _model_data_hash(model: Model, physical_schema: str) -> str: elif isinstance(model, SeedModel): data.append(str(model.kind.batch_size)) data.append(model.seed.content) + for column_name, column_type in (model.columns_to_types_ or {}).items(): + data.append(column_name) + data.append(column_type.sql()) return _hash(data) diff --git a/tests/core/test_snapshot.py b/tests/core/test_snapshot.py index 803ee51eea..e01c3cfeba 100644 --- a/tests/core/test_snapshot.py +++ b/tests/core/test_snapshot.py @@ -1,12 +1,13 @@ import json +from pathlib import Path import pytest from _pytest.monkeypatch import MonkeyPatch from pytest_mock.plugin import MockerFixture -from sqlglot import parse_one +from sqlglot import exp, parse, parse_one from sqlmesh.core.macros import macro -from sqlmesh.core.model import Model, SqlModel +from sqlmesh.core.model import Model, SqlModel, load_model from sqlmesh.core.snapshot import ( Snapshot, SnapshotChangeCategory, @@ -275,6 +276,40 @@ def test_fingerprint(model: Model, parent_model: Model): assert new_fingerprint != fingerprint_from_model(model, models={}) +def test_fingerprint_seed_model(): + expressions = parse( + """ + MODEL ( + name db.seed, + kind SEED ( + path '../seeds/waiter_names.csv' + ) + ); + """ + ) + + expected_fingerprint = SnapshotFingerprint( + data_hash="941582290", + metadata_hash="2750000337", + ) + + model = load_model(expressions, path=Path("./examples/sushi/models/test_model.sql")) + actual_fingerprint = fingerprint_from_model(model, models={}) + assert actual_fingerprint == expected_fingerprint + + updated_model = model.copy( + update={ + "columns_to_types_": { + "id": exp.DataType.build("int"), + "name": exp.DataType.build("text"), + } + } + ) + updated_actual_fingerprint = fingerprint_from_model(updated_model, models={}) + assert updated_actual_fingerprint.data_hash != expected_fingerprint.data_hash + assert updated_actual_fingerprint.metadata_hash == expected_fingerprint.metadata_hash + + def test_stamp(model: Model): original_fingerprint = fingerprint_from_model(model, models={})