diff --git a/sqlmesh/core/model/seed.py b/sqlmesh/core/model/seed.py index 3202d8e848..c6dbb7245f 100644 --- a/sqlmesh/core/model/seed.py +++ b/sqlmesh/core/model/seed.py @@ -7,6 +7,7 @@ import pandas as pd from sqlglot import exp +from sqlglot.dialects.dialect import UNESCAPED_SEQUENCES from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlmesh.core.model.common import parse_bool @@ -39,7 +40,11 @@ def _bool_validator(cls, v: t.Any) -> t.Optional[bool]: def _str_validator(cls, v: t.Any) -> t.Optional[str]: if v is None or not isinstance(v, exp.Expression): return v - return v.this + + # SQLGlot parses escape sequences like \t as \\t for dialects that don't treat \ as + # an escape character, so we map them back to the corresponding escaped sequence + v = v.this + return UNESCAPED_SEQUENCES.get(v, v) class CsvSeedReader: diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 630ce641e0..c7277141c5 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -895,14 +895,29 @@ def test_seed_with_special_characters_in_column(tmp_path, assert_exp_eq): model_csv_path = (tmp_path / "model.csv").absolute() with open(model_csv_path, "w", encoding="utf-8") as fd: - fd.write("col.\n123") + fd.write("col.\tcol!@#$\n123\tfoo") - model = create_seed_model("memory.test_db.test_model", SeedKind(path=str(model_csv_path))) - context.upsert_model(model) + expressions = d.parse( + f""" + MODEL ( + name memory.test_db.test_model, + kind SEED ( + path '{model_csv_path}', + csv_settings ( + delimiter = '\\t' + ) + ), + ); + """ + ) + context.upsert_model(load_sql_based_model(expressions)) assert_exp_eq( context.render("memory.test_db.test_model").sql(), - 'SELECT CAST("col." AS BIGINT) AS "col." FROM (VALUES (123)) AS t("col.")', + "SELECT " + 'CAST("col." AS BIGINT) AS "col.", ' + 'CAST("col!@#$" AS TEXT) AS "col!@#$" ' + """FROM (VALUES (123, 'foo')) AS t("col.", "col!@#$")""", )