In [0]:
from pathlib import Path

nb_path = (
    dbutils.notebook.entry_point.getDbutils()
    .notebook()
    .getContext()
    .notebookPath()
    .get()
)

repo_root = Path("/Workspace" + nb_path).parents[1]
rules_path = repo_root / "configs" / "validation" / "string_rules.yaml"

if not rules_path.exists():
    raise FileNotFoundError(f"Validation rules file not found at {rules_path}")

print(f"Using validation rules from: {rules_path}")

In [0]:
import yaml

with open(rules_path, "r") as f:
    string_rules = yaml.safe_load(f)

print(string_rules)

In [0]:
from utils.validation.validation_orchestrator import run_validation
from utils.config_loader import load_config

def silver_validate_pipeline(silver_df):
    validation_config = load_config("configs/validation/products_validation.yaml")

    validation_report = run_validation(silver_df, validation_config, id_cols=["sku"])

    return validation_report

In [0]:
dbutils.widgets.text("env", "dev")
env = dbutils.widgets.get("env")
catalog = f"supply_{env}"

In [0]:
df_bronze = spark.table(f"{catalog}.bronze.makeup_supply_chain_dirty")
display(df_bronze.limit(10))

In [0]:
violations = validate_strings(df_bronze, string_rules)
display(violations)
