In [0]:
dbutils.widgets.text("env", "dev")
env = dbutils.widgets.get("env")
catalog = f"supply_{env}"

In [0]:
catalog

In [0]:
df_bronze = spark.table(f"{catalog}.silver.transactions")



In [0]:
# df_bronze_dirty = spark.table(f"{catalog}.bronze.makeup_supply_chain_dirty")
# display(df_bronze_dirty.limit(10))

In [0]:
from utils.config_loader import load_config
nb_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
string_rules = load_config("string_rules", "validation", nb_path)

In [0]:
# Run this cell after modifying any utils modules
import sys

# Remove all cached utils modules
for module in list(sys.modules.keys()):
    if module.startswith('utils'):
        del sys.modules[module]

# Re-import
from utils.validation.validation_orchestrator import run_validation

print("âœ“ Modules reloaded successfully")

In [0]:
report = run_validation(df_bronze, string_rules)
failed_df = report["field"]["result"]["issues"]

In [0]:
report

In [0]:
# if the failed df is not empty then we write it to a table, empty has no count
if failed_df is not None:
    failed_df.write.mode("overwrite").saveAsTable(
        f"{catalog}.silver.silver_validation_issues"
    )
    # Show 100 rows (or use failed_df.show(n=100) to see exactly 100)
    failed_df.show(n=200, truncate=False)
# Enforce gate AFTER persistence
if env in ["dev", "prod"] and report["final_status"] in [
    "VALIDATION_FAILED",
    "VALIDATION_SYSTEM_ERROR"
]:
    raise Exception(
        f"Validation failed with status: {report['final_status']}"
    )
