# V1 Critical Seed (Databricks)

Seeds only POC-critical data:
- dropdown/default lookup options
- Help Center content

This notebook intentionally skips the full synthetic test seed.

In [None]:
dbutils.widgets.text('catalog', 'vendorcat_dev')
dbutils.widgets.text('schema', 'vendorcat_v1')
dbutils.widgets.text('seed_sql_root', '/Workspace/Repos/PrideRock-CoPilot/VendorCat/setup/v1_schema/databricks')

catalog = dbutils.widgets.get('catalog').strip()
schema = dbutils.widgets.get('schema').strip()
seed_sql_root = dbutils.widgets.get('seed_sql_root').strip()

assert catalog, 'catalog parameter is required'
assert schema, 'schema parameter is required'
assert seed_sql_root, 'seed_sql_root parameter is required'

spark.sql(f"USE CATALOG `{catalog}`")
spark.sql(f"USE SCHEMA `{schema}`")
print(f'Seeding critical data for catalog={catalog} schema={schema}')
print(f'SQL root: {seed_sql_root}')

In [None]:
import re
from pathlib import Path

token_pattern = re.compile(r'\$\{(CATALOG|SCHEMA)\}')

def render_sql(sql_text: str, catalog_name: str, schema_name: str) -> str:
    context = {'CATALOG': catalog_name, 'SCHEMA': schema_name}
    return token_pattern.sub(lambda m: context[m.group(1)], sql_text)

def execute_sql_script(file_path: str) -> None:
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f'SQL file not found: {file_path}')
    raw = path.read_text(encoding='utf-8')
    rendered = render_sql(raw, catalog, schema)
    statements = [stmt.strip() for stmt in rendered.split(';') if stmt.strip()]
    for statement in statements:
        spark.sql(statement)
    print(f'Applied {path.name} ({len(statements)} statements)')

In [None]:
seed_files = [
    f'{seed_sql_root}/94_seed_critical_reference_data.sql',
    f'{seed_sql_root}/96_seed_help_center.sql',
]

for sql_file in seed_files:
    execute_sql_script(sql_file)

print('Critical seed completed.')

In [None]:
validation_queries = {
    'lookup_options': 'SELECT COUNT(*) AS c FROM app_lookup_option',
    'help_articles': 'SELECT COUNT(*) AS c FROM vendor_help_article',
    'help_feedback': 'SELECT COUNT(*) AS c FROM vendor_help_feedback',
    'help_issues': 'SELECT COUNT(*) AS c FROM vendor_help_issue',
}

for name, sql_text in validation_queries.items():
    count = spark.sql(sql_text).collect()[0][0]
    print(f'{name}: {count}')