# V1 Data Seed (Databricks)

Seeds the full synthetic functional test dataset into the V1 schema.
Use this only for deep testing. For POC/demo-only seed, use `v1_seed_critical_data.ipynb`.
This notebook executes the SQL assets in `setup/v1_schema/databricks/95_seed_reference_data.sql` and `96_seed_help_center.sql`.

In [None]:
dbutils.widgets.text('catalog', 'vendorcat_dev')
dbutils.widgets.text('schema', 'vendorcat_v1')
dbutils.widgets.text('seed_sql_root', '/Workspace/Repos/PrideRock-CoPilot/VendorCat/setup/v1_schema/databricks')

catalog = dbutils.widgets.get('catalog').strip()
schema = dbutils.widgets.get('schema').strip()
seed_sql_root = dbutils.widgets.get('seed_sql_root').strip()

assert catalog, 'catalog parameter is required'
assert schema, 'schema parameter is required'
assert seed_sql_root, 'seed_sql_root parameter is required'

spark.sql(f"USE CATALOG `{catalog}`")
spark.sql(f"USE SCHEMA `{schema}`")
print(f'Seeding catalog={catalog} schema={schema}')
print(f'SQL root: {seed_sql_root}')

In [None]:
import re
from pathlib import Path

token_pattern = re.compile(r'\$\{(CATALOG|SCHEMA)\}')

def render_sql(sql_text: str, catalog_name: str, schema_name: str) -> str:
    context = {'CATALOG': catalog_name, 'SCHEMA': schema_name}
    return token_pattern.sub(lambda m: context[m.group(1)], sql_text)

def execute_sql_script(file_path: str) -> None:
    path = Path(file_path)
    if not path.exists():
        raise FileNotFoundError(f'SQL file not found: {file_path}')
    raw = path.read_text(encoding='utf-8')
    rendered = render_sql(raw, catalog, schema)
    statements = [stmt.strip() for stmt in rendered.split(';') if stmt.strip()]
    for statement in statements:
        spark.sql(statement)
    print(f'Applied {path.name} ({len(statements)} statements)')

In [None]:
seed_files = [
    f'{seed_sql_root}/95_seed_reference_data.sql',
    f'{seed_sql_root}/96_seed_help_center.sql',
]

for sql_file in seed_files:
    execute_sql_script(sql_file)

print('V1 seed completed.')

In [None]:
validation_queries = {
    'vendors': 'SELECT COUNT(*) AS c FROM core_vendor',
    'offerings': 'SELECT COUNT(*) AS c FROM core_vendor_offering',
    'contracts': 'SELECT COUNT(*) AS c FROM core_contract',
    'projects': 'SELECT COUNT(*) AS c FROM app_project',
    'help_articles': 'SELECT COUNT(*) AS c FROM vendor_help_article',
    'role_grants': 'SELECT COUNT(*) AS c FROM sec_user_role_map WHERE active_flag = true',
}

for name, sql_text in validation_queries.items():
    count = spark.sql(sql_text).collect()[0][0]
    print(f'{name}: {count}')