# Postgres Dictionary Validation

Checks dictionary tables against source-of-truth enums/maps and verifies ID -> normalized name mappings.

In [2]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path().resolve().parent.parent))

from db.postgres import pool
from implementation.misc.helpers import normalize_string
from implementation.classes.enums import Genre, MaturityRating, StreamingAccessType
from implementation.classes.languages import Language
from implementation.classes.watch_providers import FILTERABLE_WATCH_PROVIDERS_MAP

In [11]:
if pool.closed:
    await pool.open()
await pool.check()

async def fetch_dictionary_rows(table: str, id_col: str, name_col: str) -> dict[int, str]:
    query = f"SELECT {id_col}, {name_col} FROM {table};"
    async with pool.connection() as conn:
        async with conn.cursor() as cur:
            await cur.execute(query)
            rows = await cur.fetchall()
    return {int(row_id): row_name for row_id, row_name in rows}

async def assert_dictionary_matches(
    *,
    table: str,
    id_col: str,
    name_col: str,
    expected: dict[int, str],
) -> None:
    actual = await fetch_dictionary_rows(table, id_col, name_col)

    missing_ids = sorted(set(expected) - set(actual))
    mismatched = {
        key: (expected[key], actual[key])
        for key in sorted(set(expected) & set(actual))
        if expected[key] != actual[key]
    }

    assert not missing_ids, f"{table}: missing IDs from DB: {missing_ids}"
    assert not mismatched, f"{table}: ID/name mismatches: {mismatched}"

    unexpected_ids = sorted(set(actual) - set(expected))
    print(f"PASS {table}: {len(expected)} expected rows matched")
    if unexpected_ids:
        print(f"INFO {table}: unexpected extra IDs in DB: {unexpected_ids}")

In [12]:
expected_genres = {genre.genre_id: genre.normalized_name for genre in Genre}
expected_languages = {language.language_id: normalize_string(language.value) for language in Language}
expected_providers = {provider_id: normalize_string(name) for provider_id, name in FILTERABLE_WATCH_PROVIDERS_MAP.items()}
expected_watch_methods = {method.type_id: normalize_string(method.value) for method in StreamingAccessType}
# UNRATED (999) is intentionally excluded because batch_upsert_maturity_dictionary excludes it.
expected_maturity = {
    maturity.maturity_rank: maturity.value
    for maturity in MaturityRating
    if maturity != MaturityRating.UNRATED
}

await assert_dictionary_matches(
    table="lex.genre_dictionary",
    id_col="genre_id",
    name_col="name",
    expected=expected_genres,
)

await assert_dictionary_matches(
    table="lex.language_dictionary",
    id_col="language_id",
    name_col="name",
    expected=expected_languages,
)

await assert_dictionary_matches(
    table="lex.provider_dictionary",
    id_col="provider_id",
    name_col="name",
    expected=expected_providers,
)

await assert_dictionary_matches(
    table="lex.watch_method_dictionary",
    id_col="method_id",
    name_col="name",
    expected=expected_watch_methods,
)

await assert_dictionary_matches(
    table="lex.maturity_dictionary",
    id_col="maturity_rank",
    name_col="label",
    expected=expected_maturity,
)

PASS lex.genre_dictionary: 27 expected rows matched
PASS lex.language_dictionary: 334 expected rows matched
PASS lex.provider_dictionary: 19 expected rows matched
PASS lex.watch_method_dictionary: 3 expected rows matched
PASS lex.maturity_dictionary: 5 expected rows matched


In [None]:
# Optional cleanup when done:
# await pool.close()