# Test Suite: bronze.load_log DDL Validation

**Purpose:** Validate the structure and configuration of `bronze.load_log` table

**Scope:**
- Table existence and schema placement
- Column definitions (names, types, nullability)
- Index creation and naming
- Constraint definitions
- Extension requirements (pgcrypto)

**Usage:**
```bash
# Run all tests
pytest tests/tests_bronze/test_ddl_bronze_log.ipynb --nbmake

# Or run interactively in this notebook
```

**Prerequisites:**
- Database connection configured
- `scripts/bronze/ddl_bronze_log.sql` has been executed
- Required packages: psycopg2, pytest, ipytest

## Setup: Import Dependencies & Configure Connection

In [None]:
import os
import psycopg2
from psycopg2 import sql
import pytest
import ipytest
import pandas as pd

# Configure ipytest for notebook usage
ipytest.autoconfig()

# Database connection parameters
DB_CONFIG = {
    'host': 'localhost',
    'database': 'sql_retail_analytics_warehouse',
    'user': 'postgres',
    'password': os.getenv('POSTGRES_PASSWORD', 'your_password_here')
}

print("✅ Dependencies imported successfully")

## Fixture: Database Connection

In [None]:
@pytest.fixture(scope='module')
def db_connection():
    """Create a database connection for tests."""
    conn = psycopg2.connect(**DB_CONFIG)
    conn.autocommit = True
    yield conn
    conn.close()

@pytest.fixture(scope='module')
def db_cursor(db_connection):
    """Create a cursor for executing queries."""
    cursor = db_connection.cursor()
    yield cursor
    cursor.close()

print("✅ Fixtures defined")

## Test 1: Extension and Schema Validation

In [None]:
%%ipytest -vv

def test_pgcrypto_extension_exists(db_cursor):
    """Verify pgcrypto extension is installed (required for gen_random_uuid())."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM pg_extension 
        WHERE extname = 'pgcrypto'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 1, "pgcrypto extension must be installed"

def test_bronze_schema_exists(db_cursor):
    """Verify bronze schema exists."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM information_schema.schemata 
        WHERE schema_name = 'bronze'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 1, "bronze schema must exist"

## Test 2: Table Existence and Basic Structure

In [None]:
%%ipytest -vv

def test_load_log_table_exists(db_cursor):
    """Verify bronze.load_log table exists."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM information_schema.tables 
        WHERE table_schema = 'bronze' 
          AND table_name = 'load_log'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 1, "bronze.load_log table must exist"

def test_load_log_column_count(db_cursor):
    """Verify bronze.load_log has expected number of columns."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM information_schema.columns 
        WHERE table_schema = 'bronze' 
          AND table_name = 'load_log'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 11, "bronze.load_log should have 11 columns"

## Test 3: Column Definitions

In [None]:
%%ipytest -vv

def test_load_log_column_definitions(db_cursor):
    """Verify all required columns exist with correct data types."""
    db_cursor.execute("""
        SELECT 
            column_name,
            data_type,
            is_nullable
        FROM information_schema.columns
        WHERE table_schema = 'bronze'
          AND table_name = 'load_log'
        ORDER BY ordinal_position
    """)
    
    columns = db_cursor.fetchall()
    column_dict = {col[0]: {'type': col[1], 'nullable': col[2]} for col in columns}
    
    # Expected column definitions
    expected_columns = {
        'id': {'type': 'bigint', 'nullable': 'NO'},
        'run_id': {'type': 'uuid', 'nullable': 'NO'},
        'phase': {'type': 'text', 'nullable': 'NO'},
        'table_name': {'type': 'text', 'nullable': 'YES'},
        'file_path': {'type': 'text', 'nullable': 'YES'},
        'status': {'type': 'text', 'nullable': 'NO'},
        'rows_loaded': {'type': 'bigint', 'nullable': 'YES'},
        'started_at': {'type': 'timestamp with time zone', 'nullable': 'NO'},
        'finished_at': {'type': 'timestamp with time zone', 'nullable': 'YES'},
        'duration_sec': {'type': 'integer', 'nullable': 'YES'},
        'message': {'type': 'text', 'nullable': 'YES'}
    }
    
    # Verify each expected column
    for col_name, expected in expected_columns.items():
        assert col_name in column_dict, f"Column '{col_name}' must exist"
        assert column_dict[col_name]['type'] == expected['type'], \
            f"Column '{col_name}' should be {expected['type']}, got {column_dict[col_name]['type']}"
        assert column_dict[col_name]['nullable'] == expected['nullable'], \
            f"Column '{col_name}' nullable mismatch"

def test_load_log_primary_key(db_cursor):
    """Verify id column is the primary key."""
    db_cursor.execute("""
        SELECT a.attname
        FROM pg_index i
        JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
        WHERE i.indrelid = 'bronze.load_log'::regclass
          AND i.indisprimary
    """)
    pk_columns = [row[0] for row in db_cursor.fetchall()]
    assert pk_columns == ['id'], "Primary key should be 'id' column only"

## Test 4: Index Validation

In [None]:
%%ipytest -vv

def test_load_log_indexes_exist(db_cursor):
    """Verify all expected indexes are created."""
    db_cursor.execute("""
        SELECT indexname
        FROM pg_indexes
        WHERE schemaname = 'bronze'
          AND tablename = 'load_log'
        ORDER BY indexname
    """)
    
    indexes = [row[0] for row in db_cursor.fetchall()]
    
    # Expected indexes (excluding primary key index)
    expected_indexes = [
        'idx_load_log_duration',
        'idx_load_log_file_path',
        'idx_load_log_finished_at',
        'idx_load_log_phase',
        'idx_load_log_rows_loaded',
        'idx_load_log_run_id',
        'idx_load_log_started_at',
        'idx_load_log_status',
        'idx_load_log_table_name'
    ]
    
    for idx_name in expected_indexes:
        assert idx_name in indexes, f"Index '{idx_name}' must exist"
    
    # Verify we have at least 10 indexes (9 explicit + 1 PK)
    assert len(indexes) >= 10, f"Expected at least 10 indexes, found {len(indexes)}"

## Test 5: Constraint Validation

In [None]:
%%ipytest -vv

def test_load_log_check_constraints(db_cursor):
    """Verify CHECK constraints on status and phase columns."""
    db_cursor.execute("""
        SELECT conname, pg_get_constraintdef(oid) AS definition
        FROM pg_constraint
        WHERE conrelid = 'bronze.load_log'::regclass
          AND contype = 'c'
        ORDER BY conname
    """)
    
    constraints = {row[0]: row[1] for row in db_cursor.fetchall()}
    
    # Verify status constraint
    assert 'load_log_status_chk' in constraints, "status CHECK constraint must exist"
    assert 'OK' in constraints['load_log_status_chk'], "status constraint should include 'OK'"
    assert 'ERROR' in constraints['load_log_status_chk'], "status constraint should include 'ERROR'"
    
    # Verify phase constraint
    assert 'load_log_phase_chk' in constraints, "phase CHECK constraint must exist"
    phases = ['START', 'TRUNCATE', 'COPY', 'SEPARATOR', 'FINISH', 'ERROR']
    for phase in phases:
        assert phase in constraints['load_log_phase_chk'], \
            f"phase constraint should include '{phase}'"

## Test 6: Default Values and Sequences

In [None]:
%%ipytest -vv

def test_load_log_id_sequence(db_cursor):
    """Verify id column uses a sequence (BIGSERIAL)."""
    db_cursor.execute("""
        SELECT column_default
        FROM information_schema.columns
        WHERE table_schema = 'bronze'
          AND table_name = 'load_log'
          AND column_name = 'id'
    """)
    
    default_value = db_cursor.fetchone()[0]
    assert default_value is not None, "id column should have a default value"
    assert 'nextval' in default_value, "id should use a sequence"

def test_load_log_started_at_default(db_cursor):
    """Verify started_at has clock_timestamp() default."""
    db_cursor.execute("""
        SELECT column_default
        FROM information_schema.columns
        WHERE table_schema = 'bronze'
          AND table_name = 'load_log'
          AND column_name = 'started_at'
    """)
    
    default_value = db_cursor.fetchone()[0]
    assert default_value is not None, "started_at should have a default value"
    assert 'clock_timestamp' in default_value.lower(), "started_at should default to clock_timestamp()"

## Test 7: Integration Test - Insert and Verify

In [None]:
%%ipytest -vv

def test_load_log_insert_and_query(db_cursor):
    """Test inserting a record and querying it back."""
    import uuid
    
    # Generate a unique run_id for this test
    test_run_id = uuid.uuid4()
    
    # Insert a test record
    db_cursor.execute("""
        INSERT INTO bronze.load_log (
            run_id, phase, status, message
        ) VALUES (
            %s, 'START', 'OK', 'Test run from pytest'
        )
        RETURNING id, run_id, phase, status
    """, (test_run_id,))
    
    result = db_cursor.fetchone()
    assert result is not None, "Insert should return a row"
    assert result[1] == test_run_id, "run_id should match"
    assert result[2] == 'START', "phase should be START"
    assert result[3] == 'OK', "status should be OK"
    
    # Clean up test data
    db_cursor.execute("""
        DELETE FROM bronze.load_log 
        WHERE run_id = %s
    """, (test_run_id,))
    
    print(f"✅ Successfully inserted and deleted test record with run_id: {test_run_id}")

## Summary: Run All Tests

In [None]:
# Run all tests in this notebook
ipytest.run('-vv')

## Manual Inspection Queries

Use these queries to manually inspect the table structure:

In [None]:
# Connect and display table structure
conn = psycopg2.connect(**DB_CONFIG)

# View all columns
df_columns = pd.read_sql("""
    SELECT 
        column_name,
        data_type,
        is_nullable,
        column_default
    FROM information_schema.columns
    WHERE table_schema = 'bronze'
      AND table_name = 'load_log'
    ORDER BY ordinal_position
""", conn)

print("\n📊 bronze.load_log Columns:")
display(df_columns)

# View all indexes
df_indexes = pd.read_sql("""
    SELECT 
        indexname,
        indexdef
    FROM pg_indexes
    WHERE schemaname = 'bronze'
      AND tablename = 'load_log'
    ORDER BY indexname
""", conn)

print("\n📇 bronze.load_log Indexes:")
display(df_indexes)

# View constraints
df_constraints = pd.read_sql("""
    SELECT 
        conname AS constraint_name,
        contype AS constraint_type,
        pg_get_constraintdef(oid) AS definition
    FROM pg_constraint
    WHERE conrelid = 'bronze.load_log'::regclass
    ORDER BY conname
""", conn)

print("\n🔒 bronze.load_log Constraints:")
display(df_constraints)

conn.close()