# Test Suite: public.etl_config Configuration Table

**Purpose:** Validate the structure, seeding, and behavior of `public.etl_config` table

**Scope:**
- Table existence and schema placement
- Column definitions (names, types, constraints)
- Required configuration keys are seeded
- Path convention compliance (no trailing slashes)
- Idempotency of re-runs
- Primary key constraint enforcement

**Testing Strategy:**
- Structural validation (table, columns, constraints)
- Data validation (required keys, value formats)
- Behavioral testing (idempotency, conflict handling)
- Integration testing (path resolution)

**Prerequisites:**
- Database connection configured
- `setup/seed/01_etl_config.sql` has been executed
- Required packages: psycopg2, pytest, ipytest, pandas

## Setup: Import Dependencies & Configure Connection

In [None]:
import os
import psycopg2
from psycopg2 import sql
import pytest
import ipytest
import pandas as pd

# Configure ipytest for notebook usage
ipytest.autoconfig()

# Database connection parameters
DB_CONFIG = {
    'host': 'localhost',
    'database': 'sql_retail_analytics_warehouse',
    'user': 'postgres',
    'password': os.getenv('POSTGRES_PASSWORD', 'your_password_here')
}

print("✅ Dependencies imported successfully")

## Fixtures: Database Connection & Cleanup

In [None]:
@pytest.fixture(scope='module')
def db_connection():
    """Create a database connection for tests."""
    conn = psycopg2.connect(**DB_CONFIG)
    conn.autocommit = True
    yield conn
    conn.close()

@pytest.fixture(scope='module')
def db_cursor(db_connection):
    """Create a cursor for executing queries."""
    cursor = db_connection.cursor()
    yield cursor
    cursor.close()

print("✅ Fixtures defined")

## Test Suite 1: Table Structure Validation

In [None]:
%%ipytest

def test_etl_config_table_exists(db_cursor):
    """Verify public.etl_config table exists."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
          AND table_name = 'etl_config'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 1, "public.etl_config table must exist"

def test_etl_config_in_public_schema(db_cursor):
    """Verify table is in public schema, not other schemas."""
    db_cursor.execute("""
        SELECT table_schema
        FROM information_schema.tables 
        WHERE table_name = 'etl_config'
    """)
    result = db_cursor.fetchone()
    assert result is not None, "etl_config table not found"
    assert result[0] == 'public', "etl_config must be in public schema"

def test_etl_config_column_count(db_cursor):
    """Verify table has exactly 2 columns."""
    db_cursor.execute("""
        SELECT COUNT(*) 
        FROM information_schema.columns 
        WHERE table_schema = 'public' 
          AND table_name = 'etl_config'
    """)
    count = db_cursor.fetchone()[0]
    assert count == 2, "etl_config should have exactly 2 columns (config_key, config_value)"

## Test Suite 2: Column Definitions

In [None]:
%%ipytest

def test_etl_config_column_definitions(db_cursor):
    """Verify column names, types, and nullability."""
    db_cursor.execute("""
        SELECT 
            column_name,
            data_type,
            character_maximum_length,
            is_nullable
        FROM information_schema.columns
        WHERE table_schema = 'public'
          AND table_name = 'etl_config'
        ORDER BY ordinal_position
    """)
    
    columns = db_cursor.fetchall()
    column_dict = {
        col[0]: {
            'type': col[1], 
            'max_length': col[2], 
            'nullable': col[3]
        } for col in columns
    }
    
    # Expected: config_key VARCHAR(100) PRIMARY KEY (NOT NULL)
    assert 'config_key' in column_dict, "config_key column must exist"
    assert column_dict['config_key']['type'] == 'character varying', \
        "config_key should be VARCHAR"
    assert column_dict['config_key']['max_length'] == 100, \
        "config_key should have max length 100"
    assert column_dict['config_key']['nullable'] == 'NO', \
        "config_key should be NOT NULL (primary key)"
    
    # Expected: config_value VARCHAR(200) NOT NULL
    assert 'config_value' in column_dict, "config_value column must exist"
    assert column_dict['config_value']['type'] == 'character varying', \
        "config_value should be VARCHAR"
    assert column_dict['config_value']['max_length'] == 200, \
        "config_value should have max length 200"
    assert column_dict['config_value']['nullable'] == 'NO', \
        "config_value should be NOT NULL"

def test_etl_config_primary_key(db_cursor):
    """Verify config_key is the primary key."""
    db_cursor.execute("""
        SELECT a.attname
        FROM pg_index i
        JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
        WHERE i.indrelid = 'public.etl_config'::regclass
          AND i.indisprimary
    """)
    pk_columns = [row[0] for row in db_cursor.fetchall()]
    assert pk_columns == ['config_key'], "Primary key should be 'config_key' only"

## Test Suite 3: Seeded Configuration Keys

In [None]:
%%ipytest

def test_required_config_keys_exist(db_cursor):
    """Verify both required configuration keys are seeded."""
    db_cursor.execute("""
        SELECT config_key
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
        ORDER BY config_key
    """)
    
    keys = [row[0] for row in db_cursor.fetchall()]
    assert 'base_path_crm' in keys, "base_path_crm must be seeded"
    assert 'base_path_erp' in keys, "base_path_erp must be seeded"
    assert len(keys) == 2, "Exactly 2 required keys should be present"

def test_config_values_not_null_or_empty(db_cursor):
    """Verify all config values are non-NULL and non-empty."""
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_value IS NULL 
           OR TRIM(config_value) = ''
    """)
    
    invalid_entries = db_cursor.fetchall()
    assert len(invalid_entries) == 0, \
        f"Found config keys with NULL or empty values: {invalid_entries}"

def test_config_values_are_valid_paths(db_cursor):
    """Verify config values look like valid file paths."""
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    
    for key, value in db_cursor.fetchall():
        # Should contain path separators
        assert ('/' in value or '\\' in value), \
            f"{key} value '{value}' doesn't look like a file path"
        # Should not be just a separator
        assert len(value.strip('/\\')) > 0, \
            f"{key} value '{value}' is invalid"

## Test Suite 4: Path Convention Compliance

In [None]:
%%ipytest

def test_no_trailing_slashes_in_paths(db_cursor):
    """Verify CRITICAL requirement: no trailing slashes in file paths."""
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    
    for key, value in db_cursor.fetchall():
        assert not value.endswith('/'), \
            f"{key} has trailing forward slash: '{value}' (VIOLATION: must not end with /)"
        assert not value.endswith('\\'), \
            f"{key} has trailing backslash: '{value}' (VIOLATION: must not end with \\)"

def test_paths_use_forward_slashes(db_cursor):
    """Verify paths use forward slashes (cross-platform compatibility)."""
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    
    for key, value in db_cursor.fetchall():
        # Should use forward slashes (PostgreSQL/cross-platform standard)
        assert '/' in value, f"{key} should use forward slashes: '{value}'"
        # Warn if backslashes are present (Windows-specific)
        if '\\' in value:
            print(f"⚠️  Warning: {key} contains backslashes: '{value}'")

## Test Suite 5: Idempotency & Conflict Handling

In [None]:
%%ipytest

def test_idempotent_rerun_no_duplicates(db_cursor):
    """Verify re-running INSERT with ON CONFLICT doesn't create duplicates."""
    # Count before
    db_cursor.execute("""
        SELECT COUNT(*) FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    count_before = db_cursor.fetchone()[0]
    
    # Re-run the INSERT (should do nothing due to ON CONFLICT)
    db_cursor.execute("""
        INSERT INTO public.etl_config (config_key, config_value)
        VALUES
          ('base_path_crm', 'dummy_path_1'),
          ('base_path_erp', 'dummy_path_2')
        ON CONFLICT (config_key) DO NOTHING
    """)
    
    # Count after
    db_cursor.execute("""
        SELECT COUNT(*) FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    count_after = db_cursor.fetchone()[0]
    
    assert count_before == count_after, \
        f"Idempotency violated: count changed from {count_before} to {count_after}"

def test_existing_values_preserved_on_conflict(db_cursor):
    """Verify ON CONFLICT DO NOTHING preserves existing values."""
    # Get original values
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
        ORDER BY config_key
    """)
    original_values = {row[0]: row[1] for row in db_cursor.fetchall()}
    
    # Attempt to insert different values (should be ignored)
    db_cursor.execute("""
        INSERT INTO public.etl_config (config_key, config_value)
        VALUES
          ('base_path_crm', 'THIS_SHOULD_BE_IGNORED'),
          ('base_path_erp', 'THIS_SHOULD_ALSO_BE_IGNORED')
        ON CONFLICT (config_key) DO NOTHING
    """)
    
    # Verify values unchanged
    db_cursor.execute("""
        SELECT config_key, config_value
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
        ORDER BY config_key
    """)
    current_values = {row[0]: row[1] for row in db_cursor.fetchall()}
    
    assert original_values == current_values, \
        "ON CONFLICT DO NOTHING failed to preserve existing values"

def test_primary_key_constraint_enforced(db_cursor):
    """Verify primary key prevents duplicate keys without ON CONFLICT."""
    from psycopg2 import errors
    
    with pytest.raises(errors.UniqueViolation):
        db_cursor.execute("""
            INSERT INTO public.etl_config (config_key, config_value)
            VALUES ('base_path_crm', 'duplicate_attempt')
        """)

## Test Suite 6: Integration - Path Construction

In [None]:
%%ipytest

def test_path_construction_no_double_slashes(db_cursor):
    """Verify path concatenation doesn't create double slashes."""
    db_cursor.execute("""
        SELECT 
            config_key,
            config_value,
            config_value || '/' || 'test_file.csv' AS full_path
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    
    for key, base_path, full_path in db_cursor.fetchall():
        assert '//' not in full_path, \
            f"{key}: Path construction created double slash: '{full_path}'"
        assert full_path.endswith('test_file.csv'), \
            f"{key}: Path construction failed: '{full_path}'"

def test_realistic_file_path_construction(db_cursor):
    """Test path construction with realistic filenames."""
    test_files = ['cust_info.csv', 'prd_info.csv', 'CUST_AZ12.csv']
    
    db_cursor.execute("""
        SELECT config_value
        FROM public.etl_config
        WHERE config_key = 'base_path_crm'
    """)
    base_path = db_cursor.fetchone()[0]
    
    for filename in test_files:
        full_path = f"{base_path}/{filename}"
        assert '//' not in full_path, f"Double slash in: {full_path}"
        assert full_path.endswith(filename), f"Filename lost: {full_path}"

## Test Suite 7: Data Integrity

In [None]:
%%ipytest

def test_no_duplicate_keys(db_cursor):
    """Verify no duplicate config_key values exist."""
    db_cursor.execute("""
        SELECT config_key, COUNT(*)
        FROM public.etl_config
        GROUP BY config_key
        HAVING COUNT(*) > 1
    """)
    
    duplicates = db_cursor.fetchall()
    assert len(duplicates) == 0, f"Found duplicate keys: {duplicates}"

def test_config_key_case_sensitivity(db_cursor):
    """Verify config keys use consistent casing convention."""
    db_cursor.execute("""
        SELECT config_key
        FROM public.etl_config
        WHERE config_key IN ('base_path_crm', 'base_path_erp')
    """)
    
    keys = [row[0] for row in db_cursor.fetchall()]
    for key in keys:
        # Verify lowercase with underscores (snake_case)
        assert key == key.lower(), f"Key '{key}' should be lowercase"
        assert ' ' not in key, f"Key '{key}' should not contain spaces"

## Summary: Run All Tests

In [None]:
# Run all tests in this notebook
ipytest.run('-v')

## Manual Inspection: View Configuration

In [None]:
# Connect and display current configuration
conn = psycopg2.connect(**DB_CONFIG)

# View all configuration entries
df_config = pd.read_sql("""
    SELECT 
        config_key,
        config_value,
        LENGTH(config_value) AS value_length,
        CASE 
            WHEN config_value LIKE '%/' THEN '❌ Has trailing slash'
            ELSE '✅ No trailing slash'
        END AS path_check
    FROM public.etl_config
    ORDER BY config_key
""", conn)

print("\n📋 Current Configuration:")
display(df_config)

# View table structure
df_structure = pd.read_sql("""
    SELECT 
        column_name,
        data_type,
        character_maximum_length,
        is_nullable,
        column_default
    FROM information_schema.columns
    WHERE table_schema = 'public'
      AND table_name = 'etl_config'
    ORDER BY ordinal_position
""", conn)

print("\n🏗️  Table Structure:")
display(df_structure)

# View constraints
df_constraints = pd.read_sql("""
    SELECT 
        conname AS constraint_name,
        CASE contype
            WHEN 'p' THEN 'PRIMARY KEY'
            WHEN 'f' THEN 'FOREIGN KEY'
            WHEN 'u' THEN 'UNIQUE'
            WHEN 'c' THEN 'CHECK'
            ELSE contype::text
        END AS constraint_type,
        pg_get_constraintdef(oid) AS definition
    FROM pg_constraint
    WHERE conrelid = 'public.etl_config'::regclass
    ORDER BY conname
""", conn)

print("\n🔒 Constraints:")
display(df_constraints)

conn.close()
print("\n✅ Inspection complete")