## Python 3.14.2 Installation
This notebook requires Python 3.14.2. Run the cell below to install and verify Python 3.14.2.

In [None]:
# ============================================================================
# PYTHON 3.14.2 INSTALLATION FOR GOOGLE COLAB
# ============================================================================
import subprocess
import sys
import os
print("="*80)
print("PYTHON 3.14.2 INSTALLATION")
print("="*80)
# Check current Python version
current_version = sys.version_info
print(f"\nCurrent Python version: {current_version.major}.{current_version.minor}.{current_version.micro}")
print(f"Python executable: {sys.executable}")
# Target version
TARGET_MAJOR = 3
TARGET_MINOR = 14
TARGET_MICRO = 2
if current_version.major == TARGET_MAJOR and current_version.minor == TARGET_MINOR and current_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is already installed!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is required")
    print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
    print(f"\nInstalling Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO}...")
    
    if not IS_COLAB:
    raise RuntimeError("Python 3.14.2 installation requires Google Colab")
    
    try:
    # Method 1: Use conda (if available)
        print("\nMethod 1:
    Trying conda...")
        try:
    result = subprocess.run(['conda', '--version'], capture_output=True, text=True, timeout=5)
            if result.returncode == 0:
    print("   ‚úÖ Conda found, installing Python 3.14.2...")
                os.system('conda install -y python=3.14.2')
                print("   ‚úÖ Python 3.14.2 installed via conda")
                print("   ‚ö†Ô∏è  Restart kernel and re-run this cell to use Python 3.14.2")
        except:
            print("   ‚ö†Ô∏è  Conda not available")
        
        # Method 2: Use deadsnakes PPA (Ubuntu/Debian)
        print("\nMethod 2: Installing via deadsnakes PPA...")
        os.system('apt-get update -qq')
        os.system('apt-get install -y software-properties-common')
        os.system('add-apt-repository -y ppa:deadsnakes/ppa')
        os.system('apt-get update -qq')
        os.system('apt-get install -y python3.14 python3.14-venv python3.14-dev')
        print("   ‚úÖ Python 3.14.2 installed via deadsnakes PPA")
        
        # Method 3: Use pyenv
        print("\nMethod 3: Installing via pyenv...")
        os.system('curl https://pyenv.run | bash')
        os.system('export PYENV_ROOT="$HOME/.pyenv"')
        os.system('export PATH="$PYENV_ROOT/bin:$PATH"')
        os.system('eval "$(pyenv init -)"')
        os.system('pyenv install 3.14.2')
        os.system('pyenv global 3.14.2')
        print("   ‚úÖ Python 3.14.2 installed via pyenv")
        
        # Verify installation
        print("\nVerifying Python 3.14.2 installation...")
        result = subprocess.run(['python3.14', '--version'], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
    version_output = result.stdout.strip()
            print(f"   ‚úÖ Python 3.14 found: {version_output}")
            if '3.14.2' in version_output:
    print("   ‚úÖ Python 3.14.2 is installed!")
            print("\n‚ö†Ô∏è  IMPORTANT: Restart kernel and select Python 3.14.2 as kernel")
            print("   Or use: !python3.14 your_script.py")
        else:
            print("   ‚ö†Ô∏è  Python 3.14.2 installation may have failed")
            print("   Current Python version will be used")
    
    except Exception as e:
    print(f"\n‚ùå Error installing Python 3.14.2: {e}")
        print("\n‚ö†Ô∏è  Continuing with current Python version")
        print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
# Verify Python version
print("\n" + "="*80)
print("PYTHON VERSION VERIFICATION")
print("="*80)
final_version = sys.version_info
print(f"Python version: {final_version.major}.{final_version.minor}.{final_version.micro}")
print(f"Python executable: {sys.executable}")
if final_version.major == TARGET_MAJOR and final_version.minor == TARGET_MINOR and final_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is active!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is not active")
    print(f"   Current version: {final_version.major}.{final_version.minor}.{final_version.micro}")
    print("   If Python 3.14.2 was installed, restart kernel and select Python 3.14.2")
print("="*80)


# DB-6: Weather Forecasting & Insurance Database - End-to-End Query Testing

This notebook provides **complete end-to-end setup and testing** from scratch:

1. **Environment Setup**: Install all required Python packages automatically
2. **Database Initialization**: Create database, load schema, load data
3. **Query Execution**: Execute all 30 queries with metrics
4. **Visualization**: Performance charts and data analysis
5. **Documentation**: Comprehensive query documentation

## Database Overview

**Database Name:** Weather Forecasting & Insurance Database  
**Database ID:** db-6  
**Domain:** Weather Forecasting & Insurance  
**Total Queries:** 30  

## Prerequisites

- PostgreSQL server running (localhost or configured via environment variables)
- Python 3.14.2 installed
- Jupyter Notebook or JupyterLab

**Note:** All Python packages will be installed automatically when you run the first cell.

In [None]:
# ============================================================================# GOOGLE COLAB ONLY - ENVIRONMENT CHECK# ============================================================================import sys
import os# Verify we're running in Google ColabIS_COLAB = Falsetry:
    import google.colab    IS_COLAB = True    print("‚úÖ Running in Google Colab")except ImportError:
    # Check alternative methods    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):                        IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB env)")    else:            IS_COLAB = False
if not IS_COLAB:
    raise RuntimeError(        "‚ùå ERROR: This notebook is designed to run ONLY in Google Colab.\n"        "Please open this notebook in Google Colab: https://colab.research.google.com/"    )print("="*80)
print("GOOGLE COLAB ENVIRONMENT CONFIRMED")print("="*80)

## PostgreSQL Setup for Google Colab

This notebook requires PostgreSQL. Run the cell below to install and start PostgreSQL in Colab.

## Data Directory Detection

This notebook automatically detects the `data/` directory containing `schema.sql` and `data.sql` files.
It works when uploaded to Google Colab or run locally.

In [None]:
# ============================================================================# SELF-AWARE DATA DIRECTORY DETECTION# ============================================================================import os
import sysfrom pathlib import Pathprint("="*80)
print("DATA DIRECTORY DETECTION")print("="*80)def find_data_directory():    """    Self-aware function to find data/ directory.    Works when notebook is uploaded to Colab or run locally.    """    # Get notebook's current directory    if IS_COLAB:
    # In Colab, check common locations        search_paths = [            Path('/content'),            Path('/content/drive/MyDrive'),            Path.cwd(),        ]    else:        # Local execution        search_paths = [            Path.cwd(),            Path(__file__).parent if '__file__' in globals() else Path.cwd(),            Path.cwd().parent,        ]        # Also check parent directories recursively    current = Path.cwd()    for _ in range(5):
    # Check up to 5 levels up        search_paths.append(current)        current = current.parent        print(f"\nSearching for data/ directory...")    print(f"Current working directory: {Path.cwd()}")        # Search for data/ directory    data_dir = None    for search_path in search_paths:        if not search_path.exists():            continue                # Check if data/ exists here        potential_data = search_path / 'data'        if potential_data.exists() and potential_data.is_dir():            data_dir = potential_data            print(f"‚úÖ Found data/ directory: {data_dir}")            break                # Recursively search subdirectories (limit depth to avoid long searches)        try:
    for item in search_path.rglob('data'):
    if item.is_dir() and item.name == 'data':                    # Verify it contains expected files                    expected_files = ['schema.sql', 'data.sql']                    has_expected = any((item / f).exists() for f in expected_files)                    if has_expected:                        data_dir = item                        print(f"‚úÖ Found data/ directory (recursive): {data_dir}")                        break            if data_dir:                break        except (PermissionError, OSError):
    continue        
if not data_dir:
    # Try finding by database name pattern        db_name = Path.cwd().name        if db_name.startswith('db-'):            # Look for db-N/data pattern            for search_path in search_paths:
    potential_db = search_path / db_name / 'data'                if potential_db.exists() and potential_db.is_dir():                    data_dir = potential_db                    print(f"‚úÖ Found data/ directory by DB name: {data_dir}")                    break        return data_dirdef verify_data_directory(data_dir: Path):    """Verify data/ directory contains expected files."""    if not data_dir or not data_dir.exists():        return False        expected_files = ['schema.sql']    optional_files = ['data.sql']        print(f"\nVerifying data/ directory contents...")    print(f"Location: {data_dir}")        found_files = []    missing_files = []        for file_name in expected_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name}")        else:            missing_files.append(file_name)            print(f"  ‚ùå {file_name} (missing)")        for file_name in optional_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name} (optional)")        else:            print(f"  ‚ö†Ô∏è  {file_name} (optional, not found)")        if missing_files:        print(f"\n‚ö†Ô∏è  Warning: Missing required files: {missing_files}")        return False        return True# Detect data directoryDATA_DIR = find_data_directory()if DATA_DIR:    if verify_data_directory(DATA_DIR):        print(f"\n‚úÖ Data directory verified and ready!")        print(f"   Schema file: {DATA_DIR / 'schema.sql'}")        if (DATA_DIR / 'data.sql').exists():            print(f"   Data file: {DATA_DIR / 'data.sql'}")                # Set global variables for use in other cells        SCHEMA_FILE = DATA_DIR / 'schema.sql'        DATA_FILE = DATA_DIR / 'data.sql' if (DATA_DIR / 'data.sql').exists() else None                print(f"\n‚úÖ Global variables set:")        print(f"   DATA_DIR = {DATA_DIR}")        print(f"   SCHEMA_FILE = {SCHEMA_FILE}")        if DATA_FILE:            print(f"   DATA_FILE = {DATA_FILE}")    else:        print(f"\n‚ö†Ô∏è  Data directory found but verification failed")        print(f"   Location: {DATA_DIR}")        print(f"   Please ensure schema.sql exists in this directory")else:    print(f"\n‚ùå Data directory not found!")    print(f"\nTroubleshooting:")    print(f"1. Ensure data/ directory is uploaded to Colab")    print(f"2. Check that data/ contains schema.sql")    print(f"3. Verify notebook is in same directory structure as data/")    print(f"\nCurrent directory: {Path.cwd()}")    print(f"Contents:")    try:
    for item in sorted(Path.cwd().iterdir()):
    print(f"  - {item.name} ({'dir' if item.is_dir() else 'file'})")    except PermissionError:
    print("  (Permission denied)")print("="*80)

In [None]:
# ============================================================================# POSTGRESQL SETUP FOR GOOGLE COLAB# ============================================================================import subprocess
import timeimport osprint("="*80)
print("POSTGRESQL SETUP FOR GOOGLE COLAB")print("="*80)if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")# Check if PostgreSQL is already installedpostgres_installed = Falsetry:
    result = subprocess.run(['psql', '--version'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print(f"‚úÖ PostgreSQL already installed: {result.stdout.strip()}")        postgres_installed = Trueexcept (FileNotFoundError, subprocess.TimeoutExpired):
    pass
if not postgres_installed:
    print("\nInstalling PostgreSQL using magic commands...")    print("(Run these commands if automatic installation fails)")    print("  !apt-get update")    print("  !apt-get install -y postgresql postgresql-contrib")    print("  !service postgresql start")        # Use magic commands via subprocess (Colab-compatible)    try:
    # Update package list        print("\n   Updating package list...")        os.system('apt-get update -qq')        print("   ‚úÖ Package list updated")                # Install PostgreSQL        print("   Installing PostgreSQL...")        os.system('apt-get install -y -qq postgresql postgresql-contrib')        print("   ‚úÖ PostgreSQL installed")                # Start PostgreSQL service        print("   Starting PostgreSQL service...")        os.system('service postgresql start')        print("   ‚úÖ PostgreSQL service started")                # Wait for PostgreSQL to be ready        print("   Waiting for PostgreSQL to be ready...")        time.sleep(3)            except Exception as e:
    print(f"   ‚ùå Error: {e}")        print("   Please run manually:")        print("   !apt-get update")        print("   !apt-get install -y postgresql postgresql-contrib")        print("   !service postgresql start")# Verify PostgreSQL is runningprint("\nVerifying PostgreSQL is ready...")try:    result = subprocess.run(['pg_isready'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print("‚úÖ PostgreSQL is ready")        print(f"   {result.stdout.strip()}")    else:        print("‚ö†Ô∏è  PostgreSQL may not be ready yet")        print("   Try: !service postgresql restart")except Exception as e:    print(f"‚ö†Ô∏è  Could not verify PostgreSQL: {e}")
print("\n" + "="*80)print("POSTGRESQL SETUP COMPLETE")
print("="*80)

In [None]:
# ============================================================================# STREAMLIT DASHBOARD EXECUTION# ============================================================================import subprocess
import sysimport osfrom pathlib import Path
import webbrowserimport timeimport threadingdef find_dashboard_file():        """Find Streamlit dashboard file recursively."""    search_paths = [        Path.cwd(),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/workspace'),        Path('/content/drive/MyDrive/db'),        Path('/content/db'),        Path('/content'),        ,    ]        dashboard_name = f'{DB_NAME}_dashboard.py'        for search_path in search_paths:
    if not search_path.exists():
    continue                # Try direct path        candidate = search_path / dashboard_name        if candidate.exists():                            return candidate                # Try recursive search        try:
    for found_path in search_path.rglob(dashboard_name):
    if found_path.is_file():                                    return found_path        except:            continue        return Nonedef run_streamlit_dashboard(method='notebook', port=8501, open_browser=True):        """    Run Streamlit dashboard from Jupyter notebook.        Methods:    - 'notebook': Run in notebook output (using streamlit's notebook mode)    - 'subprocess': Run as subprocess (background)    - 'magic': Use !streamlit run magic command    """    dashboard_path = find_dashboard_file()        
if not dashboard_path:
    print("‚ùå Dashboard file not found")        print(f"   Looking for: {DB_NAME}_dashboard.py")        return None        print(f"‚úÖ Found dashboard: {dashboard_path}")        if method == 'notebook':            # Method 1: Run Streamlit in notebook-compatible mode        # Note: Streamlit doesn't natively support notebooks, but we can use iframe        print("\n" + "="*80)        print("STREAMLIT DASHBOARD - NOTEBOOK MODE")        print("="*80)        print(f"\nDashboard: {dashboard_path.name}")        print(f"\nTo run dashboard:")        print(f"  1. Run this cell to start the server")        print(f"  2. Open the URL shown below in a new tab")        print(f"  3. Or use: !streamlit run {dashboard_path} --server.port={port}")        print("\n" + "="*80)                # Start Streamlit as subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port),            '--server.headless', 'true',            '--server.runOnSave', 'false',            '--browser.gatherUsageStats', 'false'        ]                process = subprocess.Popen(            cmd,            stdout=subprocess.PIPE,            stderr=subprocess.PIPE,            text=True        )                # Wait a moment for server to start        time.sleep(2)                # Get the URL        url = f"http:
    //localhost:{port}"        print(f"\nüåê Dashboard URL: {url}")        print(f"\nServer started in background (PID: {process.pid})")        print(f"\nTo stop: process.terminate() or run stop_streamlit()")                # Store process for later termination        globals()['_streamlit_process'] = process                # Try to open browser        if open_browser:                            try:
    webbrowser.open(url)            except:                pass                return process        elif method == 'subprocess':            # Method 2: Run as background subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port)        ]                process = subprocess.Popen(cmd)        print(f"‚úÖ Streamlit started (PID: {process.pid})")        print(f"üåê Dashboard: http://localhost:{port}")        return process        elif method == 'magic':            # Method 3: Print magic command for user to run        print("Run this command in a new cell:
    ")        print(f"!streamlit run {dashboard_path} --server.port={port}")        return Nonedef stop_streamlit():        """Stop running Streamlit process."""    if '_streamlit_process' in globals():                        process = globals()['_streamlit_process']        process.terminate()        print("‚úÖ Streamlit stopped")    else:            print("‚ö†Ô∏è  No Streamlit process found")# Auto-detect DB_NAME if not setif 'DB_NAME' not in globals():        # Try to detect from current directory or notebook name    cwd = Path.cwd()    for db_num in range(6, 16):                    if f'db-{db_num}' in str(cwd) or f'db{db_num}' in str(cwd):                            DB_NAME = f'db-{db_num}'            break    else:            DB_NAME = 'db-6'  # Default        print(f"‚ö†Ô∏è  Could not detect DB_NAME, using default: {DB_NAME}")
print("\n" + "="*80)print("STREAMLIT DASHBOARD INTEGRATION")
print("="*80)print(f"Database: {DB_NAME}")
print("\nAvailable methods:")print("  1. run_streamlit_dashboard(method='notebook') - Run in notebook mode")print("  2. run_streamlit_dashboard(method='subprocess') - Run as background process")print("  3. run_streamlit_dashboard(method='magic') - Get magic command")print("  4. stop_streamlit() - Stop running dashboard")print("\n" + "="*80)

## Streamlit Dashboard

Run the Streamlit dashboard using one of these methods:

**Method 1: Notebook Mode** (Recommended)
```python
run_streamlit_dashboard(method='notebook', port=8501)
```

**Method 2: Magic Command**
```bash
!streamlit run db-6_dashboard.py --server.port=8501
```

**Method 3: Background Process**
```python
run_streamlit_dashboard(method='subprocess', port=8501)
```


## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Colab Setup (Run this first if using Google Colab)

If you're running this notebook in Google Colab:
1. **Mount Google Drive** (if your database files are in Drive)
2. **Upload database files** to `/content/db` or your Drive folder


In [None]:
# ============================================================================
# GOOGLE COLAB SETUP
# ============================================================================

if ENV_TYPE == 'colab':
    print("="*80)
    print("GOOGLE COLAB SETUP")
    print("="*80)
    
    # Mount Google Drive if not already mounted
    drive_path = Path('/content/drive/MyDrive')
    if not drive_path.exists():
    print("‚ö†Ô∏è  Google Drive not mounted.")
        print("   Run this command to mount:")
        print("   from google.colab import drive")
        print("   drive.mount('/content/drive')")
        try:
    from google.colab import drive
            drive.mount('/content/drive')
            print("‚úÖ Google Drive mounted")
        except Exception as e:
    print(f"‚ö†Ô∏è  Could not auto-mount Drive: {e}")
            print("   Please mount manually using the command above")
    else:
        print("‚úÖ Google Drive is already mounted")
    
    # Check for database files
    print("\nChecking for database files...")
    
    # Check in /content/db
    content_db = Path('/content/db')
    if content_db.exists():
    print(f"‚úÖ Found: {content_db}")
    else:
        print(f"‚ö†Ô∏è  Not found: {content_db}")
        print("   Upload your database folder to /content/db")
    
    # Check in Drive
    drive_db = drive_path / 'db'
    if drive_db.exists():
    print(f"‚úÖ Found in Drive: {drive_db}")
    else:
        print(f"‚ö†Ô∏è  Not found in Drive: {drive_db}")
        print("   Upload your database folder to Google Drive/db")
    
    print("\n" + "="*80)
    print("Some PostgreSQL-specific features may not work")
    print("="*80)
else:
    print("Not running in Colab - skipping Colab setup")

In [None]:
# ============================================================================# FAILSAFE: Force Path Correction and Package Installation# ============================================================================import sys
import subprocessimport osfrom pathlib import Path
from datetime import datetime
import shutildef force_install_package(package_name, import_name=None):    """Force install package using multiple methods."""    if import_name is None:
    import_name = package_name.split('[')[0].split('==')[0].split('>=')[0]        # Try import first    try:
    __import__(import_name)        return True    except ImportError:
    pass        # Method 1: pip install --user    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', '--quiet', package_name],                              stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 2: pip install --break-system-packages (Python 3.12+)    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--break-system-packages', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 3: pip install system-wide    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 4: conda install (if conda available)    try:        subprocess.check_call(['conda', 'install', '-y', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 5: apt-get install (Linux/Docker)    if os.path.exists('/usr/bin/apt-get'):        try:            apt_package = f'python3-{import_name.replace("_", "-")}'            subprocess.check_call(['apt-get', 'install', '-y', '--quiet', apt_package],                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)            __import__(import_name)            return True        except:            pass        # Method 6: Direct pip install with --force-reinstall    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--force-reinstall', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        print(f"‚ö†Ô∏è  Warning: Could not install {package_name}, continuing anyway...")    return Falsedef correct_file_path(file_path, search_paths=None):    """Correct file path by searching multiple locations."""    if isinstance(file_path, str):        file_path = Path(file_path)        # If path exists, return it    if file_path.exists():        return file_path        # Default search paths    if search_paths is None:        search_paths = [            Path.cwd(),            Path('/workspace/client/db'),            Path('/workspace/db'),            Path('/workspace'),            Path('/content/drive/MyDrive/db'),            Path('/content/db'),            Path('/content'),            ,            BASE_DIR if 'BASE_DIR' in globals() else ,        ]        # Search recursively    for search_path in search_paths:
    if not search_path.exists():            continue                # Try direct path        candidate = search_path / file_path.name        if candidate.exists():            return candidate                # Try recursive search        try:            for found_path in search_path.rglob(file_path.name):                if found_path.is_file():                    return found_path        except:            continue        # Return original path (will fail later, but at least we tried)    return file_pathdef create_notebook_backup(notebook_path=None):    """Create backup of current notebook automatically."""    try:        # Try to detect notebook path from various sources        if notebook_path is None:            # Try to get from __file__ or current working directory            try:                notebook_path = Path(__file__)            except:                notebook_path = Path.cwd() / 'current_notebook.ipynb'                if isinstance(notebook_path, str):            notebook_path = Path(notebook_path)                # Only create backup if file exists        if notebook_path.exists() and notebook_path.suffix == '.ipynb':            timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')            backup_path = notebook_path.parent / f"{notebook_path.stem}_{timestamp}.backup.ipynb"                        # Create backup            shutil.copy2(notebook_path, backup_path)            print(f"‚úÖ Backup created: {backup_path.name}")            return backup_path        else:            print("‚ö†Ô∏è  Could not determine notebook path for backup")            return None    except Exception as e:        print(f"‚ö†Ô∏è  Backup creation failed (non-critical): {e}")        return None# Create backup at startuptry:    create_notebook_backup()except Exception as e:    print(f"‚ö†Ô∏è  Backup skipped: {e}")def ensure_packages_installed():    """Ensure all required packages are installed."""    required_packages = [        ('psycopg2-binary', 'psycopg2'),        ('pandas', 'pandas'),        ('numpy', 'numpy'),        ('matplotlib', 'matplotlib'),        ('seaborn', 'seaborn'),        ('ipython', 'IPython'),        ('jupyter', 'jupyter'),    ]        print("\n" + "="*80)    print("FAILSAFE: Ensuring all packages are installed...")    print("="*80)        for package, import_name in required_packages:        if force_install_package(package, import_name):            print(f"‚úÖ {package} installed")        else:            print(f"‚ö†Ô∏è  {package} installation failed, but continuing...")        print("="*80 + "\n")def ensure_paths_correct():    """Ensure all file paths are correct."""    print("\n" + "="*80)    print("FAILSAFE: Correcting file paths...")    print("="*80)        # Correct BASE_DIR if needed - fix UnboundLocalError    base_dir_exists = 'BASE_DIR' in globals()    base_dir_valid = False        if base_dir_exists:        try:            base_dir_value = globals()['BASE_DIR']            if base_dir_value:                base_dir_path = Path(base_dir_value) if isinstance(base_dir_value, str) else base_dir_value                base_dir_valid = base_dir_path.exists()        except:            base_dir_valid = False        if not base_dir_exists or not base_dir_valid:        corrected_base_dir = correct_file_path()        globals()['BASE_DIR'] = corrected_base_dir        print(f"‚úÖ BASE_DIR corrected: {corrected_base_dir}")    else:        print(f"‚úÖ BASE_DIR valid: {globals()['BASE_DIR']}")        # Correct DB_DIR if needed - fix UnboundLocalError    db_dir_exists = 'DB_DIR' in globals()    db_dir_valid = False    db_dir_value = None        if db_dir_exists:        try:            db_dir_value = globals()['DB_DIR']            if db_dir_value:                db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value                db_dir_valid = db_dir_path.exists()        except:            db_dir_valid = False        if db_dir_exists and db_dir_value and not db_dir_valid:        db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value        corrected_db_dir = correct_file_path(db_dir_path)        globals()['DB_DIR'] = corrected_db_dir        print(f"‚úÖ DB_DIR corrected: {corrected_db_dir}")    elif db_dir_exists and db_dir_value:        print(f"‚úÖ DB_DIR valid: {globals()['DB_DIR']}")        print("="*80 + "\n")# Run failsafe checksensure_packages_installed()ensure_paths_correct()print("‚úÖ Failsafe checks complete")

## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Step 1: Environment Setup & Package Installation

In [None]:
def install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:
    pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return Falsedef install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:                        __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:                pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return False# ============================================================================# END-TO-END SETUP: Install all required packages and configure environment# ============================================================================import sys
import subprocessimport osimport platformfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT SETUP - END-TO-END INSTALLATION")print("="*80)# Display Python environmentprint(f"\nPython Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")print(f"Architecture: {platform.architecture()[0]}")# Required packages with versionsrequired_packages = [    'psycopg2-binary>=2.9.0',    'pandas>=2.0.0',    'numpy>=1.24.0',    'matplotlib>=3.7.0',    'seaborn>=0.12.0']# Map package names to import namespackage_import_map = {    'psycopg2-binary': 'psycopg2',    'pandas': 'pandas',    'numpy': 'numpy',    'matplotlib': 'matplotlib',    'seaborn': 'seaborn'}print("\n" + "="*80)
print("CHECKING AND INSTALLING REQUIRED PACKAGES")print("="*80)missing_packages = []installed_packages = []for package_spec in required_packages:
    package_name = package_spec.split('>=')[0]    import_name = package_import_map.get(package_name, package_name.replace('-', '_'))        # Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        installed_packages.append(package_name)    except ImportError:
    print(f"‚ö†Ô∏è  {package_name}: Missing - installing...")        missing_packages.append(package_spec)                # Try installation with --user flag first        try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE            )            print(f"   ‚úÖ Successfully installed {package_name} (user)")            installed_packages.append(package_name)        except subprocess.CalledProcessError:                # Fallback: try without --user flag            try:                        subprocess.check_call(                    [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                print(f"   ‚úÖ Successfully installed {package_name} (system-wide)")                installed_packages.append(package_name)            except Exception as e:                        print(f"   ‚ùå Failed to install {package_name}")                print(f"      Manual install: pip install {package_spec}")
print("\n" + "="*80)if missing_packages and len(installed_packages) < len(required_packages):
    print("‚ö†Ô∏è  Some packages failed to install. Please install manually:")    for pkg in missing_packages:
    print(f"   pip install {pkg}")    print("\n   Then restart the kernel and re-run this cell.")else:        print("‚úÖ All required packages are installed!")    print("\n‚ö†Ô∏è  If packages were just installed, restart the kernel and re-run this cell.")
print("="*80)# Now import all packagesprint("\n" + "="*80)
print("IMPORTING PACKAGES")print("="*80)try:
    import psycopg2    print("‚úÖ psycopg2 imported")except ImportError as e:
    print(f"‚ùå Failed to import psycopg2: {e}")    print("   Please restart kernel after installation")try:            import pandas as pd    print("‚úÖ pandas imported")except ImportError as e:            print(f"‚ùå Failed to import pandas: {e}")try:            import numpy as np    print("‚úÖ numpy imported")except ImportError as e:            print(f"‚ùå Failed to import numpy: {e}")try:            import matplotlib.pyplot as plt    import matplotlib    matplotlib.use('Agg')  # Non-interactive backend for notebooks    print("‚úÖ matplotlib imported")except ImportError as e:
    print(f"‚ùå Failed to import matplotlib: {e}")try:            import seaborn as sns    print("‚úÖ seaborn imported")except ImportError as e:            print(f"‚ùå Failed to import seaborn: {e}")try:            from IPython.display import display, HTML, Markdown    print("‚úÖ IPython.display imported")except ImportError as e:            print(f"‚ö†Ô∏è  IPython.display not available: {e}")import json
from datetime import datetime
import warningswarnings.filterwarnings('ignore')# Set visualization styletry:
    plt.style.use('seaborn-v0_8-darkgrid')    sns.set_palette("husl")except:    passprint("\n" + "="*80)
print("ENVIRONMENT SETUP COMPLETE")print("="*80)

## Step 2: Database Configuration

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-6"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 3: Database Initialization (Create Database, Load Schema, Load Data)

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-6"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 4: Load Query Metadata

## Embedded SQL Files and Queries

The following cells contain the complete database schema, data, and queries embedded directly in this notebook.
No external file dependencies required - everything is self-contained.

In [None]:
# ============================================================================
# EMBEDDED SCHEMA.SQL - DB-6
# ============================================================================
# This cell contains the complete database schema
# Execute this cell to load the schema into PostgreSQL
import psycopg2
# Schema SQL (embedded directly in notebook)
SCHEMA_SQL = """
-- Weather Data Pipeline Database Schema
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production schema for weather data pipeline system
-- GRIB2 Forecasts Table
-- Stores gridded forecast data from NDFD (National Digital Forecast Database)
CREATE TABLE grib2_forecasts (
    forecast_id VARCHAR(255) PRIMARY KEY,
    parameter_name VARCHAR(100) NOT NULL,
    forecast_time TIMESTAMP_NTZ NOT NULL,
    grid_cell_latitude NUMERIC(10, 7) NOT NULL,
    grid_cell_longitude NUMERIC(10, 7) NOT NULL,
    grid_cell_geom GEOGRAPHY,  -- Point geometry for grid cell center (PostgreSQL/Snowflake)
    parameter_value NUMERIC(10, 2),
    source_file VARCHAR(500),
    source_crs VARCHAR(50),
    target_crs VARCHAR(50),
    grid_resolution_x NUMERIC(10, 6),
    grid_resolution_y NUMERIC(10, 6),
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    transformation_status VARCHAR(50)
);
-- Shapefile Boundaries Table
-- Stores geographic boundaries (CWA, Fire Zones, Marine Zones, River Basins)
CREATE TABLE shapefile_boundaries (
    boundary_id VARCHAR(255) PRIMARY KEY,
    feature_type VARCHAR(50) NOT NULL,  -- 'CWA', 'FireZone', 'MarineZone', 'RiverBasin', 'County'
    feature_name VARCHAR(255),
    feature_identifier VARCHAR(100),
    boundary_geom GEOGRAPHY,  -- Polygon geometry
    source_shapefile VARCHAR(500),
    source_crs VARCHAR(50),
    target_crs VARCHAR(50),
    feature_count INTEGER,
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    transformation_status VARCHAR(50),
    state_code VARCHAR(2),
    office_code VARCHAR(10)
);
-- Real-Time Weather Observations Table
-- Stores point observations from NWS API
CREATE TABLE weather_observations (
    observation_id VARCHAR(255) PRIMARY KEY,
    station_id VARCHAR(50) NOT NULL,
    station_name VARCHAR(255),
    observation_time TIMESTAMP_NTZ NOT NULL,
    station_latitude NUMERIC(10, 7) NOT NULL,
    station_longitude NUMERIC(10, 7) NOT NULL,
    station_geom GEOGRAPHY,  -- Point geometry
    temperature NUMERIC(6, 2),
    dewpoint NUMERIC(6, 2),
    humidity NUMERIC(5, 2),
    wind_speed NUMERIC(6, 2),
    wind_direction INTEGER,
    pressure NUMERIC(8, 2),
    visibility NUMERIC(6, 2),
    sky_cover VARCHAR(50),
    precipitation_amount NUMERIC(8, 2),
    data_freshness_minutes INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    data_source VARCHAR(50) DEFAULT 'NWS_API'
);
-- GRIB2 Transformation Log Table
-- Tracks GRIB2 file processing and transformation operations
CREATE TABLE grib2_transformation_log (
    log_id VARCHAR(255) PRIMARY KEY,
    file_name VARCHAR(500) NOT NULL,
    source_path VARCHAR(1000),
    parameter_name VARCHAR(100) NOT NULL,
    forecast_time TIMESTAMP_NTZ,
    source_crs VARCHAR(50),
    target_crs VARCHAR(50),
    gdal_command VARCHAR(2000),
    output_file VARCHAR(1000),
    grid_resolution_x NUMERIC(10, 6),
    grid_resolution_y NUMERIC(10, 6),
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    transformation_status VARCHAR(50),
    snowflake_table VARCHAR(255),
    load_timestamp TIMESTAMP_NTZ,
    processing_duration_seconds INTEGER,
    records_processed INTEGER,
    error_message VARCHAR(2000)
);
-- Shapefile Integration Log Table
-- Tracks shapefile processing and coordinate transformations
CREATE TABLE shapefile_integration_log (
    log_id VARCHAR(255) PRIMARY KEY,
    shapefile_name VARCHAR(500) NOT NULL,
    source_path VARCHAR(1000),
    feature_type VARCHAR(50) NOT NULL,
    feature_count INTEGER,
    source_crs VARCHAR(50),
    target_crs VARCHAR(50),
    ogr2ogr_command VARCHAR(2000),
    transformed_path VARCHAR(1000),
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    transformation_status VARCHAR(50),
    snowflake_table VARCHAR(255),
    load_timestamp TIMESTAMP_NTZ,
    processing_duration_seconds INTEGER,
    error_message VARCHAR(2000)
);
-- Spatial Join Results Table
-- Documents spatial join operations between GRIB2 grid cells and shapefile boundaries
CREATE TABLE spatial_join_results (
    join_id VARCHAR(255) PRIMARY KEY,
    grib_file VARCHAR(500),
    shapefile_name VARCHAR(500),
    join_type VARCHAR(50),  -- 'Point-in-Polygon', 'Raster-to-Vector', 'Clip'
    gdal_command VARCHAR(2000),
    features_matched INTEGER,
    features_total INTEGER,
    match_percentage NUMERIC(5, 2),
    output_file VARCHAR(1000),
    join_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    forecast_id VARCHAR(255),
    boundary_id VARCHAR(255)
);
-- CRS Transformation Parameters Table
-- Documents coordinate reference system transformations and parameters
CREATE TABLE crs_transformation_parameters (
    transformation_id VARCHAR(255) PRIMARY KEY,
    source_crs VARCHAR(50) NOT NULL,
    target_crs VARCHAR(50) NOT NULL,
    source_crs_name VARCHAR(255),
    target_crs_name VARCHAR(255),
    transformation_method VARCHAR(50),  -- 'GDAL', 'PROJ', 'Custom'
    central_meridian NUMERIC(10, 6),
    false_easting NUMERIC(12, 2),
    false_northing NUMERIC(12, 2),
    scale_factor NUMERIC(10, 8),
    latitude_of_origin NUMERIC(10, 6),
    units VARCHAR(50),  -- 'degrees', 'meters', 'feet'
    accuracy_meters NUMERIC(10, 2),
    usage_count INTEGER DEFAULT 0
);
-- Data Quality Metrics Table
-- Tracks data quality metrics for weather products
CREATE TABLE data_quality_metrics (
    metric_id VARCHAR(255) PRIMARY KEY,
    metric_date DATE NOT NULL,
    data_source VARCHAR(50) NOT NULL,  -- 'GRIB2', 'Shapefile', 'API'
    files_processed INTEGER DEFAULT 0,
    files_successful INTEGER DEFAULT 0,
    files_failed INTEGER DEFAULT 0,
    success_rate NUMERIC(5, 2),
    total_records INTEGER DEFAULT 0,
    records_with_errors INTEGER DEFAULT 0,
    error_rate NUMERIC(5, 2),
    spatial_coverage_km2 NUMERIC(15, 2),
    temporal_coverage_hours INTEGER,
    data_freshness_minutes INTEGER,
    calculation_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Snowflake Load Status Table
-- Tracks data loading operations to Snowflake
CREATE TABLE snowflake_load_status (
    load_id VARCHAR(255) PRIMARY KEY,
    source_file VARCHAR(1000),
    snowflake_table VARCHAR(255) NOT NULL,
    load_start_time TIMESTAMP_NTZ NOT NULL,
    load_end_time TIMESTAMP_NTZ,
    load_duration_seconds INTEGER,
    records_loaded INTEGER DEFAULT 0,
    file_size_mb NUMERIC(10, 2),
    load_rate_mb_per_sec NUMERIC(10, 2),
    load_status VARCHAR(50),  -- 'Success', 'Failed', 'Partial'
    error_message VARCHAR(2000),
    snowflake_warehouse VARCHAR(255),
    data_source_type VARCHAR(50)
);
-- Weather Forecast Aggregations Table
-- Pre-aggregated forecast data for performance
CREATE TABLE weather_forecast_aggregations (
    aggregation_id VARCHAR(255) PRIMARY KEY,
    parameter_name VARCHAR(100) NOT NULL,
    forecast_time TIMESTAMP_NTZ NOT NULL,
    boundary_id VARCHAR(255),
    feature_type VARCHAR(50),
    feature_name VARCHAR(255),
    min_value NUMERIC(10, 2),
    max_value NUMERIC(10, 2),
    avg_value NUMERIC(10, 2),
    median_value NUMERIC(10, 2),
    std_dev_value NUMERIC(10, 2),
    grid_cells_count INTEGER,
    aggregation_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Weather Station Metadata Table
-- Metadata about weather observation stations
CREATE TABLE weather_stations (
    station_id VARCHAR(50) PRIMARY KEY,
    station_name VARCHAR(255),
    station_latitude NUMERIC(10, 7) NOT NULL,
    station_longitude NUMERIC(10, 7) NOT NULL,
    station_geom GEOGRAPHY,
    elevation_meters NUMERIC(8, 2),
    state_code VARCHAR(2),
    county_name VARCHAR(100),
    cwa_code VARCHAR(10),
    station_type VARCHAR(50),
    active_status BOOLEAN DEFAULT TRUE,
    first_observation_date DATE,
    last_observation_date DATE,
    update_frequency_minutes INTEGER
);
-- Create indexes for performance
CREATE INDEX idx_grib2_forecasts_parameter_time ON grib2_forecasts(parameter_name, forecast_time);
CREATE INDEX idx_grib2_forecasts_geom ON grib2_forecasts USING GIST(grid_cell_geom);
CREATE INDEX idx_shapefile_boundaries_type ON shapefile_boundaries(feature_type);
CREATE INDEX idx_shapefile_boundaries_geom ON shapefile_boundaries USING GIST(boundary_geom);
CREATE INDEX idx_weather_observations_station_time ON weather_observations(station_id, observation_time);
CREATE INDEX idx_weather_observations_geom ON weather_observations USING GIST(station_geom);
CREATE INDEX idx_spatial_join_forecast_boundary ON spatial_join_results(forecast_id, boundary_id);
CREATE INDEX idx_forecast_aggregations_time ON weather_forecast_aggregations(forecast_time, parameter_name);
-- Insurance Policy Areas Table
-- Production schema for insurance policy area definitions
CREATE TABLE insurance_policy_areas (
    policy_area_id VARCHAR(255) PRIMARY KEY,
    policy_type VARCHAR(100),
    coverage_type VARCHAR(100),
    policy_area_name VARCHAR(255),
    state_code VARCHAR(2),
    risk_zone VARCHAR(50),
    boundary_id VARCHAR(255),
    is_active BOOLEAN DEFAULT TRUE,
    base_rate_factor NUMERIC(10, 4),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insurance Rate Tables
-- Production schema for insurance rate calculations by forecast period
CREATE TABLE insurance_rate_tables (
    rate_table_id VARCHAR(255) PRIMARY KEY,
    policy_area_id VARCHAR(255),
    policy_type VARCHAR(100),
    coverage_type VARCHAR(100),
    forecast_period_start DATE,
    forecast_period_end DATE,
    forecast_day INTEGER,
    forecast_date DATE,
    base_rate NUMERIC(10, 4),
    risk_adjusted_rate NUMERIC(10, 4),
    risk_multiplier NUMERIC(10, 4),
    rate_tier VARCHAR(50),
    rate_category VARCHAR(50),
    confidence_level NUMERIC(5, 2),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Forecast Rate Mapping Table
-- Production schema for mapping forecasts to insurance rates
CREATE TABLE forecast_rate_mapping (
    mapping_id VARCHAR(255) PRIMARY KEY,
    forecast_id VARCHAR(255),
    rate_table_id VARCHAR(255),
    risk_factor_id VARCHAR(255),
    policy_area_id VARCHAR(255),
    parameter_name VARCHAR(100),
    forecast_date DATE,
    forecast_day INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- NEXRAD Radar Sites Table
-- Production schema for NEXRAD radar site locations
CREATE TABLE nexrad_radar_sites (
    site_id VARCHAR(50) PRIMARY KEY,
    site_name VARCHAR(255),
    operational_status VARCHAR(50),
    site_geom GEOGRAPHY,
    latitude NUMERIC(10, 7),
    longitude NUMERIC(10, 7),
    elevation_meters NUMERIC(8, 2),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Satellite Imagery Sources Table
-- Production schema for satellite imagery source metadata
CREATE TABLE satellite_imagery_sources (
    source_id VARCHAR(255) PRIMARY KEY,
    source_name VARCHAR(255),
    operational_status VARCHAR(50),
    coverage_area VARCHAR(100),
    satellite_type VARCHAR(100),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Satellite Imagery Products Table
-- Production schema for satellite imagery product data
CREATE TABLE satellite_imagery_products (
    product_id VARCHAR(255) PRIMARY KEY,
    source_id VARCHAR(255),
    scan_start_time TIMESTAMP,
    product_type VARCHAR(100),
    decompression_status VARCHAR(50),
    precipitation_rate_mmh NUMERIC(10, 4),
    grid_geom GEOGRAPHY,
    fire_detection_confidence INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create indexes for new tables
CREATE INDEX idx_insurance_policy_areas_boundary ON insurance_policy_areas(boundary_id);
CREATE INDEX idx_insurance_rate_tables_policy_area ON insurance_rate_tables(policy_area_id);
CREATE INDEX idx_forecast_rate_mapping_forecast ON forecast_rate_mapping(forecast_id);
CREATE INDEX idx_nexrad_radar_sites_geom ON nexrad_radar_sites USING GIST(site_geom);
CREATE INDEX idx_satellite_imagery_products_source ON satellite_imagery_products(source_id);
CREATE INDEX idx_satellite_imagery_products_geom ON satellite_imagery_products USING GIST(grid_geom);
-- Insurance Claims History Table
-- Production schema for insurance claims historical data
CREATE TABLE insurance_claims_history (
    claim_id VARCHAR(255) PRIMARY KEY,
    policy_area_id VARCHAR(255),
    claim_date DATE,
    loss_date DATE,
    forecast_available BOOLEAN,
    forecast_day INTEGER,
    claim_amount NUMERIC(12, 2),
    claim_type VARCHAR(100),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insurance Risk Factors Table
-- Production schema for insurance risk factor calculations
CREATE TABLE insurance_risk_factors (
    risk_factor_id VARCHAR(255) PRIMARY KEY,
    policy_area_id VARCHAR(255),
    forecast_period_start DATE,
    forecast_period_end DATE,
    forecast_day INTEGER,
    forecast_date DATE,
    parameter_name VARCHAR(100),
    overall_risk_score NUMERIC(10, 4),
    risk_category VARCHAR(50),
    cumulative_precipitation_risk NUMERIC(10, 4),
    temperature_extreme_risk NUMERIC(10, 4),
    wind_damage_risk NUMERIC(10, 4),
    freeze_risk NUMERIC(10, 4),
    flood_risk NUMERIC(10, 4),
    extreme_event_probability NUMERIC(5, 4),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- NEXRAD Storm Cells Table
-- Production schema for NEXRAD storm cell tracking
CREATE TABLE nexrad_storm_cells (
    storm_cell_id VARCHAR(255) PRIMARY KEY,
    site_id VARCHAR(50),
    scan_time TIMESTAMP,
    first_detection_time TIMESTAMP,
    last_detection_time TIMESTAMP,
    storm_center_geom GEOGRAPHY,
    storm_center_latitude NUMERIC(10, 7),
    storm_center_longitude NUMERIC(10, 7),
    max_reflectivity NUMERIC(6, 2),
    storm_severity VARCHAR(50),
    storm_type VARCHAR(50),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- NEXRAD Reflectivity Grid Table
-- Production schema for NEXRAD reflectivity grid data
CREATE TABLE nexrad_reflectivity_grid (
    grid_id VARCHAR(255) PRIMARY KEY,
    site_id VARCHAR(50),
    scan_time TIMESTAMP,
    grid_geom GEOGRAPHY,
    grid_latitude NUMERIC(10, 7),
    grid_longitude NUMERIC(10, 7),
    reflectivity_value NUMERIC(6, 2),
    elevation_angle NUMERIC(5, 2),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Add overall_risk_score to insurance_rate_tables
ALTER TABLE insurance_rate_tables ADD COLUMN IF NOT EXISTS overall_risk_score NUMERIC(10, 4);
-- Rate Table Comparison Table
-- Stores rate comparison results for different forecast days
CREATE TABLE IF NOT EXISTS rate_table_comparison (
    comparison_id VARCHAR(255) PRIMARY KEY,
    policy_area_id VARCHAR(255),
    policy_type VARCHAR(100),
    coverage_type VARCHAR(100),
    forecast_period_start DATE,
    forecast_period_end DATE,
    rate_volatility_percent NUMERIC(10, 4),
    avg_confidence_level NUMERIC(10, 4),
    recommendation_status VARCHAR(50)
);
-- Create indexes for new tables
CREATE INDEX idx_insurance_claims_history_policy_area ON insurance_claims_history(policy_area_id);
CREATE INDEX idx_insurance_risk_factors_policy_area ON insurance_risk_factors(policy_area_id);
CREATE INDEX idx_nexrad_storm_cells_site ON nexrad_storm_cells(site_id);
CREATE INDEX idx_nexrad_storm_cells_geom ON nexrad_storm_cells USING GIST(storm_center_geom);
CREATE INDEX idx_nexrad_reflectivity_grid_site ON nexrad_reflectivity_grid(site_id);
CREATE INDEX idx_nexrad_reflectivity_grid_geom ON nexrad_reflectivity_grid USING GIST(grid_geom);
"""
def execute_schema_sql(connection):
    """Execute embedded schema SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in SCHEMA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Schema loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading schema: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED SCHEMA")
    print("="*80)
    execute_schema_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Schema SQL is available in SCHEMA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED DATA.SQL - DB-6
# ============================================================================
# This cell contains sample data for the database
# Execute this cell to load data into PostgreSQL
import psycopg2
# Data SQL (embedded directly in notebook)
DATA_SQL = """
-- Sample Data for Weather Data Pipeline Database
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production sample data for weather data pipeline system
-- Insert sample CRS transformation parameters
INSERT INTO crs_transformation_parameters (transformation_id, source_crs, target_crs, source_crs_name, target_crs_name, transformation_method, units, accuracy_meters, usage_count) VALUES
('tran_001', 'EPSG:4326', 'EPSG:3857', 'WGS84 Geographic', 'Web Mercator', 'GDAL', 'meters', 0.5, 150),
('tran_002', 'EPSG:2227', 'EPSG:4326', 'California State Plane Zone 5', 'WGS84 Geographic', 'PROJ', 'degrees', 1.2, 85),
('tran_003', 'EPSG:4326', 'EPSG:4326', 'WGS84 Geographic', 'WGS84 Geographic', 'GDAL', 'degrees', 0.0, 200);
-- Insert sample shapefile boundaries (CWA - County Warning Areas)
INSERT INTO shapefile_boundaries (boundary_id, feature_type, feature_name, feature_identifier, source_shapefile, source_crs, target_crs, feature_count, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status, state_code, office_code) VALUES
('cwa_001', 'CWA', 'New York City', 'OKX', 'w_18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -74.5, 40.4, -73.5, 41.0, 'Success', 'NY', 'OKX'),
('cwa_002', 'CWA', 'Los Angeles', 'LOX', 'w_18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -118.8, 33.7, -117.5, 34.5, 'Success', 'CA', 'LOX'),
('cwa_003', 'CWA', 'Chicago', 'LOT', 'w_18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -88.2, 41.5, -87.3, 42.1, 'Success', 'IL', 'LOT'),
('cwa_004', 'CWA', 'Miami', 'MFL', 'w_18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -80.5, 25.3, -79.8, 26.2, 'Success', 'FL', 'MFL'),
('cwa_005', 'CWA', 'Seattle', 'SEW', 'w_18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -122.6, 47.3, -121.8, 47.8, 'Success', 'WA', 'SEW');
-- Insert sample shapefile boundaries (Fire Zones)
INSERT INTO shapefile_boundaries (boundary_id, feature_type, feature_name, feature_identifier, source_shapefile, source_crs, target_crs, feature_count, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status, state_code) VALUES
('fz_001', 'FireZone', 'Southern California Fire Zone 1', 'CAZ241', 'fz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -118.5, 33.8, -117.2, 34.2, 'Success', 'CA'),
('fz_002', 'FireZone', 'Arizona Fire Zone 5', 'AZZ005', 'fz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -112.2, 33.2, -111.5, 33.8, 'Success', 'AZ'),
('fz_003', 'FireZone', 'Colorado Fire Zone 12', 'COZ012', 'fz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -105.1, 39.5, -104.5, 40.0, 'Success', 'CO');
-- Insert sample shapefile boundaries (Marine Zones)
INSERT INTO shapefile_boundaries (boundary_id, feature_type, feature_name, feature_identifier, source_shapefile, source_crs, target_crs, feature_count, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status, state_code) VALUES
('mz_001', 'MarineZone', 'New York Harbor', 'ANZ330', 'mz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -74.2, 40.5, -73.8, 40.8, 'Success', 'NY'),
('mz_002', 'MarineZone', 'San Francisco Bay', 'PZZ530', 'mz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -122.6, 37.6, -122.2, 37.9, 'Success', 'CA'),
('mz_003', 'MarineZone', 'Puget Sound', 'PZZ131', 'mz18mr25.shp', 'EPSG:4326', 'EPSG:4326', 1, -122.8, 47.4, -122.2, 47.8, 'Success', 'WA');
-- Insert sample weather stations
INSERT INTO weather_stations (station_id, station_name, station_latitude, station_longitude, elevation_meters, state_code, county_name, cwa_code, station_type, active_status, first_observation_date, last_observation_date, update_frequency_minutes) VALUES
('KNYC', 'New York Central Park', 40.785, -73.969, 40.0, 'NY', 'New York', 'OKX', 'ASOS', TRUE, '2020-01-01', '2026-02-03', 5),
('KLAX', 'Los Angeles International', 33.942, -118.408, 38.0, 'CA', 'Los Angeles', 'LOX', 'ASOS', TRUE, '2020-01-01', '2026-02-03', 5),
('KORD', 'Chicago O''Hare', 41.979, -87.907, 203.0, 'IL', 'Cook', 'LOT', 'ASOS', TRUE, '2020-01-01', '2026-02-03', 5),
('KMIA', 'Miami International', 25.795, -80.290, 2.0, 'FL', 'Miami-Dade', 'MFL', 'ASOS', TRUE, '2020-01-01', '2026-02-03', 5),
('KSEA', 'Seattle-Tacoma', 47.449, -122.309, 137.0, 'WA', 'King', 'SEW', 'ASOS', TRUE, '2020-01-01', '2026-02-03', 5);
-- Insert sample GRIB2 forecasts (temperature)
INSERT INTO grib2_forecasts (forecast_id, parameter_name, forecast_time, grid_cell_latitude, grid_cell_longitude, parameter_value, source_file, source_crs, target_crs, grid_resolution_x, grid_resolution_y, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status) VALUES
('grib_temp_001', 'Temperature', '2026-02-03 12:00:00', 40.785, -73.969, 45.5, 'ds.temp.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_temp_002', 'Temperature', '2026-02-03 12:00:00', 33.942, -118.408, 72.3, 'ds.temp.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_temp_003', 'Temperature', '2026-02-03 12:00:00', 41.979, -87.907, 38.2, 'ds.temp.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_temp_004', 'Temperature', '2026-02-03 12:00:00', 25.795, -80.290, 78.9, 'ds.temp.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_temp_005', 'Temperature', '2026-02-03 12:00:00', 47.449, -122.309, 52.1, 'ds.temp.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success');
-- Insert sample GRIB2 forecasts (precipitation)
INSERT INTO grib2_forecasts (forecast_id, parameter_name, forecast_time, grid_cell_latitude, grid_cell_longitude, parameter_value, source_file, source_crs, target_crs, grid_resolution_x, grid_resolution_y, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status) VALUES
('grib_qpf_001', 'Precipitation', '2026-02-03 12:00:00', 40.785, -73.969, 0.15, 'ds.qpf.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_qpf_002', 'Precipitation', '2026-02-03 12:00:00', 33.942, -118.408, 0.0, 'ds.qpf.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_qpf_003', 'Precipitation', '2026-02-03 12:00:00', 41.979, -87.907, 0.08, 'ds.qpf.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_qpf_004', 'Precipitation', '2026-02-03 12:00:00', 25.795, -80.290, 0.25, 'ds.qpf.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_qpf_005', 'Precipitation', '2026-02-03 12:00:00', 47.449, -122.309, 0.12, 'ds.qpf.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success');
-- Insert sample GRIB2 forecasts (wind speed)
INSERT INTO grib2_forecasts (forecast_id, parameter_name, forecast_time, grid_cell_latitude, grid_cell_longitude, parameter_value, source_file, source_crs, target_crs, grid_resolution_x, grid_resolution_y, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status) VALUES
('grib_wspd_001', 'WindSpeed', '2026-02-03 12:00:00', 40.785, -73.969, 12.5, 'ds.wspd.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_wspd_002', 'WindSpeed', '2026-02-03 12:00:00', 33.942, -118.408, 8.3, 'ds.wspd.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_wspd_003', 'WindSpeed', '2026-02-03 12:00:00', 41.979, -87.907, 15.2, 'ds.wspd.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_wspd_004', 'WindSpeed', '2026-02-03 12:00:00', 25.795, -80.290, 10.7, 'ds.wspd.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success'),
('grib_wspd_005', 'WindSpeed', '2026-02-03 12:00:00', 47.449, -122.309, 18.4, 'ds.wspd.bin', 'EPSG:4326', 'EPSG:3857', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success');
-- Insert sample weather observations
INSERT INTO weather_observations (observation_id, station_id, station_name, observation_time, station_latitude, station_longitude, temperature, dewpoint, humidity, wind_speed, wind_direction, pressure, visibility, sky_cover, precipitation_amount, data_freshness_minutes, data_source) VALUES
('obs_001', 'KNYC', 'New York Central Park', '2026-02-03 12:00:00', 40.785, -73.969, 45.2, 42.1, 88.5, 12.3, 270, 1013.2, 10.0, 'Overcast', 0.15, 5, 'NWS_API'),
('obs_002', 'KLAX', 'Los Angeles International', '2026-02-03 12:00:00', 33.942, -118.408, 72.1, 65.3, 78.2, 8.1, 180, 1015.8, 10.0, 'Clear', 0.0, 5, 'NWS_API'),
('obs_003', 'KORD', 'Chicago O''Hare', '2026-02-03 12:00:00', 41.979, -87.907, 38.5, 35.2, 85.1, 15.5, 320, 1012.5, 8.0, 'Partly Cloudy', 0.08, 5, 'NWS_API'),
('obs_004', 'KMIA', 'Miami International', '2026-02-03 12:00:00', 25.795, -80.290, 78.5, 74.2, 90.3, 10.5, 120, 1014.3, 10.0, 'Scattered Clouds', 0.25, 5, 'NWS_API'),
('obs_005', 'KSEA', 'Seattle-Tacoma', '2026-02-03 12:00:00', 47.449, -122.309, 52.3, 48.1, 92.7, 18.2, 250, 1011.9, 7.0, 'Overcast', 0.12, 5, 'NWS_API');
-- Insert sample GRIB2 transformation log entries
INSERT INTO grib2_transformation_log (log_id, file_name, source_path, parameter_name, forecast_time, source_crs, target_crs, gdal_command, output_file, grid_resolution_x, grid_resolution_y, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status, snowflake_table, load_timestamp, processing_duration_seconds, records_processed) VALUES
('log_grib_001', 'ds.temp.bin', '/data/raw/grib2/ds.temp.bin', 'Temperature', '2026-02-03 12:00:00', 'EPSG:4326', 'EPSG:3857', 'gdal_translate -of GTiff -a_srs EPSG:3857', '/data/transformed/grib2/ds.temp_transformed.tif', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success', 'grib2_forecasts', '2026-02-03 12:05:00', 45, 50000),
('log_grib_002', 'ds.qpf.bin', '/data/raw/grib2/ds.qpf.bin', 'Precipitation', '2026-02-03 12:00:00', 'EPSG:4326', 'EPSG:3857', 'gdal_translate -of GTiff -a_srs EPSG:3857', '/data/transformed/grib2/ds.qpf_transformed.tif', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success', 'grib2_forecasts', '2026-02-03 12:05:00', 42, 50000),
('log_grib_003', 'ds.wspd.bin', '/data/raw/grib2/ds.wspd.bin', 'WindSpeed', '2026-02-03 12:00:00', 'EPSG:4326', 'EPSG:3857', 'gdal_translate -of GTiff -a_srs EPSG:3857', '/data/transformed/grib2/ds.wspd_transformed.tif', 0.025, 0.025, -125.0, 24.0, -66.0, 49.0, 'Success', 'grib2_forecasts', '2026-02-03 12:05:00', 38, 50000);
-- Insert sample shapefile integration log entries
INSERT INTO shapefile_integration_log (log_id, shapefile_name, source_path, feature_type, feature_count, source_crs, target_crs, ogr2ogr_command, transformed_path, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, transformation_status, snowflake_table, load_timestamp, processing_duration_seconds) VALUES
('log_shp_001', 'w_18mr25.shp', '/data/raw/shapefiles/cwa/w_18mr25.shp', 'CWA', 122, 'EPSG:4326', 'EPSG:4326', 'ogr2ogr -t_srs EPSG:4326', '/data/transformed/shapefiles/cwa/w_18mr25_wgs84.shp', -125.0, 24.0, -66.0, 49.0, 'Success', 'shapefile_boundaries', '2026-02-03 11:00:00', 120),
('log_shp_002', 'fz18mr25.shp', '/data/raw/shapefiles/firezones/fz18mr25.shp', 'FireZone', 350, 'EPSG:4326', 'EPSG:4326', 'ogr2ogr -t_srs EPSG:4326', '/data/transformed/shapefiles/firezones/fz18mr25_wgs84.shp', -125.0, 24.0, -66.0, 49.0, 'Success', 'shapefile_boundaries', '2026-02-03 11:00:00', 95),
('log_shp_003', 'mz18mr25.shp', '/data/raw/shapefiles/marine/mz18mr25.shp', 'MarineZone', 85, 'EPSG:4326', 'EPSG:4326', 'ogr2ogr -t_srs EPSG:4326', '/data/transformed/shapefiles/marine/mz18mr25_wgs84.shp', -125.0, 24.0, -66.0, 49.0, 'Success', 'shapefile_boundaries', '2026-02-03 11:00:00', 65);
-- Insert sample spatial join results
INSERT INTO spatial_join_results (join_id, grib_file, shapefile_name, join_type, gdal_command, features_matched, features_total, match_percentage, output_file, forecast_id, boundary_id) VALUES
('join_001', 'ds.temp.bin', 'w_18mr25.shp', 'Point-in-Polygon', 'gdalwarp -cutline', 45000, 50000, 90.00, '/data/transformed/spatial_joins/temp_cwa_clipped.tif', 'grib_temp_001', 'cwa_001'),
('join_002', 'ds.qpf.bin', 'fz18mr25.shp', 'Raster-to-Vector', 'gdalwarp -cutline', 32000, 50000, 64.00, '/data/transformed/spatial_joins/qpf_firezone_clipped.tif', 'grib_qpf_001', 'fz_001'),
('join_003', 'ds.wspd.bin', 'mz18mr25.shp', 'Clip', 'gdalwarp -cutline -crop_to_cutline', 15000, 50000, 30.00, '/data/transformed/spatial_joins/wspd_marine_clipped.tif', 'grib_wspd_001', 'mz_001');
-- Insert sample data quality metrics
INSERT INTO data_quality_metrics (metric_id, metric_date, data_source, files_processed, files_successful, files_failed, success_rate, total_records, records_with_errors, error_rate, spatial_coverage_km2, temporal_coverage_hours, data_freshness_minutes) VALUES
('metric_001', '2026-02-03', 'GRIB2', 15, 14, 1, 93.33, 750000, 2500, 0.33, 7850000.00, 168, 5),
('metric_002', '2026-02-03', 'Shapefile', 8, 8, 0, 100.00, 557, 0, 0.00, 7850000.00, 0, 60),
('metric_003', '2026-02-03', 'API', 0, 0, 0, 0.00, 5000, 25, 0.50, 0.00, 24, 5);
-- Insert sample Snowflake load status entries
INSERT INTO snowflake_load_status (load_id, source_file, snowflake_table, load_start_time, load_end_time, load_duration_seconds, records_loaded, file_size_mb, load_rate_mb_per_sec, load_status, snowflake_warehouse, data_source_type) VALUES
('load_001', '/data/transformed/grib2/ds.temp_transformed.tif', 'grib2_forecasts', '2026-02-03 12:05:00', '2026-02-03 12:05:45', 45, 50000, 125.5, 2.79, 'Success', 'WEATHER_WH', 'GRIB2'),
('load_002', '/data/transformed/shapefiles/cwa/w_18mr25_wgs84.shp', 'shapefile_boundaries', '2026-02-03 11:00:00', '2026-02-03 11:02:00', 120, 122, 2.3, 0.02, 'Success', 'WEATHER_WH', 'Shapefile'),
('load_003', '/data/api/observations_20260203.json', 'weather_observations', '2026-02-03 12:00:00', '2026-02-03 12:00:15', 15, 5000, 0.5, 0.03, 'Success', 'WEATHER_WH', 'API');
"""
def execute_data_sql(connection):
    """Execute embedded data SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in DATA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Data loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading data: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED DATA")
    print("="*80)
    execute_data_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Data SQL is available in DATA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED QUERIES.JSON - DB-6
# ============================================================================
# This cell contains all query metadata embedded directly in the notebook
# No external file dependencies required
import json
# Queries data (embedded directly in notebook)
QUERIES_DATA = {
  "source_file": "/Users/machine/Documents/AQ/db/db-6/queries/queries.md",
  "extraction_timestamp": "20260208-2109",
  "total_queries": 30,
  "queries": [
    {
      "number": 1,
      "title": "Production-Grade Spatial Weather Forecast Analysis with Multi-Level CTE Nesting and Geospatial Aggregations",
      "description": "Use Case: Custom Weather Impact Modeling - Regional Forecast Accuracy Assessment for Insurance Risk Modeling Description:
    Enterprise-level spatial weather forecast analysis with multi-level CTE nesting, spatial aggregations within boundaries, forecast accuracy metrics, temporal analysis, and advanced window functions. Demonstrates production patterns used by NOAA and weather forecasting platforms. Business Value: Forecast accuracy report by geographic boundary (CWA, Fire Zones) showing forecast ",
      "complexity": "Deep nested CTEs (7+ levels), spatial operations (ST_WITHIN, ST_DISTANCE), complex aggregations, window functions with multiple frame clauses, percentile calculations, time-series analysis, correlated subqueries",
      "expected_output": "Query results",
      "sql": "WITH forecast_parameter_cohorts AS (\n    -- First CTE: Identify forecast parameter cohorts and time windows\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        gf.grid_cell_latitude,\n        gf.grid_cell_longitude,\n        gf.grid_cell_geom,\n        gf.parameter_value,\n        gf.source_file,\n        DATE_TRUNC('hour', gf.forecast_time) AS forecast_hour,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date,\n        EXTRACT(HOUR FROM gf.forecast_time) AS forecast_hour_num,\n        EXTRACT(EPOCH FROM (gf.forecast_time - CURRENT_TIMESTAMP)) / 3600 AS hours_until_forecast\n    FROM grib2_forecasts gf\n    WHERE gf.transformation_status = 'Success'\n),\nspatial_boundary_matching AS (\n    -- Second CTE: Match forecasts to spatial boundaries using spatial operations\n    SELECT\n        fpc.forecast_id,\n        fpc.parameter_name,\n        fpc.forecast_time,\n        fpc.forecast_hour,\n        fpc.forecast_date,\n        fpc.grid_cell_latitude,\n        fpc.grid_cell_longitude,\n        fpc.parameter_value,\n        fpc.source_file,\n        fpc.hours_until_forecast,\n        sb.boundary_id,\n        sb.feature_type,\n        sb.feature_name,\n        sb.feature_identifier,\n        sb.state_code,\n        sb.office_code,\n        -- Spatial distance calculation (compatible across databases)\n        CASE\n            WHEN sb.boundary_geom IS NOT NULL AND fpc.grid_cell_geom IS NOT NULL THEN\n                ST_DISTANCE(sb.boundary_geom::geography, fpc.grid_cell_geom::geography)\n            ELSE NULL\n        END AS spatial_distance_meters,\n        -- Check if point is within boundary (using standard spatial functions)\n        CASE\n            WHEN sb.boundary_geom IS NOT NULL AND fpc.grid_cell_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(fpc.grid_cell_geom::geometry, sb.boundary_geom::geometry) THEN TRUE\n                    ELSE FALSE\n                END\n            ELSE NULL\n        END AS is_within_boundary\n    FROM forecast_parameter_cohorts fpc\n    LEFT JOIN shapefile_boundaries sb ON (\n        sb.boundary_geom IS NOT NULL\n        AND fpc.grid_cell_geom IS NOT NULL\n        AND ST_DISTANCE(sb.boundary_geom::geography, fpc.grid_cell_geom::geography) < 50000\n    )\n),\nboundary_forecast_aggregations AS (\n    -- Third CTE: Aggregate forecasts by boundary with spatial filtering\n    SELECT\n        sbm.boundary_id,\n        sbm.feature_type,\n        sbm.feature_name,\n        sbm.feature_identifier,\n        sbm.state_code,\n        sbm.office_code,\n        sbm.parameter_name,\n        sbm.forecast_time,\n        sbm.forecast_hour,\n        sbm.forecast_date,\n        COUNT(DISTINCT sbm.forecast_id) AS grid_cells_count,\n        COUNT(CASE WHEN sbm.is_within_boundary = TRUE THEN 1 END) AS cells_within_boundary,\n        COUNT(CASE WHEN sbm.is_within_boundary = FALSE THEN 1 END) AS cells_near_boundary,\n        MIN(sbm.parameter_value) AS min_forecast_value,\n        MAX(sbm.parameter_value) AS max_forecast_value,\n        AVG(sbm.parameter_value) AS avg_forecast_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sbm.parameter_value) AS median_forecast_value,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY sbm.parameter_value) AS q1_forecast_value,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY sbm.parameter_value) AS q3_forecast_value,\n        STDDEV(sbm.parameter_value) AS stddev_forecast_value,\n        AVG(sbm.spatial_distance_meters) AS avg_distance_to_boundary,\n        MIN(sbm.spatial_distance_meters) AS min_distance_to_boundary\n    FROM spatial_boundary_matching sbm\n    WHERE sbm.is_within_boundary IS NOT NULL\n    GROUP BY\n        sbm.boundary_id,\n        sbm.feature_type,\n        sbm.feature_name,\n        sbm.feature_identifier,\n        sbm.state_code,\n        sbm.office_code,\n        sbm.parameter_name,\n        sbm.forecast_time,\n        sbm.forecast_hour,\n        sbm.forecast_date\n),\nobservation_forecast_comparison AS (\n    -- Fourth CTE: Compare forecasts with actual observations for accuracy analysis\n    SELECT\n        bfa.boundary_id,\n        bfa.feature_type,\n        bfa.feature_name,\n        bfa.feature_identifier,\n        bfa.state_code,\n        bfa.parameter_name,\n        bfa.forecast_time,\n        bfa.forecast_hour,\n        bfa.forecast_date,\n        bfa.grid_cells_count,\n        bfa.cells_within_boundary,\n        bfa.min_forecast_value,\n        bfa.max_forecast_value,\n        bfa.avg_forecast_value,\n        bfa.median_forecast_value,\n        bfa.q1_forecast_value,\n        bfa.q3_forecast_value,\n        bfa.stddev_forecast_value,\n        -- Find nearest observation station\n        (\n            SELECT wo.station_id\n            FROM weather_observations wo\n            INNER JOIN weather_stations ws ON wo.station_id = ws.station_id\n            WHERE ws.cwa_code = bfa.office_code\n                AND wo.observation_time BETWEEN bfa.forecast_time - INTERVAL '1 hour' AND bfa.forecast_time + INTERVAL '1 hour'\n            ORDER BY\n                CASE\n                    WHEN bfa.parameter_name = 'Temperature' THEN ABS(wo.temperature - bfa.avg_forecast_value)\n                    WHEN bfa.parameter_name = 'Precipitation' THEN ABS(COALESCE(wo.precipitation_amount, 0) - bfa.avg_forecast_value)\n                    WHEN bfa.parameter_name = 'WindSpeed' THEN ABS(wo.wind_speed - bfa.avg_forecast_value)\n                    ELSE 999999\n                END\n            LIMIT 1\n        ) AS nearest_station_id,\n        -- Get actual observation value\n        (\n            SELECT\n                CASE\n                    WHEN bfa.parameter_name = 'Temperature' THEN wo.temperature\n                    WHEN bfa.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n                    WHEN bfa.parameter_name = 'WindSpeed' THEN wo.wind_speed\n                    ELSE NULL\n                END\n            FROM weather_observations wo\n            INNER JOIN weather_stations ws ON wo.station_id = ws.station_id\n            WHERE ws.cwa_code = bfa.office_code\n                AND wo.observation_time BETWEEN bfa.forecast_time - INTERVAL '1 hour' AND bfa.forecast_time + INTERVAL '1 hour'\n            ORDER BY\n                CASE\n                    WHEN bfa.parameter_name = 'Temperature' THEN ABS(wo.temperature - bfa.avg_forecast_value)\n                    WHEN bfa.parameter_name = 'Precipitation' THEN ABS(COALESCE(wo.precipitation_amount, 0) - bfa.avg_forecast_value)\n                    WHEN bfa.parameter_name = 'WindSpeed' THEN ABS(wo.wind_speed - bfa.avg_forecast_value)\n                    ELSE 999999\n                END\n            LIMIT 1\n        ) AS actual_observation_value\n    FROM boundary_forecast_aggregations bfa\n),\nforecast_accuracy_metrics AS (\n    -- Fifth CTE:
    Calculate forecast accuracy metrics with window functions\n    SELECT\n        ofc.boundary_id,\n        ofc.feature_type,\n        ofc.feature_name,\n        ofc.feature_identifier,\n        ofc.state_code,\n        ofc.parameter_name,\n        ofc.forecast_time,\n        ofc.forecast_hour,\n        ofc.forecast_date,\n        ofc.grid_cells_count,\n        ofc.cells_within_boundary,\n        ofc.min_forecast_value,\n        ofc.max_forecast_value,\n        ROUND(CAST(CAST(ofc.avg_forecast_value AS NUMERIC) AS NUMERIC), 2) AS avg_forecast_value,\n        ROUND(CAST(CAST(ofc.median_forecast_value AS NUMERIC) AS NUMERIC), 2) AS median_forecast_value,\n        ROUND(CAST(CAST(ofc.q1_forecast_value AS NUMERIC) AS NUMERIC), 2) AS q1_forecast_value,\n        ROUND(CAST(CAST(ofc.q3_forecast_value AS NUMERIC) AS NUMERIC), 2) AS q3_forecast_value,\n        ROUND(CAST(CAST(ofc.stddev_forecast_value AS NUMERIC) AS NUMERIC), 2) AS stddev_forecast_value,\n        ofc.nearest_station_id,\n        ROUND(CAST(CAST(ofc.actual_observation_value AS NUMERIC) AS NUMERIC), 2) AS actual_observation_value,\n        -- Forecast error calculations\n        CASE\n            WHEN ofc.actual_observation_value IS NOT NULL THEN\n                ABS(ofc.avg_forecast_value - ofc.actual_observation_value)\n            ELSE NULL\n        END AS absolute_error,\n        CASE\n            WHEN ofc.actual_observation_value IS NOT NULL AND ofc.actual_observation_value != 0 THEN\n                ABS((ofc.avg_forecast_value - ofc.actual_observation_value) / ofc.actual_observation_value) * 100\n            ELSE NULL\n        END AS percentage_error,\n        -- Window functions for accuracy trends\n        AVG(CASE WHEN ofc.actual_observation_value IS NOT NULL THEN ABS(ofc.avg_forecast_value - ofc.actual_observation_value) ELSE NULL END)\n            OVER (\n                PARTITION BY ofc.boundary_id, ofc.parameter_name\n                ORDER BY ofc.forecast_time\n                ROWS BETWEEN 9 PRECEDING AND CURRENT ROW\n            ) AS moving_avg_error_10,\n        STDDEV(CASE WHEN ofc.actual_observation_value IS NOT NULL THEN ABS(ofc.avg_forecast_value - ofc.actual_observation_value) ELSE NULL END)\n            OVER (\n                PARTITION BY ofc.boundary_id, ofc.parameter_name\n                ORDER BY ofc.forecast_time\n                ROWS BETWEEN 19 PRECEDING AND CURRENT ROW\n            ) AS moving_stddev_error_20\n    FROM observation_forecast_comparison ofc\n),\ntemporal_forecast_analysis AS (\n    -- Sixth CTE: Temporal analysis with multiple window function patterns\n    SELECT\n        fam.boundary_id,\n        fam.feature_type,\n        fam.feature_name,\n        fam.feature_identifier,\n        fam.state_code,\n        fam.parameter_name,\n        fam.forecast_time,\n        fam.forecast_hour,\n        fam.forecast_date,\n        fam.grid_cells_count,\n        fam.avg_forecast_value,\n        fam.median_forecast_value,\n        fam.actual_observation_value,\n        ROUND(CAST(CAST(fam.absolute_error AS NUMERIC) AS NUMERIC), 2) AS absolute_error,\n        ROUND(CAST(CAST(fam.percentage_error AS NUMERIC) AS NUMERIC), 2) AS percentage_error,\n        ROUND(CAST(CAST(fam.moving_avg_error_10 AS NUMERIC) AS NUMERIC), 2) AS moving_avg_error_10,\n        ROUND(CAST(CAST(fam.moving_stddev_error_20 AS NUMERIC) AS NUMERIC), 2) AS moving_stddev_error_20,\n        -- Time-series window functions\n        LAG(fam.avg_forecast_value, 1) OVER (\n            PARTITION BY fam.boundary_id, fam.parameter_name\n            ORDER BY fam.forecast_time\n        ) AS prev_forecast_value,\n        LEAD(fam.avg_forecast_value, 1) OVER (\n            PARTITION BY fam.boundary_id, fam.parameter_name\n            ORDER BY fam.forecast_time\n        ) AS next_forecast_value,\n        -- Running totals and cumulative metrics\n        SUM(fam.avg_forecast_value) OVER (\n            PARTITION BY fam.boundary_id, fam.parameter_name\n            ORDER BY fam.forecast_time\n            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n        ) AS cumulative_forecast_sum,\n        AVG(fam.avg_forecast_value) OVER (\n            PARTITION BY fam.boundary_id, fam.parameter_name\n            ORDER BY fam.forecast_time\n            RANGE BETWEEN INTERVAL '24 hours' PRECEDING AND CURRENT ROW\n        ) AS avg_forecast_24h_range,\n        -- Ranking functions\n        ROW_NUMBER() OVER (\n            PARTITION BY fam.boundary_id, fam.parameter_name\n            ORDER BY fam.forecast_time DESC\n        ) AS forecast_recency_rank,\n        PERCENT_RANK() OVER (\n            PARTITION BY fam.feature_type\n            ORDER BY fam.avg_forecast_value DESC\n        ) AS forecast_value_percentile,\n        NTILE(5) OVER (\n            PARTITION BY fam.feature_type\n            ORDER BY fam.avg_forecast_value DESC\n        ) AS forecast_value_quintile\n    FROM forecast_accuracy_metrics fam\n),\nfinal_forecast_analytics AS (\n    -- Seventh CTE: Final analytics with comprehensive metrics and classifications\n    SELECT\n        tfa.boundary_id,\n        tfa.feature_type,\n        tfa.feature_name,\n        tfa.feature_identifier,\n        tfa.state_code,\n        tfa.parameter_name,\n        tfa.forecast_time,\n        tfa.forecast_hour,\n        tfa.forecast_date,\n        tfa.grid_cells_count,\n        tfa.avg_forecast_value,\n        tfa.median_forecast_value,\n        tfa.actual_observation_value,\n        tfa.absolute_error,\n        tfa.percentage_error,\n        tfa.moving_avg_error_10,\n        tfa.moving_stddev_error_20,\n        tfa.prev_forecast_value,\n        tfa.next_forecast_value,\n        tfa.cumulative_forecast_sum,\n        ROUND(CAST(CAST(tfa.avg_forecast_24h_range AS NUMERIC) AS NUMERIC), 2) AS avg_forecast_24h_range,\n        tfa.forecast_recency_rank,\n        ROUND(CAST(CAST(tfa.forecast_value_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS forecast_value_percentile,\n        tfa.forecast_value_quintile,\n        -- Forecast trend analysis\n        CASE\n            WHEN tfa.prev_forecast_value IS NOT NULL THEN\n                tfa.avg_forecast_value - tfa.prev_forecast_value\n            ELSE NULL\n        END AS forecast_change_from_previous,\n        CASE\n            WHEN tfa.prev_forecast_value IS NOT NULL AND tfa.prev_forecast_value != 0 THEN\n                ((tfa.avg_forecast_value - tfa.prev_forecast_value) / ABS(tfa.prev_forecast_value)) * 100\n            ELSE NULL\n        END AS forecast_change_percentage,\n        -- Accuracy classification\n        CASE\n            WHEN tfa.absolute_error IS NULL THEN 'No Observation'\n            WHEN tfa.absolute_error <= 2.0 THEN 'Highly Accurate'\n            WHEN tfa.absolute_error <= 5.0 THEN 'Accurate'\n            WHEN tfa.absolute_error <= 10.0 THEN 'Moderate Accuracy'\n            ELSE 'Low Accuracy'\n        END AS accuracy_classification,\n        -- Forecast value classification\n        CASE\n            WHEN tfa.parameter_name = 'Temperature' THEN\n                CASE\n                    WHEN tfa.avg_forecast_value < 32 THEN 'Freezing'\n                    WHEN tfa.avg_forecast_value < 50 THEN 'Cold'\n                    WHEN tfa.avg_forecast_value < 70 THEN 'Moderate'\n                    WHEN tfa.avg_forecast_value < 85 THEN 'Warm'\n                    ELSE 'Hot'\n                END\n            WHEN tfa.parameter_name = 'Precipitation' THEN\n                CASE\n                    WHEN tfa.avg_forecast_value = 0 THEN 'No Precipitation'\n                    WHEN tfa.avg_forecast_value < 0.1 THEN 'Light'\n                    WHEN tfa.avg_forecast_value < 0.5 THEN 'Moderate'\n                    ELSE 'Heavy'\n                END\n            WHEN tfa.parameter_name = 'WindSpeed' THEN\n                CASE\n                    WHEN tfa.avg_forecast_value < 10 THEN 'Calm'\n                    WHEN tfa.avg_forecast_value < 20 THEN 'Light Breeze'\n                    WHEN tfa.avg_forecast_value < 30 THEN 'Moderate Wind'\n                    ELSE 'Strong Wind'\n                END\n            ELSE 'Unknown'\n        END AS forecast_category\n    FROM temporal_forecast_analysis tfa\n)\nSELECT\n    boundary_id,\n    feature_type,\n    feature_name,\n    feature_identifier,\n    state_code,\n    parameter_name,\n    forecast_time,\n    forecast_hour,\n    forecast_date,\n    grid_cells_count,\n    avg_forecast_value,\n    median_forecast_value,\n    actual_observation_value,\n    absolute_error,\n    percentage_error,\n    moving_avg_error_10,\n    moving_stddev_error_20,\n    ROUND(CAST(CAST(forecast_change_from_previous AS NUMERIC) AS NUMERIC), 2) AS forecast_change_from_previous,\n    ROUND(CAST(CAST(forecast_change_percentage AS NUMERIC) AS NUMERIC), 2) AS forecast_change_percentage,\n    accuracy_classification,\n    forecast_category,\n    forecast_value_percentile,\n    forecast_value_quintile,\n    forecast_recency_rank\nFROM final_forecast_analytics\nWHERE forecast_recency_rank <= 100\nORDER BY forecast_time DESC, boundary_id, parameter_name;",
      "line_number": 194,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.028849,
        "row_count": 0,
        "column_count": 24,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 2,
      "title": "Recursive Spatial Boundary Hierarchy Analysis with Multi-Hop Geospatial Traversal",
      "description": "Use Case: Custom Map Development - Multi-Level Geographic Hierarchy Visualization for Agriculture Insurance Description:
    Enterprise-level recursive spatial analysis using recursive CTE for multi-level boundary relationships, spatial hierarchy traversal, boundary intersection detection, and geospatial path discovery. Implements production patterns for analyzing nested geographic boundaries (e.g., counties within states, fire zones within CWAs). Business Value: Spatial hierarchy relationships show",
      "complexity": "Advanced recursive CTE with spatial operations, multi-hop traversal, cycle detection, path weight calculations, spatial intersection analysis, multiple CTE nesting levels (6+)",
      "expected_output": "Query results",
      "sql": "WITH RECURSIVE boundary_spatial_hierarchy AS (\n    -- Anchor CTE: Direct spatial relationships between boundaries\n    SELECT DISTINCT\n        sb1.boundary_id AS parent_boundary_id,\n        sb1.feature_type AS parent_feature_type,\n        sb1.feature_name AS parent_feature_name,\n        sb2.boundary_id AS child_boundary_id,\n        sb2.feature_type AS child_feature_type,\n        sb2.feature_name AS child_feature_name,\n        1 AS hierarchy_level,\n        ARRAY[sb1.boundary_id::VARCHAR(255), sb2.boundary_id::VARCHAR(255)]::VARCHAR[] AS boundary_path,\n        CASE\n            WHEN sb1.boundary_geom IS NOT NULL AND sb2.boundary_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(sb2.boundary_geom::geometry, sb1.boundary_geom::geometry) THEN 'Contains'\n                    WHEN ST_Within(sb1.boundary_geom::geometry, sb2.boundary_geom::geometry) THEN 'Contained By'\n                    WHEN ST_INTERSECTS(CAST(sb1.boundary_geom AS geometry), CAST(sb2.boundary_geom AS geometry)) THEN 'Intersects'\n                    ELSE 'Near'\n                END\n            ELSE NULL\n        END AS spatial_relationship,\n        CASE\n            WHEN sb1.boundary_geom IS NOT NULL AND sb2.boundary_geom IS NOT NULL THEN\n                ST_DISTANCE(sb1.boundary_geom::geography, sb2.boundary_geom::geography)\n            ELSE NULL\n        END AS spatial_distance,\n        CASE\n            WHEN sb1.boundary_geom IS NOT NULL AND sb2.boundary_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(sb2.boundary_geom::geometry, sb1.boundary_geom::geometry) THEN\n                        (ST_AREA(CAST(sb2.boundary_geom AS geometry))::NUMERIC / NULLIF(ST_AREA(CAST(sb1.boundary_geom AS geometry))::NUMERIC, 0::NUMERIC)) * 100::NUMERIC\n                    ELSE NULL::NUMERIC\n                END\n            ELSE NULL::NUMERIC\n        END AS coverage_percentage\n    FROM shapefile_boundaries sb1\n    CROSS JOIN shapefile_boundaries sb2\n    WHERE sb1.boundary_id != sb2.boundary_id\n        AND sb1.boundary_geom IS NOT NULL\n        AND sb2.boundary_geom IS NOT NULL\n        AND (\n            ST_Within(sb2.boundary_geom::geometry, sb1.boundary_geom::geometry)\n            OR ST_INTERSECTS(CAST(sb1.boundary_geom AS geometry), CAST(sb2.boundary_geom AS geometry))\n            OR ST_DISTANCE(sb1.boundary_geom::geography, sb2.boundary_geom::geography) < 50000\n        )\n\n    UNION ALL\n\n    -- Recursive step: Multi-hop spatial relationships\n    SELECT\n        bsh.parent_boundary_id,\n        bsh.parent_feature_type,\n        bsh.parent_feature_name,\n        sb3.boundary_id AS child_boundary_id,\n        sb3.feature_type AS child_feature_type,\n        sb3.feature_name AS child_feature_name,\n        bsh.hierarchy_level + 1,\n        bsh.boundary_path || ARRAY[sb3.boundary_id::VARCHAR(255)],\n        CASE\n            WHEN bsh.spatial_relationship = 'Contains' AND sb3.boundary_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(CAST(sb3.boundary_geom AS geometry), CAST((SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.child_boundary_id) AS geometry)) THEN 'Contains'\n                    WHEN ST_INTERSECTS(sb3.boundary_geom,\n                        (SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.parent_boundary_id)\n                    ) THEN 'Intersects'\n                    ELSE 'Near'\n                END\n            ELSE bsh.spatial_relationship\n        END,\n        CASE\n            WHEN sb3.boundary_geom IS NOT NULL THEN\n                ST_DISTANCE(\n                    (SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.parent_boundary_id)::geography,\n                    sb3.boundary_geom::geography\n                )\n            ELSE bsh.spatial_distance\n        END,\n        CASE\n            WHEN sb3.boundary_geom IS NOT NULL AND bsh.spatial_relationship = 'Contains' THEN\n                (ST_AREA(CAST(sb3.boundary_geom AS geometry))::NUMERIC / NULLIF(\n                    ST_AREA(CAST((SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.parent_boundary_id) AS geometry))::NUMERIC,\n                    0::NUMERIC\n                )) * 100::NUMERIC\n            ELSE bsh.coverage_percentage::NUMERIC\n        END\n    FROM boundary_spatial_hierarchy bsh\n    INNER JOIN shapefile_boundaries sb3 ON (\n        NOT sb3.boundary_id = ANY(bsh.boundary_path)\n        AND sb3.boundary_geom IS NOT NULL\n        AND (\n            ST_Within(CAST(sb3.boundary_geom AS geometry), CAST((SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.child_boundary_id) AS geometry))\n            OR ST_INTERSECTS(CAST(sb3.boundary_geom AS geometry), CAST((SELECT boundary_geom FROM shapefile_boundaries WHERE boundary_id = bsh.child_boundary_id) AS geometry))\n        )\n    )\n    WHERE bsh.hierarchy_level < 5\n        AND array_length(bsh.boundary_path, 1) < 6\n),\nhierarchy_metrics AS (\n    -- Second CTE: Calculate hierarchy metrics with aggregations\n    SELECT\n        bsh.parent_boundary_id,\n        bsh.parent_feature_type,\n        bsh.parent_feature_name,\n        bsh.child_boundary_id,\n        bsh.child_feature_type,\n        bsh.child_feature_name,\n        bsh.hierarchy_level,\n        bsh.boundary_path,\n        array_length(bsh.boundary_path, 1) AS path_length,\n        bsh.spatial_relationship,\n        ROUND(CAST(CAST(bsh.spatial_distance AS NUMERIC) AS NUMERIC), 2) AS spatial_distance,\n        ROUND(CAST(CAST(bsh.coverage_percentage AS NUMERIC) AS NUMERIC), 2) AS coverage_percentage,\n        -- Count children at each level\n        COUNT(*) OVER (\n            PARTITION BY bsh.parent_boundary_id, bsh.hierarchy_level\n        ) AS children_count_at_level,\n        -- Total descendants count\n        COUNT(*) OVER (\n            PARTITION BY bsh.parent_boundary_id\n        ) AS total_descendants_count,\n        -- Path weight (inverse of hierarchy level)\n        1.0 / NULLIF(bsh.hierarchy_level, 0) AS path_weight\n    FROM boundary_spatial_hierarchy bsh\n),\nshortest_paths AS (\n    -- Third CTE: Find shortest paths between boundaries\n    SELECT\n        hm1.parent_boundary_id,\n        hm1.child_boundary_id AS intermediate_boundary_id,\n        hm2.child_boundary_id AS target_boundary_id,\n        hm1.hierarchy_level + hm2.hierarchy_level AS total_hops,\n        hm1.boundary_path || hm2.boundary_path[2:] AS combined_path,\n        hm1.path_weight + hm2.path_weight AS combined_path_weight,\n        hm1.spatial_distance + COALESCE(hm2.spatial_distance, 0) AS combined_distance\n    FROM hierarchy_metrics hm1\n    INNER JOIN hierarchy_metrics hm2 ON hm1.child_boundary_id = hm2.parent_boundary_id\n    WHERE hm1.child_boundary_id != hm2.child_boundary_id\n        AND NOT hm2.child_boundary_id = ANY(hm1.boundary_path)\n),\npath_optimization AS (\n    -- Fourth CTE: Optimize paths and find best routes\n    SELECT\n        sp.parent_boundary_id,\n        sp.target_boundary_id,\n        sp.total_hops,\n        sp.combined_path,\n        sp.combined_path_weight,\n        sp.combined_distance,\n        -- Window functions for path comparison\n        MIN(sp.combined_path_weight) OVER (\n            PARTITION BY sp.parent_boundary_id, sp.target_boundary_id\n        ) AS min_path_weight,\n        MIN(sp.total_hops) OVER (\n            PARTITION BY sp.parent_boundary_id, sp.target_boundary_id\n        ) AS min_hops,\n        ROW_NUMBER() OVER (\n            PARTITION BY sp.parent_boundary_id, sp.target_boundary_id\n            ORDER BY sp.combined_path_weight ASC, sp.total_hops ASC\n        ) AS path_rank\n    FROM shortest_paths sp\n),\nforecast_boundary_aggregations AS (\n    -- Fifth CTE:
    Aggregate forecasts by boundary hierarchy\n    SELECT\n        po.parent_boundary_id,\n        po.target_boundary_id,\n        po.total_hops,\n        po.combined_path,\n        po.path_rank,\n        sb_parent.feature_type AS parent_feature_type,\n        sb_parent.feature_name AS parent_feature_name,\n        sb_target.feature_type AS target_feature_type,\n        sb_target.feature_name AS target_feature_name,\n        COUNT(DISTINCT gf.forecast_id) AS forecast_count,\n        COUNT(DISTINCT gf.parameter_name) AS parameter_count,\n        AVG(gf.parameter_value) AS avg_forecast_value,\n        MIN(gf.parameter_value) AS min_forecast_value,\n        MAX(gf.parameter_value) AS max_forecast_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY gf.parameter_value) AS median_forecast_value\n    FROM path_optimization po\n    INNER JOIN shapefile_boundaries sb_parent ON po.parent_boundary_id = sb_parent.boundary_id\n    INNER JOIN shapefile_boundaries sb_target ON po.target_boundary_id = sb_target.boundary_id\n    LEFT JOIN grib2_forecasts gf ON (\n        gf.grid_cell_geom IS NOT NULL\n        AND (\n            ST_Within(gf.grid_cell_geom::geometry, sb_parent.boundary_geom::geometry)\n            OR ST_Within(gf.grid_cell_geom::geometry, sb_target.boundary_geom::geometry)\n        )\n    )\n    WHERE po.path_rank = 1\n    GROUP BY\n        po.parent_boundary_id,\n        po.target_boundary_id,\n        po.total_hops,\n        po.combined_path,\n        po.path_rank,\n        sb_parent.feature_type,\n        sb_parent.feature_name,\n        sb_target.feature_type,\n        sb_target.feature_name\n),\nfinal_hierarchy_analysis AS (\n    -- Sixth CTE: Final analysis with comprehensive metrics\n    SELECT\n        fba.parent_boundary_id,\n        fba.parent_feature_type,\n        fba.parent_feature_name,\n        fba.target_boundary_id,\n        fba.target_feature_type,\n        fba.target_feature_name,\n        fba.total_hops,\n        fba.combined_path,\n        fba.forecast_count,\n        fba.parameter_count,\n        ROUND(CAST(CAST(fba.avg_forecast_value AS NUMERIC) AS NUMERIC), 2) AS avg_forecast_value,\n        ROUND(CAST(CAST(fba.min_forecast_value AS NUMERIC) AS NUMERIC), 2) AS min_forecast_value,\n        ROUND(CAST(CAST(fba.max_forecast_value AS NUMERIC) AS NUMERIC), 2) AS max_forecast_value,\n        ROUND(CAST(CAST(fba.median_forecast_value AS NUMERIC) AS NUMERIC), 2) AS median_forecast_value,\n        -- Window functions for comparison\n        AVG(fba.avg_forecast_value) OVER (\n            PARTITION BY fba.parent_feature_type\n        ) AS avg_by_feature_type,\n        PERCENT_RANK() OVER (\n            PARTITION BY fba.parent_feature_type\n            ORDER BY fba.forecast_count DESC\n        ) AS forecast_count_percentile,\n        NTILE(4) OVER (\n            ORDER BY fba.total_hops ASC\n        ) AS hop_quartile\n    FROM forecast_boundary_aggregations fba\n)\nSELECT\n    parent_boundary_id,\n    parent_feature_type,\n    parent_feature_name,\n    target_boundary_id,\n    target_feature_type,\n    target_feature_name,\n    total_hops,\n    combined_path,\n    forecast_count,\n    parameter_count,\n    avg_forecast_value,\n    min_forecast_value,\n    max_forecast_value,\n    median_forecast_value,\n    ROUND(CAST(CAST(avg_by_feature_type AS NUMERIC) AS NUMERIC), 2) AS avg_by_feature_type,\n    ROUND(CAST(CAST(forecast_count_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS forecast_count_percentile,\n    hop_quartile\nFROM final_hierarchy_analysis\nWHERE forecast_count > 0\nORDER BY total_hops ASC, forecast_count DESC\nLIMIT 200;",
      "line_number": 580,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014234,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 3,
      "title": "Multi-Parameter Weather Correlation Analysis with Cross-Parameter Temporal Patterns",
      "description": "Use Case: Physical Climate Risk Assessment - Multi-Parameter Risk Correlation for Renewable Energy Planning Description:
    Enterprise-level multi-parameter weather correlation analysis with cross-parameter temporal pattern detection, correlation matrices, lag analysis, and predictive indicators. Implements production patterns for analyzing relationships between temperature, precipitation, wind speed, and other meteorological parameters. Business Value: Correlation analysis between temperature, pre",
      "complexity": "Multiple CTEs (8+ levels), cross-parameter joins, correlation calculations, lag/lead analysis, window functions with multiple frame clauses, temporal pattern detection, UNION operations",
      "expected_output": "Query results",
      "sql": "WITH parameter_time_series AS (\n    -- First CTE: Create unified time series for all parameters\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        DATE_TRUNC('hour', gf.forecast_time) AS forecast_hour,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date,\n        gf.grid_cell_latitude,\n        gf.grid_cell_longitude,\n        gf.parameter_value,\n        -- Round coordinates to grid resolution for matching\n        ROUND(CAST(gf.grid_cell_latitude::NUMERIC AS NUMERIC), 2) AS rounded_lat,\n        ROUND(CAST(gf.grid_cell_longitude::NUMERIC AS NUMERIC), 2) AS rounded_lon\n    FROM grib2_forecasts gf\n    WHERE gf.transformation_status = 'Success'\n        AND gf.parameter_name IN ('Temperature', 'Precipitation', 'WindSpeed', 'Dewpoint', 'SkyCover')\n),\ntemperature_series AS (\n    -- Second CTE: Temperature parameter series\n    SELECT\n        pts.forecast_id,\n        pts.forecast_time,\n        pts.forecast_hour,\n        pts.forecast_date,\n        pts.rounded_lat,\n        pts.rounded_lon,\n        pts.parameter_value AS temperature_value\n    FROM parameter_time_series pts\n    WHERE pts.parameter_name = 'Temperature'\n),\nprecipitation_series AS (\n    -- Third CTE: Precipitation parameter series\n    SELECT\n        pts.forecast_id,\n        pts.forecast_time,\n        pts.forecast_hour,\n        pts.forecast_date,\n        pts.rounded_lat,\n        pts.rounded_lon,\n        pts.parameter_value AS precipitation_value\n    FROM parameter_time_series pts\n    WHERE pts.parameter_name = 'Precipitation'\n),\nwindspeed_series AS (\n    -- Fourth CTE: Wind speed parameter series\n    SELECT\n        pts.forecast_id,\n        pts.forecast_time,\n        pts.forecast_hour,\n        pts.forecast_date,\n        pts.rounded_lat,\n        pts.rounded_lon,\n        pts.parameter_value AS windspeed_value\n    FROM parameter_time_series pts\n    WHERE pts.parameter_name = 'WindSpeed'\n),\nmulti_parameter_join AS (\n    -- Fifth CTE: Join all parameters by location and time\n    SELECT\n        COALESCE(ts.forecast_time, ps.forecast_time, ws.forecast_time) AS forecast_time,\n        COALESCE(ts.forecast_hour, ps.forecast_hour, ws.forecast_hour) AS forecast_hour,\n        COALESCE(ts.forecast_date, ps.forecast_date, ws.forecast_date) AS forecast_date,\n        COALESCE(ts.rounded_lat, ps.rounded_lat, ws.rounded_lat) AS rounded_lat,\n        COALESCE(ts.rounded_lon, ps.rounded_lon, ws.rounded_lon) AS rounded_lon,\n        ts.temperature_value,\n        ps.precipitation_value,\n        ws.windspeed_value,\n        -- Calculate derived metrics\n        CASE\n            WHEN ts.temperature_value IS NOT NULL AND ps.precipitation_value IS NOT NULL THEN\n                ts.temperature_value - (ps.precipitation_value * 5.0)\n            ELSE NULL\n        END AS apparent_temperature,\n        CASE\n            WHEN ts.temperature_value IS NOT NULL AND ws.windspeed_value IS NOT NULL THEN\n                CASE\n                    WHEN ws.windspeed_value > 0 THEN\n                        35.74 + (0.6215 * ts.temperature_value) -\n                        (35.75 * POWER(ws.windspeed_value, 0.16)) +\n                        (0.4275 * ts.temperature_value * POWER(ws.windspeed_value, 0.16))\n                    ELSE ts.temperature_value\n                END\n            ELSE NULL\n        END AS wind_chill_temperature\n    FROM temperature_series ts\n    FULL OUTER JOIN precipitation_series ps ON (\n        ts.rounded_lat = ps.rounded_lat\n        AND ts.rounded_lon = ps.rounded_lon\n        AND ts.forecast_hour = ps.forecast_hour\n    )\n    FULL OUTER JOIN windspeed_series ws ON (\n        COALESCE(ts.rounded_lat, ps.rounded_lat) = ws.rounded_lat\n        AND COALESCE(ts.rounded_lon, ps.rounded_lon) = ws.rounded_lon\n        AND COALESCE(ts.forecast_hour, ps.forecast_hour) = ws.forecast_hour\n    )\n),\ntemporal_lag_analysis AS (\n    -- Sixth CTE: Temporal lag analysis for correlation detection\n    SELECT\n        mpj.forecast_time,\n        mpj.forecast_hour,\n        mpj.forecast_date,\n        mpj.rounded_lat,\n        mpj.rounded_lon,\n        mpj.temperature_value,\n        mpj.precipitation_value,\n        mpj.windspeed_value,\n        mpj.apparent_temperature,\n        mpj.wind_chill_temperature,\n        -- Lag values for correlation analysis\n        LAG(mpj.temperature_value, 1) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n        ) AS temp_lag_1h,\n        LAG(mpj.temperature_value, 3) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n        ) AS temp_lag_3h,\n        LAG(mpj.precipitation_value, 1) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n        ) AS precip_lag_1h,\n        LEAD(mpj.temperature_value, 1) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n        ) AS temp_lead_1h,\n        LEAD(mpj.precipitation_value, 1) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n        ) AS precip_lead_1h,\n        -- Moving averages for trend detection\n        AVG(mpj.temperature_value) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS temp_moving_avg_6h,\n        AVG(mpj.precipitation_value) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n            ROWS BETWEEN 11 PRECEDING AND CURRENT ROW\n        ) AS precip_moving_avg_12h,\n        AVG(mpj.windspeed_value) OVER (\n            PARTITION BY mpj.rounded_lat, mpj.rounded_lon\n            ORDER BY mpj.forecast_time\n            ROWS BETWEEN 23 PRECEDING AND CURRENT ROW\n        ) AS windspeed_moving_avg_24h\n    FROM multi_parameter_join mpj\n),\ncorrelation_calculations AS (\n    -- Seventh CTE:
    Calculate correlation metrics\n    SELECT\n        tla.forecast_time,\n        tla.forecast_hour,\n        tla.forecast_date,\n        tla.rounded_lat,\n        tla.rounded_lon,\n        ROUND(CAST(CAST(tla.temperature_value AS NUMERIC) AS NUMERIC), 2) AS temperature_value,\n        ROUND(CAST(CAST(tla.precipitation_value AS NUMERIC) AS NUMERIC), 2) AS precipitation_value,\n        ROUND(CAST(CAST(tla.windspeed_value AS NUMERIC) AS NUMERIC), 2) AS windspeed_value,\n        ROUND(CAST(CAST(tla.apparent_temperature AS NUMERIC) AS NUMERIC), 2) AS apparent_temperature,\n        ROUND(CAST(CAST(tla.wind_chill_temperature AS NUMERIC) AS NUMERIC), 2) AS wind_chill_temperature,\n        ROUND(CAST(CAST(tla.temp_lag_1h AS NUMERIC) AS NUMERIC), 2) AS temp_lag_1h,\n        ROUND(CAST(CAST(tla.temp_lag_3h AS NUMERIC) AS NUMERIC), 2) AS temp_lag_3h,\n        ROUND(CAST(CAST(tla.precip_lag_1h AS NUMERIC) AS NUMERIC), 2) AS precip_lag_1h,\n        ROUND(CAST(CAST(tla.temp_lead_1h AS NUMERIC) AS NUMERIC), 2) AS temp_lead_1h,\n        ROUND(CAST(CAST(tla.precip_lead_1h AS NUMERIC) AS NUMERIC), 2) AS precip_lead_1h,\n        ROUND(CAST(CAST(tla.temp_moving_avg_6h AS NUMERIC) AS NUMERIC), 2) AS temp_moving_avg_6h,\n        ROUND(CAST(CAST(tla.precip_moving_avg_12h AS NUMERIC) AS NUMERIC), 2) AS precip_moving_avg_12h,\n        ROUND(CAST(CAST(tla.windspeed_moving_avg_24h AS NUMERIC) AS NUMERIC), 2) AS windspeed_moving_avg_24h,\n        -- Temperature change indicators\n        CASE\n            WHEN tla.temp_lag_1h IS NOT NULL THEN\n                tla.temperature_value - tla.temp_lag_1h\n            ELSE NULL\n        END AS temp_change_1h,\n        CASE\n            WHEN tla.temp_lag_3h IS NOT NULL THEN\n                tla.temperature_value - tla.temp_lag_3h\n            ELSE NULL\n        END AS temp_change_3h,\n        -- Precipitation change indicators\n        CASE\n            WHEN tla.precip_lag_1h IS NOT NULL THEN\n                tla.precipitation_value - tla.precip_lag_1h\n            ELSE NULL\n        END AS precip_change_1h,\n        -- Correlation indicators (simplified correlation coefficients)\n        CASE\n            WHEN tla.temperature_value IS NOT NULL AND tla.precipitation_value IS NOT NULL THEN\n                CASE\n                    WHEN tla.temperature_value > tla.temp_moving_avg_6h\n                         AND tla.precipitation_value > tla.precip_moving_avg_12h THEN 'Both Above Average'\n                    WHEN tla.temperature_value < tla.temp_moving_avg_6h\n                         AND tla.precipitation_value < tla.precip_moving_avg_12h THEN 'Both Below Average'\n                    WHEN tla.temperature_value > tla.temp_moving_avg_6h\n                         AND tla.precipitation_value < tla.precip_moving_avg_12h THEN 'Temp High, Precip Low'\n                    WHEN tla.temperature_value < tla.temp_moving_avg_6h\n                         AND tla.precipitation_value > tla.precip_moving_avg_12h THEN 'Temp Low, Precip High'\n                    ELSE 'Mixed'\n                END\n            ELSE NULL\n        END AS correlation_pattern\n    FROM temporal_lag_analysis tla\n),\npattern_classification AS (\n    -- Eighth CTE: Classify weather patterns\n    SELECT\n        cc.forecast_time,\n        cc.forecast_hour,\n        cc.forecast_date,\n        cc.rounded_lat,\n        cc.rounded_lon,\n        cc.temperature_value,\n        cc.precipitation_value,\n        cc.windspeed_value,\n        cc.apparent_temperature,\n        cc.wind_chill_temperature,\n        cc.temp_change_1h,\n        cc.temp_change_3h,\n        cc.precip_change_1h,\n        cc.correlation_pattern,\n        -- Weather pattern classification\n        CASE\n            WHEN cc.temperature_value < 32 AND cc.precipitation_value > 0 THEN 'Freezing Precipitation'\n            WHEN cc.temperature_value >= 32 AND cc.temperature_value < 50\n                 AND cc.precipitation_value > 0.1 THEN 'Cold Rain'\n            WHEN cc.temperature_value >= 50 AND cc.temperature_value < 70\n                 AND cc.precipitation_value > 0.1 THEN 'Moderate Rain'\n            WHEN cc.temperature_value >= 70 AND cc.precipitation_value > 0.1 THEN 'Warm Rain'\n            WHEN cc.temperature_value >= 85 AND cc.windspeed_value < 5 THEN 'Hot Calm'\n            WHEN cc.windspeed_value > 30 THEN 'High Wind'\n            WHEN cc.precipitation_value = 0 AND cc.temperature_value BETWEEN 60 AND 80 THEN 'Pleasant'\n            ELSE 'Other'\n        END AS weather_pattern,\n        -- Window functions for pattern frequency\n        COUNT(*) OVER (\n            PARTITION BY cc.rounded_lat, cc.rounded_lon, cc.correlation_pattern\n        ) AS pattern_frequency,\n        PERCENT_RANK() OVER (\n            PARTITION BY cc.rounded_lat, cc.rounded_lon\n            ORDER BY cc.temperature_value DESC\n        ) AS temp_percentile,\n        NTILE(5) OVER (\n            PARTITION BY cc.rounded_lat, cc.rounded_lon\n            ORDER BY cc.precipitation_value DESC\n        ) AS precip_quintile\n    FROM correlation_calculations cc\n)\nSELECT\n    forecast_time,\n    forecast_hour,\n    forecast_date,\n    rounded_lat,\n    rounded_lon,\n    temperature_value,\n    precipitation_value,\n    windspeed_value,\n    apparent_temperature,\n    wind_chill_temperature,\n    ROUND(CAST(CAST(temp_change_1h AS NUMERIC) AS NUMERIC), 2) AS temp_change_1h,\n    ROUND(CAST(CAST(temp_change_3h AS NUMERIC) AS NUMERIC), 2) AS temp_change_3h,\n    ROUND(CAST(CAST(precip_change_1h AS NUMERIC) AS NUMERIC), 2) AS precip_change_1h,\n    correlation_pattern,\n    weather_pattern,\n    pattern_frequency,\n    ROUND(CAST(CAST(temp_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS temp_percentile,\n    precip_quintile\nFROM pattern_classification\nWHERE forecast_time >= CURRENT_TIMESTAMP - INTERVAL '7 days'\nORDER BY forecast_time DESC, rounded_lat, rounded_lon\nLIMIT 1000;",
      "line_number": 854,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005914,
        "row_count": 5,
        "column_count": 18,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 4,
      "title": "Spatial Join Optimization Analysis with Boundary-Forecast Matching Efficiency Metrics",
      "description": "Use Case: Custom Weather Impact Modeling - Boundary-Forecast Matching Efficiency for Logistics Optimization Description:
    Enterprise-level spatial join optimization analysis evaluating boundary-forecast matching efficiency, spatial index utilization, join performance metrics, and optimization opportunities. Implements production patterns for optimizing geospatial data joins. Purpose: Analysis of how efficiently forecasts match to client-defined boundaries with optimization recommendations. Helps ",
      "complexity": "Multiple CTEs (7+ levels), spatial join analysis, performance metrics, optimization scoring, window functions, correlated subqueries, UNION operations",
      "expected_output": "Query results",
      "sql": "WITH spatial_join_base_metrics AS (\n    -- First CTE: Base spatial join metrics\n    SELECT\n        sjr.join_id,\n        sjr.grib_file,\n        sjr.shapefile_name,\n        sjr.join_type,\n        sjr.features_matched,\n        sjr.features_total,\n        sjr.match_percentage,\n        sjr.join_timestamp,\n        sjr.forecast_id,\n        sjr.boundary_id,\n        -- Calculate join efficiency\n        CASE\n            WHEN sjr.features_total > 0 THEN\n                (sjr.features_matched::NUMERIC / sjr.features_total::NUMERIC) * 100\n            ELSE 0\n        END AS calculated_match_percentage,\n        CASE\n            WHEN sjr.features_matched > 0 THEN\n                sjr.features_total::NUMERIC / sjr.features_matched::NUMERIC\n            ELSE NULL\n        END AS features_per_match\n    FROM spatial_join_results sjr\n),\nboundary_forecast_join_analysis AS (\n    -- Second CTE: Analyze joins between boundaries and forecasts\n    SELECT\n        sjbm.join_id,\n        sjbm.join_type,\n        sjbm.match_percentage,\n        sjbm.calculated_match_percentage,\n        sb.feature_type,\n        sb.feature_name,\n        sb.feature_identifier,\n        gf.parameter_name,\n        gf.forecast_time,\n        sjbm.features_matched,\n        sjbm.features_total,\n        sjbm.features_per_match,\n        -- Spatial relationship metrics\n        CASE\n            WHEN sb.boundary_geom IS NOT NULL AND gf.grid_cell_geom IS NOT NULL THEN\n                ST_DISTANCE(sb.boundary_geom::geography, gf.grid_cell_geom::geography)\n            ELSE NULL\n        END AS spatial_distance,\n        CASE\n            WHEN sb.boundary_geom IS NOT NULL AND gf.grid_cell_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(gf.grid_cell_geom::geometry, sb.boundary_geom::geometry) THEN 'Within'\n                    WHEN ST_INTERSECTS(gf.grid_cell_geom, sb.boundary_geom) THEN 'Intersects'\n                    ELSE 'Near'\n                END\n            ELSE NULL\n        END AS spatial_relationship\n    FROM spatial_join_base_metrics sjbm\n    LEFT JOIN shapefile_boundaries sb ON sjbm.boundary_id = sb.boundary_id\n    LEFT JOIN grib2_forecasts gf ON sjbm.forecast_id = gf.forecast_id\n),\njoin_type_performance AS (\n    -- Third CTE: Performance by join type\n    SELECT\n        bfja.join_type,\n        bfja.feature_type,\n        COUNT(*) AS join_count,\n        AVG(bfja.match_percentage) AS avg_match_percentage,\n        AVG(bfja.features_matched) AS avg_features_matched,\n        AVG(bfja.features_total) AS avg_features_total,\n        AVG(bfja.features_per_match) AS avg_features_per_match,\n        COUNT(CASE WHEN bfja.match_percentage > 80 THEN 1 END) AS high_match_joins,\n        COUNT(CASE WHEN bfja.match_percentage < 20 THEN 1 END) AS low_match_joins,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY bfja.match_percentage) AS median_match_percentage\n    FROM boundary_forecast_join_analysis bfja\n    GROUP BY bfja.join_type, bfja.feature_type\n),\nspatial_relationship_analysis AS (\n    -- Fourth CTE: Analyze spatial relationships\n    SELECT\n        bfja.join_id,\n        bfja.join_type,\n        bfja.feature_type,\n        bfja.parameter_name,\n        bfja.match_percentage,\n        bfja.spatial_distance,\n        bfja.spatial_relationship,\n        -- Window functions for spatial analysis\n        AVG(bfja.match_percentage) OVER (\n            PARTITION BY bfja.spatial_relationship\n        ) AS avg_match_by_relationship,\n        COUNT(*) OVER (\n            PARTITION BY bfja.spatial_relationship\n        ) AS count_by_relationship,\n        PERCENT_RANK() OVER (\n            PARTITION BY bfja.join_type\n            ORDER BY bfja.match_percentage DESC\n        ) AS match_percentile\n    FROM boundary_forecast_join_analysis bfja\n    WHERE bfja.spatial_relationship IS NOT NULL\n),\njoin_optimization_scoring AS (\n    -- Fifth CTE:
    Calculate optimization scores\n    SELECT\n        sra.join_id,\n        sra.join_type,\n        sra.feature_type,\n        sra.parameter_name,\n        sra.match_percentage,\n        sra.spatial_distance,\n        sra.spatial_relationship,\n        sra.avg_match_by_relationship,\n        sra.count_by_relationship,\n        ROUND(CAST(CAST(sra.match_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS match_percentile,\n        -- Optimization score (higher is better)\n        (\n            -- Match percentage component (40% weight)\n            (sra.match_percentage / 100.0 * 40) +\n            -- Relationship quality component (30% weight)\n            (CASE\n                WHEN sra.spatial_relationship = 'Within' THEN 30\n                WHEN sra.spatial_relationship = 'Intersects' THEN 20\n                ELSE 10\n            END) +\n            -- Distance component (30% weight) - closer is better\n            (CASE\n                WHEN sra.spatial_distance IS NOT NULL THEN\n                    GREATEST(0, 30 - (sra.spatial_distance / 1000.0))\n                ELSE 15\n            END)\n        ) AS optimization_score\n    FROM spatial_relationship_analysis sra\n),\nfinal_join_optimization AS (\n    -- Sixth CTE: Final optimization analysis\n    SELECT\n        jos.join_id,\n        jos.join_type,\n        jos.feature_type,\n        jos.parameter_name,\n        jos.match_percentage,\n        ROUND(CAST(CAST(jos.spatial_distance AS NUMERIC) AS NUMERIC), 2) AS spatial_distance,\n        jos.spatial_relationship,\n        ROUND(CAST(CAST(jos.optimization_score AS NUMERIC) AS NUMERIC), 2) AS optimization_score,\n        jos.match_percentile,\n        -- Optimization recommendations\n        CASE\n            WHEN jos.match_percentage < 50 THEN 'Low Match - Consider Different Join Type'\n            WHEN jos.spatial_distance > 10000 THEN 'High Distance - Check Spatial Index'\n            WHEN jos.optimization_score < 50 THEN 'Poor Optimization - Review Join Strategy'\n            ELSE 'Well Optimized'\n        END AS optimization_recommendation,\n        -- Rankings\n        ROW_NUMBER() OVER (\n            ORDER BY jos.optimization_score DESC\n        ) AS optimization_rank,\n        NTILE(5) OVER (\n            ORDER BY jos.optimization_score DESC\n        ) AS optimization_quintile\n    FROM join_optimization_scoring jos\n)\nSELECT\n    join_id,\n    join_type,\n    feature_type,\n    parameter_name,\n    match_percentage,\n    spatial_distance,\n    spatial_relationship,\n    optimization_score,\n    match_percentile,\n    optimization_recommendation,\n    optimization_rank,\n    optimization_quintile\nFROM final_join_optimization\nORDER BY optimization_score DESC\nLIMIT 200;",
      "line_number": 1147,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012429,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 5,
      "title": "Weather Station Network Coverage Analysis with Spatial Gap Detection and Coverage Optimization",
      "description": "Use Case: Supply Chain and Fleet Management - Station Coverage Gap Analysis for Route Planning Description:
    Enterprise-level weather station network analysis identifying coverage gaps, station density metrics, spatial interpolation opportunities, and network optimization recommendations. Implements production patterns for analyzing observation network coverage. Business Value: Map showing gaps in weather station coverage along routes with coverage density metrics. Fleet management companies can ",
      "complexity": "Multiple CTEs (8+ levels), spatial coverage analysis, gap detection algorithms, density calculations, interpolation analysis, window functions, spatial operations",
      "expected_output": "Query results",
      "sql": "WITH station_coverage_base AS (\n    -- First CTE: Base station coverage metrics\n    SELECT\n        ws.station_id,\n        ws.station_name,\n        ws.station_latitude,\n        ws.station_longitude,\n        ws.station_geom,\n        ws.state_code,\n        ws.county_name,\n        ws.cwa_code,\n        ws.station_type,\n        ws.active_status,\n        ws.elevation_meters,\n        -- Count recent observations\n        (\n            SELECT COUNT(*)\n            FROM weather_observations wo\n            WHERE wo.station_id = ws.station_id\n                AND wo.observation_time >= CURRENT_TIMESTAMP - INTERVAL '7 days'\n        ) AS recent_observations_count,\n        -- Latest observation time\n        (\n            SELECT MAX(wo.observation_time)\n            FROM weather_observations wo\n            WHERE wo.station_id = ws.station_id\n        ) AS latest_observation_time\n    FROM weather_stations ws\n    WHERE ws.active_status = TRUE\n),\nstation_density_analysis AS (\n    -- Second CTE: Calculate station density metrics\n    SELECT\n        scb.station_id,\n        scb.station_name,\n        scb.station_latitude,\n        scb.station_longitude,\n        scb.station_geom,\n        scb.state_code,\n        scb.cwa_code,\n        scb.recent_observations_count,\n        scb.latest_observation_time,\n        -- Count nearby stations within 50km\n        (\n            SELECT COUNT(*)\n            FROM station_coverage_base scb2\n            WHERE scb2.station_id != scb.station_id\n                AND scb2.station_geom IS NOT NULL\n                AND scb.station_geom IS NOT NULL\n                AND ST_DISTANCE(scb.station_geom::geography, scb2.station_geom::geography) < 50000\n        ) AS nearby_stations_50km,\n        -- Count nearby stations within 100km\n        (\n            SELECT COUNT(*)\n            FROM station_coverage_base scb2\n            WHERE scb2.station_id != scb.station_id\n                AND scb2.station_geom IS NOT NULL\n                AND scb.station_geom IS NOT NULL\n                AND ST_DISTANCE(scb.station_geom::geography, scb2.station_geom::geography) < 100000\n        ) AS nearby_stations_100km,\n        -- Minimum distance to nearest station\n        (\n            SELECT MIN(ST_DISTANCE(scb.station_geom::geography, scb2.station_geom::geography))\n            FROM station_coverage_base scb2\n            WHERE scb2.station_id != scb.station_id\n                AND scb2.station_geom IS NOT NULL\n                AND scb.station_geom IS NOT NULL\n        ) AS min_distance_to_nearest_station\n    FROM station_coverage_base scb\n),\ncoverage_gap_analysis AS (\n    -- Third CTE: Identify coverage gaps\n    SELECT\n        sda.station_id,\n        sda.station_name,\n        sda.station_latitude,\n        sda.station_longitude,\n        sda.state_code,\n        sda.cwa_code,\n        sda.recent_observations_count,\n        sda.nearby_stations_50km,\n        sda.nearby_stations_100km,\n        ROUND(CAST(CAST(sda.min_distance_to_nearest_station AS NUMERIC) AS NUMERIC), 2) AS min_distance_to_nearest_station,\n        -- Coverage classification\n        CASE\n            WHEN sda.nearby_stations_50km >= 5 THEN 'High Density'\n            WHEN sda.nearby_stations_50km >= 2 THEN 'Medium Density'\n            WHEN sda.nearby_stations_50km >= 1 THEN 'Low Density'\n            ELSE 'Isolated'\n        END AS density_classification,\n        -- Gap indicators\n        CASE\n            WHEN sda.min_distance_to_nearest_station > 100000 THEN 'Large Gap'\n            WHEN sda.min_distance_to_nearest_station > 50000 THEN 'Medium Gap'\n            WHEN sda.min_distance_to_nearest_station > 25000 THEN 'Small Gap'\n            ELSE 'No Gap'\n        END AS gap_classification\n    FROM station_density_analysis sda\n),\nboundary_coverage_analysis AS (\n    -- Fourth CTE: Analyze coverage by boundary\n    SELECT\n        cga.station_id,\n        cga.station_name,\n        cga.state_code,\n        cga.cwa_code,\n        cga.density_classification,\n        cga.gap_classification,\n        sb.boundary_id,\n        sb.feature_type,\n        sb.feature_name,\n        -- Check if station is within boundary\n        CASE\n            WHEN sda.station_geom IS NOT NULL AND sb.boundary_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(sda.station_geom::geometry, sb.boundary_geom::geometry) THEN TRUE\n                    ELSE FALSE\n                END\n            ELSE NULL\n        END AS is_within_boundary,\n        -- Count stations in same boundary\n        (\n            SELECT COUNT(*)\n            FROM station_coverage_base scb2\n            WHERE scb2.station_geom IS NOT NULL\n                AND sb.boundary_geom IS NOT NULL\n                AND ST_Within(scb2.station_geom::geometry, sb.boundary_geom::geometry)\n        ) AS stations_in_boundary\n    FROM coverage_gap_analysis cga\n    INNER JOIN station_density_analysis sda ON cga.station_id = sda.station_id\n    LEFT JOIN shapefile_boundaries sb ON (\n        sb.feature_type = 'CWA'\n        AND sda.station_geom IS NOT NULL\n        AND sb.boundary_geom IS NOT NULL\n        AND ST_DISTANCE(sda.station_geom::geography, sb.boundary_geom::geography) < 100000\n    )\n),\nboundary_coverage_summary AS (\n    -- Fifth CTE: Summarize coverage by boundary\n    SELECT\n        bca.boundary_id,\n        bca.feature_type,\n        bca.feature_name,\n        COUNT(DISTINCT bca.station_id) AS station_count,\n        COUNT(CASE WHEN bca.is_within_boundary = TRUE THEN 1 END) AS stations_within,\n        COUNT(CASE WHEN bca.gap_classification = 'Large Gap' THEN 1 END) AS large_gap_stations,\n        COUNT(CASE WHEN bca.gap_classification = 'No Gap' THEN 1 END) AS no_gap_stations,\n        AVG(CASE WHEN bca.is_within_boundary = TRUE THEN 1 ELSE 0 END) * 100 AS coverage_percentage,\n        -- Window functions for comparison\n        AVG(bca.stations_in_boundary) OVER (\n            PARTITION BY bca.feature_type\n        ) AS avg_stations_per_boundary_type\n    FROM boundary_coverage_analysis bca\n    GROUP BY\n        bca.boundary_id,\n        bca.feature_type,\n        bca.feature_name,\n        bca.stations_in_boundary\n),\ninterpolation_opportunity_analysis AS (\n    -- Sixth CTE:
    Identify interpolation opportunities\n    SELECT\n        cga.station_id,\n        cga.station_name,\n        cga.state_code,\n        cga.cwa_code,\n        cga.density_classification,\n        cga.gap_classification,\n        cga.min_distance_to_nearest_station,\n        cga.nearby_stations_50km,\n        -- Count forecast grid cells near station\n        (\n            SELECT COUNT(*)\n            FROM grib2_forecasts gf\n            WHERE gf.grid_cell_geom IS NOT NULL\n                AND sda.station_geom IS NOT NULL\n                AND ST_DISTANCE(gf.grid_cell_geom::geography, sda.station_geom::geography) < 25000\n        ) AS nearby_forecast_cells,\n        -- Interpolation quality score\n        CASE\n            WHEN cga.nearby_stations_50km >= 3 AND cga.min_distance_to_nearest_station < 25000 THEN 'Excellent'\n            WHEN cga.nearby_stations_50km >= 2 AND cga.min_distance_to_nearest_station < 50000 THEN 'Good'\n            WHEN cga.nearby_stations_50km >= 1 THEN 'Fair'\n            ELSE 'Poor'\n        END AS interpolation_quality\n    FROM coverage_gap_analysis cga\n    INNER JOIN station_density_analysis sda ON cga.station_id = sda.station_id\n    WHERE sda.station_geom IS NOT NULL\n),\nfinal_coverage_report AS (\n    -- Seventh CTE: Final coverage report\n    SELECT\n        ioa.station_id,\n        ioa.station_name,\n        ioa.state_code,\n        ioa.cwa_code,\n        ioa.density_classification,\n        ioa.gap_classification,\n        ioa.min_distance_to_nearest_station,\n        ioa.nearby_forecast_cells,\n        ioa.interpolation_quality,\n        -- Coverage recommendations\n        CASE\n            WHEN ioa.gap_classification = 'Large Gap' THEN 'Add Station Recommended'\n            WHEN ioa.interpolation_quality = 'Poor' THEN 'Improve Station Density'\n            WHEN ioa.nearby_forecast_cells = 0 THEN 'No Forecast Coverage'\n            ELSE 'Adequate Coverage'\n        END AS coverage_recommendation,\n        -- Rankings\n        ROW_NUMBER() OVER (\n            ORDER BY ioa.min_distance_to_nearest_station DESC\n        ) AS isolation_rank,\n        PERCENT_RANK() OVER (\n            ORDER BY ioa.nearby_forecast_cells DESC\n        ) AS forecast_coverage_percentile\n    FROM interpolation_opportunity_analysis ioa\n)\nSELECT\n    station_id,\n    station_name,\n    state_code,\n    cwa_code,\n    density_classification,\n    gap_classification,\n    min_distance_to_nearest_station,\n    nearby_forecast_cells,\n    interpolation_quality,\n    coverage_recommendation,\n    isolation_rank,\n    ROUND(CAST(CAST(forecast_coverage_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS forecast_coverage_percentile\nFROM final_coverage_report\nORDER BY isolation_rank\nLIMIT 200;",
      "line_number": 1344,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012626,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 6,
      "title": "Forecast Accuracy Trend Analysis with Temporal Error Pattern Detection",
      "description": "Use Case: Forensic Meteorology - Historical Forecast Accuracy Assessment for Legal Cases Description:
    Enterprise-level forecast accuracy trend analysis identifying temporal error patterns, accuracy degradation over time, seasonal variations, and forecast model performance trends. Implements production patterns for monitoring forecast model accuracy. Purpose: Trend analysis showing forecast accuracy over time with error pattern detection. Provides evidence of forecast reliability for insurance cl",
      "complexity": "Multiple CTEs (7+ levels), temporal trend analysis, error pattern detection, seasonal analysis, window functions with multiple frame clauses, time-series decomposition",
      "expected_output": "Query results",
      "sql": "WITH forecast_observation_pairs AS (\n    -- First CTE: Match forecasts with observations\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        DATE_TRUNC('hour', gf.forecast_time) AS forecast_hour,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date,\n        EXTRACT(HOUR FROM gf.forecast_time) AS hour_of_day,\n        EXTRACT(DOW FROM gf.forecast_time) AS day_of_week,\n        EXTRACT(MONTH FROM gf.forecast_time) AS month_of_year,\n        gf.parameter_value AS forecast_value,\n        wo.observation_id,\n        wo.observation_time,\n        CASE\n            WHEN gf.parameter_name = 'Temperature' THEN wo.temperature\n            WHEN gf.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n            WHEN gf.parameter_name = 'WindSpeed' THEN wo.wind_speed\n            ELSE NULL\n        END AS observation_value,\n        ABS(gf.parameter_value - CASE\n            WHEN gf.parameter_name = 'Temperature' THEN wo.temperature\n            WHEN gf.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n            WHEN gf.parameter_name = 'WindSpeed' THEN wo.wind_speed\n            ELSE NULL\n        END) AS absolute_error\n    FROM grib2_forecasts gf\n    INNER JOIN weather_observations wo ON (\n        wo.observation_time BETWEEN gf.forecast_time - INTERVAL '1 hour' AND gf.forecast_time + INTERVAL '1 hour'\n        AND ST_DISTANCE(gf.grid_cell_geom::geography, wo.station_geom::geography) < 25000\n    )\n    WHERE gf.transformation_status = 'Success'\n        AND (\n            (gf.parameter_name = 'Temperature' AND wo.temperature IS NOT NULL)\n            OR (gf.parameter_name = 'Precipitation' AND wo.precipitation_amount IS NOT NULL)\n            OR (gf.parameter_name = 'WindSpeed' AND wo.wind_speed IS NOT NULL)\n        )\n),\ntemporal_error_aggregation AS (\n    -- Second CTE: Aggregate errors by time periods\n    SELECT\n        fop.forecast_hour,\n        fop.forecast_date,\n        fop.hour_of_day,\n        fop.day_of_week,\n        fop.month_of_year,\n        fop.parameter_name,\n        COUNT(*) AS forecast_count,\n        AVG(fop.absolute_error) AS avg_absolute_error,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY fop.absolute_error) AS median_absolute_error,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY fop.absolute_error) AS q1_absolute_error,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY fop.absolute_error) AS q3_absolute_error,\n        STDDEV(fop.absolute_error) AS stddev_absolute_error,\n        MAX(fop.absolute_error) AS max_absolute_error,\n        MIN(fop.absolute_error) AS min_absolute_error\n    FROM forecast_observation_pairs fop\n    GROUP BY\n        fop.forecast_hour,\n        fop.forecast_date,\n        fop.hour_of_day,\n        fop.day_of_week,\n        fop.month_of_year,\n        fop.parameter_name\n),\ntrend_analysis AS (\n    -- Third CTE: Trend analysis with window functions\n    SELECT\n        tea.forecast_hour,\n        tea.forecast_date,\n        tea.hour_of_day,\n        tea.day_of_week,\n        tea.month_of_year,\n        tea.parameter_name,\n        tea.forecast_count,\n        ROUND(CAST(CAST(tea.avg_absolute_error AS NUMERIC) AS NUMERIC), 2) AS avg_absolute_error,\n        ROUND(CAST(CAST(tea.median_absolute_error AS NUMERIC) AS NUMERIC), 2) AS median_absolute_error,\n        ROUND(CAST(CAST(tea.stddev_absolute_error AS NUMERIC) AS NUMERIC), 2) AS stddev_absolute_error,\n        -- Moving averages for trend detection\n        AVG(tea.avg_absolute_error) OVER (\n            PARTITION BY tea.parameter_name\n            ORDER BY tea.forecast_hour\n            ROWS BETWEEN 23 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_error_24h,\n        AVG(tea.avg_absolute_error) OVER (\n            PARTITION BY tea.parameter_name\n            ORDER BY tea.forecast_hour\n            ROWS BETWEEN 167 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_error_168h,\n        -- Lag/Lead for trend direction\n        LAG(tea.avg_absolute_error, 1) OVER (\n            PARTITION BY tea.parameter_name\n            ORDER BY tea.forecast_hour\n        ) AS prev_avg_error,\n        LEAD(tea.avg_absolute_error, 1) OVER (\n            PARTITION BY tea.parameter_name\n            ORDER BY tea.forecast_hour\n        ) AS next_avg_error,\n        -- Cumulative error\n        SUM(tea.avg_absolute_error) OVER (\n            PARTITION BY tea.parameter_name\n            ORDER BY tea.forecast_hour\n            ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n        ) AS cumulative_error\n    FROM temporal_error_aggregation tea\n),\nerror_pattern_detection AS (\n    -- Fourth CTE:
    Detect error patterns\n    SELECT\n        ta.forecast_hour,\n        ta.forecast_date,\n        ta.hour_of_day,\n        ta.day_of_week,\n        ta.month_of_year,\n        ta.parameter_name,\n        ta.forecast_count,\n        ta.avg_absolute_error,\n        ta.median_absolute_error,\n        ta.stddev_absolute_error,\n        ROUND(CAST(CAST(ta.moving_avg_error_24h AS NUMERIC) AS NUMERIC), 2) AS moving_avg_error_24h,\n        ROUND(CAST(CAST(ta.moving_avg_error_168h AS NUMERIC) AS NUMERIC), 2) AS moving_avg_error_168h,\n        ROUND(CAST(CAST(ta.prev_avg_error AS NUMERIC) AS NUMERIC), 2) AS prev_avg_error,\n        -- Error trend direction\n        CASE\n            WHEN ta.prev_avg_error IS NOT NULL THEN\n                CASE\n                    WHEN ta.avg_absolute_error > ta.prev_avg_error * 1.1 THEN 'Increasing'\n                    WHEN ta.avg_absolute_error < ta.prev_avg_error * 0.9 THEN 'Decreasing'\n                    ELSE 'Stable'\n                END\n            ELSE NULL\n        END AS error_trend,\n        -- Seasonal pattern detection\n        CASE\n            WHEN ta.month_of_year IN (12, 1, 2) THEN 'Winter'\n            WHEN ta.month_of_year IN (3, 4, 5) THEN 'Spring'\n            WHEN ta.month_of_year IN (6, 7, 8) THEN 'Summer'\n            ELSE 'Fall'\n        END AS season,\n        -- Time of day pattern\n        CASE\n            WHEN ta.hour_of_day BETWEEN 6 AND 11 THEN 'Morning'\n            WHEN ta.hour_of_day BETWEEN 12 AND 17 THEN 'Afternoon'\n            WHEN ta.hour_of_day BETWEEN 18 AND 23 THEN 'Evening'\n            ELSE 'Night'\n        END AS time_of_day\n    FROM trend_analysis ta\n),\nseasonal_error_analysis AS (\n    -- Fifth CTE: Seasonal error analysis\n    SELECT\n        epd.forecast_hour,\n        epd.forecast_date,\n        epd.parameter_name,\n        epd.avg_absolute_error,\n        epd.error_trend,\n        epd.season,\n        epd.time_of_day,\n        -- Seasonal averages\n        AVG(epd.avg_absolute_error) OVER (\n            PARTITION BY epd.parameter_name, epd.season\n        ) AS seasonal_avg_error,\n        AVG(epd.avg_absolute_error) OVER (\n            PARTITION BY epd.parameter_name, epd.time_of_day\n        ) AS time_of_day_avg_error,\n        -- Percentile rankings\n        PERCENT_RANK() OVER (\n            PARTITION BY epd.parameter_name\n            ORDER BY epd.avg_absolute_error DESC\n        ) AS error_percentile,\n        NTILE(5) OVER (\n            PARTITION BY epd.parameter_name, epd.season\n            ORDER BY epd.avg_absolute_error DESC\n        ) AS seasonal_error_quintile\n    FROM error_pattern_detection epd\n),\naccuracy_classification AS (\n    -- Sixth CTE: Classify accuracy levels\n    SELECT\n        sea.forecast_hour,\n        sea.forecast_date,\n        sea.parameter_name,\n        sea.avg_absolute_error,\n        sea.error_trend,\n        sea.season,\n        sea.time_of_day,\n        ROUND(CAST(CAST(sea.seasonal_avg_error AS NUMERIC) AS NUMERIC), 2) AS seasonal_avg_error,\n        ROUND(CAST(CAST(sea.time_of_day_avg_error AS NUMERIC) AS NUMERIC), 2) AS time_of_day_avg_error,\n        ROUND(CAST(CAST(sea.error_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS error_percentile,\n        sea.seasonal_error_quintile,\n        -- Accuracy classification\n        CASE\n            WHEN sea.avg_absolute_error <= sea.seasonal_avg_error * 0.8 THEN 'Excellent'\n            WHEN sea.avg_absolute_error <= sea.seasonal_avg_error THEN 'Good'\n            WHEN sea.avg_absolute_error <= sea.seasonal_avg_error * 1.2 THEN 'Fair'\n            ELSE 'Poor'\n        END AS accuracy_classification\n    FROM seasonal_error_analysis sea\n),\nfinal_accuracy_trends AS (\n    -- Seventh CTE: Final trend analysis\n    SELECT\n        ac.forecast_hour,\n        ac.forecast_date,\n        ac.parameter_name,\n        ac.avg_absolute_error,\n        ac.error_trend,\n        ac.season,\n        ac.time_of_day,\n        ac.seasonal_avg_error,\n        ac.time_of_day_avg_error,\n        ac.error_percentile,\n        ac.accuracy_classification,\n        -- Trend recommendations\n        CASE\n            WHEN ac.error_trend = 'Increasing' AND ac.accuracy_classification IN ('Fair', 'Poor') THEN 'Review Model Parameters'\n            WHEN ac.error_trend = 'Decreasing' THEN 'Model Improving'\n            WHEN ac.accuracy_classification = 'Poor' THEN 'Investigate Root Cause'\n            ELSE 'Monitor'\n        END AS recommendation\n    FROM accuracy_classification ac\n)\nSELECT\n    forecast_hour,\n    forecast_date,\n    parameter_name,\n    avg_absolute_error,\n    error_trend,\n    season,\n    time_of_day,\n    seasonal_avg_error,\n    time_of_day_avg_error,\n    error_percentile,\n    accuracy_classification,\n    recommendation\nFROM final_accuracy_trends\nWHERE forecast_date >= CURRENT_DATE - INTERVAL '30 days'\nORDER BY forecast_hour DESC, parameter_name\nLIMIT 500;",
      "line_number": 1598,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012648,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 7,
      "title": "Boundary Forecast Aggregation Analysis with Multi-Level Spatial Summarization",
      "description": "Use Case: Custom Weather Impact Modeling - Aggregated Forecasts by Boundary for Retail Operations Description:
    Enterprise-level boundary forecast aggregation analysis with multi-level spatial summarization, hierarchical aggregations, and comprehensive statistical summaries. Implements production patterns for aggregating gridded forecasts to geographic boundaries. Business Value: Summary forecasts aggregated by client-defined boundaries (counties, zones). Retail chains can get aggregated temperat",
      "complexity": "Multiple CTEs (7+ levels), spatial aggregations, hierarchical summarization, statistical calculations, window functions, UNION operations",
      "expected_output": "Query results",
      "sql": "WITH forecast_boundary_matching AS (\n    -- First CTE: Match forecasts to boundaries\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        gf.parameter_value,\n        gf.grid_cell_geom,\n        sb.boundary_id,\n        sb.feature_type,\n        sb.feature_name,\n        sb.feature_identifier,\n        sb.boundary_geom,\n        CASE\n            WHEN sb.boundary_geom IS NOT NULL AND gf.grid_cell_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(gf.grid_cell_geom::geometry, sb.boundary_geom::geometry) THEN TRUE\n                    ELSE FALSE\n                END\n            ELSE NULL\n        END AS is_within_boundary\n    FROM grib2_forecasts gf\n    CROSS JOIN shapefile_boundaries sb\n    WHERE gf.transformation_status = 'Success'\n        AND sb.boundary_geom IS NOT NULL\n        AND gf.grid_cell_geom IS NOT NULL\n        AND ST_DISTANCE(gf.grid_cell_geom::geography, sb.boundary_geom::geography) < 50000\n),\nboundary_forecast_aggregation AS (\n    -- Second CTE: Aggregate forecasts by boundary\n    SELECT\n        fbm.boundary_id,\n        fbm.feature_type,\n        fbm.feature_name,\n        fbm.feature_identifier,\n        fbm.parameter_name,\n        fbm.forecast_time,\n        COUNT(DISTINCT fbm.forecast_id) AS grid_cells_count,\n        COUNT(CASE WHEN fbm.is_within_boundary = TRUE THEN 1 END) AS cells_within_boundary,\n        AVG(fbm.parameter_value) AS avg_value,\n        MIN(fbm.parameter_value) AS min_value,\n        MAX(fbm.parameter_value) AS max_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY fbm.parameter_value) AS median_value,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY fbm.parameter_value) AS q1_value,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY fbm.parameter_value) AS q3_value,\n        STDDEV(fbm.parameter_value) AS stddev_value,\n        VARIANCE(fbm.parameter_value) AS variance_value\n    FROM forecast_boundary_matching fbm\n    WHERE fbm.is_within_boundary = TRUE\n    GROUP BY\n        fbm.boundary_id,\n        fbm.feature_type,\n        fbm.feature_name,\n        fbm.feature_identifier,\n        fbm.parameter_name,\n        fbm.forecast_time\n),\nfeature_type_aggregation AS (\n    -- Third CTE: Aggregate by feature type\n    SELECT\n        bfa.feature_type,\n        bfa.parameter_name,\n        bfa.forecast_time,\n        COUNT(DISTINCT bfa.boundary_id) AS boundary_count,\n        SUM(bfa.grid_cells_count) AS total_grid_cells,\n        AVG(bfa.avg_value) AS feature_type_avg_value,\n        AVG(bfa.min_value) AS feature_type_min_value,\n        AVG(bfa.max_value) AS feature_type_max_value,\n        AVG(bfa.median_value) AS feature_type_median_value,\n        AVG(bfa.stddev_value) AS feature_type_stddev_value\n    FROM boundary_forecast_aggregation bfa\n    GROUP BY\n        bfa.feature_type,\n        bfa.parameter_name,\n        bfa.forecast_time\n),\ntemporal_aggregation AS (\n    -- Fourth CTE: Temporal aggregation\n    SELECT\n        bfa.boundary_id,\n        bfa.feature_type,\n        bfa.feature_name,\n        bfa.parameter_name,\n        DATE_TRUNC('hour', bfa.forecast_time) AS forecast_hour,\n        DATE_TRUNC('day', bfa.forecast_time) AS forecast_date,\n        COUNT(*) AS forecast_count,\n        AVG(bfa.avg_value) AS hourly_avg_value,\n        AVG(bfa.min_value) AS hourly_min_value,\n        AVG(bfa.max_value) AS hourly_max_value,\n        AVG(bfa.median_value) AS hourly_median_value,\n        -- Window functions for temporal trends\n        AVG(bfa.avg_value) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY DATE_TRUNC('hour', bfa.forecast_time)\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_6h,\n        LAG(bfa.avg_value, 1) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY DATE_TRUNC('hour', bfa.forecast_time)\n        ) AS prev_hour_avg\n    FROM boundary_forecast_aggregation bfa\n    GROUP BY\n        bfa.boundary_id,\n        bfa.feature_type,\n        bfa.feature_name,\n        bfa.parameter_name,\n        DATE_TRUNC('hour', bfa.forecast_time),\n        DATE_TRUNC('day', bfa.forecast_time),\n        bfa.avg_value\n),\nstatistical_summary AS (\n    -- Fifth CTE:
    Statistical summary\n    SELECT\n        ta.boundary_id,\n        ta.feature_type,\n        ta.feature_name,\n        ta.parameter_name,\n        ta.forecast_hour,\n        ta.forecast_date,\n        ta.forecast_count,\n        ROUND(CAST(CAST(ta.hourly_avg_value AS NUMERIC) AS NUMERIC), 2) AS hourly_avg_value,\n        ROUND(CAST(CAST(ta.hourly_min_value AS NUMERIC) AS NUMERIC), 2) AS hourly_min_value,\n        ROUND(CAST(CAST(ta.hourly_max_value AS NUMERIC) AS NUMERIC), 2) AS hourly_max_value,\n        ROUND(CAST(CAST(ta.hourly_median_value AS NUMERIC) AS NUMERIC), 2) AS hourly_median_value,\n        ROUND(CAST(CAST(ta.moving_avg_6h AS NUMERIC) AS NUMERIC), 2) AS moving_avg_6h,\n        ROUND(CAST(CAST(ta.prev_hour_avg AS NUMERIC) AS NUMERIC), 2) AS prev_hour_avg,\n        -- Value range\n        ta.hourly_max_value - ta.hourly_min_value AS value_range,\n        -- Coefficient of variation\n        CASE\n            WHEN ta.hourly_avg_value != 0 THEN\n                ABS(ta.hourly_max_value - ta.hourly_min_value) / ABS(ta.hourly_avg_value)\n            ELSE NULL\n        END AS coefficient_of_variation,\n        -- Trend indicator\n        CASE\n            WHEN ta.prev_hour_avg IS NOT NULL THEN\n                CASE\n                    WHEN ta.hourly_avg_value > ta.prev_hour_avg * 1.05 THEN 'Increasing'\n                    WHEN ta.hourly_avg_value < ta.prev_hour_avg * 0.95 THEN 'Decreasing'\n                    ELSE 'Stable'\n                END\n            ELSE NULL\n        END AS trend_indicator\n    FROM temporal_aggregation ta\n),\nfinal_aggregation_report AS (\n    -- Sixth CTE: Final aggregation report\n    SELECT\n        ss.boundary_id,\n        ss.feature_type,\n        ss.feature_name,\n        ss.parameter_name,\n        ss.forecast_hour,\n        ss.forecast_date,\n        ss.forecast_count,\n        ss.hourly_avg_value,\n        ss.hourly_min_value,\n        ss.hourly_max_value,\n        ss.hourly_median_value,\n        ss.moving_avg_6h,\n        ROUND(CAST(CAST(ss.value_range AS NUMERIC) AS NUMERIC), 2) AS value_range,\n        ROUND(CAST(CAST(ss.coefficient_of_variation AS NUMERIC) AS NUMERIC), 4) AS coefficient_of_variation,\n        ss.trend_indicator,\n        -- Percentile rankings\n        PERCENT_RANK() OVER (\n            PARTITION BY ss.feature_type, ss.parameter_name\n            ORDER BY ss.hourly_avg_value DESC\n        ) AS value_percentile,\n        NTILE(5) OVER (\n            PARTITION BY ss.feature_type\n            ORDER BY ss.hourly_avg_value DESC\n        ) AS value_quintile\n    FROM statistical_summary ss\n)\nSELECT\n    boundary_id,\n    feature_type,\n    feature_name,\n    parameter_name,\n    forecast_hour,\n    forecast_date,\n    forecast_count,\n    hourly_avg_value,\n    hourly_min_value,\n    hourly_max_value,\n    hourly_median_value,\n    moving_avg_6h,\n    value_range,\n    coefficient_of_variation,\n    trend_indicator,\n    ROUND(CAST(CAST(value_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS value_percentile,\n    value_quintile\nFROM final_aggregation_report\nWHERE forecast_date >= CURRENT_DATE - INTERVAL '7 days'\nORDER BY forecast_hour DESC, boundary_id, parameter_name\nLIMIT 500;",
      "line_number": 1857,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012771,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 8,
      "title": "Observation Forecast Validation Analysis with Accuracy Scoring",
      "description": "Use Case: Forensic Meteorology - Forecast vs. Observation Validation for Legal Evidence Description:
    Enterprise-level observation-forecast validation analysis comparing actual observations with forecasts, calculating accuracy scores, identifying systematic biases, and providing validation metrics. Implements production patterns for forecast validation and model evaluation. Purpose: Validation report comparing forecasts to actual observations with accuracy scoring. Legal cases require documentati",
      "complexity": "Multiple CTEs (8+ levels), observation-forecast matching, accuracy calculations, bias detection, validation scoring, window functions, statistical analysis",
      "expected_output": "Query results",
      "sql": "WITH observation_forecast_matching AS (\n    -- First CTE: Match observations with forecasts\n    SELECT\n        wo.observation_id,\n        wo.station_id,\n        wo.station_name,\n        wo.observation_time,\n        wo.station_geom,\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        gf.parameter_value AS forecast_value,\n        CASE\n            WHEN gf.parameter_name = 'Temperature' THEN wo.temperature\n            WHEN gf.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n            WHEN gf.parameter_name = 'WindSpeed' THEN wo.wind_speed\n            ELSE NULL\n        END AS observation_value,\n        -- Time difference\n        EXTRACT(EPOCH FROM (wo.observation_time - gf.forecast_time)) / 3600 AS hours_difference,\n        -- Spatial distance\n        CASE\n            WHEN wo.station_geom IS NOT NULL AND gf.grid_cell_geom IS NOT NULL THEN\n                ST_DISTANCE(wo.station_geom::geography, gf.grid_cell_geom::geography)\n            ELSE NULL\n        END AS spatial_distance\n    FROM weather_observations wo\n    INNER JOIN grib2_forecasts gf ON (\n        gf.parameter_name IN ('Temperature', 'Precipitation', 'WindSpeed')\n        AND gf.forecast_time BETWEEN wo.observation_time - INTERVAL '2 hours' AND wo.observation_time + INTERVAL '2 hours'\n        AND wo.station_geom IS NOT NULL\n        AND gf.grid_cell_geom IS NOT NULL\n        AND ST_DISTANCE(wo.station_geom::geography, gf.grid_cell_geom::geography) < 50000\n    )\n    WHERE wo.observation_time >= CURRENT_TIMESTAMP - INTERVAL '30 days'\n),\nvalidation_metrics AS (\n    -- Second CTE: Calculate validation metrics\n    SELECT\n        ofm.observation_id,\n        ofm.station_id,\n        ofm.station_name,\n        ofm.parameter_name,\n        ofm.forecast_time,\n        ofm.observation_time,\n        ofm.forecast_value,\n        ofm.observation_value,\n        ofm.hours_difference,\n        ROUND(CAST(CAST(ofm.spatial_distance AS NUMERIC) AS NUMERIC), 2) AS spatial_distance,\n        -- Error calculations\n        ofm.forecast_value - ofm.observation_value AS error,\n        ABS(ofm.forecast_value - ofm.observation_value) AS absolute_error,\n        CASE\n            WHEN ofm.observation_value != 0 THEN\n                ABS((ofm.forecast_value - ofm.observation_value) / ofm.observation_value) * 100\n            ELSE NULL\n        END AS percentage_error,\n        -- Squared error for RMSE calculation\n        POWER(ofm.forecast_value - ofm.observation_value, 2) AS squared_error\n    FROM observation_forecast_matching ofm\n    WHERE ofm.observation_value IS NOT NULL\n        AND ofm.forecast_value IS NOT NULL\n),\nstation_validation_summary AS (\n    -- Third CTE:
    Summarize by station\n    SELECT\n        vm.station_id,\n        vm.station_name,\n        vm.parameter_name,\n        COUNT(*) AS validation_count,\n        AVG(vm.error) AS mean_error,\n        AVG(vm.absolute_error) AS mean_absolute_error,\n        SQRT(AVG(vm.squared_error)) AS root_mean_squared_error,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY vm.absolute_error) AS median_absolute_error,\n        STDDEV(vm.error) AS error_stddev,\n        -- Bias indicators\n        CASE\n            WHEN AVG(vm.error) > 2 THEN 'Over-forecast Bias'\n            WHEN AVG(vm.error) < -2 THEN 'Under-forecast Bias'\n            ELSE 'No Significant Bias'\n        END AS bias_indicator,\n        -- Accuracy classification\n        CASE\n            WHEN AVG(vm.absolute_error) <= 2 THEN 'Excellent'\n            WHEN AVG(vm.absolute_error) <= 5 THEN 'Good'\n            WHEN AVG(vm.absolute_error) <= 10 THEN 'Fair'\n            ELSE 'Poor'\n        END AS accuracy_classification\n    FROM validation_metrics vm\n    GROUP BY\n        vm.station_id,\n        vm.station_name,\n        vm.parameter_name\n),\nparameter_validation_summary AS (\n    -- Fourth CTE: Summarize by parameter\n    SELECT\n        vm.parameter_name,\n        vm.forecast_time,\n        COUNT(*) AS total_validations,\n        COUNT(DISTINCT vm.station_id) AS unique_stations,\n        AVG(vm.error) AS overall_mean_error,\n        AVG(vm.absolute_error) AS overall_mean_absolute_error,\n        SQRT(AVG(vm.squared_error)) AS overall_rmse,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY vm.absolute_error) AS overall_median_error,\n        AVG(vm.absolute_error) AS overall_mean_absolute_error_for_window\n    FROM validation_metrics vm\n    GROUP BY vm.parameter_name, vm.forecast_time\n),\nparameter_validation_with_window AS (\n    -- Fifth CTE: Add window functions to parameter summary\n    SELECT\n        pvs.parameter_name,\n        pvs.forecast_time,\n        pvs.total_validations,\n        pvs.unique_stations,\n        pvs.overall_mean_error,\n        pvs.overall_mean_absolute_error,\n        pvs.overall_rmse,\n        pvs.overall_median_error,\n        -- Window functions for comparison\n        AVG(pvs.overall_mean_absolute_error_for_window) OVER (\n            PARTITION BY pvs.parameter_name\n            ORDER BY pvs.forecast_time\n            ROWS BETWEEN 99 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_error_100,\n        PERCENT_RANK() OVER (\n            ORDER BY pvs.overall_mean_absolute_error DESC\n        ) AS error_percentile\n    FROM parameter_validation_summary pvs\n),\ntemporal_validation_analysis AS (\n    -- Sixth CTE: Temporal validation analysis\n    SELECT\n        vm.parameter_name,\n        DATE_TRUNC('day', vm.forecast_time) AS forecast_date,\n        COUNT(*) AS daily_validations,\n        AVG(vm.absolute_error) AS daily_mean_absolute_error,\n        AVG(vm.error) AS daily_mean_error,\n        STDDEV(vm.error) AS daily_error_stddev,\n        -- Lag for trend detection\n        LAG(AVG(vm.absolute_error), 1) OVER (\n            PARTITION BY vm.parameter_name\n            ORDER BY DATE_TRUNC('day', vm.forecast_time)\n        ) AS prev_day_mae\n    FROM validation_metrics vm\n    GROUP BY\n        vm.parameter_name,\n        DATE_TRUNC('day', vm.forecast_time)\n),\nvalidation_scoring AS (\n    -- Sixth CTE: Calculate validation scores\n    SELECT\n        svs.station_id,\n        svs.station_name,\n        svs.parameter_name,\n        svs.validation_count,\n        ROUND(CAST(CAST(svs.mean_error AS NUMERIC) AS NUMERIC), 2) AS mean_error,\n        ROUND(CAST(CAST(svs.mean_absolute_error AS NUMERIC) AS NUMERIC), 2) AS mean_absolute_error,\n        ROUND(CAST(CAST(svs.root_mean_squared_error AS NUMERIC) AS NUMERIC), 2) AS root_mean_squared_error,\n        ROUND(CAST(CAST(svs.median_absolute_error AS NUMERIC) AS NUMERIC), 2) AS median_absolute_error,\n        ROUND(CAST(CAST(svs.error_stddev AS NUMERIC) AS NUMERIC), 2) AS error_stddev,\n        svs.bias_indicator,\n        svs.accuracy_classification,\n        -- Validation score (higher is better, normalized to 0-100)\n        CASE\n            WHEN svs.mean_absolute_error <= 2 THEN 100\n            WHEN svs.mean_absolute_error <= 5 THEN 80\n            WHEN svs.mean_absolute_error <= 10 THEN 60\n            WHEN svs.mean_absolute_error <= 20 THEN 40\n            ELSE 20\n        END AS validation_score\n    FROM station_validation_summary svs\n),\nfinal_validation_report AS (\n    -- Seventh CTE: Final validation report\n    SELECT\n        vs.station_id,\n        vs.station_name,\n        vs.parameter_name,\n        vs.validation_count,\n        vs.mean_error,\n        vs.mean_absolute_error,\n        vs.root_mean_squared_error,\n        vs.median_absolute_error,\n        vs.error_stddev,\n        vs.bias_indicator,\n        vs.accuracy_classification,\n        vs.validation_score,\n        -- Rankings\n        ROW_NUMBER() OVER (\n            PARTITION BY vs.parameter_name\n            ORDER BY vs.validation_score DESC\n        ) AS accuracy_rank,\n        PERCENT_RANK() OVER (\n            PARTITION BY vs.parameter_name\n            ORDER BY vs.mean_absolute_error ASC\n        ) AS error_percentile,\n        NTILE(5) OVER (\n            PARTITION BY vs.parameter_name\n            ORDER BY vs.validation_score DESC\n        ) AS validation_quintile\n    FROM validation_scoring vs\n)\nSELECT\n    station_id,\n    station_name,\n    parameter_name,\n    validation_count,\n    mean_error,\n    mean_absolute_error,\n    root_mean_squared_error,\n    median_absolute_error,\n    error_stddev,\n    bias_indicator,\n    accuracy_classification,\n    validation_score,\n    accuracy_rank,\n    ROUND(CAST(CAST(error_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS error_percentile,\n    validation_quintile\nFROM final_validation_report\nORDER BY parameter_name, validation_score DESC\nLIMIT 300;",
      "line_number": 2075,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015261,
        "row_count": 0,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 9,
      "title": "Multi-Boundary Spatial Intersection Analysis with Overlap Detection",
      "description": "Use Case: Custom Map Development - Boundary Overlap Detection for Real Estate Development Description:
    Enterprise-level multi-boundary spatial intersection analysis identifying overlapping boundaries, intersection areas, coverage gaps, and spatial relationships between different boundary types. Implements production patterns for analyzing complex geospatial boundary relationships. Business Value: Analysis of overlapping boundaries (e.g., fire zones overlapping counties) with intersection metrics",
      "complexity": "Multiple CTEs (7+ levels), spatial intersection operations, overlap calculations, area computations, relationship detection, window functions",
      "expected_output": "Query results",
      "sql": "WITH boundary_pairs AS (\n    -- First CTE: Create boundary pairs for intersection analysis\n    SELECT\n        sb1.boundary_id AS boundary1_id,\n        sb1.feature_type AS boundary1_type,\n        sb1.feature_name AS boundary1_name,\n        sb1.boundary_geom AS boundary1_geom,\n        sb2.boundary_id AS boundary2_id,\n        sb2.feature_type AS boundary2_type,\n        sb2.feature_name AS boundary2_name,\n        sb2.boundary_geom AS boundary2_geom,\n        -- Spatial relationships\n        CASE\n            WHEN sb1.boundary_geom IS NOT NULL AND sb2.boundary_geom IS NOT NULL THEN\n                CASE\n                    WHEN ST_Within(sb1.boundary_geom::geometry, sb2.boundary_geom::geometry) THEN 'Boundary1 Within Boundary2'\n                    WHEN ST_Within(sb2.boundary_geom::geometry, sb1.boundary_geom::geometry) THEN 'Boundary2 Within Boundary1'\n                    WHEN ST_INTERSECTS(CAST(sb1.boundary_geom AS geometry), CAST(sb2.boundary_geom AS geometry)) THEN 'Intersects'\n                    WHEN ST_TOUCHES(CAST(sb1.boundary_geom AS geometry), CAST(sb2.boundary_geom AS geometry)) THEN 'Touches'\n                    ELSE 'Disjoint'\n                END\n            ELSE NULL\n        END AS spatial_relationship,\n        -- Distance calculation\n        CASE\n            WHEN sb1.boundary_geom IS NOT NULL AND sb2.boundary_geom IS NOT NULL THEN\n                ST_DISTANCE(sb1.boundary_geom::geography, sb2.boundary_geom::geography)\n            ELSE NULL\n        END AS distance_between_boundaries\n    FROM shapefile_boundaries sb1\n    CROSS JOIN shapefile_boundaries sb2\n    WHERE sb1.boundary_id < sb2.boundary_id\n        AND sb1.boundary_geom IS NOT NULL\n        AND sb2.boundary_geom IS NOT NULL\n        AND ST_DISTANCE(sb1.boundary_geom::geography, sb2.boundary_geom::geography) < 100000\n),\nintersection_analysis AS (\n    -- Second CTE: Calculate intersection metrics\n    SELECT\n        bp.boundary1_id,\n        bp.boundary1_type,\n        bp.boundary1_name,\n        bp.boundary2_id,\n        bp.boundary2_type,\n        bp.boundary2_name,\n        bp.spatial_relationship,\n        ROUND(CAST(CAST(bp.distance_between_boundaries AS NUMERIC) AS NUMERIC), 2) AS distance_between_boundaries,\n        -- Intersection area\n        CASE\n            WHEN bp.spatial_relationship IN ('Intersects', 'Boundary1 Within Boundary2', 'Boundary2 Within Boundary1') THEN\n                ST_AREA(ST_INTERSECTION(bp.boundary1_geom, bp.boundary2_geom))\n            ELSE NULL\n        END AS intersection_area,\n        -- Individual boundary areas\n        ST_AREA(bp.boundary1_geom) AS boundary1_area,\n        ST_AREA(bp.boundary2_geom) AS boundary2_area,\n        -- Union area\n        CASE\n            WHEN bp.spatial_relationship IN ('Intersects', 'Boundary1 Within Boundary2', 'Boundary2 Within Boundary1') THEN\n                ST_AREA(ST_UNION(CAST(bp.boundary1_geom AS geometry), CAST(bp.boundary2_geom AS geometry)))\n            ELSE NULL\n        END AS union_area\n    FROM boundary_pairs bp\n),\noverlap_metrics AS (\n    -- Third CTE: Calculate overlap metrics\n    SELECT\n        ia.boundary1_id,\n        ia.boundary1_type,\n        ia.boundary1_name,\n        ia.boundary2_id,\n        ia.boundary2_type,\n        ia.boundary2_name,\n        ia.spatial_relationship,\n        ia.distance_between_boundaries,\n        ROUND(CAST(CAST(ia.intersection_area AS NUMERIC) AS NUMERIC), 2) AS intersection_area,\n        ROUND(CAST(CAST(ia.boundary1_area AS NUMERIC) AS NUMERIC), 2) AS boundary1_area,\n        ROUND(CAST(CAST(ia.boundary2_area AS NUMERIC) AS NUMERIC), 2) AS boundary2_area,\n        ROUND(CAST(CAST(ia.union_area AS NUMERIC) AS NUMERIC), 2) AS union_area,\n        -- Overlap percentages\n        CASE\n            WHEN ia.boundary1_area > 0 AND ia.intersection_area IS NOT NULL THEN\n                (ia.intersection_area / ia.boundary1_area) * 100\n            ELSE NULL\n        END AS boundary1_overlap_percentage,\n        CASE\n            WHEN ia.boundary2_area > 0 AND ia.intersection_area IS NOT NULL THEN\n                (ia.intersection_area / ia.boundary2_area) * 100\n            ELSE NULL\n        END AS boundary2_overlap_percentage,\n        -- Jaccard similarity coefficient\n        CASE\n            WHEN ia.union_area > 0 AND ia.intersection_area IS NOT NULL THEN\n                ia.intersection_area / ia.union_area\n            ELSE NULL\n        END AS jaccard_similarity\n    FROM intersection_analysis ia\n),\nboundary_overlap_summary AS (\n    -- Fourth CTE: Summarize overlaps by boundary\n    SELECT\n        om.boundary1_id,\n        om.boundary1_type,\n        om.boundary1_name,\n        COUNT(*) AS total_intersections,\n        COUNT(CASE WHEN om.spatial_relationship = 'Intersects' THEN 1 END) AS intersection_count,\n        COUNT(CASE WHEN om.spatial_relationship = 'Boundary1 Within Boundary2' THEN 1 END) AS contained_count,\n        COUNT(CASE WHEN om.spatial_relationship = 'Boundary2 Within Boundary1' THEN 1 END) AS containing_count,\n        AVG(om.boundary1_overlap_percentage) AS avg_overlap_percentage,\n        MAX(om.boundary1_overlap_percentage) AS max_overlap_percentage,\n        SUM(om.intersection_area) AS total_intersection_area,\n        AVG(om.jaccard_similarity) AS avg_jaccard_similarity\n    FROM overlap_metrics om\n    GROUP BY\n        om.boundary1_id,\n        om.boundary1_type,\n        om.boundary1_name\n),\nfeature_type_overlap_analysis AS (\n    -- Fifth CTE: Analyze overlaps by feature type combinations\n    SELECT\n        om.boundary1_type,\n        om.boundary2_type,\n        COUNT(*) AS type_pair_count,\n        AVG(om.intersection_area) AS avg_intersection_area,\n        AVG(om.boundary1_overlap_percentage) AS avg_overlap_percentage,\n        AVG(om.jaccard_similarity) AS avg_jaccard_similarity,\n        COUNT(CASE WHEN om.spatial_relationship = 'Intersects' THEN 1 END) AS intersection_count,\n        COUNT(CASE WHEN om.spatial_relationship IN ('Boundary1 Within Boundary2', 'Boundary2 Within Boundary1') THEN 1 END) AS containment_count\n    FROM overlap_metrics om\n    GROUP BY\n        om.boundary1_type,\n        om.boundary2_type\n),\nfinal_intersection_report AS (\n    -- Sixth CTE: Final intersection report\n    SELECT\n        om.boundary1_id,\n        om.boundary1_type,\n        om.boundary1_name,\n        om.boundary2_id,\n        om.boundary2_type,\n        om.boundary2_name,\n        om.spatial_relationship,\n        om.intersection_area,\n        ROUND(CAST(CAST(om.boundary1_overlap_percentage AS NUMERIC) AS NUMERIC), 2) AS boundary1_overlap_percentage,\n        ROUND(CAST(CAST(om.boundary2_overlap_percentage AS NUMERIC) AS NUMERIC), 2) AS boundary2_overlap_percentage,\n        ROUND(CAST(CAST(om.jaccard_similarity AS NUMERIC) AS NUMERIC), 4) AS jaccard_similarity,\n        -- Overlap classification\n        CASE\n            WHEN om.jaccard_similarity > 0.8 THEN 'High Overlap'\n            WHEN om.jaccard_similarity > 0.5 THEN 'Moderate Overlap'\n            WHEN om.jaccard_similarity > 0.2 THEN 'Low Overlap'\n            WHEN om.jaccard_similarity > 0 THEN 'Minimal Overlap'\n            ELSE 'No Overlap'\n        END AS overlap_classification,\n        -- Rankings\n        ROW_NUMBER() OVER (\n            ORDER BY om.jaccard_similarity DESC NULLS LAST\n        ) AS overlap_rank,\n        PERCENT_RANK() OVER (\n            ORDER BY om.intersection_area DESC NULLS LAST\n        ) AS intersection_area_percentile\n    FROM overlap_metrics om\n    WHERE om.intersection_area IS NOT NULL\n)\nSELECT\n    boundary1_id,\n    boundary1_type,\n    boundary1_name,\n    boundary2_id,\n    boundary2_type,\n    boundary2_name,\n    spatial_relationship,\n    intersection_area,\n    boundary1_overlap_percentage,\n    boundary2_overlap_percentage,\n    jaccard_similarity,\n    overlap_classification,\n    overlap_rank,\n    ROUND(CAST(CAST(intersection_area_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS intersection_area_percentile\nFROM final_intersection_report\nORDER BY jaccard_similarity DESC NULLS LAST\nLIMIT 200;",
      "line_number": 2319,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013932,
        "row_count": 0,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 10,
      "title": "Parameter Forecast Distribution Analysis with Statistical Profiling",
      "description": "Use Case: Physical Climate Risk Assessment - Statistical Weather Profiling for Insurance Underwriting Description:
    Enterprise-level parameter forecast distribution analysis with statistical profiling, distribution shape analysis, outlier detection, and distribution comparisons. Implements production patterns for statistical analysis of forecast distributions. Purpose: Statistical distribution analysis of forecast parameters with percentile rankings. Insurance companies need statistical profiles ",
      "complexity": "Multiple CTEs (6+ levels), statistical distribution analysis, percentile calculations, outlier detection, distribution comparisons, window functions",
      "expected_output": "Query results",
      "sql": "WITH parameter_distribution_base AS (\n    -- First CTE: Base parameter distribution data\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date,\n        gf.parameter_value,\n        gf.grid_cell_latitude,\n        gf.grid_cell_longitude\n    FROM grib2_forecasts gf\n    WHERE gf.transformation_status = 'Success'\n        AND gf.parameter_value IS NOT NULL\n),\nparameter_statistics AS (\n    -- Second CTE: Calculate statistical measures\n    SELECT\n        pdb.parameter_name,\n        pdb.forecast_date,\n        COUNT(*) AS forecast_count,\n        AVG(pdb.parameter_value) AS mean_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pdb.parameter_value) AS median_value,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY pdb.parameter_value) AS q1_value,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pdb.parameter_value) AS q3_value,\n        PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY pdb.parameter_value) AS p10_value,\n        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY pdb.parameter_value) AS p90_value,\n        PERCENTILE_CONT(0.05) WITHIN GROUP (ORDER BY pdb.parameter_value) AS p5_value,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pdb.parameter_value) AS p95_value,\n        MIN(pdb.parameter_value) AS min_value,\n        MAX(pdb.parameter_value) AS max_value,\n        STDDEV(pdb.parameter_value) AS stddev_value,\n        VARIANCE(pdb.parameter_value) AS variance_value\n    FROM parameter_distribution_base pdb\n    GROUP BY\n        pdb.parameter_name,\n        pdb.forecast_date\n),\ndistribution_metrics AS (\n    -- Third CTE: Calculate distribution metrics\n    SELECT\n        ps.parameter_name,\n        ps.forecast_date,\n        ps.forecast_count,\n        ROUND(CAST(CAST(ps.mean_value AS NUMERIC) AS NUMERIC), 2) AS mean_value,\n        ROUND(CAST(CAST(ps.median_value AS NUMERIC) AS NUMERIC), 2) AS median_value,\n        ROUND(CAST(CAST(ps.q1_value AS NUMERIC) AS NUMERIC), 2) AS q1_value,\n        ROUND(CAST(CAST(ps.q3_value AS NUMERIC) AS NUMERIC), 2) AS q3_value,\n        ROUND(CAST(CAST(ps.p10_value AS NUMERIC) AS NUMERIC), 2) AS p10_value,\n        ROUND(CAST(CAST(ps.p90_value AS NUMERIC) AS NUMERIC), 2) AS p90_value,\n        ROUND(CAST(CAST(ps.p5_value AS NUMERIC) AS NUMERIC), 2) AS p5_value,\n        ROUND(CAST(CAST(ps.p95_value AS NUMERIC) AS NUMERIC), 2) AS p95_value,\n        ROUND(CAST(CAST(ps.min_value AS NUMERIC) AS NUMERIC), 2) AS min_value,\n        ROUND(CAST(CAST(ps.max_value AS NUMERIC) AS NUMERIC), 2) AS max_value,\n        ROUND(CAST(CAST(ps.stddev_value AS NUMERIC) AS NUMERIC), 2) AS stddev_value,\n        ROUND(CAST(CAST(ps.variance_value AS NUMERIC) AS NUMERIC), 2) AS variance_value,\n        -- Interquartile range\n        ps.q3_value - ps.q1_value AS iqr,\n        -- Range\n        ps.max_value - ps.min_value AS value_range,\n        -- Coefficient of variation\n        CASE\n            WHEN ps.mean_value != 0 THEN\n                ps.stddev_value / ABS(ps.mean_value)\n            ELSE NULL\n        END AS coefficient_of_variation,\n        -- Skewness indicator (simplified)\n        CASE\n            WHEN ps.mean_value > ps.median_value THEN 'Right Skewed'\n            WHEN ps.mean_value < ps.median_value THEN 'Left Skewed'\n            ELSE 'Symmetric'\n        END AS skewness_indicator\n    FROM parameter_statistics ps\n),\noutlier_detection AS (\n    -- Fourth CTE: Detect outliers\n    SELECT\n        pdb.forecast_id,\n        pdb.parameter_name,\n        pdb.forecast_date,\n        pdb.parameter_value,\n        dm.mean_value,\n        dm.median_value,\n        dm.stddev_value,\n        dm.q1_value,\n        dm.q3_value,\n        dm.iqr,\n        dm.p5_value,\n        dm.p95_value,\n        -- Outlier detection using IQR method\n        CASE\n            WHEN pdb.parameter_value < dm.q1_value - 1.5 * dm.iqr OR\n                 pdb.parameter_value > dm.q3_value + 1.5 * dm.iqr THEN TRUE\n            ELSE FALSE\n        END AS is_iqr_outlier,\n        -- Outlier detection using percentile method\n        CASE\n            WHEN pdb.parameter_value < dm.p5_value OR\n                 pdb.parameter_value > dm.p95_value THEN TRUE\n            ELSE FALSE\n        END AS is_percentile_outlier,\n        -- Outlier detection using z-score (simplified)\n        CASE\n            WHEN dm.stddev_value > 0 THEN\n                ABS((pdb.parameter_value - dm.mean_value) / dm.stddev_value) > 3\n            ELSE FALSE\n        END AS is_zscore_outlier\n    FROM parameter_distribution_base pdb\n    INNER JOIN distribution_metrics dm ON (\n        pdb.parameter_name = dm.parameter_name\n        AND pdb.forecast_date = dm.forecast_date\n    )\n),\noutlier_summary AS (\n    -- Fifth CTE: Summarize outliers\n    SELECT\n        od.parameter_name,\n        od.forecast_date,\n        COUNT(*) AS total_forecasts,\n        COUNT(CASE WHEN od.is_iqr_outlier = TRUE THEN 1 END) AS iqr_outlier_count,\n        COUNT(CASE WHEN od.is_percentile_outlier = TRUE THEN 1 END) AS percentile_outlier_count,\n        COUNT(CASE WHEN od.is_zscore_outlier = TRUE THEN 1 END) AS zscore_outlier_count,\n        AVG(CASE WHEN od.is_iqr_outlier = TRUE THEN od.parameter_value ELSE NULL END) AS avg_iqr_outlier_value,\n        AVG(CASE WHEN od.is_percentile_outlier = TRUE THEN od.parameter_value ELSE NULL END) AS avg_percentile_outlier_value\n    FROM outlier_detection od\n    GROUP BY\n        od.parameter_name,\n        od.forecast_date\n),\nfinal_distribution_report AS (\n    -- Sixth CTE: Final distribution report\n    SELECT\n        dm.parameter_name,\n        dm.forecast_date,\n        dm.forecast_count,\n        dm.mean_value,\n        dm.median_value,\n        dm.q1_value,\n        dm.q3_value,\n        dm.p10_value,\n        dm.p90_value,\n        dm.p5_value,\n        dm.p95_value,\n        dm.min_value,\n        dm.max_value,\n        dm.stddev_value,\n        ROUND(CAST(CAST(dm.iqr AS NUMERIC) AS NUMERIC), 2) AS iqr,\n        ROUND(CAST(CAST(dm.value_range AS NUMERIC) AS NUMERIC), 2) AS value_range,\n        ROUND(CAST(CAST(dm.coefficient_of_variation AS NUMERIC) AS NUMERIC), 4) AS coefficient_of_variation,\n        dm.skewness_indicator,\n        os.iqr_outlier_count,\n        os.percentile_outlier_count,\n        os.zscore_outlier_count,\n        ROUND(CAST(CAST(os.avg_iqr_outlier_value AS NUMERIC) AS NUMERIC), 2) AS avg_iqr_outlier_value,\n        -- Outlier percentage\n        CASE\n            WHEN os.total_forecasts > 0 THEN\n                (os.iqr_outlier_count::NUMERIC / os.total_forecasts::NUMERIC) * 100\n            ELSE 0\n        END AS outlier_percentage,\n        -- Window functions for comparison\n        AVG(dm.mean_value) OVER (\n            PARTITION BY dm.parameter_name\n            ORDER BY dm.forecast_date\n            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_mean_7d,\n        PERCENT_RANK() OVER (\n            PARTITION BY dm.parameter_name\n            ORDER BY dm.stddev_value DESC\n        ) AS stddev_percentile\n    FROM distribution_metrics dm\n    INNER JOIN outlier_summary os ON (\n        dm.parameter_name = os.parameter_name\n        AND dm.forecast_date = os.forecast_date\n    )\n)\nSELECT\n    parameter_name,\n    forecast_date,\n    forecast_count,\n    mean_value,\n    median_value,\n    q1_value,\n    q3_value,\n    p10_value,\n    p90_value,\n    p5_value,\n    p95_value,\n    min_value,\n    max_value,\n    stddev_value,\n    iqr,\n    value_range,\n    coefficient_of_variation,\n    skewness_indicator,\n    iqr_outlier_count,\n    percentile_outlier_count,\n    zscore_outlier_count,\n    avg_iqr_outlier_value,\n    ROUND(CAST(CAST(outlier_percentage AS NUMERIC) AS NUMERIC), 2) AS outlier_percentage,\n    ROUND(CAST(CAST(moving_avg_mean_7d AS NUMERIC) AS NUMERIC), 2) AS moving_avg_mean_7d,\n    ROUND(CAST(CAST(stddev_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS stddev_percentile\nFROM final_distribution_report\nWHERE forecast_date >= CURRENT_DATE - INTERVAL '30 days'\nORDER BY forecast_date DESC, parameter_name\nLIMIT 500;",
      "line_number":
    2524,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005478,
        "row_count": 3,
        "column_count": 25,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 11,
      "title": "Geospatial Forecast Interpolation Analysis with Spatial Gradient Detection",
      "description": "Use Case: Custom Weather Impact Modeling - Spatial Gradient Detection for Precision Agriculture Description:
    Enterprise-level geospatial forecast interpolation analysis identifying spatial gradients, interpolation opportunities, spatial patterns, and interpolation quality metrics. Implements production patterns for spatial interpolation and gradient analysis. Business Value: Analysis of how weather parameters change across space with gradient calculations. Agriculture companies can understand te",
      "complexity": "Multiple CTEs (7+ levels), spatial interpolation analysis, gradient calculations, spatial pattern detection, distance-based analysis, window functions",
      "expected_output": "Query results",
      "sql": "WITH spatial_forecast_grid AS (\n    -- First CTE: Create spatial forecast grid\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        gf.grid_cell_latitude,\n        gf.grid_cell_longitude,\n        gf.parameter_value,\n        gf.grid_cell_geom,\n        ROUND(CAST(gf.grid_cell_latitude::NUMERIC AS NUMERIC), 2) AS rounded_lat,\n        ROUND(CAST(gf.grid_cell_longitude::NUMERIC AS NUMERIC), 2) AS rounded_lon\n    FROM grib2_forecasts gf\n    WHERE gf.transformation_status = 'Success'\n        AND gf.grid_cell_geom IS NOT NULL\n),\nnearest_neighbor_analysis AS (\n    -- Second CTE: Find nearest neighbors for each grid cell\n    SELECT\n        sfg.forecast_id,\n        sfg.parameter_name,\n        sfg.forecast_time,\n        sfg.rounded_lat,\n        sfg.rounded_lon,\n        sfg.parameter_value,\n        -- Find nearest neighbor\n        (\n            SELECT sfg2.forecast_id\n            FROM spatial_forecast_grid sfg2\n            WHERE sfg2.forecast_id != sfg.forecast_id\n                AND sfg2.parameter_name = sfg.parameter_name\n                AND sfg2.forecast_time = sfg.forecast_time\n                AND sfg2.grid_cell_geom IS NOT NULL\n                AND sfg.grid_cell_geom IS NOT NULL\n            ORDER BY ST_DISTANCE(sfg.grid_cell_geom::geography, sfg2.grid_cell_geom::geography)\n            LIMIT 1\n        ) AS nearest_neighbor_id,\n        -- Distance to nearest neighbor\n        (\n            SELECT ST_DISTANCE(sfg.grid_cell_geom::geography, sfg2.grid_cell_geom::geography)\n            FROM spatial_forecast_grid sfg2\n            WHERE sfg2.forecast_id != sfg.forecast_id\n                AND sfg2.parameter_name = sfg.parameter_name\n                AND sfg2.forecast_time = sfg.forecast_time\n                AND sfg2.grid_cell_geom IS NOT NULL\n                AND sfg.grid_cell_geom IS NOT NULL\n            ORDER BY ST_DISTANCE(sfg.grid_cell_geom::geography, sfg2.grid_cell_geom::geography)\n            LIMIT 1\n        ) AS distance_to_nearest_neighbor,\n        -- Value of nearest neighbor\n        (\n            SELECT sfg2.parameter_value\n            FROM spatial_forecast_grid sfg2\n            WHERE sfg2.forecast_id != sfg.forecast_id\n                AND sfg2.parameter_name = sfg.parameter_name\n                AND sfg2.forecast_time = sfg.forecast_time\n                AND sfg2.grid_cell_geom IS NOT NULL\n                AND sfg.grid_cell_geom IS NOT NULL\n            ORDER BY ST_DISTANCE(sfg.grid_cell_geom::geography, sfg2.grid_cell_geom::geography)\n            LIMIT 1\n        ) AS nearest_neighbor_value\n    FROM spatial_forecast_grid sfg\n),\nspatial_gradient_calculation AS (\n    -- Third CTE: Calculate spatial gradients\n    SELECT\n        nna.forecast_id,\n        nna.parameter_name,\n        nna.forecast_time,\n        nna.rounded_lat,\n        nna.rounded_lon,\n        nna.parameter_value,\n        nna.nearest_neighbor_id,\n        ROUND(CAST(CAST(nna.distance_to_nearest_neighbor AS NUMERIC) AS NUMERIC), 2) AS distance_to_nearest_neighbor,\n        ROUND(CAST(CAST(nna.nearest_neighbor_value AS NUMERIC) AS NUMERIC), 2) AS nearest_neighbor_value,\n        -- Gradient calculation (value difference per unit distance)\n        CASE\n            WHEN nna.distance_to_nearest_neighbor > 0 AND nna.nearest_neighbor_value IS NOT NULL THEN\n                ABS(nna.parameter_value - nna.nearest_neighbor_value) / nna.distance_to_nearest_neighbor\n            ELSE NULL\n        END AS spatial_gradient,\n        -- Value difference\n        CASE\n            WHEN nna.nearest_neighbor_value IS NOT NULL THEN\n                nna.parameter_value - nna.nearest_neighbor_value\n            ELSE NULL\n        END AS value_difference\n    FROM nearest_neighbor_analysis nna\n),\ninterpolation_quality_metrics AS (\n    -- Fourth CTE: Calculate interpolation quality metrics\n    SELECT\n        sgc.forecast_id,\n        sgc.parameter_name,\n        sgc.forecast_time,\n        sgc.rounded_lat,\n        sgc.rounded_lon,\n        sgc.parameter_value,\n        sgc.distance_to_nearest_neighbor,\n        sgc.spatial_gradient,\n        sgc.value_difference,\n        -- Count neighbors within different radii\n        (\n            SELECT COUNT(*)\n            FROM spatial_forecast_grid sfg2\n            WHERE sfg2.forecast_id != sgc.forecast_id\n                AND sfg2.parameter_name = sgc.parameter_name\n                AND sfg2.forecast_time = sgc.forecast_time\n                AND sfg2.grid_cell_geom IS NOT NULL\n                AND EXISTS (\n                    SELECT 1 FROM spatial_forecast_grid sfg3\n                    WHERE sfg3.forecast_id = sgc.forecast_id\n                        AND sfg3.grid_cell_geom IS NOT NULL\n                        AND ST_DISTANCE(sfg3.grid_cell_geom::geography, sfg2.grid_cell_geom::geography) < 10000\n                )\n        ) AS neighbors_within_10km,\n        (\n            SELECT COUNT(*)\n            FROM spatial_forecast_grid sfg2\n            WHERE sfg2.forecast_id != sgc.forecast_id\n                AND sfg2.parameter_name = sgc.parameter_name\n                AND sfg2.forecast_time = sgc.forecast_time\n                AND sfg2.grid_cell_geom IS NOT NULL\n                AND EXISTS (\n                    SELECT 1 FROM spatial_forecast_grid sfg3\n                    WHERE sfg3.forecast_id = sgc.forecast_id\n                        AND sfg3.grid_cell_geom IS NOT NULL\n                        AND ST_DISTANCE(sfg3.grid_cell_geom::geography, sfg2.grid_cell_geom::geography) < 25000\n                )\n        ) AS neighbors_within_25km,\n        -- Interpolation quality score\n        CASE\n            WHEN sgc.distance_to_nearest_neighbor < 5000 AND sgc.spatial_gradient < 0.001 THEN 'Excellent'\n            WHEN sgc.distance_to_nearest_neighbor < 10000 AND sgc.spatial_gradient < 0.002 THEN 'Good'\n            WHEN sgc.distance_to_nearest_neighbor < 25000 AND sgc.spatial_gradient < 0.005 THEN 'Fair'\n            ELSE 'Poor'\n        END AS interpolation_quality\n    FROM spatial_gradient_calculation sgc\n),\nspatial_pattern_analysis AS (\n    -- Fifth CTE: Analyze spatial patterns\n    SELECT\n        iqm.forecast_id,\n        iqm.parameter_name,\n        iqm.forecast_time,\n        iqm.rounded_lat,\n        iqm.rounded_lon,\n        iqm.parameter_value,\n        iqm.distance_to_nearest_neighbor,\n        ROUND(CAST(CAST(iqm.spatial_gradient AS NUMERIC) AS NUMERIC), 6) AS spatial_gradient,\n        iqm.neighbors_within_10km,\n        iqm.neighbors_within_25km,\n        iqm.interpolation_quality,\n        -- Window functions for spatial pattern detection\n        AVG(iqm.parameter_value) OVER (\n            PARTITION BY iqm.parameter_name, iqm.forecast_time\n            ORDER BY iqm.rounded_lat, iqm.rounded_lon\n            ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING\n        ) AS local_spatial_avg,\n        STDDEV(iqm.parameter_value) OVER (\n            PARTITION BY iqm.parameter_name, iqm.forecast_time\n            ORDER BY iqm.rounded_lat, iqm.rounded_lon\n            ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING\n        ) AS local_spatial_stddev,\n        -- Gradient classification\n        CASE\n            WHEN iqm.spatial_gradient < 0.0005 THEN 'Very Low Gradient'\n            WHEN iqm.spatial_gradient < 0.001 THEN 'Low Gradient'\n            WHEN iqm.spatial_gradient < 0.002 THEN 'Moderate Gradient'\n            WHEN iqm.spatial_gradient < 0.005 THEN 'High Gradient'\n            ELSE 'Very High Gradient'\n        END AS gradient_classification\n    FROM interpolation_quality_metrics iqm\n),\nfinal_interpolation_report AS (\n    -- Sixth CTE:
    Final interpolation report\n    SELECT\n        spa.forecast_id,\n        spa.parameter_name,\n        spa.forecast_time,\n        spa.rounded_lat,\n        spa.rounded_lon,\n        spa.parameter_value,\n        spa.distance_to_nearest_neighbor,\n        spa.spatial_gradient,\n        spa.neighbors_within_10km,\n        spa.neighbors_within_25km,\n        spa.interpolation_quality,\n        ROUND(CAST(CAST(spa.local_spatial_avg AS NUMERIC) AS NUMERIC), 2) AS local_spatial_avg,\n        ROUND(CAST(CAST(spa.local_spatial_stddev AS NUMERIC) AS NUMERIC), 2) AS local_spatial_stddev,\n        spa.gradient_classification,\n        -- Interpolation recommendations\n        CASE\n            WHEN spa.interpolation_quality = 'Poor' THEN 'Increase Grid Density'\n            WHEN spa.neighbors_within_10km < 3 THEN 'Add More Neighbors'\n            WHEN spa.spatial_gradient > 0.005 THEN 'High Variability - Use Advanced Interpolation'\n            ELSE 'Standard Interpolation Sufficient'\n        END AS interpolation_recommendation,\n        -- Rankings\n        PERCENT_RANK() OVER (\n            PARTITION BY spa.parameter_name\n            ORDER BY spa.spatial_gradient DESC\n        ) AS gradient_percentile,\n        NTILE(5) OVER (\n            PARTITION BY spa.parameter_name\n            ORDER BY spa.distance_to_nearest_neighbor ASC\n        ) AS neighbor_density_quintile\n    FROM spatial_pattern_analysis spa\n)\nSELECT\n    forecast_id,\n    parameter_name,\n    forecast_time,\n    rounded_lat,\n    rounded_lon,\n    parameter_value,\n    distance_to_nearest_neighbor,\n    spatial_gradient,\n    neighbors_within_10km,\n    neighbors_within_25km,\n    interpolation_quality,\n    local_spatial_avg,\n    local_spatial_stddev,\n    gradient_classification,\n    interpolation_recommendation,\n    ROUND(CAST(CAST(gradient_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS gradient_percentile,\n    neighbor_density_quintile\nFROM final_interpolation_report\nWHERE forecast_time >= CURRENT_TIMESTAMP - INTERVAL '7 days'\nORDER BY forecast_time DESC, spatial_gradient DESC\nLIMIT 1000;",
      "line_number": 2750,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013342,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 12,
      "title": "Weather Pattern Clustering Analysis with Spatial-Temporal Pattern Detection",
      "description": "Use Case: Physical Climate Risk Assessment - Pattern-Based Risk Identification for Renewable Energy Description:
    Enterprise-level weather pattern clustering analysis identifying spatial-temporal patterns, clustering similar weather conditions, and detecting pattern anomalies. Implements production patterns for weather pattern recognition and clustering. Purpose: Identification of recurring weather patterns with clustering metrics. Energy companies can identify patterns that affect renewable ener",
      "complexity": "Multiple CTEs (8+ levels), clustering analysis, pattern detection, spatial-temporal analysis, similarity calculations, window functions",
      "expected_output": "Query results",
      "sql": "WITH multi_parameter_forecast AS (\n    -- First CTE: Combine multiple parameters\n    SELECT\n        gf_temp.forecast_id AS temp_forecast_id,\n        gf_temp.forecast_time,\n        DATE_TRUNC('hour', gf_temp.forecast_time) AS forecast_hour,\n        gf_temp.grid_cell_latitude,\n        gf_temp.grid_cell_longitude,\n        gf_temp.parameter_value AS temperature,\n        gf_precip.parameter_value AS precipitation,\n        gf_wind.parameter_value AS wind_speed,\n        ROUND(CAST(gf_temp.grid_cell_latitude::NUMERIC AS NUMERIC), 2) AS rounded_lat,\n        ROUND(CAST(gf_temp.grid_cell_longitude::NUMERIC AS NUMERIC), 2) AS rounded_lon\n    FROM grib2_forecasts gf_temp\n    LEFT JOIN grib2_forecasts gf_precip ON (\n        gf_precip.parameter_name = 'Precipitation'\n        AND gf_precip.forecast_time = gf_temp.forecast_time\n        AND ROUND(CAST(gf_precip.grid_cell_latitude::NUMERIC AS NUMERIC), 2) = ROUND(CAST(gf_temp.grid_cell_latitude::NUMERIC AS NUMERIC), 2)\n        AND ROUND(CAST(gf_precip.grid_cell_longitude::NUMERIC AS NUMERIC), 2) = ROUND(CAST(gf_temp.grid_cell_longitude::NUMERIC AS NUMERIC), 2)\n    )\n    LEFT JOIN grib2_forecasts gf_wind ON (\n        gf_wind.parameter_name = 'WindSpeed'\n        AND gf_wind.forecast_time = gf_temp.forecast_time\n        AND ROUND(CAST(gf_wind.grid_cell_latitude::NUMERIC AS NUMERIC), 2) = ROUND(CAST(gf_temp.grid_cell_latitude::NUMERIC AS NUMERIC), 2)\n        AND ROUND(CAST(gf_wind.grid_cell_longitude::NUMERIC AS NUMERIC), 2) = ROUND(CAST(gf_temp.grid_cell_longitude::NUMERIC AS NUMERIC), 2)\n    )\n    WHERE gf_temp.parameter_name = 'Temperature'\n        AND gf_temp.transformation_status = 'Success'\n),\npattern_feature_extraction AS (\n    -- Second CTE: Extract pattern features\n    SELECT\n        mpf.temp_forecast_id,\n        mpf.forecast_time,\n        mpf.forecast_hour,\n        mpf.rounded_lat,\n        mpf.rounded_lon,\n        ROUND(CAST(CAST(mpf.temperature AS NUMERIC) AS NUMERIC), 2) AS temperature,\n        ROUND(CAST(CAST(COALESCE(CAST(mpf.precipitation AS NUMERIC), 0) AS NUMERIC) AS NUMERIC), 2) AS precipitation,\n        ROUND(CAST(CAST(COALESCE(CAST(mpf.wind_speed AS NUMERIC), 0) AS NUMERIC) AS NUMERIC), 2) AS wind_speed,\n        -- Normalized features for clustering\n        CASE\n            WHEN mpf.temperature BETWEEN 0 AND 100 THEN (mpf.temperature - 50) / 50.0\n            ELSE 0\n        END AS normalized_temp,\n        CASE\n            WHEN mpf.precipitation BETWEEN 0 AND 10 THEN mpf.precipitation / 10.0\n            ELSE 0\n        END AS normalized_precip,\n        CASE\n            WHEN mpf.wind_speed BETWEEN 0 AND 50 THEN mpf.wind_speed / 50.0\n            ELSE 0\n        END AS normalized_wind,\n        -- Weather pattern classification\n        CASE\n            WHEN mpf.temperature < 32 AND COALESCE(mpf.precipitation, 0) > 0 THEN 'Freezing Precipitation'\n            WHEN mpf.temperature < 50 AND COALESCE(mpf.precipitation, 0) > 0.1 THEN 'Cold Rain'\n            WHEN mpf.temperature BETWEEN 50 AND 70 AND COALESCE(mpf.precipitation, 0) > 0.1 THEN 'Moderate Rain'\n            WHEN mpf.temperature >= 70 AND COALESCE(mpf.precipitation, 0) > 0.1 THEN 'Warm Rain'\n            WHEN mpf.temperature >= 85 AND COALESCE(mpf.wind_speed, 0) < 5 THEN 'Hot Calm'\n            WHEN COALESCE(mpf.wind_speed, 0) > 30 THEN 'High Wind'\n            WHEN COALESCE(mpf.precipitation, 0) = 0 AND mpf.temperature BETWEEN 60 AND 80 THEN 'Pleasant'\n            ELSE 'Other'\n        END AS weather_pattern\n    FROM multi_parameter_forecast mpf\n),\nspatial_temporal_clustering AS (\n    -- Third CTE:
    Spatial-temporal clustering\n    SELECT\n        pfe.temp_forecast_id,\n        pfe.forecast_time,\n        pfe.forecast_hour,\n        pfe.rounded_lat,\n        pfe.rounded_lon,\n        pfe.temperature,\n        pfe.precipitation,\n        pfe.wind_speed,\n        pfe.weather_pattern,\n        -- Local pattern density\n        COUNT(*) OVER (\n            PARTITION BY pfe.weather_pattern, pfe.forecast_hour\n        ) AS pattern_count_per_hour,\n        COUNT(*) OVER (\n            PARTITION BY pfe.weather_pattern\n        ) AS total_pattern_count,\n        -- Spatial neighbors with same pattern\n        (\n            SELECT COUNT(*)\n            FROM pattern_feature_extraction pfe2\n            WHERE pfe2.weather_pattern = pfe.weather_pattern\n                AND pfe2.forecast_hour = pfe.forecast_hour\n                AND ABS(pfe2.rounded_lat - pfe.rounded_lat) < 0.5\n                AND ABS(pfe2.rounded_lon - pfe.rounded_lon) < 0.5\n                AND pfe2.temp_forecast_id != pfe.temp_forecast_id\n        ) AS spatial_neighbors_same_pattern,\n        -- Temporal neighbors with same pattern\n        (\n            SELECT COUNT(*)\n            FROM pattern_feature_extraction pfe2\n            WHERE pfe2.weather_pattern = pfe.weather_pattern\n                AND pfe2.rounded_lat = pfe.rounded_lat\n                AND pfe2.rounded_lon = pfe.rounded_lon\n                AND ABS(EXTRACT(EPOCH FROM (pfe2.forecast_time - pfe.forecast_time)) / 3600) <= 3\n                AND pfe2.temp_forecast_id != pfe.temp_forecast_id\n        ) AS temporal_neighbors_same_pattern\n    FROM pattern_feature_extraction pfe\n),\ncluster_metrics AS (\n    -- Fourth CTE: Calculate cluster metrics\n    SELECT\n        stc.temp_forecast_id,\n        stc.forecast_time,\n        stc.forecast_hour,\n        stc.rounded_lat,\n        stc.rounded_lon,\n        stc.temperature,\n        stc.precipitation,\n        stc.wind_speed,\n        stc.weather_pattern,\n        stc.pattern_count_per_hour,\n        stc.total_pattern_count,\n        stc.spatial_neighbors_same_pattern,\n        stc.temporal_neighbors_same_pattern,\n        -- Cluster cohesion score\n        CASE\n            WHEN stc.spatial_neighbors_same_pattern >= 5 AND stc.temporal_neighbors_same_pattern >= 3 THEN 'High Cohesion'\n            WHEN stc.spatial_neighbors_same_pattern >= 3 AND stc.temporal_neighbors_same_pattern >= 2 THEN 'Medium Cohesion'\n            WHEN stc.spatial_neighbors_same_pattern >= 1 OR stc.temporal_neighbors_same_pattern >= 1 THEN 'Low Cohesion'\n            ELSE 'Isolated'\n        END AS cluster_cohesion,\n        -- Pattern frequency\n        CASE\n            WHEN stc.total_pattern_count > 1000 THEN 'Very Common'\n            WHEN stc.total_pattern_count > 500 THEN 'Common'\n            WHEN stc.total_pattern_count > 100 THEN 'Uncommon'\n            ELSE 'Rare'\n        END AS pattern_frequency\n    FROM spatial_temporal_clustering stc\n),\npattern_anomaly_detection AS (\n    -- Fifth CTE: Detect pattern anomalies\n    SELECT\n        cm.temp_forecast_id,\n        cm.forecast_time,\n        cm.forecast_hour,\n        cm.rounded_lat,\n        cm.rounded_lon,\n        cm.temperature,\n        cm.precipitation,\n        cm.wind_speed,\n        cm.weather_pattern,\n        cm.cluster_cohesion,\n        cm.pattern_frequency,\n        -- Anomaly detection\n        CASE\n            WHEN cm.cluster_cohesion = 'Isolated' AND cm.pattern_frequency = 'Rare' THEN 'Anomaly'\n            WHEN cm.cluster_cohesion = 'Low Cohesion' AND cm.pattern_frequency IN ('Uncommon', 'Rare') THEN 'Potential Anomaly'\n            ELSE 'Normal'\n        END AS anomaly_status,\n        -- Window functions for pattern comparison\n        AVG(cm.temperature) OVER (\n            PARTITION BY cm.weather_pattern\n        ) AS avg_temp_for_pattern,\n        AVG(cm.precipitation) OVER (\n            PARTITION BY cm.weather_pattern\n        ) AS avg_precip_for_pattern,\n        AVG(cm.wind_speed) OVER (\n            PARTITION BY cm.weather_pattern\n        ) AS avg_wind_for_pattern\n    FROM cluster_metrics cm\n),\nfinal_pattern_report AS (\n    -- Sixth CTE: Final pattern report\n    SELECT\n        pad.temp_forecast_id,\n        pad.forecast_time,\n        pad.forecast_hour,\n        pad.rounded_lat,\n        pad.rounded_lon,\n        pad.temperature,\n        pad.precipitation,\n        pad.wind_speed,\n        pad.weather_pattern,\n        pad.cluster_cohesion,\n        pad.pattern_frequency,\n        pad.anomaly_status,\n        ROUND(CAST(CAST(pad.avg_temp_for_pattern AS NUMERIC) AS NUMERIC), 2) AS avg_temp_for_pattern,\n        ROUND(CAST(CAST(pad.avg_precip_for_pattern AS NUMERIC) AS NUMERIC), 2) AS avg_precip_for_pattern,\n        ROUND(CAST(CAST(pad.avg_wind_for_pattern AS NUMERIC) AS NUMERIC), 2) AS avg_wind_for_pattern,\n        -- Deviation from pattern average\n        ABS(pad.temperature - pad.avg_temp_for_pattern) AS temp_deviation_from_pattern,\n        ABS(pad.precipitation - pad.avg_precip_for_pattern) AS precip_deviation_from_pattern,\n        ABS(pad.wind_speed - pad.avg_wind_for_pattern) AS wind_deviation_from_pattern,\n        -- Rankings\n        PERCENT_RANK() OVER (\n            PARTITION BY pad.weather_pattern\n            ORDER BY pad.temperature DESC\n        ) AS temp_percentile_in_pattern,\n        NTILE(5) OVER (\n            PARTITION BY pad.weather_pattern\n            ORDER BY pad.temperature DESC\n        ) AS temp_quintile_in_pattern\n    FROM pattern_anomaly_detection pad\n)\nSELECT\n    temp_forecast_id,\n    forecast_time,\n    forecast_hour,\n    rounded_lat,\n    rounded_lon,\n    temperature,\n    precipitation,\n    wind_speed,\n    weather_pattern,\n    cluster_cohesion,\n    pattern_frequency,\n    anomaly_status,\n    avg_temp_for_pattern,\n    avg_precip_for_pattern,\n    avg_wind_for_pattern,\n    ROUND(CAST(CAST(temp_deviation_from_pattern AS NUMERIC) AS NUMERIC), 2) AS temp_deviation_from_pattern,\n    ROUND(CAST(CAST(precip_deviation_from_pattern AS NUMERIC) AS NUMERIC), 2) AS precip_deviation_from_pattern,\n    ROUND(CAST(CAST(wind_deviation_from_pattern AS NUMERIC) AS NUMERIC), 2) AS wind_deviation_from_pattern,\n    ROUND(CAST(CAST(temp_percentile_in_pattern * 100 AS NUMERIC) AS NUMERIC), 2) AS temp_percentile_in_pattern,\n    temp_quintile_in_pattern\nFROM final_pattern_report\nWHERE forecast_time >= CURRENT_TIMESTAMP - INTERVAL '7 days'\nORDER BY forecast_time DESC, anomaly_status DESC, weather_pattern\nLIMIT 1000;",
      "line_number": 3002,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005392,
        "row_count": 5,
        "column_count": 20,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 13,
      "title": "Forecast Model Performance Comparison with Multi-Model Analysis",
      "description": "Use Case: Forensic Meteorology - Multi-Model Analysis for Comprehensive Legal Evidence Description:
    This SQL query performs comprehensive forecast model performance comparison for a weather consulting firm. It identifies forecast models (NDFD, GFS, NAM, RAP) from source file patterns, spatially matches forecasts with weather observations within 25km and temporally within 1 hour, calculates accuracy metrics (mean absolute error, root mean squared error, bias, success rates), ranks models by perfo",
      "complexity": "Multiple CTEs (7+ levels), model comparison, performance metrics, accuracy calculations, ranking analysis, window functions",
      "expected_output": "Query results",
      "sql": "WITH forecast_model_identification AS (\n    -- First CTE: Identify forecast models (based on source file patterns)\n    SELECT\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        gf.parameter_value,\n        gf.source_file,\n        -- Extract model identifier from source file\n        CASE\n            WHEN gf.source_file LIKE '%ndfd%' THEN 'NDFD'\n            WHEN gf.source_file LIKE '%gfs%' THEN 'GFS'\n            WHEN gf.source_file LIKE '%nam%' THEN 'NAM'\n            WHEN gf.source_file LIKE '%rap%' THEN 'RAP'\n            ELSE 'Unknown'\n        END AS forecast_model,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date\n    FROM grib2_forecasts gf\n    WHERE gf.transformation_status = 'Success'\n),\nobservation_forecast_matching AS (\n    -- Second CTE: Match forecasts with observations\n    SELECT\n        fmi.forecast_id,\n        fmi.parameter_name,\n        fmi.forecast_time,\n        fmi.forecast_model,\n        fmi.forecast_date,\n        fmi.parameter_value AS forecast_value,\n        wo.observation_id,\n        wo.observation_time,\n        CASE\n            WHEN fmi.parameter_name = 'Temperature' THEN wo.temperature\n            WHEN fmi.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n            WHEN fmi.parameter_name = 'WindSpeed' THEN wo.wind_speed\n            ELSE NULL\n        END AS observation_value,\n        ABS(fmi.parameter_value - CASE\n            WHEN fmi.parameter_name = 'Temperature' THEN wo.temperature\n            WHEN fmi.parameter_name = 'Precipitation' THEN COALESCE(wo.precipitation_amount, 0)\n            WHEN fmi.parameter_name = 'WindSpeed' THEN wo.wind_speed\n            ELSE NULL\n        END) AS absolute_error\n    FROM forecast_model_identification fmi\n    INNER JOIN weather_observations wo ON (\n        wo.observation_time BETWEEN fmi.forecast_time - INTERVAL '1 hour' AND fmi.forecast_time + INTERVAL '1 hour'\n        AND EXISTS (\n            SELECT 1 FROM grib2_forecasts gf2\n            WHERE gf2.forecast_id = fmi.forecast_id\n                AND gf2.grid_cell_geom IS NOT NULL\n                AND wo.station_geom IS NOT NULL\n                AND ST_DISTANCE(gf2.grid_cell_geom::geography, wo.station_geom::geography) < 25000\n        )\n    )\n    WHERE (\n        (fmi.parameter_name = 'Temperature' AND wo.temperature IS NOT NULL)\n        OR (fmi.parameter_name = 'Precipitation' AND wo.precipitation_amount IS NOT NULL)\n        OR (fmi.parameter_name = 'WindSpeed' AND wo.wind_speed IS NOT NULL)\n    )\n),\nmodel_performance_metrics AS (\n    -- Third CTE: Calculate model performance metrics\n    SELECT\n        ofm.forecast_model,\n        ofm.parameter_name,\n        ofm.forecast_date,\n        COUNT(*) AS validation_count,\n        AVG(ofm.absolute_error) AS mean_absolute_error,\n        SQRT(AVG(POWER(ofm.absolute_error, 2))) AS root_mean_squared_error,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ofm.absolute_error) AS median_absolute_error,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY ofm.absolute_error) AS q1_error,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY ofm.absolute_error) AS q3_error,\n        STDDEV(ofm.absolute_error) AS error_stddev,\n        MIN(ofm.absolute_error) AS min_error,\n        MAX(ofm.absolute_error) AS max_error,\n        -- Bias calculation\n        AVG(ofm.forecast_value - ofm.observation_value) AS mean_bias,\n        -- Success rate (within acceptable error threshold)\n        COUNT(CASE WHEN ofm.absolute_error <= 5 THEN 1 END)::NUMERIC / COUNT(*)::NUMERIC * 100 AS success_rate_5deg,\n        COUNT(CASE WHEN ofm.absolute_error <= 10 THEN 1 END)::NUMERIC / COUNT(*)::NUMERIC * 100 AS success_rate_10deg\n    FROM observation_forecast_matching ofm\n    GROUP BY\n        ofm.forecast_model,\n        ofm.parameter_name,\n        ofm.forecast_date\n),\nmodel_comparison_analysis AS (\n    -- Fourth CTE: Compare models\n    SELECT\n        mpm.forecast_model,\n        mpm.parameter_name,\n        mpm.forecast_date,\n        mpm.validation_count,\n        ROUND(CAST(CAST(mpm.mean_absolute_error AS NUMERIC) AS NUMERIC), 2) AS mean_absolute_error,\n        ROUND(CAST(CAST(mpm.root_mean_squared_error AS NUMERIC) AS NUMERIC), 2) AS root_mean_squared_error,\n        ROUND(CAST(CAST(mpm.median_absolute_error AS NUMERIC) AS NUMERIC), 2) AS median_absolute_error,\n        ROUND(CAST(CAST(mpm.error_stddev AS NUMERIC) AS NUMERIC), 2) AS error_stddev,\n        ROUND(CAST(CAST(mpm.mean_bias AS NUMERIC) AS NUMERIC), 2) AS mean_bias,\n        ROUND(CAST(CAST(mpm.success_rate_5deg AS NUMERIC) AS NUMERIC), 2) AS success_rate_5deg,\n        ROUND(CAST(CAST(mpm.success_rate_10deg AS NUMERIC) AS NUMERIC), 2) AS success_rate_10deg,\n        -- Compare with overall average\n        AVG(mpm.mean_absolute_error) OVER (\n            PARTITION BY mpm.parameter_name, mpm.forecast_date\n        ) AS overall_avg_mae,\n        -- Model ranking\n        ROW_NUMBER() OVER (\n            PARTITION BY mpm.parameter_name, mpm.forecast_date\n            ORDER BY mpm.mean_absolute_error ASC\n        ) AS model_rank_by_mae,\n        ROW_NUMBER() OVER (\n            PARTITION BY mpm.parameter_name, mpm.forecast_date\n            ORDER BY mpm.success_rate_10deg DESC\n        ) AS model_rank_by_success_rate\n    FROM model_performance_metrics mpm\n),\nmodel_performance_scoring AS (\n    -- Fifth CTE: Calculate performance scores\n    SELECT\n        mca.forecast_model,\n        mca.parameter_name,\n        mca.forecast_date,\n        mca.validation_count,\n        mca.mean_absolute_error,\n        mca.root_mean_squared_error,\n        mca.median_absolute_error,\n        mca.error_stddev,\n        mca.mean_bias,\n        mca.success_rate_5deg,\n        mca.success_rate_10deg,\n        mca.overall_avg_mae,\n        mca.model_rank_by_mae,\n        mca.model_rank_by_success_rate,\n        -- Performance score (higher is better)\n        (\n            -- MAE component (40% weight) - lower is better\n            GREATEST(0, 1.0 - (mca.mean_absolute_error / NULLIF(mca.overall_avg_mae, 0))) * 40 +\n            -- Success rate component (40% weight)\n            (mca.success_rate_10deg / 100.0) * 40 +\n            -- Bias component (20% weight) - less bias is better\n            GREATEST(0, 1.0 - (ABS(mca.mean_bias) / 10.0)) * 20\n        ) AS performance_score,\n        -- Performance classification\n        CASE\n            WHEN mca.model_rank_by_mae = 1 AND mca.success_rate_10deg > 90 THEN 'Best Performer'\n            WHEN mca.model_rank_by_mae <= 2 AND mca.success_rate_10deg > 85 THEN 'Good Performer'\n            WHEN mca.model_rank_by_mae <= 3 AND mca.success_rate_10deg > 80 THEN 'Average Performer'\n            ELSE 'Below Average'\n        END AS performance_classification\n    FROM model_comparison_analysis mca\n),\nfinal_model_comparison AS (\n    -- Sixth CTE: Final model comparison\n    SELECT\n        mps.forecast_model,\n        mps.parameter_name,\n        mps.forecast_date,\n        mps.validation_count,\n        mps.mean_absolute_error,\n        mps.root_mean_squared_error,\n        mps.median_absolute_error,\n        mps.error_stddev,\n        mps.mean_bias,\n        mps.success_rate_5deg,\n        mps.success_rate_10deg,\n        mps.model_rank_by_mae,\n        mps.model_rank_by_success_rate,\n        ROUND(CAST(CAST(mps.performance_score AS NUMERIC) AS NUMERIC), 2) AS performance_score,\n        mps.performance_classification,\n        -- Window functions for trend analysis\n        AVG(mps.performance_score) OVER (\n            PARTITION BY mps.forecast_model, mps.parameter_name\n            ORDER BY mps.forecast_date\n            ROWS BETWEEN 6 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_performance_7d,\n        LAG(mps.performance_score, 1) OVER (\n            PARTITION BY mps.forecast_model, mps.parameter_name\n            ORDER BY mps.forecast_date\n        ) AS prev_day_performance_score,\n        -- Percentile rankings\n        PERCENT_RANK() OVER (\n            PARTITION BY mps.parameter_name\n            ORDER BY mps.performance_score DESC\n        ) AS performance_percentile,\n        NTILE(5) OVER (\n            PARTITION BY mps.parameter_name\n            ORDER BY mps.performance_score DESC\n        ) AS performance_quintile\n    FROM model_performance_scoring mps\n)\nSELECT\n    forecast_model,\n    parameter_name,\n    forecast_date,\n    validation_count,\n    mean_absolute_error,\n    root_mean_squared_error,\n    median_absolute_error,\n    error_stddev,\n    mean_bias,\n    success_rate_5deg,\n    success_rate_10deg,\n    model_rank_by_mae,\n    model_rank_by_success_rate,\n    performance_score,\n    performance_classification,\n    ROUND(CAST(CAST(moving_avg_performance_7d AS NUMERIC) AS NUMERIC), 2) AS moving_avg_performance_7d,\n    ROUND(CAST(CAST(prev_day_performance_score AS NUMERIC) AS NUMERIC), 2) AS prev_day_performance_score,\n    ROUND(CAST(CAST(performance_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS performance_percentile,\n    performance_quintile\nFROM final_model_comparison\nWHERE forecast_date >= CURRENT_DATE - INTERVAL '30 days'\nORDER BY forecast_date DESC, parameter_name, performance_score DESC\nLIMIT 500;",
      "line_number":
    3252,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012815,
        "row_count": 0,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 14,
      "title": "Boundary Forecast Anomaly Detection with Statistical Outlier Identification",
      "description": "Use Case: Physical Climate Risk Assessment - Extreme Event Detection for Emergency Management Description:
    Enterprise-level boundary forecast anomaly detection identifying statistical outliers, anomalies in boundary aggregations, and unusual forecast patterns. Implements production patterns for anomaly detection in geospatial forecast data. Purpose: Identification of anomalous weather patterns within boundaries with outlier metrics. Emergency management needs anomaly detection for early warning ",
      "complexity": "Multiple CTEs (8+ levels), anomaly detection algorithms, statistical outlier identification, pattern analysis, window functions, UNION operations",
      "expected_output": "Query results",
      "sql": "WITH boundary_forecast_aggregation AS (\n    -- First CTE: Aggregate forecasts by boundary\n    SELECT\n        sb.boundary_id,\n        sb.feature_type,\n        sb.feature_name,\n        gf.parameter_name,\n        gf.forecast_time,\n        DATE_TRUNC('hour', gf.forecast_time) AS forecast_hour,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_date,\n        COUNT(DISTINCT gf.forecast_id) AS grid_cells_count,\n        AVG(gf.parameter_value) AS avg_value,\n        MIN(gf.parameter_value) AS min_value,\n        MAX(gf.parameter_value) AS max_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY gf.parameter_value) AS median_value,\n        STDDEV(gf.parameter_value) AS stddev_value\n    FROM shapefile_boundaries sb\n    INNER JOIN grib2_forecasts gf ON (\n        sb.boundary_geom IS NOT NULL\n        AND gf.grid_cell_geom IS NOT NULL\n        AND ST_Within(gf.grid_cell_geom::geometry, sb.boundary_geom::geometry)\n    )\n    WHERE gf.transformation_status = 'Success'\n    GROUP BY\n        sb.boundary_id,\n        sb.feature_type,\n        sb.feature_name,\n        gf.parameter_name,\n        gf.forecast_time\n),\ntemporal_statistics AS (\n    -- Second CTE: Calculate temporal statistics\n    SELECT\n        bfa.boundary_id,\n        bfa.feature_type,\n        bfa.feature_name,\n        bfa.parameter_name,\n        bfa.forecast_time,\n        bfa.forecast_hour,\n        bfa.forecast_date,\n        bfa.grid_cells_count,\n        ROUND(CAST(CAST(bfa.avg_value AS NUMERIC) AS NUMERIC), 2) AS avg_value,\n        ROUND(CAST(CAST(bfa.min_value AS NUMERIC) AS NUMERIC), 2) AS min_value,\n        ROUND(CAST(CAST(bfa.max_value AS NUMERIC) AS NUMERIC), 2) AS max_value,\n        ROUND(CAST(CAST(bfa.median_value AS NUMERIC) AS NUMERIC), 2) AS median_value,\n        ROUND(CAST(CAST(bfa.stddev_value AS NUMERIC) AS NUMERIC), 2) AS stddev_value,\n        -- Temporal averages\n        AVG(bfa.avg_value) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n            ROWS BETWEEN 23 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_24h,\n        AVG(bfa.avg_value) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n            ROWS BETWEEN 167 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_168h,\n        -- Temporal standard deviations\n        STDDEV(bfa.avg_value) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n            ROWS BETWEEN 23 PRECEDING AND CURRENT ROW\n        ) AS moving_stddev_24h,\n        STDDEV(bfa.avg_value) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n            ROWS BETWEEN 167 PRECEDING AND CURRENT ROW\n        ) AS moving_stddev_168h,\n        -- Lag values\n        LAG(bfa.avg_value, 1) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n        ) AS prev_hour_avg,\n        LAG(bfa.avg_value, 24) OVER (\n            PARTITION BY bfa.boundary_id, bfa.parameter_name\n            ORDER BY bfa.forecast_time\n        ) AS prev_day_avg\n    FROM boundary_forecast_aggregation bfa\n),\nanomaly_detection_metrics AS (\n    -- Third CTE: Calculate anomaly detection metrics\n    SELECT\n        ts.boundary_id,\n        ts.feature_type,\n        ts.feature_name,\n        ts.parameter_name,\n        ts.forecast_time,\n        ts.forecast_hour,\n        ts.forecast_date,\n        ts.avg_value,\n        ts.min_value,\n        ts.max_value,\n        ts.median_value,\n        ts.stddev_value,\n        ROUND(CAST(CAST(ts.moving_avg_24h AS NUMERIC) AS NUMERIC), 2) AS moving_avg_24h,\n        ROUND(CAST(CAST(ts.moving_avg_168h AS NUMERIC) AS NUMERIC), 2) AS moving_avg_168h,\n        ROUND(CAST(CAST(ts.moving_stddev_24h AS NUMERIC) AS NUMERIC), 2) AS moving_stddev_24h,\n        ROUND(CAST(CAST(ts.moving_stddev_168h AS NUMERIC) AS NUMERIC), 2) AS moving_stddev_168h,\n        ROUND(CAST(CAST(ts.prev_hour_avg AS NUMERIC) AS NUMERIC), 2) AS prev_hour_avg,\n        ROUND(CAST(CAST(ts.prev_day_avg AS NUMERIC) AS NUMERIC), 2) AS prev_day_avg,\n        -- Z-score calculation\n        CASE\n            WHEN ts.moving_stddev_24h > 0 THEN\n                (ts.avg_value - ts.moving_avg_24h) / ts.moving_stddev_24h\n            ELSE NULL\n        END AS z_score_24h,\n        -- Deviation from moving average\n        ts.avg_value - ts.moving_avg_24h AS deviation_from_24h_avg,\n        ts.avg_value - ts.moving_avg_168h AS deviation_from_168h_avg,\n        -- Change from previous\n        CASE\n            WHEN ts.prev_hour_avg IS NOT NULL THEN\n                ts.avg_value - ts.prev_hour_avg\n            ELSE NULL\n        END AS change_from_prev_hour,\n        CASE\n            WHEN ts.prev_day_avg IS NOT NULL THEN\n                ts.avg_value - ts.prev_day_avg\n            ELSE NULL\n        END AS change_from_prev_day\n    FROM temporal_statistics ts\n),\noutlier_classification AS (\n    -- Fourth CTE: Classify outliers\n    SELECT\n        adm.boundary_id,\n        adm.feature_type,\n        adm.feature_name,\n        adm.parameter_name,\n        adm.forecast_time,\n        adm.forecast_hour,\n        adm.forecast_date,\n        adm.avg_value,\n        ROUND(CAST(CAST(adm.z_score_24h AS NUMERIC) AS NUMERIC), 2) AS z_score_24h,\n        ROUND(CAST(CAST(adm.deviation_from_24h_avg AS NUMERIC) AS NUMERIC), 2) AS deviation_from_24h_avg,\n        ROUND(CAST(CAST(adm.deviation_from_168h_avg AS NUMERIC) AS NUMERIC), 2) AS deviation_from_168h_avg,\n        ROUND(CAST(CAST(adm.change_from_prev_hour AS NUMERIC) AS NUMERIC), 2) AS change_from_prev_hour,\n        ROUND(CAST(CAST(adm.change_from_prev_day AS NUMERIC) AS NUMERIC), 2) AS change_from_prev_day,\n        -- Z-score based anomaly\n        CASE\n            WHEN adm.z_score_24h IS NOT NULL THEN\n                CASE\n                    WHEN ABS(adm.z_score_24h) > 3 THEN 'Extreme Anomaly'\n                    WHEN ABS(adm.z_score_24h) > 2 THEN 'Significant Anomaly'\n                    WHEN ABS(adm.z_score_24h) > 1.5 THEN 'Moderate Anomaly'\n                    ELSE 'Normal'\n                END\n            ELSE NULL\n        END AS z_score_anomaly,\n        -- Deviation based anomaly\n        CASE\n            WHEN adm.moving_stddev_24h IS NOT NULL THEN\n                CASE\n                    WHEN ABS(adm.deviation_from_24h_avg) > 3 * adm.moving_stddev_24h THEN 'Extreme Deviation'\n                    WHEN ABS(adm.deviation_from_24h_avg) > 2 * adm.moving_stddev_24h THEN 'Significant Deviation'\n                    WHEN ABS(adm.deviation_from_24h_avg) > adm.moving_stddev_24h THEN 'Moderate Deviation'\n                    ELSE 'Normal Deviation'\n                END\n            ELSE NULL\n        END AS deviation_anomaly,\n        -- Change based anomaly\n        CASE\n            WHEN adm.change_from_prev_hour IS NOT NULL AND adm.moving_stddev_24h IS NOT NULL THEN\n                CASE\n                    WHEN ABS(adm.change_from_prev_hour) > 3 * adm.moving_stddev_24h THEN 'Extreme Change'\n                    WHEN ABS(adm.change_from_prev_hour) > 2 * adm.moving_stddev_24h THEN 'Significant Change'\n                    ELSE 'Normal Change'\n                END\n            ELSE NULL\n        END AS change_anomaly\n    FROM anomaly_detection_metrics adm\n),\nanomaly_severity_scoring AS (\n    -- Fifth CTE: Score anomaly severity\n    SELECT\n        oc.boundary_id,\n        oc.feature_type,\n        oc.feature_name,\n        oc.parameter_name,\n        oc.forecast_time,\n        oc.forecast_hour,\n        oc.forecast_date,\n        oc.avg_value,\n        oc.z_score_24h,\n        oc.deviation_from_24h_avg,\n        oc.change_from_prev_hour,\n        oc.z_score_anomaly,\n        oc.deviation_anomaly,\n        oc.change_anomaly,\n        -- Anomaly severity score\n        CASE\n            WHEN oc.z_score_anomaly = 'Extreme Anomaly' OR oc.deviation_anomaly = 'Extreme Deviation' THEN 5\n            WHEN oc.z_score_anomaly = 'Significant Anomaly' OR oc.deviation_anomaly = 'Significant Deviation' THEN 4\n            WHEN oc.z_score_anomaly = 'Moderate Anomaly' OR oc.deviation_anomaly = 'Moderate Deviation' THEN 3\n            WHEN oc.change_anomaly = 'Extreme Change' THEN 4\n            WHEN oc.change_anomaly = 'Significant Change' THEN 3\n            ELSE 1\n        END AS anomaly_severity_score,\n        -- Overall anomaly status\n        CASE\n            WHEN oc.z_score_anomaly IN ('Extreme Anomaly', 'Significant Anomaly') OR\n                 oc.deviation_anomaly IN ('Extreme Deviation', 'Significant Deviation') THEN 'Anomaly Detected'\n            WHEN oc.change_anomaly IN ('Extreme Change', 'Significant Change') THEN 'Rapid Change Detected'\n            ELSE 'Normal'\n        END AS overall_anomaly_status\n    FROM outlier_classification oc\n),\nfinal_anomaly_report AS (\n    -- Sixth CTE: Final anomaly report\n    SELECT\n        ass.boundary_id,\n        ass.feature_type,\n        ass.feature_name,\n        ass.parameter_name,\n        ass.forecast_time,\n        ass.forecast_hour,\n        ass.forecast_date,\n        ass.avg_value,\n        ass.z_score_24h,\n        ass.deviation_from_24h_avg,\n        ass.change_from_prev_hour,\n        ass.anomaly_severity_score,\n        ass.overall_anomaly_status,\n        -- Window functions for anomaly frequency\n        COUNT(CASE WHEN ass.overall_anomaly_status != 'Normal' THEN 1 END) OVER (\n            PARTITION BY ass.boundary_id, ass.parameter_name\n            ORDER BY ass.forecast_time\n            ROWS BETWEEN 23 PRECEDING AND CURRENT ROW\n        ) AS anomalies_in_last_24h,\n        -- Rankings\n        ROW_NUMBER() OVER (\n            PARTITION BY ass.parameter_name, ass.forecast_time\n            ORDER BY ass.anomaly_severity_score DESC\n        ) AS anomaly_rank,\n        PERCENT_RANK() OVER (\n            PARTITION BY ass.feature_type\n            ORDER BY ass.anomaly_severity_score DESC\n        ) AS anomaly_severity_percentile\n    FROM anomaly_severity_scoring ass\n)\nSELECT\n    boundary_id,\n    feature_type,\n    feature_name,\n    parameter_name,\n    forecast_time,\n    forecast_hour,\n    forecast_date,\n    avg_value,\n    z_score_24h,\n    deviation_from_24h_avg,\n    change_from_prev_hour,\n    anomaly_severity_score,\n    overall_anomaly_status,\n    anomalies_in_last_24h,\n    anomaly_rank,\n    ROUND(CAST(CAST(anomaly_severity_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS anomaly_severity_percentile\nFROM final_anomaly_report\nWHERE forecast_date >= CURRENT_DATE - INTERVAL '7 days'\n    AND overall_anomaly_status != 'Normal'\nORDER BY forecast_time DESC, anomaly_severity_score DESC\nLIMIT 500;",
      "line_number":
    3486,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013022,
        "row_count": 0,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 15,
      "title": "Insurance Risk Factor Calculation from 7-14 Day Forecasts",
      "description": "Use Case: Insurance Underwriting - Multi-Day Forecast Risk Assessment for Rate Determination Description:
    Calculates comprehensive risk factors for insurance policy areas based on 7-14 day forecasts from December 3-17, 2025. Analyzes multiple weather parameters to determine extreme event probabilities, precipitation risk, temperature extremes, wind damage risk, freeze risk, and flood risk. Business Value: Risk factor analysis report showing forecast-based risk scores for each policy area and for",
      "complexity": "Multiple CTEs (8+ levels), forecast period filtering, risk calculations, percentile analysis, window functions, spatial joins",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period (Dec 3-17, 2025)\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end,\n        DATE '2025-12-03' AS forecast_date_start,\n        DATE '2025-12-17' AS forecast_date_end\n),\npolicy_area_forecasts AS (\n    -- Second CTE: Join forecasts with policy areas\n    SELECT\n        ipa.policy_area_id,\n        ipa.policy_type,\n        ipa.coverage_type,\n        ipa.policy_area_name,\n        ipa.state_code,\n        ipa.risk_zone,\n        gf.forecast_id,\n        gf.parameter_name,\n        gf.forecast_time,\n        DATE_TRUNC('day', gf.forecast_time) AS forecast_day_date,\n        DATE_TRUNC('day', fp.period_start) AS period_start_date,\n        DATE_TRUNC('day', fp.period_end) AS period_end_date,\n        -- Calculate forecast day (days ahead)\n        (DATE_TRUNC('day', gf.forecast_time)::date - DATE_TRUNC('day', CURRENT_TIMESTAMP)::date) AS forecast_day,\n        gf.parameter_value,\n        gf.grid_cell_latitude,\n        gf.grid_cell_longitude\n    FROM insurance_policy_areas ipa\n    CROSS JOIN forecast_period fp\n    INNER JOIN shapefile_boundaries sb ON ipa.boundary_id = sb.boundary_id\n    INNER JOIN grib2_forecasts gf ON (\n        sb.boundary_geom IS NOT NULL\n        AND gf.grid_cell_geom IS NOT NULL\n        AND ST_Within(gf.grid_cell_geom::geometry, sb.boundary_geom::geometry)\n    )\n    WHERE ipa.is_active = TRUE\n        AND gf.transformation_status = 'Success'\n        AND DATE_TRUNC('day', gf.forecast_time) BETWEEN fp.period_start AND fp.period_end\n        AND (DATE_TRUNC('day', gf.forecast_time)::date - DATE_TRUNC('day', CURRENT_TIMESTAMP)::date) BETWEEN 7 AND 14\n),\nforecast_statistics AS (\n    -- Third CTE: Calculate forecast statistics by policy area and parameter\n    SELECT\n        paf.policy_area_id,\n        paf.policy_type,\n        paf.coverage_type,\n        paf.policy_area_name,\n        paf.state_code,\n        paf.risk_zone,\n        paf.parameter_name,\n        paf.forecast_day,\n        DATE_TRUNC('day', paf.forecast_time) AS forecast_date,\n        COUNT(*) AS forecast_count,\n        MIN(paf.parameter_value) AS min_value,\n        MAX(paf.parameter_value) AS max_value,\n        AVG(paf.parameter_value) AS avg_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY paf.parameter_value) AS median_value,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY paf.parameter_value) AS q1_value,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY paf.parameter_value) AS q3_value,\n        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY paf.parameter_value) AS p90_value,\n        PERCENTILE_CONT(0.05) WITHIN GROUP (ORDER BY paf.parameter_value) AS p5_value,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY paf.parameter_value) AS p95_value,\n        PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY paf.parameter_value) AS p99_value,\n        STDDEV(paf.parameter_value) AS stddev_value,\n        VARIANCE(paf.parameter_value) AS variance_value\n    FROM policy_area_forecasts paf\n    GROUP BY\n        paf.policy_area_id,\n        paf.policy_type,\n        paf.coverage_type,\n        paf.policy_area_name,\n        paf.state_code,\n        paf.risk_zone,\n        paf.parameter_name,\n        paf.forecast_day,\n        DATE_TRUNC('day', paf.forecast_time)\n),\nprecipitation_risk_calculation AS (\n    -- Fourth CTE: Calculate precipitation risk\n    SELECT\n        fs.policy_area_id,\n        fs.policy_type,\n        fs.forecast_day,\n        fs.forecast_date,\n        fs.avg_value AS avg_precipitation,\n        fs.max_value AS max_precipitation,\n        fs.p95_value AS p95_precipitation,\n        fs.p99_value AS p99_precipitation,\n        -- Cumulative precipitation risk (higher for more precipitation)\n        CASE\n            WHEN fs.avg_value > 50 THEN 100.0  -- Extreme risk\n            WHEN fs.avg_value > 25 THEN 75.0 + ((fs.avg_value - 25) / 25.0) * 25.0\n            WHEN fs.avg_value > 10 THEN 50.0 + ((fs.avg_value - 10) / 15.0) * 25.0\n            WHEN fs.avg_value > 5 THEN 25.0 + ((fs.avg_value - 5) / 5.0) * 25.0\n            ELSE (fs.avg_value / 5.0) * 25.0\n        END AS cumulative_precipitation_risk,\n        -- Extreme event probability (based on percentile thresholds)\n        CASE\n            WHEN fs.p99_value > 50 THEN 0.95\n            WHEN fs.p95_value > 50 THEN 0.75\n            WHEN fs.p95_value > 25 THEN 0.50\n            WHEN fs.p90_value > 25 THEN 0.25\n            ELSE 0.10\n        END AS extreme_event_probability\n    FROM forecast_statistics fs\n    WHERE fs.parameter_name = 'Precipitation'\n),\ntemperature_risk_calculation AS (\n    -- Fifth CTE:
    Calculate temperature extreme risk\n    SELECT\n        fs.policy_area_id,\n        fs.policy_type,\n        fs.forecast_day,\n        fs.forecast_date,\n        fs.avg_value AS avg_temperature,\n        fs.min_value AS min_temperature,\n        fs.max_value AS max_temperature,\n        fs.p95_value AS p95_temperature,\n        fs.p5_value AS p5_temperature,\n        -- Freeze risk (temperature below 32¬∞F)\n        CASE\n            WHEN fs.min_value < 20 THEN 100.0  -- Extreme freeze risk\n            WHEN fs.min_value < 28 THEN 75.0 + ((28 - fs.min_value) / 8.0) * 25.0\n            WHEN fs.min_value < 32 THEN 50.0 + ((32 - fs.min_value) / 4.0) * 25.0\n            WHEN fs.min_value < 35 THEN 25.0 + ((35 - fs.min_value) / 3.0) * 25.0\n            ELSE 0.0\n        END AS freeze_risk,\n        -- Heat risk (temperature above 90¬∞F)\n        CASE\n            WHEN fs.max_value > 100 THEN 100.0  -- Extreme heat risk\n            WHEN fs.max_value > 95 THEN 75.0 + ((fs.max_value - 95) / 5.0) * 25.0\n            WHEN fs.max_value > 90 THEN 50.0 + ((fs.max_value - 90) / 5.0) * 25.0\n            WHEN fs.max_value > 85 THEN 25.0 + ((fs.max_value - 85) / 5.0) * 25.0\n            ELSE 0.0\n        END AS heat_risk,\n        -- Temperature extreme risk (combination)\n        CASE\n            WHEN fs.min_value < 32 THEN\n                CASE\n                    WHEN fs.min_value < 20 THEN 100.0\n                    WHEN fs.min_value < 28 THEN 75.0 + ((28 - fs.min_value) / 8.0) * 25.0\n                    WHEN fs.min_value < 32 THEN 50.0 + ((32 - fs.min_value) / 4.0) * 25.0\n                    ELSE 25.0\n                END\n            WHEN fs.max_value > 90 THEN\n                CASE\n                    WHEN fs.max_value > 100 THEN 100.0\n                    WHEN fs.max_value > 95 THEN 75.0 + ((fs.max_value - 95) / 5.0) * 25.0\n                    WHEN fs.max_value > 90 THEN 50.0 + ((fs.max_value - 90) / 5.0) * 25.0\n                    ELSE 25.0\n                END\n            ELSE 0.0\n        END AS temperature_extreme_risk\n    FROM forecast_statistics fs\n    WHERE fs.parameter_name = 'Temperature'\n),\nwind_risk_calculation AS (\n    -- Sixth CTE: Calculate wind damage risk\n    SELECT\n        fs.policy_area_id,\n        fs.policy_type,\n        fs.forecast_day,\n        fs.forecast_date,\n        fs.avg_value AS avg_wind_speed,\n        fs.max_value AS max_wind_speed,\n        fs.p95_value AS p95_wind_speed,\n        fs.p99_value AS p99_wind_speed,\n        -- Wind damage risk (higher for stronger winds)\n        CASE\n            WHEN fs.max_value > 75 THEN 100.0  -- Hurricane-force winds\n            WHEN fs.max_value > 58 THEN 75.0 + ((fs.max_value - 58) / 17.0) * 25.0  -- Tropical storm\n            WHEN fs.max_value > 45 THEN 50.0 + ((fs.max_value - 45) / 13.0) * 25.0  -- Strong winds\n            WHEN fs.max_value > 30 THEN 25.0 + ((fs.max_value - 30) / 15.0) * 25.0  -- Moderate winds\n            ELSE (fs.max_value / 30.0) * 25.0\n        END AS wind_damage_risk\n    FROM forecast_statistics fs\n    WHERE fs.parameter_name = 'WindSpeed'\n),\nflood_risk_calculation AS (\n    -- Seventh CTE: Calculate flood risk (combination of precipitation and other factors)\n    SELECT\n        prc.policy_area_id,\n        prc.policy_type,\n        prc.forecast_day,\n        prc.forecast_date,\n        prc.avg_precipitation,\n        prc.cumulative_precipitation_risk,\n        -- Flood risk combines precipitation with other factors\n        CASE\n            WHEN prc.avg_precipitation > 50 THEN 100.0\n            WHEN prc.avg_precipitation > 25 THEN 75.0 + ((prc.avg_precipitation - 25) / 25.0) * 25.0\n            WHEN prc.avg_precipitation > 10 THEN 50.0 + ((prc.avg_precipitation - 10) / 15.0) * 25.0\n            WHEN prc.avg_precipitation > 5 THEN 25.0 + ((prc.avg_precipitation - 5) / 5.0) * 25.0\n            ELSE (prc.avg_precipitation / 5.0) * 25.0\n        END AS flood_risk\n    FROM precipitation_risk_calculation prc\n),\ncombined_risk_factors AS (\n    -- Eighth CTE: Combine all risk factors\n    SELECT\n        COALESCE(prc.policy_area_id, trc.policy_area_id, wrc.policy_area_id, frc.policy_area_id) AS policy_area_id,\n        COALESCE(prc.policy_type, trc.policy_type, wrc.policy_type, frc.policy_type) AS policy_type,\n        COALESCE(prc.forecast_day, trc.forecast_day, wrc.forecast_day, frc.forecast_day) AS forecast_day,\n        COALESCE(prc.forecast_date, trc.forecast_date, wrc.forecast_date, frc.forecast_date) AS forecast_date,\n        prc.cumulative_precipitation_risk,\n        prc.extreme_event_probability,\n        trc.freeze_risk,\n        trc.heat_risk,\n        trc.temperature_extreme_risk,\n        wrc.wind_damage_risk,\n        frc.flood_risk,\n        -- Overall risk score (weighted combination)\n        (\n            COALESCE(prc.cumulative_precipitation_risk, 0) * 0.30 +\n            COALESCE(trc.temperature_extreme_risk, 0) * 0.25 +\n            COALESCE(wrc.wind_damage_risk, 0) * 0.20 +\n            COALESCE(frc.flood_risk, 0) * 0.15 +\n            COALESCE(prc.extreme_event_probability, 0) * 100 * 0.10\n        ) AS overall_risk_score\n    FROM precipitation_risk_calculation prc\n    FULL OUTER JOIN temperature_risk_calculation trc ON (\n        prc.policy_area_id = trc.policy_area_id\n        AND prc.forecast_day = trc.forecast_day\n        AND prc.forecast_date = trc.forecast_date\n    )\n    FULL OUTER JOIN wind_risk_calculation wrc ON (\n        COALESCE(prc.policy_area_id, trc.policy_area_id) = wrc.policy_area_id\n        AND COALESCE(prc.forecast_day, trc.forecast_day) = wrc.forecast_day\n        AND COALESCE(prc.forecast_date, trc.forecast_date) = wrc.forecast_date\n    )\n    FULL OUTER JOIN flood_risk_calculation frc ON (\n        COALESCE(prc.policy_area_id, trc.policy_area_id) = frc.policy_area_id\n        AND COALESCE(prc.forecast_day, trc.forecast_day) = frc.forecast_day\n        AND COALESCE(prc.forecast_date, trc.forecast_date) = frc.forecast_date\n    )\n),\nrisk_category_assignment AS (\n    -- Ninth CTE: Assign risk categories\n    SELECT\n        crf.policy_area_id,\n        crf.policy_type,\n        crf.forecast_day,\n        crf.forecast_date,\n        ROUND(CAST(CAST(crf.cumulative_precipitation_risk AS NUMERIC) AS NUMERIC), 2) AS cumulative_precipitation_risk,\n        ROUND(CAST(CAST(crf.extreme_event_probability AS NUMERIC) AS NUMERIC), 4) AS extreme_event_probability,\n        ROUND(CAST(CAST(crf.freeze_risk AS NUMERIC) AS NUMERIC), 2) AS freeze_risk,\n        ROUND(CAST(CAST(crf.heat_risk AS NUMERIC) AS NUMERIC), 2) AS heat_risk,\n        ROUND(CAST(CAST(crf.temperature_extreme_risk AS NUMERIC) AS NUMERIC), 2) AS temperature_extreme_risk,\n        ROUND(CAST(CAST(crf.wind_damage_risk AS NUMERIC) AS NUMERIC), 2) AS wind_damage_risk,\n        ROUND(CAST(CAST(crf.flood_risk AS NUMERIC) AS NUMERIC), 2) AS flood_risk,\n        ROUND(CAST(CAST(crf.overall_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_risk_score,\n        -- Risk category classification\n        CASE\n            WHEN crf.overall_risk_score >= 75 THEN 'Extreme'\n            WHEN crf.overall_risk_score >= 50 THEN 'Very High'\n            WHEN crf.overall_risk_score >= 30 THEN 'High'\n            WHEN crf.overall_risk_score >= 15 THEN 'Moderate'\n            ELSE 'Low'\n        END AS risk_category\n    FROM combined_risk_factors crf\n)\nSELECT\n    rca.policy_area_id,\n    rca.policy_type,\n    rca.forecast_day,\n    rca.forecast_date,\n    rca.cumulative_precipitation_risk,\n    rca.extreme_event_probability,\n    rca.freeze_risk,\n    rca.heat_risk,\n    rca.temperature_extreme_risk,\n    rca.wind_damage_risk,\n    rca.flood_risk,\n    rca.overall_risk_score,\n    rca.risk_category\nFROM risk_category_assignment rca\nWHERE rca.forecast_day BETWEEN 7 AND 14\n    AND rca.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\nORDER BY rca.policy_area_id, rca.forecast_day, rca.forecast_date\nLIMIT 5000;",
      "line_number": 3769,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014295,
        "row_count": 0,
        "column_count": 13,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 16,
      "title": "Insurance Rate Table Generation from Forecast Risk Factors",
      "description": "Use Case: Insurance Underwriting - Dynamic Rate Table Calculation Based on 7-14 Day Forecasts Description: Generates insurance rate tables for December 3-17, 2025 period using risk factors calculated from 7-14 day forecasts. Calculates base rates, risk-adjusted rates, rate components, and rate tiers based on forecast-based risk scores. Purpose:
    Complete rate table showing rates for each policy area, forecast day (7-14 days), and coverage type with risk-adjusted pricing. Business Value: Enables d",
      "complexity": "Multiple CTEs (9+ levels), rate calculations, risk factor integration, tier assignments, window functions, complex aggregations",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nrisk_factors_base AS (\n    -- Second CTE: Get risk factors from Query 31 results or calculate inline\n    SELECT\n        irf.risk_factor_id,\n        irf.policy_area_id,\n        irf.forecast_period_start,\n        irf.forecast_period_end,\n        irf.forecast_day,\n        irf.forecast_date,\n        irf.parameter_name,\n        irf.overall_risk_score,\n        irf.risk_category,\n        irf.cumulative_precipitation_risk,\n        irf.temperature_extreme_risk,\n        irf.wind_damage_risk,\n        irf.freeze_risk,\n        irf.flood_risk,\n        irf.extreme_event_probability,\n        ipa.policy_type,\n        ipa.coverage_type,\n        ipa.base_rate_factor,\n        ipa.risk_zone\n    FROM insurance_risk_factors irf\n    INNER JOIN insurance_policy_areas ipa ON irf.policy_area_id = ipa.policy_area_id\n    WHERE irf.forecast_period_start = DATE '2025-12-03'\n        AND irf.forecast_period_end = DATE '2025-12-17'\n        AND irf.forecast_day BETWEEN 7 AND 14\n        AND ipa.is_active = TRUE\n),\nbase_rate_calculation AS (\n    -- Third CTE: Calculate base rates by policy type\n    SELECT\n        rfb.policy_area_id,\n        rfb.policy_type,\n        rfb.coverage_type,\n        rfb.forecast_day,\n        rfb.forecast_date,\n        rfb.base_rate_factor,\n        rfb.risk_zone,\n        -- Base rate by policy type (example rates in USD)\n        CASE\n            WHEN rfb.policy_type = 'Property' THEN 500.00\n            WHEN rfb.policy_type = 'Crop' THEN 300.00\n            WHEN rfb.policy_type = 'Auto' THEN 800.00\n            WHEN rfb.policy_type = 'Marine' THEN 1200.00\n            WHEN rfb.policy_type = 'General Liability' THEN 1000.00\n            ELSE 600.00\n        END AS base_rate,\n        rfb.overall_risk_score,\n        rfb.risk_category,\n        rfb.cumulative_precipitation_risk,\n        rfb.temperature_extreme_risk,\n        rfb.wind_damage_risk,\n        rfb.freeze_risk,\n        rfb.flood_risk,\n        rfb.extreme_event_probability\n    FROM risk_factors_base rfb\n),\nrisk_component_calculation AS (\n    -- Fourth CTE: Calculate individual risk components\n    SELECT\n        brc.policy_area_id,\n        brc.policy_type,\n        brc.coverage_type,\n        brc.forecast_day,\n        brc.forecast_date,\n        brc.base_rate,\n        brc.base_rate_factor,\n        brc.risk_zone,\n        brc.overall_risk_score,\n        brc.risk_category,\n        -- Risk components (as dollar amounts)\n        (brc.base_rate * brc.cumulative_precipitation_risk / 100.0) * 0.30 AS precipitation_risk_component,\n        (brc.base_rate * brc.temperature_extreme_risk / 100.0) * 0.25 AS temperature_risk_component,\n        (brc.base_rate * brc.wind_damage_risk / 100.0) * 0.20 AS wind_risk_component,\n        (brc.base_rate * brc.freeze_risk / 100.0) * 0.15 AS freeze_risk_component,\n        (brc.base_rate * brc.flood_risk / 100.0) * 0.10 AS flood_risk_component,\n        (brc.base_rate * brc.extreme_event_probability) * 0.10 AS extreme_event_component,\n        -- Risk multiplier (how much to adjust base rate)\n        CASE\n            WHEN brc.overall_risk_score >= 75 THEN 2.50  -- Extreme risk: 2.5x base rate\n            WHEN brc.overall_risk_score >= 50 THEN 2.00  -- Very High: 2.0x\n            WHEN brc.overall_risk_score >= 30 THEN 1.50  -- High: 1.5x\n            WHEN brc.overall_risk_score >= 15 THEN 1.25  -- Moderate: 1.25x\n            ELSE 1.00  -- Low: 1.0x (no adjustment)\n        END AS risk_multiplier\n    FROM base_rate_calculation brc\n),\nrate_tier_assignment AS (\n    -- Fifth CTE: Assign rate tiers based on risk\n    SELECT\n        rcc.policy_area_id,\n        rcc.policy_type,\n        rcc.coverage_type,\n        rcc.forecast_day,\n        rcc.forecast_date,\n        rcc.base_rate,\n        rcc.base_rate_factor,\n        rcc.risk_zone,\n        rcc.overall_risk_score,\n        rcc.risk_category,\n        rcc.precipitation_risk_component,\n        rcc.temperature_risk_component,\n        rcc.wind_risk_component,\n        rcc.freeze_risk_component,\n        rcc.flood_risk_component,\n        rcc.extreme_event_component,\n        rcc.risk_multiplier,\n        -- Rate tier assignment\n        CASE\n            WHEN rcc.overall_risk_score >= 75 THEN 'High Risk'\n            WHEN rcc.overall_risk_score >= 50 THEN 'Substandard'\n            WHEN rcc.overall_risk_score >= 30 THEN 'Standard'\n            WHEN rcc.overall_risk_score >= 15 THEN 'Preferred'\n            ELSE 'Preferred Plus'\n        END AS rate_tier,\n        -- Rate category\n        CASE\n            WHEN rcc.overall_risk_score >= 75 THEN 'Very High'\n            WHEN rcc.overall_risk_score >= 50 THEN 'High'\n            WHEN rcc.overall_risk_score >= 30 THEN 'Moderate'\n            WHEN rcc.overall_risk_score >= 15 THEN 'Low'\n            ELSE 'Very Low'\n        END AS rate_category\n    FROM risk_component_calculation rcc\n),\nrisk_adjusted_rate_calculation AS (\n    -- Sixth CTE: Calculate risk-adjusted rates\n    SELECT\n        rta.policy_area_id,\n        rta.policy_type,\n        rta.coverage_type,\n        rta.forecast_day,\n        rta.forecast_date,\n        rta.base_rate,\n        rta.base_rate_factor,\n        rta.risk_zone,\n        rta.overall_risk_score,\n        rta.risk_category,\n        ROUND(CAST(CAST(rta.precipitation_risk_component AS NUMERIC) AS NUMERIC), 2) AS precipitation_risk_component,\n        ROUND(CAST(CAST(rta.temperature_risk_component AS NUMERIC) AS NUMERIC), 2) AS temperature_risk_component,\n        ROUND(CAST(CAST(rta.wind_risk_component AS NUMERIC) AS NUMERIC), 2) AS wind_risk_component,\n        ROUND(CAST(CAST(rta.freeze_risk_component AS NUMERIC) AS NUMERIC), 2) AS freeze_risk_component,\n        ROUND(CAST(CAST(rta.flood_risk_component AS NUMERIC) AS NUMERIC), 2) AS flood_risk_component,\n        ROUND(CAST(CAST(rta.extreme_event_component AS NUMERIC) AS NUMERIC), 2) AS extreme_event_component,\n        rta.risk_multiplier,\n        rta.rate_tier,\n        rta.rate_category,\n        -- Base component (always present)\n        rta.base_rate * rta.base_rate_factor AS base_component,\n        -- Total risk components\n        (\n            rta.precipitation_risk_component +\n            rta.temperature_risk_component +\n            rta.wind_risk_component +\n            rta.freeze_risk_component +\n            rta.flood_risk_component +\n            rta.extreme_event_component\n        ) AS total_risk_components,\n        -- Risk-adjusted rate\n        (\n            (rta.base_rate * rta.base_rate_factor) +\n            (\n                rta.precipitation_risk_component +\n                rta.temperature_risk_component +\n                rta.wind_risk_component +\n                rta.freeze_risk_component +\n                rta.flood_risk_component +\n                rta.extreme_event_component\n            )\n        ) * rta.risk_multiplier AS risk_adjusted_rate\n    FROM rate_tier_assignment rta\n),\nconfidence_calculation AS (\n    -- Seventh CTE: Calculate confidence levels based on forecast day\n    SELECT\n        rarc.policy_area_id,\n        rarc.policy_type,\n        rarc.coverage_type,\n        rarc.forecast_day,\n        rarc.forecast_date,\n        rarc.base_rate,\n        rarc.base_component,\n        rarc.precipitation_risk_component,\n        rarc.temperature_risk_component,\n        rarc.wind_risk_component,\n        rarc.freeze_risk_component,\n        rarc.flood_risk_component,\n        rarc.extreme_event_component,\n        rarc.total_risk_components,\n        rarc.risk_multiplier,\n        rarc.risk_adjusted_rate,\n        rarc.rate_tier,\n        rarc.rate_category,\n        rarc.overall_risk_score,\n        -- Confidence level decreases with forecast day (7 days = higher confidence, 14 days = lower)\n        CASE\n            WHEN rarc.forecast_day <= 8 THEN 90.0  -- High confidence for 7-8 days\n            WHEN rarc.forecast_day <= 10 THEN 75.0  -- Moderate-high for 9-10 days\n            WHEN rarc.forecast_day <= 12 THEN 60.0  -- Moderate for 11-12 days\n            ELSE 45.0  -- Lower confidence for 13-14 days\n        END AS confidence_level\n    FROM risk_adjusted_rate_calculation rarc\n),\nfinal_rate_table AS (\n    -- Eighth CTE:
    Final rate table with all components\n    SELECT\n        cc.policy_area_id,\n        cc.policy_type,\n        cc.coverage_type,\n        cc.forecast_day,\n        cc.forecast_date,\n        DATE '2025-12-03' AS forecast_period_start,\n        DATE '2025-12-17' AS forecast_period_end,\n        cc.base_rate,\n        ROUND(CAST(CAST(cc.base_component AS NUMERIC) AS NUMERIC), 2) AS base_component,\n        cc.precipitation_risk_component,\n        cc.temperature_risk_component,\n        cc.wind_risk_component,\n        cc.freeze_risk_component,\n        cc.flood_risk_component,\n        cc.extreme_event_component,\n        ROUND(CAST(CAST(cc.total_risk_components AS NUMERIC) AS NUMERIC), 2) AS total_risk_components,\n        ROUND(CAST(CAST(cc.risk_multiplier AS NUMERIC) AS NUMERIC), 3) AS risk_multiplier,\n        ROUND(CAST(CAST(cc.risk_adjusted_rate AS NUMERIC) AS NUMERIC), 2) AS risk_adjusted_rate,\n        cc.rate_tier,\n        cc.rate_category,\n        ROUND(CAST(CAST(cc.overall_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_risk_score,\n        ROUND(CAST(CAST(cc.confidence_level AS NUMERIC) AS NUMERIC), 2) AS confidence_level,\n        'Forecast-Based' AS calculation_method,\n        DATE '2025-12-03' AS effective_date,\n        DATE '2025-12-17' AS expiration_date\n    FROM confidence_calculation cc\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_day,\n    forecast_date,\n    forecast_period_start,\n    forecast_period_end,\n    base_rate,\n    base_component,\n    precipitation_risk_component,\n    temperature_risk_component,\n    wind_risk_component,\n    freeze_risk_component,\n    flood_risk_component,\n    extreme_event_component,\n    total_risk_components,\n    risk_multiplier,\n    risk_adjusted_rate,\n    rate_tier,\n    rate_category,\n    overall_risk_score,\n    confidence_level,\n    calculation_method,\n    effective_date,\n    expiration_date\nFROM final_rate_table\nWHERE forecast_day BETWEEN 7 AND 14\n    AND forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\nORDER BY policy_area_id, forecast_day, forecast_date\nLIMIT 10000;",
      "line_number": 4071,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005486,
        "row_count": 0,
        "column_count": 25,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 17,
      "title": "Rate Table Comparison Across 7-14 Day Forecasts",
      "description": "Use Case: Insurance Underwriting - Multi-Day Forecast Rate Comparison for Optimal Rate Selection Description:
    Compares insurance rates across all forecast days (7-14 days ahead) for December 3-17, 2025 period. Calculates rate statistics, volatility, trends, and recommends optimal forecast day for rate determination. Business Value: Rate comparison report showing rates by forecast day with volatility metrics and recommended rates. Purpose: Enables insurance companies to select optimal forecast da",
      "complexity": "Multiple CTEs (7+ levels), rate aggregation, volatility calculations, trend analysis, window functions, statistical analysis",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nrate_tables_by_day AS (\n    -- Second CTE: Get rates for each forecast day (7-14)\n    SELECT\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        irt.forecast_period_start,\n        irt.forecast_period_end,\n        irt.forecast_day,\n        irt.forecast_date,\n        irt.base_rate,\n        irt.risk_adjusted_rate,\n        irt.risk_multiplier,\n        irt.rate_tier,\n        irt.rate_category,\n        irt.confidence_level\n    FROM insurance_rate_tables irt\n    WHERE irt.forecast_period_start = DATE '2025-12-03'\n        AND irt.forecast_period_end = DATE '2025-12-17'\n        AND irt.forecast_day BETWEEN 7 AND 14\n        AND irt.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n),\nrates_by_forecast_day AS (\n    -- Third CTE:
    Aggregate rates by forecast day\n    SELECT\n        rtbd.policy_area_id,\n        rtbd.policy_type,\n        rtbd.coverage_type,\n        rtbd.forecast_period_start,\n        rtbd.forecast_period_end,\n        rtbd.forecast_day,\n        COUNT(*) AS rate_count,\n        MIN(rtbd.risk_adjusted_rate) AS min_rate,\n        MAX(rtbd.risk_adjusted_rate) AS max_rate,\n        AVG(rtbd.risk_adjusted_rate) AS avg_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY rtbd.risk_adjusted_rate) AS median_rate,\n        STDDEV(rtbd.risk_adjusted_rate) AS rate_stddev,\n        VARIANCE(rtbd.risk_adjusted_rate) AS rate_variance,\n        AVG(rtbd.confidence_level) AS avg_confidence_level,\n        AVG(rtbd.risk_multiplier) AS avg_risk_multiplier\n    FROM rate_tables_by_day rtbd\n    GROUP BY\n        rtbd.policy_area_id,\n        rtbd.policy_type,\n        rtbd.coverage_type,\n        rtbd.forecast_period_start,\n        rtbd.forecast_period_end,\n        rtbd.forecast_day\n),\nrate_statistics AS (\n    -- Fourth CTE: Calculate comprehensive statistics\n    SELECT\n        rbfd.policy_area_id,\n        rbfd.policy_type,\n        rbfd.coverage_type,\n        rbfd.forecast_period_start,\n        rbfd.forecast_period_end,\n        rbfd.forecast_day,\n        rbfd.rate_count,\n        ROUND(CAST(CAST(rbfd.min_rate AS NUMERIC) AS NUMERIC), 2) AS min_rate,\n        ROUND(CAST(CAST(rbfd.max_rate AS NUMERIC) AS NUMERIC), 2) AS max_rate,\n        ROUND(CAST(CAST(rbfd.avg_rate AS NUMERIC) AS NUMERIC), 2) AS avg_rate,\n        ROUND(CAST(CAST(rbfd.median_rate AS NUMERIC) AS NUMERIC), 2) AS median_rate,\n        ROUND(CAST(CAST(rbfd.rate_stddev AS NUMERIC) AS NUMERIC), 2) AS rate_stddev,\n        ROUND(CAST(CAST(rbfd.rate_variance AS NUMERIC) AS NUMERIC), 2) AS rate_variance,\n        ROUND(CAST(CAST(rbfd.avg_confidence_level AS NUMERIC) AS NUMERIC), 2) AS avg_confidence_level,\n        ROUND(CAST(CAST(rbfd.avg_risk_multiplier AS NUMERIC) AS NUMERIC), 3) AS avg_risk_multiplier,\n        -- Rate range\n        rbfd.max_rate - rbfd.min_rate AS rate_range,\n        -- Coefficient of variation (volatility measure)\n        CASE\n            WHEN rbfd.avg_rate != 0 THEN\n                (rbfd.rate_stddev / ABS(rbfd.avg_rate)) * 100\n            ELSE NULL\n        END AS rate_volatility_percent\n    FROM rates_by_forecast_day rbfd\n),\nrate_trend_analysis AS (\n    -- Fifth CTE: Analyze rate trends across forecast days\n    SELECT\n        rs.policy_area_id,\n        rs.policy_type,\n        rs.coverage_type,\n        rs.forecast_period_start,\n        rs.forecast_period_end,\n        rs.forecast_day,\n        rs.rate_count,\n        rs.min_rate,\n        rs.max_rate,\n        rs.avg_rate,\n        rs.median_rate,\n        rs.rate_stddev,\n        rs.rate_range,\n        rs.rate_volatility_percent,\n        rs.avg_confidence_level,\n        rs.avg_risk_multiplier,\n        -- Compare with previous forecast day\n        LAG(rs.avg_rate, 1) OVER (\n            PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n            ORDER BY rs.forecast_day\n        ) AS prev_day_avg_rate,\n        -- Rate change from previous day\n        rs.avg_rate - LAG(rs.avg_rate, 1) OVER (\n            PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n            ORDER BY rs.forecast_day\n        ) AS rate_change_from_prev_day,\n        -- Rate trend\n        CASE\n            WHEN LAG(rs.avg_rate, 1) OVER (\n                PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n                ORDER BY rs.forecast_day\n            ) IS NOT NULL THEN\n                CASE\n                    WHEN rs.avg_rate > LAG(rs.avg_rate, 1) OVER (\n                        PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n                        ORDER BY rs.forecast_day\n                    ) * 1.05 THEN 'Increasing'\n                    WHEN rs.avg_rate < LAG(rs.avg_rate, 1) OVER (\n                        PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n                        ORDER BY rs.forecast_day\n                    ) * 0.95 THEN 'Decreasing'\n                    ELSE 'Stable'\n                END\n            ELSE NULL\n        END AS rate_trend,\n        -- Moving average (3-day)\n        AVG(rs.avg_rate) OVER (\n            PARTITION BY rs.policy_area_id, rs.policy_type, rs.coverage_type\n            ORDER BY rs.forecast_day\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_3day\n    FROM rate_statistics rs\n),\noptimal_rate_selection AS (\n    -- Sixth CTE: Determine optimal rate and forecast day\n    SELECT\n        rta.policy_area_id,\n        rta.policy_type,\n        rta.coverage_type,\n        rta.forecast_period_start,\n        rta.forecast_period_end,\n        rta.forecast_day,\n        rta.rate_count,\n        rta.min_rate,\n        rta.max_rate,\n        rta.avg_rate,\n        rta.median_rate,\n        rta.rate_stddev,\n        rta.rate_range,\n        ROUND(CAST(CAST(rta.rate_volatility_percent AS NUMERIC) AS NUMERIC), 2) AS rate_volatility_percent,\n        rta.avg_confidence_level,\n        rta.avg_risk_multiplier,\n        ROUND(CAST(CAST(rta.prev_day_avg_rate AS NUMERIC) AS NUMERIC), 2) AS prev_day_avg_rate,\n        ROUND(CAST(CAST(rta.rate_change_from_prev_day AS NUMERIC) AS NUMERIC), 2) AS rate_change_from_prev_day,\n        rta.rate_trend,\n        ROUND(CAST(CAST(rta.moving_avg_3day AS NUMERIC) AS NUMERIC), 2) AS moving_avg_3day,\n        -- Score for optimal rate selection (higher confidence + lower volatility = better)\n        (\n            (rta.avg_confidence_level / 100.0) * 50.0 +  -- Confidence component (50% weight)\n            GREATEST(0, 50.0 - (rta.rate_volatility_percent / 2.0))  -- Low volatility component (50% weight)\n        ) AS optimal_rate_score\n    FROM rate_trend_analysis rta\n),\nrecommended_rates AS (\n    -- Seventh CTE: Select recommended rates\n    SELECT\n        ors.policy_area_id,\n        ors.policy_type,\n        ors.coverage_type,\n        ors.forecast_period_start,\n        ors.forecast_period_end,\n        ors.forecast_day,\n        ors.rate_count,\n        ors.min_rate,\n        ors.max_rate,\n        ors.avg_rate,\n        ors.median_rate,\n        ors.rate_stddev,\n        ors.rate_range,\n        ors.rate_volatility_percent,\n        ors.avg_confidence_level,\n        ors.avg_risk_multiplier,\n        ors.prev_day_avg_rate,\n        ors.rate_change_from_prev_day,\n        ors.rate_trend,\n        ors.moving_avg_3day,\n        ors.optimal_rate_score,\n        -- Recommended rate (use median for stability)\n        ors.median_rate AS recommended_rate,\n        -- Rank by optimal score\n        ROW_NUMBER() OVER (\n            PARTITION BY ors.policy_area_id, ors.policy_type, ors.coverage_type\n            ORDER BY ors.optimal_rate_score DESC\n        ) AS optimal_day_rank\n    FROM optimal_rate_selection ors\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_period_start,\n    forecast_period_end,\n    forecast_day,\n    rate_count,\n    min_rate,\n    max_rate,\n    avg_rate,\n    median_rate,\n    rate_stddev,\n    rate_range,\n    rate_volatility_percent,\n    avg_confidence_level,\n    avg_risk_multiplier,\n    prev_day_avg_rate,\n    rate_change_from_prev_day,\n    rate_trend,\n    moving_avg_3day,\n    ROUND(CAST(CAST(optimal_rate_score AS NUMERIC) AS NUMERIC), 2) AS optimal_rate_score,\n    recommended_rate,\n    optimal_day_rank,\n    CASE\n        WHEN optimal_day_rank = 1 THEN 'Recommended'\n        ELSE 'Alternative'\n    END AS recommendation_status\nFROM recommended_rates\nWHERE forecast_day BETWEEN 7 AND 14\nORDER BY policy_area_id, policy_type, coverage_type, forecast_day\nLIMIT 5000;",
      "line_number": 4363,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004576,
        "row_count": 0,
        "column_count": 24,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 18,
      "title": "Historical Claims Validation Against Forecast Risk Factors",
      "description": "Use Case: Insurance Underwriting - Forecast Accuracy Validation Using Historical Claims Data Description: Validates forecast-based risk factors against historical claims data for December 3-17, 2025 period. Compares forecast risk scores with actual claims to assess forecast accuracy and improve rate modeling. Purpose:
    Validation report showing forecast risk vs actual claims with accuracy metrics and improvement recommendations. Business Value: Enables insurance companies to validate and improve ",
      "complexity": "Multiple CTEs (8+ levels), historical data joins, forecast accuracy calculations, error analysis, window functions, statistical comparisons",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nhistorical_claims AS (\n    -- Second CTE: Get historical claims for validation period\n    SELECT\n        ich.claim_id,\n        ich.policy_area_id,\n        ich.loss_date AS claim_date,\n        ich.loss_date,\n        NULL::VARCHAR(100) AS policy_type,\n        NULL::VARCHAR(100) AS coverage_type,\n        ich.claim_type,\n        ich.claim_amount,\n        'Closed'::VARCHAR(50) AS claim_status,\n        NULL::VARCHAR(100) AS weather_event_type,\n        NULL::DATE AS weather_event_date,\n        NULL::NUMERIC AS temperature_at_loss,\n        NULL::NUMERIC AS precipitation_at_loss,\n        NULL::NUMERIC AS wind_speed_at_loss,\n        ich.forecast_available,\n        ich.forecast_day,\n        NULL::NUMERIC AS forecast_error\n    FROM insurance_claims_history ich\n    WHERE ich.loss_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n),\nforecast_risk_factors AS (\n    -- Third CTE: Get forecast risk factors for same period\n    SELECT\n        irf.risk_factor_id,\n        irf.policy_area_id,\n        irf.forecast_period_start,\n        irf.forecast_period_end,\n        irf.forecast_day,\n        irf.forecast_date,\n        irf.parameter_name,\n        irf.overall_risk_score,\n        irf.risk_category,\n        irf.cumulative_precipitation_risk,\n        irf.temperature_extreme_risk,\n        irf.wind_damage_risk,\n        irf.freeze_risk,\n        irf.flood_risk,\n        irf.extreme_event_probability\n    FROM insurance_risk_factors irf\n    WHERE irf.forecast_period_start = DATE '2025-12-03'\n        AND irf.forecast_period_end = DATE '2025-12-17'\n        AND irf.forecast_day BETWEEN 7 AND 14\n),\nclaims_risk_matching AS (\n    -- Fourth CTE:
    Match claims with forecast risk factors\n    SELECT\n        hc.claim_id,\n        hc.policy_area_id,\n        hc.loss_date,\n        hc.policy_type,\n        hc.coverage_type,\n        hc.claim_type,\n        hc.claim_amount AS loss_amount,\n        hc.weather_event_type,\n        hc.forecast_day,\n        frf.forecast_date,\n        frf.forecast_day AS forecast_day_ahead,\n        frf.overall_risk_score,\n        frf.risk_category,\n        frf.cumulative_precipitation_risk,\n        frf.temperature_extreme_risk,\n        frf.wind_damage_risk,\n        frf.freeze_risk,\n        frf.flood_risk,\n        frf.extreme_event_probability,\n        -- Days between forecast and loss\n        (hc.loss_date::date - frf.forecast_date::date) AS days_between_forecast_loss\n    FROM historical_claims hc\n    LEFT JOIN forecast_risk_factors frf ON (\n        hc.policy_area_id = frf.policy_area_id\n        AND hc.loss_date = frf.forecast_date\n    )\n),\nclaims_risk_analysis AS (\n    -- Fifth CTE: Analyze claims vs forecast risk\n    SELECT\n        crm.claim_id,\n        crm.policy_area_id,\n        crm.loss_date,\n        crm.policy_type,\n        crm.coverage_type,\n        crm.claim_type,\n        crm.loss_amount,\n        crm.weather_event_type,\n        crm.forecast_day_ahead,\n        crm.overall_risk_score,\n        crm.risk_category,\n        crm.cumulative_precipitation_risk,\n        crm.temperature_extreme_risk,\n        crm.wind_damage_risk,\n        crm.freeze_risk,\n        crm.flood_risk,\n        crm.extreme_event_probability,\n        crm.days_between_forecast_loss,\n        -- Expected risk category based on claim type\n        CASE\n            WHEN crm.weather_event_type IN ('Flood', 'Heavy Rain') THEN\n                CASE\n                    WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                    WHEN crm.loss_amount > 50000 THEN 'Very High'\n                    WHEN crm.loss_amount > 25000 THEN 'High'\n                    WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                    ELSE 'Low'\n                END\n            WHEN crm.weather_event_type IN ('Freeze', 'Frost') THEN\n                CASE\n                    WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                    WHEN crm.loss_amount > 50000 THEN 'Very High'\n                    WHEN crm.loss_amount > 25000 THEN 'High'\n                    WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                    ELSE 'Low'\n                END\n            WHEN crm.weather_event_type IN ('Wind', 'Hurricane', 'Tornado') THEN\n                CASE\n                    WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                    WHEN crm.loss_amount > 50000 THEN 'Very High'\n                    WHEN crm.loss_amount > 25000 THEN 'High'\n                    WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                    ELSE 'Low'\n                END\n            ELSE 'Moderate'\n        END AS expected_risk_category,\n        -- Forecast accuracy (risk category match)\n        CASE\n            WHEN crm.risk_category = CASE\n                WHEN crm.weather_event_type IN ('Flood', 'Heavy Rain') THEN\n                    CASE\n                        WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                        WHEN crm.loss_amount > 50000 THEN 'Very High'\n                        WHEN crm.loss_amount > 25000 THEN 'High'\n                        WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                        ELSE 'Low'\n                    END\n                WHEN crm.weather_event_type IN ('Freeze', 'Frost') THEN\n                    CASE\n                        WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                        WHEN crm.loss_amount > 50000 THEN 'Very High'\n                        WHEN crm.loss_amount > 25000 THEN 'High'\n                        WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                        ELSE 'Low'\n                    END\n                WHEN crm.weather_event_type IN ('Wind', 'Hurricane', 'Tornado') THEN\n                    CASE\n                        WHEN crm.loss_amount > 100000 THEN 'Extreme'\n                        WHEN crm.loss_amount > 50000 THEN 'Very High'\n                        WHEN crm.loss_amount > 25000 THEN 'High'\n                        WHEN crm.loss_amount > 10000 THEN 'Moderate'\n                        ELSE 'Low'\n                    END\n                ELSE 'Moderate'\n            END THEN 'Accurate'\n            ELSE 'Inaccurate'\n        END AS forecast_accuracy\n    FROM claims_risk_matching crm\n    WHERE crm.overall_risk_score IS NOT NULL\n),\naccuracy_statistics AS (\n    -- Sixth CTE: Calculate accuracy statistics\n    SELECT\n        cra.policy_area_id,\n        cra.policy_type,\n        cra.coverage_type,\n        cra.forecast_day_ahead,\n        COUNT(*) AS total_claims,\n        COUNT(CASE WHEN cra.forecast_accuracy = 'Accurate' THEN 1 END) AS accurate_forecasts,\n        COUNT(CASE WHEN cra.forecast_accuracy = 'Inaccurate' THEN 1 END) AS inaccurate_forecasts,\n        -- Accuracy rate\n        (COUNT(CASE WHEN cra.forecast_accuracy = 'Accurate' THEN 1 END)::NUMERIC / COUNT(*)::NUMERIC) * 100 AS accuracy_rate,\n        -- Average risk scores\n        AVG(cra.overall_risk_score) AS avg_forecast_risk_score,\n        AVG(cra.loss_amount) AS avg_loss_amount,\n        -- Risk category distribution\n        COUNT(CASE WHEN cra.risk_category = 'Extreme' THEN 1 END) AS extreme_risk_count,\n        COUNT(CASE WHEN cra.risk_category = 'Very High' THEN 1 END) AS very_high_risk_count,\n        COUNT(CASE WHEN cra.risk_category = 'High' THEN 1 END) AS high_risk_count,\n        COUNT(CASE WHEN cra.risk_category = 'Moderate' THEN 1 END) AS moderate_risk_count,\n        COUNT(CASE WHEN cra.risk_category = 'Low' THEN 1 END) AS low_risk_count\n    FROM claims_risk_analysis cra\n    GROUP BY\n        cra.policy_area_id,\n        cra.policy_type,\n        cra.coverage_type,\n        cra.forecast_day_ahead\n),\nforecast_improvement_analysis AS (\n    -- Seventh CTE: Analyze forecast improvement opportunities\n    SELECT\n        ast.policy_area_id,\n        ast.policy_type,\n        ast.coverage_type,\n        ast.forecast_day_ahead,\n        ast.total_claims,\n        ast.accurate_forecasts,\n        ast.inaccurate_forecasts,\n        ROUND(CAST(CAST(ast.accuracy_rate AS NUMERIC) AS NUMERIC), 2) AS accuracy_rate,\n        ROUND(CAST(CAST(ast.avg_forecast_risk_score AS NUMERIC) AS NUMERIC), 2) AS avg_forecast_risk_score,\n        ROUND(CAST(CAST(ast.avg_loss_amount AS NUMERIC) AS NUMERIC), 2) AS avg_loss_amount,\n        ast.extreme_risk_count,\n        ast.very_high_risk_count,\n        ast.high_risk_count,\n        ast.moderate_risk_count,\n        ast.low_risk_count,\n        -- Improvement recommendation\n        CASE\n            WHEN ast.accuracy_rate < 50 THEN 'High Priority - Significant Improvement Needed'\n            WHEN ast.accuracy_rate < 70 THEN 'Medium Priority - Moderate Improvement Needed'\n            WHEN ast.accuracy_rate < 85 THEN 'Low Priority - Minor Improvement Needed'\n            ELSE 'Acceptable - Monitor Performance'\n        END AS improvement_priority,\n        -- Forecast day performance\n        CASE\n            WHEN ast.forecast_day_ahead <= 8 AND ast.accuracy_rate >= 80 THEN 'Optimal Forecast Day'\n            WHEN ast.forecast_day_ahead <= 10 AND ast.accuracy_rate >= 70 THEN 'Good Forecast Day'\n            WHEN ast.forecast_day_ahead <= 12 AND ast.accuracy_rate >= 60 THEN 'Acceptable Forecast Day'\n            ELSE 'Review Forecast Day'\n        END AS forecast_day_assessment\n    FROM accuracy_statistics ast\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_day_ahead,\n    total_claims,\n    accurate_forecasts,\n    inaccurate_forecasts,\n    accuracy_rate,\n    avg_forecast_risk_score,\n    avg_loss_amount,\n    extreme_risk_count,\n    very_high_risk_count,\n    high_risk_count,\n    moderate_risk_count,\n    low_risk_count,\n    improvement_priority,\n    forecast_day_assessment\nFROM forecast_improvement_analysis\nWHERE forecast_day_ahead BETWEEN 7 AND 14\nORDER BY policy_area_id, policy_type, coverage_type, forecast_day_ahead\nLIMIT 2000;",
      "line_number": 4618,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005464,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 19,
      "title": "Rate Volatility and Stability Analysis",
      "description": "Use Case: Insurance Underwriting - Rate Stability Assessment for Pricing Consistency Description:
    Analyzes rate volatility and stability across 7-14 day forecasts for December 3-17, 2025. Identifies areas with high rate volatility and recommends stable pricing strategies. Business Value: Rate volatility analysis report with stability metrics and recommendations for consistent pricing. Purpose: Helps insurance companies identify pricing inconsistencies and implement stable pricing strategies, imp",
      "complexity": "Multiple CTEs (6+ levels), volatility calculations, stability metrics, trend analysis, window functions, statistical analysis",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nrate_data AS (\n    -- Second CTE: Get rate data for analysis\n    SELECT\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        irt.forecast_day,\n        irt.forecast_date,\n        irt.base_rate,\n        irt.risk_adjusted_rate,\n        irt.risk_multiplier,\n        irt.confidence_level\n    FROM insurance_rate_tables irt\n    WHERE irt.forecast_period_start = DATE '2025-12-03'\n        AND irt.forecast_period_end = DATE '2025-12-17'\n        AND irt.forecast_day BETWEEN 7 AND 14\n        AND irt.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n),\nrate_volatility_calculation AS (\n    -- Third CTE:
    Calculate rate volatility metrics\n    SELECT\n        rd.policy_area_id,\n        rd.policy_type,\n        rd.coverage_type,\n        rd.forecast_day,\n        COUNT(*) AS rate_observations,\n        MIN(rd.risk_adjusted_rate) AS min_rate,\n        MAX(rd.risk_adjusted_rate) AS max_rate,\n        AVG(rd.risk_adjusted_rate) AS avg_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY rd.risk_adjusted_rate) AS median_rate,\n        STDDEV(rd.risk_adjusted_rate) AS rate_stddev,\n        VARIANCE(rd.risk_adjusted_rate) AS rate_variance,\n        -- Coefficient of variation (volatility measure)\n        CASE\n            WHEN AVG(rd.risk_adjusted_rate) != 0 THEN\n                (STDDEV(rd.risk_adjusted_rate) / ABS(AVG(rd.risk_adjusted_rate))) * 100\n            ELSE NULL\n        END AS coefficient_of_variation,\n        -- Range as percentage of average\n        CASE\n            WHEN AVG(rd.risk_adjusted_rate) != 0 THEN\n                ((MAX(rd.risk_adjusted_rate) - MIN(rd.risk_adjusted_rate)) / ABS(AVG(rd.risk_adjusted_rate))) * 100\n            ELSE NULL\n        END AS range_percentage,\n        AVG(rd.confidence_level) AS avg_confidence_level\n    FROM rate_data rd\n    GROUP BY\n        rd.policy_area_id,\n        rd.policy_type,\n        rd.coverage_type,\n        rd.forecast_day\n),\nrate_stability_metrics AS (\n    -- Fourth CTE: Calculate stability metrics\n    SELECT\n        rvc.policy_area_id,\n        rvc.policy_type,\n        rvc.coverage_type,\n        rvc.forecast_day,\n        rvc.rate_observations,\n        ROUND(CAST(CAST(rvc.min_rate AS NUMERIC) AS NUMERIC), 2) AS min_rate,\n        ROUND(CAST(CAST(rvc.max_rate AS NUMERIC) AS NUMERIC), 2) AS max_rate,\n        ROUND(CAST(CAST(rvc.avg_rate AS NUMERIC) AS NUMERIC), 2) AS avg_rate,\n        ROUND(CAST(CAST(rvc.median_rate AS NUMERIC) AS NUMERIC), 2) AS median_rate,\n        ROUND(CAST(CAST(rvc.rate_stddev AS NUMERIC) AS NUMERIC), 2) AS rate_stddev,\n        ROUND(CAST(CAST(rvc.coefficient_of_variation AS NUMERIC) AS NUMERIC), 2) AS coefficient_of_variation,\n        ROUND(CAST(CAST(rvc.range_percentage AS NUMERIC) AS NUMERIC), 2) AS range_percentage,\n        ROUND(CAST(CAST(rvc.avg_confidence_level AS NUMERIC) AS NUMERIC), 2) AS avg_confidence_level,\n        -- Stability score (lower volatility = higher stability)\n        CASE\n            WHEN rvc.coefficient_of_variation IS NOT NULL THEN\n                100.0 - LEAST(rvc.coefficient_of_variation, 100.0)\n            ELSE 50.0\n        END AS stability_score,\n        -- Volatility classification\n        CASE\n            WHEN rvc.coefficient_of_variation > 30 THEN 'Very High Volatility'\n            WHEN rvc.coefficient_of_variation > 20 THEN 'High Volatility'\n            WHEN rvc.coefficient_of_variation > 10 THEN 'Moderate Volatility'\n            WHEN rvc.coefficient_of_variation > 5 THEN 'Low Volatility'\n            ELSE 'Very Low Volatility'\n        END AS volatility_classification\n    FROM rate_volatility_calculation rvc\n),\ncross_day_volatility AS (\n    -- Fifth CTE: Analyze volatility across forecast days\n    SELECT\n        rsm.policy_area_id,\n        rsm.policy_type,\n        rsm.coverage_type,\n        rsm.forecast_day,\n        rsm.rate_observations,\n        rsm.min_rate,\n        rsm.max_rate,\n        rsm.avg_rate,\n        rsm.median_rate,\n        rsm.rate_stddev,\n        rsm.coefficient_of_variation,\n        rsm.range_percentage,\n        rsm.avg_confidence_level,\n        rsm.stability_score,\n        rsm.volatility_classification,\n        -- Compare with other forecast days\n        AVG(rsm.coefficient_of_variation) OVER (\n            PARTITION BY rsm.policy_area_id, rsm.policy_type, rsm.coverage_type\n        ) AS avg_volatility_across_days,\n        MIN(rsm.coefficient_of_variation) OVER (\n            PARTITION BY rsm.policy_area_id, rsm.policy_type, rsm.coverage_type\n        ) AS min_volatility_across_days,\n        MAX(rsm.coefficient_of_variation) OVER (\n            PARTITION BY rsm.policy_area_id, rsm.policy_type, rsm.coverage_type\n        ) AS max_volatility_across_days,\n        -- Rate change from previous forecast day\n        rsm.avg_rate - LAG(rsm.avg_rate, 1) OVER (\n            PARTITION BY rsm.policy_area_id, rsm.policy_type, rsm.coverage_type\n            ORDER BY rsm.forecast_day\n        ) AS rate_change_from_prev_day\n    FROM rate_stability_metrics rsm\n),\nstability_recommendations AS (\n    -- Sixth CTE: Generate stability recommendations\n    SELECT\n        cdv.policy_area_id,\n        cdv.policy_type,\n        cdv.coverage_type,\n        cdv.forecast_day,\n        cdv.rate_observations,\n        cdv.min_rate,\n        cdv.max_rate,\n        cdv.avg_rate,\n        cdv.median_rate,\n        cdv.rate_stddev,\n        cdv.coefficient_of_variation,\n        cdv.range_percentage,\n        cdv.avg_confidence_level,\n        ROUND(CAST(CAST(cdv.stability_score AS NUMERIC) AS NUMERIC), 2) AS stability_score,\n        cdv.volatility_classification,\n        ROUND(CAST(CAST(cdv.avg_volatility_across_days AS NUMERIC) AS NUMERIC), 2) AS avg_volatility_across_days,\n        ROUND(CAST(CAST(cdv.min_volatility_across_days AS NUMERIC) AS NUMERIC), 2) AS min_volatility_across_days,\n        ROUND(CAST(CAST(cdv.max_volatility_across_days AS NUMERIC) AS NUMERIC), 2) AS max_volatility_across_days,\n        ROUND(CAST(CAST(cdv.rate_change_from_prev_day AS NUMERIC) AS NUMERIC), 2) AS rate_change_from_prev_day,\n        -- Recommended rate (use median for stability)\n        cdv.median_rate AS recommended_stable_rate,\n        -- Stability recommendation\n        CASE\n            WHEN cdv.coefficient_of_variation > 30 THEN 'Use Median Rate - High Volatility Detected'\n            WHEN cdv.coefficient_of_variation > 20 THEN 'Consider Rate Smoothing - Moderate-High Volatility'\n            WHEN cdv.coefficient_of_variation > 10 THEN 'Monitor Closely - Moderate Volatility'\n            ELSE 'Stable - Current Rate Acceptable'\n        END AS stability_recommendation\n    FROM cross_day_volatility cdv\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_day,\n    rate_observations,\n    min_rate,\n    max_rate,\n    avg_rate,\n    median_rate,\n    rate_stddev,\n    coefficient_of_variation,\n    range_percentage,\n    avg_confidence_level,\n    stability_score,\n    volatility_classification,\n    avg_volatility_across_days,\n    min_volatility_across_days,\n    max_volatility_across_days,\n    rate_change_from_prev_day,\n    recommended_stable_rate,\n    stability_recommendation\nFROM stability_recommendations\nWHERE forecast_day BETWEEN 7 AND 14\nORDER BY policy_area_id, policy_type, coverage_type, forecast_day\nLIMIT 3000;",
      "line_number": 4888,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004653,
        "row_count": 0,
        "column_count": 21,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 20,
      "title": "Policy Area Risk Ranking and Comparison",
      "description": "Use Case: Insurance Underwriting - Geographic Risk Ranking for Portfolio Management Description:
    Ranks policy areas by forecast-based risk scores for December 3-17, 2025 period. Provides comparative risk analysis across geographic areas to support portfolio management and resource allocation. Purpose: Risk ranking report showing policy areas ordered by risk level with comparative metrics. Business Value: Enables insurance companies to identify high-risk areas, allocate underwriting resources eff",
      "complexity": "Multiple CTEs (7+ levels), risk aggregation, ranking calculations, percentile analysis, window functions, comparative analysis",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\npolicy_area_risk_aggregation AS (\n    -- Second CTE: Aggregate risk factors by policy area\n    SELECT\n        irf.policy_area_id,\n        irf.forecast_day,\n        irf.forecast_date,\n        ipa.policy_type,\n        ipa.coverage_type,\n        ipa.policy_area_name,\n        ipa.state_code,\n        ipa.risk_zone,\n        AVG(irf.overall_risk_score) AS avg_risk_score,\n        MAX(irf.overall_risk_score) AS max_risk_score,\n        MIN(irf.overall_risk_score) AS min_risk_score,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY irf.overall_risk_score) AS median_risk_score,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY irf.overall_risk_score) AS p95_risk_score,\n        STDDEV(irf.overall_risk_score) AS risk_score_stddev,\n        AVG(irf.cumulative_precipitation_risk) AS avg_precipitation_risk,\n        AVG(irf.temperature_extreme_risk) AS avg_temperature_risk,\n        AVG(irf.wind_damage_risk) AS avg_wind_risk,\n        AVG(irf.freeze_risk) AS avg_freeze_risk,\n        AVG(irf.flood_risk) AS avg_flood_risk,\n        AVG(irf.extreme_event_probability) AS avg_extreme_event_probability,\n        COUNT(*) AS risk_observations\n    FROM insurance_risk_factors irf\n    INNER JOIN insurance_policy_areas ipa ON irf.policy_area_id = ipa.policy_area_id\n    WHERE irf.forecast_period_start = DATE '2025-12-03'\n        AND irf.forecast_period_end = DATE '2025-12-17'\n        AND irf.forecast_day BETWEEN 7 AND 14\n        AND ipa.is_active = TRUE\n    GROUP BY\n        irf.policy_area_id,\n        irf.forecast_day,\n        irf.forecast_date,\n        ipa.policy_type,\n        ipa.coverage_type,\n        ipa.policy_area_name,\n        ipa.state_code,\n        ipa.risk_zone\n),\npolicy_area_summary AS (\n    -- Third CTE: Summarize risk by policy area\n    SELECT\n        para.policy_area_id,\n        para.policy_type,\n        para.coverage_type,\n        para.policy_area_name,\n        para.state_code,\n        para.risk_zone,\n        COUNT(DISTINCT para.forecast_day) AS forecast_days_covered,\n        COUNT(DISTINCT para.forecast_date) AS forecast_dates_covered,\n        SUM(para.risk_observations) AS total_risk_observations,\n        AVG(para.avg_risk_score) AS overall_avg_risk_score,\n        MAX(para.max_risk_score) AS overall_max_risk_score,\n        MIN(para.min_risk_score) AS overall_min_risk_score,\n        AVG(para.median_risk_score) AS overall_median_risk_score,\n        AVG(para.p95_risk_score) AS overall_p95_risk_score,\n        AVG(para.risk_score_stddev) AS overall_risk_stddev,\n        AVG(para.avg_precipitation_risk) AS overall_avg_precipitation_risk,\n        AVG(para.avg_temperature_risk) AS overall_avg_temperature_risk,\n        AVG(para.avg_wind_risk) AS overall_avg_wind_risk,\n        AVG(para.avg_freeze_risk) AS overall_avg_freeze_risk,\n        AVG(para.avg_flood_risk) AS overall_avg_flood_risk,\n        AVG(para.avg_extreme_event_probability) AS overall_avg_extreme_event_probability\n    FROM policy_area_risk_aggregation para\n    GROUP BY\n        para.policy_area_id,\n        para.policy_type,\n        para.coverage_type,\n        para.policy_area_name,\n        para.state_code,\n        para.risk_zone\n),\nrisk_ranking_calculation AS (\n    -- Fourth CTE: Calculate risk rankings\n    SELECT\n        pas.policy_area_id,\n        pas.policy_type,\n        pas.coverage_type,\n        pas.policy_area_name,\n        pas.state_code,\n        pas.risk_zone,\n        pas.forecast_days_covered,\n        pas.forecast_dates_covered,\n        pas.total_risk_observations,\n        ROUND(CAST(CAST(pas.overall_avg_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_avg_risk_score,\n        ROUND(CAST(CAST(pas.overall_max_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_max_risk_score,\n        ROUND(CAST(CAST(pas.overall_min_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_min_risk_score,\n        ROUND(CAST(CAST(pas.overall_median_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_median_risk_score,\n        ROUND(CAST(CAST(pas.overall_p95_risk_score AS NUMERIC) AS NUMERIC), 2) AS overall_p95_risk_score,\n        ROUND(CAST(CAST(pas.overall_risk_stddev AS NUMERIC) AS NUMERIC), 2) AS overall_risk_stddev,\n        ROUND(CAST(CAST(pas.overall_avg_precipitation_risk AS NUMERIC) AS NUMERIC), 2) AS overall_avg_precipitation_risk,\n        ROUND(CAST(CAST(pas.overall_avg_temperature_risk AS NUMERIC) AS NUMERIC), 2) AS overall_avg_temperature_risk,\n        ROUND(CAST(CAST(pas.overall_avg_wind_risk AS NUMERIC) AS NUMERIC), 2) AS overall_avg_wind_risk,\n        ROUND(CAST(CAST(pas.overall_avg_freeze_risk AS NUMERIC) AS NUMERIC), 2) AS overall_avg_freeze_risk,\n        ROUND(CAST(CAST(pas.overall_avg_flood_risk AS NUMERIC) AS NUMERIC), 2) AS overall_avg_flood_risk,\n        ROUND(CAST(CAST(pas.overall_avg_extreme_event_probability AS NUMERIC) AS NUMERIC), 4) AS overall_avg_extreme_event_probability,\n        -- Risk category\n        CASE\n            WHEN pas.overall_avg_risk_score >= 75 THEN 'Extreme'\n            WHEN pas.overall_avg_risk_score >= 50 THEN 'Very High'\n            WHEN pas.overall_avg_risk_score >= 30 THEN 'High'\n            WHEN pas.overall_avg_risk_score >= 15 THEN 'Moderate'\n            ELSE 'Low'\n        END AS risk_category,\n        -- Overall risk rank (by average risk score)\n        ROW_NUMBER() OVER (\n            PARTITION BY pas.policy_type, pas.coverage_type\n            ORDER BY pas.overall_avg_risk_score DESC\n        ) AS risk_rank_by_type,\n        -- Overall risk rank (all policy areas)\n        ROW_NUMBER() OVER (\n            ORDER BY pas.overall_avg_risk_score DESC\n        ) AS overall_risk_rank,\n        -- Percentile rank\n        PERCENT_RANK() OVER (\n            ORDER BY pas.overall_avg_risk_score DESC\n        ) AS risk_percentile,\n        -- Decile rank\n        NTILE(10) OVER (\n            ORDER BY pas.overall_avg_risk_score DESC\n        ) AS risk_decile\n    FROM policy_area_summary pas\n),\ncomparative_analysis AS (\n    -- Fifth CTE: Comparative analysis across policy areas\n    SELECT\n        rrc.policy_area_id,\n        rrc.policy_type,\n        rrc.coverage_type,\n        rrc.policy_area_name,\n        rrc.state_code,\n        rrc.risk_zone,\n        rrc.forecast_days_covered,\n        rrc.forecast_dates_covered,\n        rrc.total_risk_observations,\n        rrc.overall_avg_risk_score,\n        rrc.overall_max_risk_score,\n        rrc.overall_min_risk_score,\n        rrc.overall_median_risk_score,\n        rrc.overall_p95_risk_score,\n        rrc.overall_risk_stddev,\n        rrc.overall_avg_precipitation_risk,\n        rrc.overall_avg_temperature_risk,\n        rrc.overall_avg_wind_risk,\n        rrc.overall_avg_freeze_risk,\n        rrc.overall_avg_flood_risk,\n        rrc.overall_avg_extreme_event_probability,\n        rrc.risk_category,\n        rrc.risk_rank_by_type,\n        rrc.overall_risk_rank,\n        ROUND(CAST(CAST(rrc.risk_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS risk_percentile,\n        rrc.risk_decile,\n        -- Compare with average risk for policy type\n        AVG(rrc.overall_avg_risk_score) OVER (\n            PARTITION BY rrc.policy_type, rrc.coverage_type\n        ) AS avg_risk_for_type,\n        -- Deviation from type average\n        rrc.overall_avg_risk_score - AVG(rrc.overall_avg_risk_score) OVER (\n            PARTITION BY rrc.policy_type, rrc.coverage_type\n        ) AS deviation_from_type_avg,\n        -- Percent deviation from type average\n        CASE\n            WHEN AVG(rrc.overall_avg_risk_score) OVER (\n                PARTITION BY rrc.policy_type, rrc.coverage_type\n            ) != 0 THEN\n                ((rrc.overall_avg_risk_score - AVG(rrc.overall_avg_risk_score) OVER (\n                    PARTITION BY rrc.policy_type, rrc.coverage_type\n                )) / ABS(AVG(rrc.overall_avg_risk_score) OVER (\n                    PARTITION BY rrc.policy_type, rrc.coverage_type\n                ))) * 100\n            ELSE NULL\n        END AS percent_deviation_from_type_avg\n    FROM risk_ranking_calculation rrc\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    policy_area_name,\n    state_code,\n    risk_zone,\n    forecast_days_covered,\n    forecast_dates_covered,\n    total_risk_observations,\n    overall_avg_risk_score,\n    overall_max_risk_score,\n    overall_min_risk_score,\n    overall_median_risk_score,\n    overall_p95_risk_score,\n    overall_risk_stddev,\n    overall_avg_precipitation_risk,\n    overall_avg_temperature_risk,\n    overall_avg_wind_risk,\n    overall_avg_freeze_risk,\n    overall_avg_flood_risk,\n    overall_avg_extreme_event_probability,\n    risk_category,\n    risk_rank_by_type,\n    overall_risk_rank,\n    risk_percentile,\n    risk_decile,\n    ROUND(CAST(CAST(avg_risk_for_type AS NUMERIC) AS NUMERIC), 2) AS avg_risk_for_type,\n    ROUND(CAST(CAST(deviation_from_type_avg AS NUMERIC) AS NUMERIC), 2) AS deviation_from_type_avg,\n    ROUND(CAST(CAST(percent_deviation_from_type_avg AS NUMERIC) AS NUMERIC), 2) AS percent_deviation_from_type_avg\nFROM comparative_analysis\nORDER BY overall_risk_rank\nLIMIT 1000;",
      "line_number":
    5093,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00498,
        "row_count": 0,
        "column_count": 29,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 21,
      "title": "Forecast-to-Rate Impact Analysis",
      "description": "Use Case: Insurance Underwriting - Forecast Parameter Impact on Rate Determination Description: Analyzes how individual forecast parameters (temperature, precipitation, wind) impact insurance rates for December 3-17, 2025. Quantifies the contribution of each weather parameter to final rate calculations. Business Value:
    Parameter impact analysis showing which forecast parameters drive rate changes and their relative contributions. Purpose: Enables insurance companies to understand which weather p",
      "complexity": "Multiple CTEs (6+ levels), parameter contribution analysis, impact calculations, correlation analysis, window functions",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nforecast_rate_mapping_data AS (\n    -- Second CTE: Get forecast-to-rate mappings\n    SELECT\n        frm.mapping_id,\n        frm.forecast_id,\n        frm.rate_table_id,\n        frm.risk_factor_id,\n        frm.policy_area_id,\n        frm.forecast_date,\n        frm.forecast_day,\n        gf.forecast_time,\n        frm.parameter_name,\n        gf.parameter_value,\n        NULL::NUMERIC AS risk_contribution,\n        NULL::NUMERIC AS rate_impact,\n        gf.parameter_value AS forecast_parameter_value,\n        irt.risk_adjusted_rate,\n        irt.base_rate,\n        irt.risk_multiplier\n    FROM forecast_rate_mapping frm\n    INNER JOIN grib2_forecasts gf ON frm.forecast_id = gf.forecast_id\n    LEFT JOIN insurance_rate_tables irt ON frm.rate_table_id = irt.rate_table_id\n    WHERE frm.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n        AND frm.forecast_day BETWEEN 7 AND 14\n),\nparameter_impact_aggregation AS (\n    -- Third CTE: Aggregate impact by parameter\n    SELECT\n        frmd.policy_area_id,\n        frmd.parameter_name,\n        frmd.forecast_day,\n        frmd.forecast_date,\n        COUNT(*) AS parameter_observations,\n        AVG(frmd.parameter_value) AS avg_parameter_value,\n        MIN(frmd.parameter_value) AS min_parameter_value,\n        MAX(frmd.parameter_value) AS max_parameter_value,\n        STDDEV(frmd.parameter_value) AS parameter_stddev,\n        AVG(frmd.risk_contribution) AS avg_risk_contribution,\n        SUM(frmd.risk_contribution) AS total_risk_contribution,\n        AVG(frmd.rate_impact) AS avg_rate_impact,\n        SUM(frmd.rate_impact) AS total_rate_impact,\n        AVG(frmd.risk_adjusted_rate) AS avg_risk_adjusted_rate,\n        AVG(frmd.base_rate) AS avg_base_rate,\n        AVG(frmd.risk_multiplier) AS avg_risk_multiplier\n    FROM forecast_rate_mapping_data frmd\n    GROUP BY\n        frmd.policy_area_id,\n        frmd.parameter_name,\n        frmd.forecast_day,\n        frmd.forecast_date\n),\nparameter_contribution_analysis AS (\n    -- Fourth CTE: Analyze parameter contributions\n    SELECT\n        pia.policy_area_id,\n        pia.parameter_name,\n        pia.forecast_day,\n        pia.forecast_date,\n        pia.parameter_observations,\n        ROUND(CAST(CAST(pia.avg_parameter_value AS NUMERIC) AS NUMERIC), 2) AS avg_parameter_value,\n        ROUND(CAST(CAST(pia.min_parameter_value AS NUMERIC) AS NUMERIC), 2) AS min_parameter_value,\n        ROUND(CAST(CAST(pia.max_parameter_value AS NUMERIC) AS NUMERIC), 2) AS max_parameter_value,\n        ROUND(CAST(CAST(pia.parameter_stddev AS NUMERIC) AS NUMERIC), 2) AS parameter_stddev,\n        ROUND(CAST(CAST(pia.avg_risk_contribution AS NUMERIC) AS NUMERIC), 4) AS avg_risk_contribution,\n        ROUND(CAST(CAST(pia.total_risk_contribution AS NUMERIC) AS NUMERIC), 4) AS total_risk_contribution,\n        ROUND(CAST(CAST(pia.avg_rate_impact AS NUMERIC) AS NUMERIC), 2) AS avg_rate_impact,\n        ROUND(CAST(CAST(pia.total_rate_impact AS NUMERIC) AS NUMERIC), 2) AS total_rate_impact,\n        ROUND(CAST(CAST(pia.avg_risk_adjusted_rate AS NUMERIC) AS NUMERIC), 2) AS avg_risk_adjusted_rate,\n        ROUND(CAST(CAST(pia.avg_base_rate AS NUMERIC) AS NUMERIC), 2) AS avg_base_rate,\n        ROUND(CAST(CAST(pia.avg_risk_multiplier AS NUMERIC) AS NUMERIC), 3) AS avg_risk_multiplier,\n        -- Contribution percentage\n        CASE\n            WHEN SUM(pia.total_risk_contribution) OVER (\n                PARTITION BY pia.policy_area_id, pia.forecast_day, pia.forecast_date\n            ) != 0 THEN\n                (pia.total_risk_contribution / SUM(pia.total_risk_contribution) OVER (\n                    PARTITION BY pia.policy_area_id, pia.forecast_day, pia.forecast_date\n                )) * 100\n            ELSE NULL\n        END AS contribution_percentage,\n        -- Impact percentage of base rate\n        CASE\n            WHEN pia.avg_base_rate != 0 THEN\n                (pia.total_rate_impact / ABS(pia.avg_base_rate)) * 100\n            ELSE NULL\n        END AS impact_percentage_of_base_rate\n    FROM parameter_impact_aggregation pia\n),\nparameter_ranking AS (\n    -- Fifth CTE: Rank parameters by impact\n    SELECT\n        pca.policy_area_id,\n        pca.parameter_name,\n        pca.forecast_day,\n        pca.forecast_date,\n        pca.parameter_observations,\n        pca.avg_parameter_value,\n        pca.min_parameter_value,\n        pca.max_parameter_value,\n        pca.parameter_stddev,\n        pca.avg_risk_contribution,\n        pca.total_risk_contribution,\n        pca.avg_rate_impact,\n        pca.total_rate_impact,\n        pca.avg_risk_adjusted_rate,\n        pca.avg_base_rate,\n        pca.avg_risk_multiplier,\n        ROUND(CAST(CAST(pca.contribution_percentage AS NUMERIC) AS NUMERIC), 2) AS contribution_percentage,\n        ROUND(CAST(CAST(pca.impact_percentage_of_base_rate AS NUMERIC) AS NUMERIC), 2) AS impact_percentage_of_base_rate,\n        -- Parameter impact rank\n        ROW_NUMBER() OVER (\n            PARTITION BY pca.policy_area_id, pca.forecast_day, pca.forecast_date\n            ORDER BY ABS(pca.total_rate_impact) DESC\n        ) AS parameter_impact_rank,\n        -- Parameter contribution rank\n        ROW_NUMBER() OVER (\n            PARTITION BY pca.policy_area_id, pca.forecast_day, pca.forecast_date\n            ORDER BY pca.total_risk_contribution DESC\n        ) AS parameter_contribution_rank\n    FROM parameter_contribution_analysis pca\n)\nSELECT\n    policy_area_id,\n    parameter_name,\n    forecast_day,\n    forecast_date,\n    parameter_observations,\n    avg_parameter_value,\n    min_parameter_value,\n    max_parameter_value,\n    parameter_stddev,\n    avg_risk_contribution,\n    total_risk_contribution,\n    avg_rate_impact,\n    total_rate_impact,\n    avg_risk_adjusted_rate,\n    avg_base_rate,\n    avg_risk_multiplier,\n    contribution_percentage,\n    impact_percentage_of_base_rate,\n    parameter_impact_rank,\n    parameter_contribution_rank,\n    CASE\n        WHEN parameter_impact_rank = 1 THEN 'Primary Driver'\n        WHEN parameter_impact_rank <= 3 THEN 'Significant Contributor'\n        ELSE 'Minor Contributor'\n    END AS impact_classification\nFROM parameter_ranking\nWHERE forecast_day BETWEEN 7 AND 14\n    AND forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\nORDER BY policy_area_id, forecast_day, forecast_date, parameter_impact_rank\nLIMIT 5000;",
      "line_number": 5328,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005225,
        "row_count": 0,
        "column_count": 21,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 22,
      "title": "Multi-Day Forecast Ensemble Rate Analysis",
      "description": "Use Case: Insurance Underwriting - Ensemble Forecast Rate Analysis for Robust Pricing Description:
    Analyzes rates across multiple forecast days (7-14 days) as an ensemble to determine robust, consensus rates. Uses ensemble statistics to reduce forecast uncertainty and provide more stable pricing. Purpose: Ensemble rate analysis showing consensus rates across forecast days with confidence intervals. Business Value: Provides more robust rate determination by combining multiple forecast days, reduc",
      "complexity": "Multiple CTEs (7+ levels), ensemble statistics, consensus calculations, confidence intervals, window functions, statistical aggregation",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nrate_ensemble_data AS (\n    -- Second CTE: Collect rates across all forecast days\n    SELECT\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        irt.forecast_day,\n        irt.forecast_date,\n        irt.base_rate,\n        irt.risk_adjusted_rate,\n        irt.risk_multiplier,\n        irt.confidence_level,\n        irt.overall_risk_score\n    FROM insurance_rate_tables irt\n    WHERE irt.forecast_period_start = DATE '2025-12-03'\n        AND irt.forecast_period_end = DATE '2025-12-17'\n        AND irt.forecast_day BETWEEN 7 AND 14\n        AND irt.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n),\nensemble_statistics AS (\n    -- Third CTE: Calculate ensemble statistics\n    SELECT\n        red.policy_area_id,\n        red.policy_type,\n        red.coverage_type,\n        red.forecast_date,\n        COUNT(DISTINCT red.forecast_day) AS forecast_days_count,\n        COUNT(*) AS total_rate_observations,\n        -- Rate statistics\n        MIN(red.risk_adjusted_rate) AS ensemble_min_rate,\n        MAX(red.risk_adjusted_rate) AS ensemble_max_rate,\n        AVG(red.risk_adjusted_rate) AS ensemble_mean_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY red.risk_adjusted_rate) AS ensemble_median_rate,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY red.risk_adjusted_rate) AS ensemble_q1_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY red.risk_adjusted_rate) AS ensemble_q3_rate,\n        PERCENTILE_CONT(0.10) WITHIN GROUP (ORDER BY red.risk_adjusted_rate) AS ensemble_p10_rate,\n        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY red.risk_adjusted_rate) AS ensemble_p90_rate,\n        STDDEV(red.risk_adjusted_rate) AS ensemble_stddev_rate,\n        VARIANCE(red.risk_adjusted_rate) AS ensemble_variance_rate,\n        -- Confidence statistics\n        AVG(red.confidence_level) AS ensemble_avg_confidence,\n        MIN(red.confidence_level) AS ensemble_min_confidence,\n        MAX(red.confidence_level) AS ensemble_max_confidence,\n        -- Risk score statistics\n        AVG(red.overall_risk_score) AS ensemble_avg_risk_score,\n        STDDEV(red.overall_risk_score) AS ensemble_risk_score_stddev\n    FROM rate_ensemble_data red\n    GROUP BY\n        red.policy_area_id,\n        red.policy_type,\n        red.coverage_type,\n        red.forecast_date\n),\nensemble_consensus_calculation AS (\n    -- Fourth CTE: Calculate consensus rates\n    SELECT\n        es.policy_area_id,\n        es.policy_type,\n        es.coverage_type,\n        es.forecast_date,\n        es.forecast_days_count,\n        es.total_rate_observations,\n        ROUND(CAST(CAST(es.ensemble_min_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_min_rate,\n        ROUND(CAST(CAST(es.ensemble_max_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_max_rate,\n        ROUND(CAST(CAST(es.ensemble_mean_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_mean_rate,\n        ROUND(CAST(CAST(es.ensemble_median_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_median_rate,\n        ROUND(CAST(CAST(es.ensemble_q1_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_q1_rate,\n        ROUND(CAST(CAST(es.ensemble_q3_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_q3_rate,\n        ROUND(CAST(CAST(es.ensemble_p10_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_p10_rate,\n        ROUND(CAST(CAST(es.ensemble_p90_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_p90_rate,\n        ROUND(CAST(CAST(es.ensemble_stddev_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_stddev_rate,\n        ROUND(CAST(CAST(es.ensemble_variance_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_variance_rate,\n        ROUND(CAST(CAST(es.ensemble_avg_confidence AS NUMERIC) AS NUMERIC), 2) AS ensemble_avg_confidence,\n        ROUND(CAST(CAST(es.ensemble_min_confidence AS NUMERIC) AS NUMERIC), 2) AS ensemble_min_confidence,\n        ROUND(CAST(CAST(es.ensemble_max_confidence AS NUMERIC) AS NUMERIC), 2) AS ensemble_max_confidence,\n        ROUND(CAST(CAST(es.ensemble_avg_risk_score AS NUMERIC) AS NUMERIC), 2) AS ensemble_avg_risk_score,\n        ROUND(CAST(CAST(es.ensemble_risk_score_stddev AS NUMERIC) AS NUMERIC), 2) AS ensemble_risk_score_stddev,\n        -- Consensus rate (use median for robustness)\n        ROUND(CAST(CAST(es.ensemble_median_rate AS NUMERIC) AS NUMERIC), 2) AS consensus_rate,\n        -- Confidence interval (90%)\n        ROUND(CAST(CAST(es.ensemble_p10_rate AS NUMERIC) AS NUMERIC), 2) AS confidence_interval_lower_90,\n        ROUND(CAST(CAST(es.ensemble_p90_rate AS NUMERIC) AS NUMERIC), 2) AS confidence_interval_upper_90,\n        -- Interquartile range\n        ROUND(CAST(CAST(es.ensemble_q3_rate - es.ensemble_q1_rate AS NUMERIC) AS NUMERIC), 2) AS ensemble_iqr,\n        -- Coefficient of variation\n        CASE\n            WHEN es.ensemble_mean_rate != 0 THEN\n                (es.ensemble_stddev_rate / ABS(es.ensemble_mean_rate)) * 100\n            ELSE NULL\n        END AS ensemble_coefficient_of_variation\n    FROM ensemble_statistics es\n),\nensemble_quality_assessment AS (\n    -- Fifth CTE:
    Assess ensemble quality\n    SELECT\n        ecc.policy_area_id,\n        ecc.policy_type,\n        ecc.coverage_type,\n        ecc.forecast_date,\n        ecc.forecast_days_count,\n        ecc.total_rate_observations,\n        ecc.ensemble_min_rate,\n        ecc.ensemble_max_rate,\n        ecc.ensemble_mean_rate,\n        ecc.ensemble_median_rate,\n        ecc.ensemble_q1_rate,\n        ecc.ensemble_q3_rate,\n        ecc.ensemble_p10_rate,\n        ecc.ensemble_p90_rate,\n        ecc.ensemble_stddev_rate,\n        ecc.ensemble_variance_rate,\n        ecc.ensemble_avg_confidence,\n        ecc.ensemble_min_confidence,\n        ecc.ensemble_max_confidence,\n        ecc.ensemble_avg_risk_score,\n        ecc.ensemble_risk_score_stddev,\n        ecc.consensus_rate,\n        ecc.confidence_interval_lower_90,\n        ecc.confidence_interval_upper_90,\n        ecc.ensemble_iqr,\n        ROUND(CAST(CAST(ecc.ensemble_coefficient_of_variation AS NUMERIC) AS NUMERIC), 2) AS ensemble_coefficient_of_variation,\n        -- Ensemble quality score (higher is better)\n        (\n            (ecc.ensemble_avg_confidence / 100.0) * 40.0 +  -- Confidence component (40%)\n            GREATEST(0, 40.0 - (ecc.ensemble_coefficient_of_variation / 2.0)) +  -- Low variability component (40%)\n            (LEAST(ecc.forecast_days_count / 8.0, 1.0) * 20.0)  -- Coverage component (20%)\n        ) AS ensemble_quality_score,\n        -- Ensemble reliability\n        CASE\n            WHEN ecc.ensemble_coefficient_of_variation < 5 THEN 'Very Reliable'\n            WHEN ecc.ensemble_coefficient_of_variation < 10 THEN 'Reliable'\n            WHEN ecc.ensemble_coefficient_of_variation < 20 THEN 'Moderately Reliable'\n            ELSE 'Less Reliable'\n        END AS ensemble_reliability\n    FROM ensemble_consensus_calculation ecc\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_date,\n    forecast_days_count,\n    total_rate_observations,\n    ensemble_min_rate,\n    ensemble_max_rate,\n    ensemble_mean_rate,\n    ensemble_median_rate,\n    ensemble_q1_rate,\n    ensemble_q3_rate,\n    ensemble_p10_rate,\n    ensemble_p90_rate,\n    ensemble_stddev_rate,\n    ensemble_variance_rate,\n    ensemble_avg_confidence,\n    ensemble_min_confidence,\n    ensemble_max_confidence,\n    ensemble_avg_risk_score,\n    ensemble_risk_score_stddev,\n    consensus_rate,\n    confidence_interval_lower_90,\n    confidence_interval_upper_90,\n    ensemble_iqr,\n    ensemble_coefficient_of_variation,\n    ROUND(CAST(CAST(ensemble_quality_score AS NUMERIC) AS NUMERIC), 2) AS ensemble_quality_score,\n    ensemble_reliability\nFROM ensemble_quality_assessment\nWHERE forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\nORDER BY policy_area_id, policy_type, coverage_type, forecast_date\nLIMIT 2000;",
      "line_number": 5507,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006394,
        "row_count": 0,
        "column_count": 28,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 23,
      "title": "Forecast Day Selection Optimization",
      "description": "Use Case: Insurance Underwriting - Optimal Forecast Day Selection for Rate Determination Description:
    Determines optimal forecast day (7-14 days) for rate determination based on accuracy, confidence, and business requirements. Balances forecast accuracy (shorter forecast) with planning horizon (longer forecast). Business Value: Forecast day optimization report recommending optimal forecast day for each policy area with justification. Purpose: Enables insurance companies to select optimal forecas",
      "complexity": "Multiple CTEs (8+ levels), optimization scoring, multi-criteria analysis, window functions, ranking calculations",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nforecast_day_metrics AS (\n    -- Second CTE: Collect metrics for each forecast day\n    SELECT\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        irt.forecast_day,\n        COUNT(*) AS rate_count,\n        AVG(irt.confidence_level) AS avg_confidence,\n        MIN(irt.confidence_level) AS min_confidence,\n        MAX(irt.confidence_level) AS max_confidence,\n        AVG(irt.risk_adjusted_rate) AS avg_rate,\n        STDDEV(irt.risk_adjusted_rate) AS rate_stddev,\n        AVG(irt.overall_risk_score) AS avg_risk_score,\n        STDDEV(irt.overall_risk_score) AS risk_score_stddev,\n        -- Historical accuracy (if available from claims validation)\n        COALESCE(\n            (SELECT AVG(CASE WHEN ich.forecast_available = TRUE AND ich.forecast_day = irt.forecast_day THEN 1 ELSE 0 END)\n             FROM insurance_claims_history ich\n             WHERE ich.policy_area_id = irt.policy_area_id\n               AND ich.loss_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'), 0.5\n        ) AS historical_accuracy_rate\n    FROM insurance_rate_tables irt\n    WHERE irt.forecast_period_start = DATE '2025-12-03'\n        AND irt.forecast_period_end = DATE '2025-12-17'\n        AND irt.forecast_day BETWEEN 7 AND 14\n        AND irt.forecast_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n    GROUP BY\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        irt.forecast_day\n),\nforecast_day_scoring AS (\n    -- Third CTE:
    Score each forecast day\n    SELECT\n        fdm.policy_area_id,\n        fdm.policy_type,\n        fdm.coverage_type,\n        fdm.forecast_day,\n        fdm.rate_count,\n        ROUND(CAST(CAST(fdm.avg_confidence AS NUMERIC) AS NUMERIC), 2) AS avg_confidence,\n        ROUND(CAST(CAST(fdm.min_confidence AS NUMERIC) AS NUMERIC), 2) AS min_confidence,\n        ROUND(CAST(CAST(fdm.max_confidence AS NUMERIC) AS NUMERIC), 2) AS max_confidence,\n        ROUND(CAST(CAST(fdm.avg_rate AS NUMERIC) AS NUMERIC), 2) AS avg_rate,\n        ROUND(CAST(CAST(fdm.rate_stddev AS NUMERIC) AS NUMERIC), 2) AS rate_stddev,\n        ROUND(CAST(CAST(fdm.avg_risk_score AS NUMERIC) AS NUMERIC), 2) AS avg_risk_score,\n        ROUND(CAST(CAST(fdm.risk_score_stddev AS NUMERIC) AS NUMERIC), 2) AS risk_score_stddev,\n        ROUND(CAST(CAST(fdm.historical_accuracy_rate AS NUMERIC) AS NUMERIC), 4) AS historical_accuracy_rate,\n        -- Confidence score (higher confidence = better, but decreases with forecast day)\n        CASE\n            WHEN fdm.forecast_day <= 8 THEN fdm.avg_confidence * 1.0  -- Full weight for 7-8 days\n            WHEN fdm.forecast_day <= 10 THEN fdm.avg_confidence * 0.9  -- Slight reduction for 9-10 days\n            WHEN fdm.forecast_day <= 12 THEN fdm.avg_confidence * 0.8  -- More reduction for 11-12 days\n            ELSE fdm.avg_confidence * 0.7  -- Lower weight for 13-14 days\n        END AS confidence_score,\n        -- Stability score (lower volatility = better)\n        CASE\n            WHEN fdm.avg_rate != 0 THEN\n                100.0 - LEAST((fdm.rate_stddev / ABS(fdm.avg_rate)) * 100, 100.0)\n            ELSE 50.0\n        END AS stability_score,\n        -- Accuracy score (from historical data)\n        fdm.historical_accuracy_rate * 100 AS accuracy_score,\n        -- Planning horizon score (longer forecast = better for planning)\n        (fdm.forecast_day / 14.0) * 100 AS planning_horizon_score\n    FROM forecast_day_metrics fdm\n),\noptimization_scoring AS (\n    -- Fourth CTE: Calculate optimization scores\n    SELECT\n        fds.policy_area_id,\n        fds.policy_type,\n        fds.coverage_type,\n        fds.forecast_day,\n        fds.rate_count,\n        fds.avg_confidence,\n        fds.min_confidence,\n        fds.max_confidence,\n        fds.avg_rate,\n        fds.rate_stddev,\n        fds.avg_risk_score,\n        fds.risk_score_stddev,\n        fds.historical_accuracy_rate,\n        fds.confidence_score,\n        fds.stability_score,\n        fds.accuracy_score,\n        fds.planning_horizon_score,\n        -- Overall optimization score (weighted combination)\n        (\n            fds.confidence_score * 0.35 +  -- Confidence weight: 35%\n            fds.stability_score * 0.30 +  -- Stability weight: 30%\n            fds.accuracy_score * 0.25 +  -- Accuracy weight: 25%\n            fds.planning_horizon_score * 0.10  -- Planning horizon weight: 10%\n        ) AS overall_optimization_score\n    FROM forecast_day_scoring fds\n),\nforecast_day_ranking AS (\n    -- Fifth CTE: Rank forecast days\n    SELECT\n        os.policy_area_id,\n        os.policy_type,\n        os.coverage_type,\n        os.forecast_day,\n        os.rate_count,\n        os.avg_confidence,\n        os.min_confidence,\n        os.max_confidence,\n        os.avg_rate,\n        os.rate_stddev,\n        os.avg_risk_score,\n        os.risk_score_stddev,\n        os.historical_accuracy_rate,\n        ROUND(CAST(CAST(os.confidence_score AS NUMERIC) AS NUMERIC), 2) AS confidence_score,\n        ROUND(CAST(CAST(os.stability_score AS NUMERIC) AS NUMERIC), 2) AS stability_score,\n        ROUND(CAST(CAST(os.accuracy_score AS NUMERIC) AS NUMERIC), 2) AS accuracy_score,\n        ROUND(CAST(CAST(os.planning_horizon_score AS NUMERIC) AS NUMERIC), 2) AS planning_horizon_score,\n        ROUND(CAST(CAST(os.overall_optimization_score AS NUMERIC) AS NUMERIC), 2) AS overall_optimization_score,\n        -- Rank by optimization score\n        ROW_NUMBER() OVER (\n            PARTITION BY os.policy_area_id, os.policy_type, os.coverage_type\n            ORDER BY os.overall_optimization_score DESC\n        ) AS optimization_rank,\n        -- Percentile rank\n        PERCENT_RANK() OVER (\n            PARTITION BY os.policy_area_id, os.policy_type, os.coverage_type\n            ORDER BY os.overall_optimization_score DESC\n        ) AS optimization_percentile\n    FROM optimization_scoring os\n),\nrecommendation_generation AS (\n    -- Sixth CTE: Generate recommendations\n    SELECT\n        fdr.policy_area_id,\n        fdr.policy_type,\n        fdr.coverage_type,\n        fdr.forecast_day,\n        fdr.rate_count,\n        fdr.avg_confidence,\n        fdr.min_confidence,\n        fdr.max_confidence,\n        fdr.avg_rate,\n        fdr.rate_stddev,\n        fdr.avg_risk_score,\n        fdr.risk_score_stddev,\n        fdr.historical_accuracy_rate,\n        fdr.confidence_score,\n        fdr.stability_score,\n        fdr.accuracy_score,\n        fdr.planning_horizon_score,\n        fdr.overall_optimization_score,\n        fdr.optimization_rank,\n        ROUND(CAST(CAST(fdr.optimization_percentile * 100 AS NUMERIC) AS NUMERIC), 2) AS optimization_percentile,\n        -- Recommendation status\n        CASE\n            WHEN fdr.optimization_rank = 1 THEN 'Recommended'\n            WHEN fdr.optimization_rank <= 3 THEN 'Alternative'\n            ELSE 'Not Recommended'\n        END AS recommendation_status,\n        -- Justification\n        CASE\n            WHEN fdr.optimization_rank = 1 THEN\n                'Optimal balance of confidence (' || ROUND(CAST(fdr.avg_confidence AS NUMERIC), 1) || '%), stability, and accuracy'\n            WHEN fdr.optimization_rank <= 3 THEN\n                'Good alternative with ' || ROUND(CAST(fdr.avg_confidence AS NUMERIC), 1) || '% confidence'\n            ELSE\n                'Lower optimization score compared to alternatives'\n        END AS recommendation_justification\n    FROM forecast_day_ranking fdr\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    forecast_day,\n    rate_count,\n    avg_confidence,\n    min_confidence,\n    max_confidence,\n    avg_rate,\n    rate_stddev,\n    avg_risk_score,\n    risk_score_stddev,\n    historical_accuracy_rate,\n    confidence_score,\n    stability_score,\n    accuracy_score,\n    planning_horizon_score,\n    overall_optimization_score,\n    optimization_rank,\n    optimization_percentile,\n    recommendation_status,\n    recommendation_justification\nFROM recommendation_generation\nWHERE forecast_day BETWEEN 7 AND 14\nORDER BY policy_area_id, policy_type, coverage_type, optimization_rank\nLIMIT 3000;",
      "line_number": 5703,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005099,
        "row_count": 0,
        "column_count": 22,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 24,
      "title": "Comprehensive Insurance Rate Modeling Summary",
      "description": "Use Case: Insurance Underwriting - Comprehensive Rate Modeling Summary Dashboard Description: Provides comprehensive summary of insurance rate modeling for December 3-17, 2025 period. Aggregates risk factors, rates, comparisons, validations, and recommendations into a single dashboard view. Purpose:
    Comprehensive rate modeling summary dashboard with all key metrics and recommendations. Business Value: Provides insurance companies with single-source-of-truth dashboard for rate modeling decisions,",
      "complexity": "Multiple CTEs (9+ levels), comprehensive aggregation, summary statistics, dashboard metrics, window functions, multi-table joins",
      "expected_output": "Query results",
      "sql": "WITH forecast_period AS (\n    -- First CTE: Define forecast period\n    SELECT\n        DATE '2025-12-03' AS period_start,\n        DATE '2025-12-17' AS period_end\n),\nrisk_factors_summary AS (\n    -- Second CTE: Summarize risk factors\n    SELECT\n        irf.policy_area_id,\n        COUNT(DISTINCT irf.forecast_day) AS forecast_days_analyzed,\n        AVG(irf.overall_risk_score) AS avg_overall_risk_score,\n        MAX(irf.overall_risk_score) AS max_overall_risk_score,\n        MIN(irf.overall_risk_score) AS min_overall_risk_score,\n        AVG(irf.cumulative_precipitation_risk) AS avg_precipitation_risk,\n        AVG(irf.temperature_extreme_risk) AS avg_temperature_risk,\n        AVG(irf.wind_damage_risk) AS avg_wind_risk,\n        AVG(irf.freeze_risk) AS avg_freeze_risk,\n        AVG(irf.flood_risk) AS avg_flood_risk,\n        AVG(irf.extreme_event_probability) AS avg_extreme_event_probability\n    FROM insurance_risk_factors irf\n    WHERE irf.forecast_period_start = DATE '2025-12-03'\n        AND irf.forecast_period_end = DATE '2025-12-17'\n        AND irf.forecast_day BETWEEN 7 AND 14\n    GROUP BY irf.policy_area_id\n),\nrate_tables_summary AS (\n    -- Third CTE: Summarize rate tables\n    SELECT\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type,\n        COUNT(*) AS total_rate_records,\n        COUNT(DISTINCT irt.forecast_day) AS forecast_days_covered,\n        AVG(irt.base_rate) AS avg_base_rate,\n        AVG(irt.risk_adjusted_rate) AS avg_risk_adjusted_rate,\n        MIN(irt.risk_adjusted_rate) AS min_risk_adjusted_rate,\n        MAX(irt.risk_adjusted_rate) AS max_risk_adjusted_rate,\n        STDDEV(irt.risk_adjusted_rate) AS rate_stddev,\n        AVG(irt.confidence_level) AS avg_confidence_level,\n        AVG(irt.risk_multiplier) AS avg_risk_multiplier\n    FROM insurance_rate_tables irt\n    WHERE irt.forecast_period_start = DATE '2025-12-03'\n        AND irt.forecast_period_end = DATE '2025-12-17'\n        AND irt.forecast_day BETWEEN 7 AND 14\n    GROUP BY\n        irt.policy_area_id,\n        irt.policy_type,\n        irt.coverage_type\n),\nrate_comparison_summary AS (\n    -- Fourth CTE: Summarize rate comparisons\n    SELECT\n        rtc.policy_area_id,\n        rtc.policy_type,\n        rtc.coverage_type,\n        AVG(rtc.rate_volatility_percent) AS avg_rate_volatility,\n        AVG(rtc.avg_confidence_level) AS avg_comparison_confidence,\n        COUNT(CASE WHEN rtc.recommendation_status = 'Recommended' THEN 1 END) AS recommended_forecast_days_count\n    FROM rate_table_comparison rtc\n    WHERE rtc.forecast_period_start = DATE '2025-12-03'\n        AND rtc.forecast_period_end = DATE '2025-12-17'\n    GROUP BY\n        rtc.policy_area_id,\n        rtc.policy_type,\n        rtc.coverage_type\n),\nclaims_validation_summary AS (\n    -- Fifth CTE: Summarize claims validation\n    SELECT\n        ich.policy_area_id,\n        COUNT(*) AS total_claims,\n        COUNT(CASE WHEN ich.forecast_available = TRUE THEN 1 END) AS claims_with_forecast,\n        AVG(CASE WHEN ich.forecast_available = TRUE THEN NULL::NUMERIC ELSE NULL END) AS avg_forecast_error,\n        AVG(ich.claim_amount) AS avg_loss_amount,\n        SUM(ich.claim_amount) AS total_loss_amount\n    FROM insurance_claims_history ich\n    WHERE ich.loss_date BETWEEN DATE '2025-12-03' AND DATE '2025-12-17'\n    GROUP BY ich.policy_area_id\n),\ncomprehensive_summary AS (\n    -- Sixth CTE: Combine all summaries\n    SELECT\n        COALESCE(rfs.policy_area_id, rts.policy_area_id, rcs.policy_area_id, cvs.policy_area_id) AS policy_area_id,\n        rts.policy_type,\n        rts.coverage_type,\n        ipa.policy_area_name,\n        ipa.state_code,\n        ipa.risk_zone,\n        -- Risk factors\n        rfs.forecast_days_analyzed,\n        ROUND(CAST(CAST(rfs.avg_overall_risk_score AS NUMERIC) AS NUMERIC), 2) AS avg_overall_risk_score,\n        ROUND(CAST(CAST(rfs.max_overall_risk_score AS NUMERIC) AS NUMERIC), 2) AS max_overall_risk_score,\n        ROUND(CAST(CAST(rfs.min_overall_risk_score AS NUMERIC) AS NUMERIC), 2) AS min_overall_risk_score,\n        ROUND(CAST(CAST(rfs.avg_precipitation_risk AS NUMERIC) AS NUMERIC), 2) AS avg_precipitation_risk,\n        ROUND(CAST(CAST(rfs.avg_temperature_risk AS NUMERIC) AS NUMERIC), 2) AS avg_temperature_risk,\n        ROUND(CAST(CAST(rfs.avg_wind_risk AS NUMERIC) AS NUMERIC), 2) AS avg_wind_risk,\n        ROUND(CAST(CAST(rfs.avg_freeze_risk AS NUMERIC) AS NUMERIC), 2) AS avg_freeze_risk,\n        ROUND(CAST(CAST(rfs.avg_flood_risk AS NUMERIC) AS NUMERIC), 2) AS avg_flood_risk,\n        ROUND(CAST(CAST(rfs.avg_extreme_event_probability AS NUMERIC) AS NUMERIC), 4) AS avg_extreme_event_probability,\n        -- Rate tables\n        rts.total_rate_records,\n        rts.forecast_days_covered,\n        ROUND(CAST(CAST(rts.avg_base_rate AS NUMERIC) AS NUMERIC), 2) AS avg_base_rate,\n        ROUND(CAST(CAST(rts.avg_risk_adjusted_rate AS NUMERIC) AS NUMERIC), 2) AS avg_risk_adjusted_rate,\n        ROUND(CAST(CAST(rts.min_risk_adjusted_rate AS NUMERIC) AS NUMERIC), 2) AS min_risk_adjusted_rate,\n        ROUND(CAST(CAST(rts.max_risk_adjusted_rate AS NUMERIC) AS NUMERIC), 2) AS max_risk_adjusted_rate,\n        ROUND(CAST(CAST(rts.rate_stddev AS NUMERIC) AS NUMERIC), 2) AS rate_stddev,\n        ROUND(CAST(CAST(rts.avg_confidence_level AS NUMERIC) AS NUMERIC), 2) AS avg_confidence_level,\n        ROUND(CAST(CAST(rts.avg_risk_multiplier AS NUMERIC) AS NUMERIC), 3) AS avg_risk_multiplier,\n        -- Rate comparison\n        ROUND(CAST(CAST(rcs.avg_rate_volatility AS NUMERIC) AS NUMERIC), 2) AS avg_rate_volatility,\n        ROUND(CAST(CAST(rcs.avg_comparison_confidence AS NUMERIC) AS NUMERIC), 2) AS avg_comparison_confidence,\n        rcs.recommended_forecast_days_count,\n        -- Claims validation\n        COALESCE(cvs.total_claims, 0) AS total_claims,\n        COALESCE(cvs.claims_with_forecast, 0) AS claims_with_forecast,\n        ROUND(CAST(CAST(cvs.avg_forecast_error AS NUMERIC) AS NUMERIC), 2) AS avg_forecast_error,\n        ROUND(CAST(CAST(cvs.avg_loss_amount AS NUMERIC) AS NUMERIC), 2) AS avg_loss_amount,\n        ROUND(CAST(CAST(cvs.total_loss_amount AS NUMERIC) AS NUMERIC), 2) AS total_loss_amount\n    FROM risk_factors_summary rfs\n    FULL OUTER JOIN rate_tables_summary rts ON rfs.policy_area_id = rts.policy_area_id\n    FULL OUTER JOIN rate_comparison_summary rcs ON COALESCE(rfs.policy_area_id, rts.policy_area_id) = rcs.policy_area_id\n    FULL OUTER JOIN claims_validation_summary cvs ON COALESCE(rfs.policy_area_id, rts.policy_area_id) = cvs.policy_area_id\n    LEFT JOIN insurance_policy_areas ipa ON COALESCE(rfs.policy_area_id, rts.policy_area_id, rcs.policy_area_id, cvs.policy_area_id) = ipa.policy_area_id\n),\ndashboard_metrics AS (\n    -- Seventh CTE: Calculate dashboard metrics\n    SELECT\n        cs.policy_area_id,\n        cs.policy_type,\n        cs.coverage_type,\n        cs.policy_area_name,\n        cs.state_code,\n        cs.risk_zone,\n        cs.forecast_days_analyzed,\n        cs.avg_overall_risk_score,\n        cs.max_overall_risk_score,\n        cs.min_overall_risk_score,\n        cs.avg_precipitation_risk,\n        cs.avg_temperature_risk,\n        cs.avg_wind_risk,\n        cs.avg_freeze_risk,\n        cs.avg_flood_risk,\n        cs.avg_extreme_event_probability,\n        cs.total_rate_records,\n        cs.forecast_days_covered,\n        cs.avg_base_rate,\n        cs.avg_risk_adjusted_rate,\n        cs.min_risk_adjusted_rate,\n        cs.max_risk_adjusted_rate,\n        cs.rate_stddev,\n        cs.avg_confidence_level,\n        cs.avg_risk_multiplier,\n        cs.avg_rate_volatility,\n        cs.avg_comparison_confidence,\n        cs.recommended_forecast_days_count,\n        cs.total_claims,\n        cs.claims_with_forecast,\n        cs.avg_forecast_error,\n        cs.avg_loss_amount,\n        cs.total_loss_amount,\n        -- Risk category\n        CASE\n            WHEN cs.avg_overall_risk_score >= 75 THEN 'Extreme'\n            WHEN cs.avg_overall_risk_score >= 50 THEN 'Very High'\n            WHEN cs.avg_overall_risk_score >= 30 THEN 'High'\n            WHEN cs.avg_overall_risk_score >= 15 THEN 'Moderate'\n            ELSE 'Low'\n        END AS risk_category,\n        -- Rate stability\n        CASE\n            WHEN cs.avg_rate_volatility > 30 THEN 'Very High Volatility'\n            WHEN cs.avg_rate_volatility > 20 THEN 'High Volatility'\n            WHEN cs.avg_rate_volatility > 10 THEN 'Moderate Volatility'\n            WHEN cs.avg_rate_volatility > 5 THEN 'Low Volatility'\n            ELSE 'Very Low Volatility'\n        END AS rate_stability,\n        -- Overall status\n        CASE\n            WHEN cs.avg_overall_risk_score >= 75 AND cs.avg_rate_volatility > 20 THEN 'Critical - High Risk & High Volatility'\n            WHEN cs.avg_overall_risk_score >= 50 AND cs.avg_rate_volatility > 15 THEN 'Warning - Elevated Risk & Volatility'\n            WHEN cs.avg_overall_risk_score >= 30 THEN 'Monitor - Moderate Risk'\n            ELSE 'Normal - Low Risk'\n        END AS overall_status\n    FROM comprehensive_summary cs\n)\nSELECT\n    policy_area_id,\n    policy_type,\n    coverage_type,\n    policy_area_name,\n    state_code,\n    risk_zone,\n    forecast_days_analyzed,\n    avg_overall_risk_score,\n    max_overall_risk_score,\n    min_overall_risk_score,\n    avg_precipitation_risk,\n    avg_temperature_risk,\n    avg_wind_risk,\n    avg_freeze_risk,\n    avg_flood_risk,\n    avg_extreme_event_probability,\n    total_rate_records,\n    forecast_days_covered,\n    avg_base_rate,\n    avg_risk_adjusted_rate,\n    min_risk_adjusted_rate,\n    max_risk_adjusted_rate,\n    rate_stddev,\n    avg_confidence_level,\n    avg_risk_multiplier,\n    avg_rate_volatility,\n    avg_comparison_confidence,\n    recommended_forecast_days_count,\n    total_claims,\n    claims_with_forecast,\n    avg_forecast_error,\n    avg_loss_amount,\n    total_loss_amount,\n    risk_category,\n    rate_stability,\n    overall_status\nFROM dashboard_metrics\nORDER BY avg_overall_risk_score DESC, policy_area_id\nLIMIT 500;",
      "line_number": 5927,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006342,
        "row_count": 0,
        "column_count": 36,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 25,
      "title": "US-Wide NEXRAD Reflectivity Composite Generation",
      "description": "Use Case: Real-Time Weather Monitoring - Nationwide Radar Composite for Severe Weather Detection Description:
    Generates US-wide composite reflectivity from all NEXRAD radar sites. Combines Level II radar data from multiple sites to create seamless nationwide coverage, handling overlapping coverage areas and data quality issues. Business Value: US-wide reflectivity composite showing precipitation intensity across entire United States with seamless coverage. Purpose: Provides comprehensive real-ti",
      "complexity": "Multiple CTEs (8+ levels), multi-site data fusion, spatial interpolation, coverage optimization, quality weighting, window functions",
      "expected_output": "Query results",
      "sql": "WITH us_spatial_bounds AS (\n    -- First CTE: Define US spatial bounds\n    SELECT\n        -125.0 AS west_bound,\n        24.0 AS south_bound,\n        -66.0 AS east_bound,\n        50.0 AS north_bound\n),\nactive_nexrad_sites AS (\n    -- Second CTE: Get active NEXRAD sites\n    SELECT\n        nrs.site_id,\n        nrs.site_name,\n        nrs.latitude,\n        nrs.longitude,\n        nrs.site_geom,\n        nrs.elevation_meters,\n        NULL::VARCHAR(10) AS state_code,\n        NULL::VARCHAR(10) AS cwa_code,\n        NULL::NUMERIC AS coverage_radius_km,\n        nrs.operational_status\n    FROM nexrad_radar_sites nrs\n    WHERE nrs.operational_status = 'Operational'\n        AND nrs.site_geom IS NOT NULL\n),\nrecent_nexrad_scans AS (\n    -- Third CTE: Get recent NEXRAD scans (within last hour)\n    SELECT DISTINCT\n        nrg.site_id,\n        nrg.scan_time,\n        DATE_TRUNC('minute', nrg.scan_time) AS scan_time_minute\n    FROM nexrad_reflectivity_grid nrg\n    INNER JOIN active_nexrad_sites ans ON nrg.site_id = ans.site_id\n    WHERE nrg.scan_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND nrg.reflectivity_value IS NOT NULL\n        AND nrg.grid_geom IS NOT NULL\n),\nnexrad_reflectivity_data AS (\n    -- Fourth CTE: Get reflectivity data with spatial information\n    SELECT\n        nrg.grid_id AS radar_data_id,\n        nrg.site_id,\n        nrg.scan_time,\n        nrg.elevation_angle,\n        NULL::NUMERIC AS azimuth_angle,\n        NULL::NUMERIC AS range_km,\n        nrg.reflectivity_value AS reflectivity_dbz,\n        nrg.grid_geom AS reflectivity_geom,\n        ST_X(nrg.grid_geom::GEOMETRY) AS longitude,\n        ST_Y(nrg.grid_geom::GEOMETRY) AS latitude,\n        CASE\n            WHEN nrg.reflectivity_value >= 30 THEN 0  -- High quality\n            WHEN nrg.reflectivity_value >= 20 THEN 2  -- Moderate quality\n            WHEN nrg.reflectivity_value >= 10 THEN 4  -- Lower quality\n            ELSE 6  -- Low quality\n        END AS data_quality_flag,\n        ans.coverage_radius_km,\n        -- Distance from radar site\n        ST_DISTANCE(ans.site_geom::geography, nrg.grid_geom::geography) / 1000.0 AS distance_from_site_km,\n        -- Data quality weight (higher quality = higher weight)\n        CASE\n            WHEN nrg.reflectivity_value >= 30 THEN 1.0  -- Perfect quality\n            WHEN nrg.reflectivity_value >= 20 THEN 0.9  -- Good quality\n            WHEN nrg.reflectivity_value >= 10 THEN 0.7  -- Moderate quality\n            ELSE 0.5  -- Lower quality\n        END AS quality_weight,\n        -- Distance weight (closer to radar = higher weight, but consider beam height)\n        CASE\n            WHEN ST_DISTANCE(ans.site_geom::geography, nrg.grid_geom::geography) / 1000.0 <= 50 THEN 1.0\n            WHEN ST_DISTANCE(ans.site_geom::geography, nrg.grid_geom::geography) / 1000.0 <= 100 THEN 0.9\n            WHEN ST_DISTANCE(ans.site_geom::geography, nrg.grid_geom::geography) / 1000.0 <= 150 THEN 0.7\n            WHEN ST_DISTANCE(ans.site_geom::geography, nrg.grid_geom::geography) / 1000.0 <= 200 THEN 0.5\n            ELSE 0.3\n        END AS distance_weight\n    FROM nexrad_reflectivity_grid nrg\n    INNER JOIN active_nexrad_sites ans ON nrg.site_id = ans.site_id\n    INNER JOIN recent_nexrad_scans rns ON (\n        nrg.site_id = rns.site_id\n        AND DATE_TRUNC('minute', nrg.scan_time) = rns.scan_time_minute\n    )\n    WHERE nrg.reflectivity_value IS NOT NULL\n        AND nrg.grid_geom IS NOT NULL\n        \n),\nus_grid_cells AS (\n    -- Fifth CTE: Generate US-wide grid cells (1km resolution)\n    SELECT\n        grid_id,\n        grid_latitude,\n        grid_longitude,\n        ST_SETSRID(ST_MAKEPOINT(grid_longitude, grid_latitude), 4326)::GEOGRAPHY AS grid_geom\n    FROM (\n        SELECT\n            'GRID_' || LPAD(ROW_NUMBER() OVER (ORDER BY lat, lon)::VARCHAR, 10, '0') AS grid_id,\n            lat AS grid_latitude,\n            lon AS grid_longitude\n        FROM (\n            SELECT\n                generate_series(24, 50, 0.01) AS lat,\n                generate_series(-125, -66, 0.01) AS lon\n        ) grid_points\n        WHERE lat BETWEEN 24 AND 50\n            AND lon BETWEEN -125 AND -66\n    ) grid\n),\ngrid_nexrad_matching AS (\n    -- Sixth CTE: Match grid cells with nearby NEXRAD data\n    SELECT\n        ugc.grid_id,\n        ugc.grid_latitude,\n        ugc.grid_longitude,\n        ugc.grid_geom,\n        nrd.site_id,\n        nrd.reflectivity_dbz,\n        nrd.distance_from_site_km,\n        nrd.quality_weight,\n        nrd.distance_weight,\n        -- Combined weight\n        nrd.quality_weight * nrd.distance_weight AS combined_weight,\n        -- Inverse distance weighting\n        1.0 / (nrd.distance_from_site_km + 1.0) AS inverse_distance_weight,\n        ST_DISTANCE(ugc.grid_geom::geography, nrd.reflectivity_geom::geography) / 1000.0 AS distance_to_grid_km\n    FROM us_grid_cells ugc\n    INNER JOIN nexrad_reflectivity_data nrd ON (\n        ST_DWITHIN(ugc.grid_geom, nrd.reflectivity_geom, 50000)  -- Within 50km\n    )\n),\nweighted_reflectivity_calculation AS (\n    -- Seventh CTE: Calculate weighted reflectivity for each grid cell\n    SELECT\n        gnm.grid_id,\n        gnm.grid_latitude,\n        gnm.grid_longitude,\n        gnm.grid_geom,\n        COUNT(*) AS contributing_sites_count,\n        -- Weighted average reflectivity\n        SUM(nrd.reflectivity_dbz * gnm.combined_weight * gnm.inverse_distance_weight) /\n        NULLIF(SUM(gnm.combined_weight * gnm.inverse_distance_weight), 0) AS weighted_avg_reflectivity_dbz,\n        -- Maximum reflectivity\n        MAX(nrd.reflectivity_dbz) AS max_reflectivity_dbz,\n        -- Minimum reflectivity\n        MIN(nrd.reflectivity_dbz) AS min_reflectivity_dbz,\n        -- Standard deviation\n        STDDEV(nrd.reflectivity_dbz) AS reflectivity_stddev_dbz,\n        -- Closest site\n        (ARRAY_AGG(nrd.site_id ORDER BY gnm.distance_to_grid_km))[1] AS closest_site_id,\n        MIN(gnm.distance_to_grid_km) AS distance_to_closest_site_km,\n        -- Data quality score\n        AVG(nrd.quality_weight) AS avg_quality_weight\n    FROM grid_nexrad_matching gnm\n    INNER JOIN nexrad_reflectivity_data nrd ON (\n        gnm.site_id = nrd.site_id\n        AND gnm.reflectivity_dbz = nrd.reflectivity_dbz\n    )\n    GROUP BY\n        gnm.grid_id,\n        gnm.grid_latitude,\n        gnm.grid_longitude,\n        gnm.grid_geom\n),\nfinal_composite_reflectivity AS (\n    -- Eighth CTE:
    Final composite reflectivity with quality assessment\n    SELECT\n        wrc.grid_id,\n        wrc.grid_latitude,\n        wrc.grid_longitude,\n        wrc.grid_geom,\n        wrc.contributing_sites_count,\n        ROUND(CAST(CAST(wrc.weighted_avg_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS composite_reflectivity_dbz,\n        ROUND(CAST(CAST(wrc.max_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS max_reflectivity_dbz,\n        ROUND(CAST(CAST(wrc.min_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS min_reflectivity_dbz,\n        ROUND(CAST(CAST(wrc.reflectivity_stddev_dbz AS NUMERIC) AS NUMERIC), 2) AS reflectivity_stddev_dbz,\n        wrc.closest_site_id,\n        ROUND(CAST(CAST(wrc.distance_to_closest_site_km AS NUMERIC) AS NUMERIC), 2) AS distance_to_closest_site_km,\n        ROUND(CAST(CAST(wrc.avg_quality_weight AS NUMERIC) AS NUMERIC), 3) AS avg_quality_weight,\n        -- Precipitation intensity classification\n        CASE\n            WHEN wrc.weighted_avg_reflectivity_dbz >= 50 THEN 'Extreme'\n            WHEN wrc.weighted_avg_reflectivity_dbz >= 40 THEN 'Heavy'\n            WHEN wrc.weighted_avg_reflectivity_dbz >= 30 THEN 'Moderate'\n            WHEN wrc.weighted_avg_reflectivity_dbz >= 20 THEN 'Light'\n            WHEN wrc.weighted_avg_reflectivity_dbz >= 10 THEN 'Very Light'\n            ELSE 'None'\n        END AS precipitation_intensity,\n        -- Data coverage quality\n        CASE\n            WHEN wrc.contributing_sites_count >= 3 THEN 'Excellent'\n            WHEN wrc.contributing_sites_count = 2 THEN 'Good'\n            WHEN wrc.contributing_sites_count = 1 THEN 'Fair'\n            ELSE 'Poor'\n        END AS coverage_quality\n    FROM weighted_reflectivity_calculation wrc\n    WHERE wrc.weighted_avg_reflectivity_dbz IS NOT NULL\n)\nSELECT\n    grid_id,\n    grid_latitude,\n    grid_longitude,\n    composite_reflectivity_dbz,\n    max_reflectivity_dbz,\n    min_reflectivity_dbz,\n    reflectivity_stddev_dbz,\n    contributing_sites_count,\n    closest_site_id,\n    distance_to_closest_site_km,\n    avg_quality_weight,\n    precipitation_intensity,\n    coverage_quality\nFROM final_composite_reflectivity\nWHERE grid_latitude BETWEEN 24 AND 50\n    AND grid_longitude BETWEEN -125 AND -66\nORDER BY grid_latitude, grid_longitude\nLIMIT 100000;",
      "line_number": 6187,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013678,
        "row_count": 0,
        "column_count": 13,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 26,
      "title": "NEXRAD Storm Cell Tracking and Movement Analysis",
      "description": "Use Case: Severe Weather Forecasting - Multi-Site Storm Cell Tracking for Tornado and Severe Thunderstorm Prediction Description:
    Tracks storm cells across multiple NEXRAD radar sites and analyzes their movement, intensity changes, and development patterns. Handles storm cell merging, splitting, and dissipation across the entire US. Purpose: Storm cell tracking report showing storm movement, intensity trends, and predicted paths across multiple radar sites. Business Value: Enables severe weather",
      "complexity": "Multiple CTEs (9+ levels), temporal tracking, spatial matching, storm cell association, movement calculation, window functions, recursive patterns",
      "expected_output": "Query results",
      "sql": "WITH time_window AS (\n    -- First CTE: Define time window for tracking (last 2 hours)\n    SELECT\n        CURRENT_TIMESTAMP - INTERVAL '2 hours' AS window_start,\n        CURRENT_TIMESTAMP AS window_end\n),\nstorm_cells_by_scan AS (\n    -- Second CTE:
    Get storm cells detected at each scan time\n    SELECT\n        nsc.storm_cell_id,\n        nsc.site_id,\n        nsc.first_detection_time,\n        nsc.last_detection_time,\n        nsc.storm_center_latitude,\n        nsc.storm_center_longitude,\n        nsc.storm_center_geom,\n        nsc.max_reflectivity,\n        NULL::NUMERIC AS storm_area_km2,\n        nsc.storm_severity,\n        nsc.storm_type,\n        DATE_TRUNC('minute', nsc.first_detection_time) AS scan_time_minute,\n        -- Calculate scan number (sequential)\n        ROW_NUMBER() OVER (\n            PARTITION BY nsc.site_id\n            ORDER BY nsc.first_detection_time\n        ) AS scan_number\n    FROM nexrad_storm_cells nsc\n    WHERE nsc.first_detection_time BETWEEN (\n        SELECT window_start FROM time_window\n    ) AND (\n        SELECT window_end FROM time_window\n    )\n        AND nsc.storm_center_geom IS NOT NULL\n),\nstorm_cell_movement AS (\n    -- Third CTE: Calculate storm cell movement between scans\n    SELECT\n        scbs1.storm_cell_id AS storm_cell_id_1,\n        scbs1.site_id AS site_id_1,\n        scbs1.scan_time_minute AS scan_time_1,\n        scbs1.storm_center_latitude AS lat_1,\n        scbs1.storm_center_longitude AS lon_1,\n        scbs1.storm_center_geom AS geom_1,\n        scbs1.max_reflectivity AS reflectivity_1,\n        scbs1.storm_area_km2 AS area_1,\n        scbs1.scan_number AS scan_num_1,\n        scbs2.storm_cell_id AS storm_cell_id_2,\n        scbs2.site_id AS site_id_2,\n        scbs2.scan_time_minute AS scan_time_2,\n        scbs2.storm_center_latitude AS lat_2,\n        scbs2.storm_center_longitude AS lon_2,\n        scbs2.storm_center_geom AS geom_2,\n        scbs2.max_reflectivity AS reflectivity_2,\n        scbs2.storm_area_km2 AS area_2,\n        scbs2.scan_number AS scan_num_2,\n        -- Distance between storm centers\n        ST_DISTANCE(scbs1.storm_center_geom::geography, scbs2.storm_center_geom::geography) / 1000.0 AS distance_km,\n        -- Time difference in minutes\n        EXTRACT(EPOCH FROM (scbs2.scan_time_minute - scbs1.scan_time_minute)) / 60.0 AS time_diff_minutes,\n        -- Movement speed (km/h)\n        CASE\n            WHEN EXTRACT(EPOCH FROM (scbs2.scan_time_minute - scbs1.scan_time_minute)) > 0 THEN\n                (ST_DISTANCE(scbs1.storm_center_geom::geography, scbs2.storm_center_geom::geography) / 1000.0) /\n                (EXTRACT(EPOCH FROM (scbs2.scan_time_minute - scbs1.scan_time_minute)) / 3600.0)\n            ELSE NULL\n        END AS movement_speed_kmh,\n        -- Movement direction (degrees from north)\n        DEGREES(\n            ATAN2(\n                ST_X(scbs2.storm_center_geom::GEOMETRY) - ST_X(scbs1.storm_center_geom::GEOMETRY),\n                ST_Y(scbs2.storm_center_geom::GEOMETRY) - ST_Y(scbs1.storm_center_geom::GEOMETRY)\n            )\n        ) AS movement_direction_deg,\n        -- Reflectivity change\n        scbs2.max_reflectivity - scbs1.max_reflectivity AS reflectivity_change_dbz,\n        -- Area change\n        scbs2.storm_area_km2 - scbs1.storm_area_km2 AS area_change_km2\n    FROM storm_cells_by_scan scbs1\n    INNER JOIN storm_cells_by_scan scbs2 ON (\n        scbs1.site_id = scbs2.site_id\n        AND scbs2.scan_number = scbs1.scan_number + 1\n    )\n),\nstorm_cell_association AS (\n    -- Fourth CTE: Associate storm cells across sites (same storm detected by multiple radars)\n    SELECT\n        scm.storm_cell_id_1,\n        scm.site_id_1,\n        scm.scan_time_1,\n        scm.lat_1,\n        scm.lon_1,\n        scm.reflectivity_1,\n        scm.area_1,\n        scm.storm_cell_id_2,\n        scm.site_id_2,\n        scm.scan_time_2,\n        scm.lat_2,\n        scm.lon_2,\n        scm.reflectivity_2,\n        scm.area_2,\n        scm.distance_km,\n        scm.time_diff_minutes,\n        ROUND(CAST(CAST(scm.movement_speed_kmh AS NUMERIC) AS NUMERIC), 2) AS movement_speed_kmh,\n        ROUND(CAST(CAST(scm.movement_direction_deg AS NUMERIC) AS NUMERIC), 2) AS movement_direction_deg,\n        ROUND(CAST(CAST(scm.reflectivity_change_dbz AS NUMERIC) AS NUMERIC), 2) AS reflectivity_change_dbz,\n        ROUND(CAST(CAST(scm.area_change_km2 AS NUMERIC) AS NUMERIC), 2) AS area_change_km2,\n        -- Association confidence (same storm if close in space and time)\n        CASE\n            WHEN scm.distance_km < 20 AND scm.time_diff_minutes < 10 THEN 'High Confidence'\n            WHEN scm.distance_km < 50 AND scm.time_diff_minutes < 20 THEN 'Moderate Confidence'\n            WHEN scm.distance_km < 100 AND scm.time_diff_minutes < 30 THEN 'Low Confidence'\n            ELSE 'Unlikely Same Storm'\n        END AS association_confidence\n    FROM storm_cell_movement scm\n    WHERE scm.distance_km < 100  -- Only consider storms within 100km\n        AND scm.time_diff_minutes BETWEEN 0 AND 30  -- Within 30 minutes\n),\nstorm_track_aggregation AS (\n    -- Fifth CTE:
    Aggregate storm tracks\n    SELECT\n        sca.storm_cell_id_1,\n        sca.site_id_1,\n        COUNT(DISTINCT sca.storm_cell_id_2) AS track_length,\n        MIN(sca.scan_time_1) AS track_start_time,\n        MAX(sca.scan_time_2) AS track_end_time,\n        AVG(sca.movement_speed_kmh) AS avg_movement_speed_kmh,\n        AVG(sca.movement_direction_deg) AS avg_movement_direction_deg,\n        MAX(sca.reflectivity_1) AS max_reflectivity_dbz,\n        MIN(sca.reflectivity_1) AS min_reflectivity_dbz,\n        AVG(sca.reflectivity_1) AS avg_reflectivity_dbz,\n        MAX(sca.reflectivity_change_dbz) AS max_intensification_dbz,\n        MIN(sca.reflectivity_change_dbz) AS max_weakening_dbz,\n        SUM(sca.area_change_km2) AS total_area_change_km2,\n        -- Track distance\n        SUM(sca.distance_km) AS total_track_distance_km,\n        -- Track duration\n        EXTRACT(EPOCH FROM (MAX(sca.scan_time_2) - MIN(sca.scan_time_1))) / 60.0 AS track_duration_minutes\n    FROM storm_cell_association sca\n    WHERE sca.association_confidence IN ('High Confidence', 'Moderate Confidence')\n    GROUP BY\n        sca.storm_cell_id_1,\n        sca.site_id_1\n),\nstorm_development_analysis AS (\n    -- Sixth CTE: Analyze storm development patterns\n    SELECT\n        sta.storm_cell_id_1,\n        sta.site_id_1,\n        sta.track_length,\n        sta.track_start_time,\n        sta.track_end_time,\n        ROUND(CAST(CAST(sta.avg_movement_speed_kmh AS NUMERIC) AS NUMERIC), 2) AS avg_movement_speed_kmh,\n        ROUND(CAST(CAST(sta.avg_movement_direction_deg AS NUMERIC) AS NUMERIC), 2) AS avg_movement_direction_deg,\n        ROUND(CAST(CAST(sta.max_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS max_reflectivity_dbz,\n        ROUND(CAST(CAST(sta.min_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS min_reflectivity_dbz,\n        ROUND(CAST(CAST(sta.avg_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS avg_reflectivity_dbz,\n        ROUND(CAST(CAST(sta.max_intensification_dbz AS NUMERIC) AS NUMERIC), 2) AS max_intensification_dbz,\n        ROUND(CAST(CAST(sta.max_weakening_dbz AS NUMERIC) AS NUMERIC), 2) AS max_weakening_dbz,\n        ROUND(CAST(CAST(sta.total_area_change_km2 AS NUMERIC) AS NUMERIC), 2) AS total_area_change_km2,\n        ROUND(CAST(CAST(sta.total_track_distance_km AS NUMERIC) AS NUMERIC), 2) AS total_track_distance_km,\n        ROUND(CAST(CAST(sta.track_duration_minutes AS NUMERIC) AS NUMERIC), 2) AS track_duration_minutes,\n        -- Development trend\n        CASE\n            WHEN sta.max_intensification_dbz > 10 THEN 'Rapidly Intensifying'\n            WHEN sta.max_intensification_dbz > 5 THEN 'Intensifying'\n            WHEN sta.max_weakening_dbz < -10 THEN 'Rapidly Weakening'\n            WHEN sta.max_weakening_dbz < -5 THEN 'Weakening'\n            ELSE 'Stable'\n        END AS development_trend,\n        -- Severity classification\n        CASE\n            WHEN sta.max_reflectivity_dbz >= 60 THEN 'Extreme'\n            WHEN sta.max_reflectivity_dbz >= 50 THEN 'Severe'\n            WHEN sta.max_reflectivity_dbz >= 40 THEN 'Strong'\n            WHEN sta.max_reflectivity_dbz >= 30 THEN 'Moderate'\n            ELSE 'Weak'\n        END AS severity_classification\n    FROM storm_track_aggregation sta\n),\npredicted_storm_path AS (\n    -- Seventh CTE: Predict storm path based on movement\n    SELECT\n        sda.storm_cell_id_1,\n        sda.site_id_1,\n        sda.track_length,\n        sda.track_start_time,\n        sda.track_end_time,\n        sda.avg_movement_speed_kmh,\n        sda.avg_movement_direction_deg,\n        sda.max_reflectivity_dbz,\n        sda.avg_reflectivity_dbz,\n        sda.development_trend,\n        sda.severity_classification,\n        -- Predicted position in 1 hour (using average movement)\n        CASE\n            WHEN sda.avg_movement_speed_kmh IS NOT NULL AND sda.avg_movement_direction_deg IS NOT NULL THEN\n                ST_TRANSLATE(\n                    (SELECT storm_center_geom::geometry FROM nexrad_storm_cells WHERE storm_cell_id = sda.storm_cell_id_1),\n                    sda.avg_movement_speed_kmh * 1.0 * SIN(RADIANS(sda.avg_movement_direction_deg)) * 1000.0,\n                    sda.avg_movement_speed_kmh * 1.0 * COS(RADIANS(sda.avg_movement_direction_deg)) * 1000.0\n                )::geography\n            ELSE NULL\n        END AS predicted_position_1h_geom,\n        -- Predicted position in 2 hours\n        CASE\n            WHEN sda.avg_movement_speed_kmh IS NOT NULL AND sda.avg_movement_direction_deg IS NOT NULL THEN\n                ST_TRANSLATE(\n                    (SELECT storm_center_geom::geometry FROM nexrad_storm_cells WHERE storm_cell_id = sda.storm_cell_id_1),\n                    sda.avg_movement_speed_kmh * 2.0 * SIN(RADIANS(sda.avg_movement_direction_deg)) * 1000.0,\n                    sda.avg_movement_speed_kmh * 2.0 * COS(RADIANS(sda.avg_movement_direction_deg)) * 1000.0\n                )::geography\n            ELSE NULL\n        END AS predicted_position_2h_geom\n    FROM storm_development_analysis sda\n)\nSELECT\n    storm_cell_id_1,\n    site_id_1,\n    track_length,\n    track_start_time,\n    track_end_time,\n    avg_movement_speed_kmh,\n    avg_movement_direction_deg,\n    max_reflectivity_dbz,\n    avg_reflectivity_dbz,\n    development_trend,\n    severity_classification,\n    predicted_position_1h_geom,\n    predicted_position_2h_geom\nFROM predicted_storm_path\nWHERE track_length >= 2  -- At least 2 scans for tracking\nORDER BY max_reflectivity_dbz DESC, track_start_time DESC\nLIMIT 1000;",
      "line_number":
    6421,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013176,
        "row_count": 0,
        "column_count": 13,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 27,
      "title": "US-Wide Satellite Imagery Cloud Composite Generation",
      "description": "Use Case: Cloud Monitoring - Nationwide Cloud Coverage Analysis from GOES Satellite Imagery Description: Generates US-wide cloud composite from decompressed GOES satellite imagery. Combines multiple satellite bands and products to create seamless cloud coverage maps across the entire United States. Business Value: US-wide cloud composite showing cloud coverage, cloud top heights, and cloud properties across entire United States. Purpose: Provides comprehensive cloud monitoring for solar energy f",
      "complexity":
    "Multiple CTEs (7+ levels), multi-band satellite data fusion, cloud property extraction, spatial interpolation, temporal alignment",
      "expected_output": "Query results",
      "sql": "WITH us_spatial_bounds AS (\n    -- First CTE: Define US spatial bounds\n    SELECT\n        -125.0 AS west_bound,\n        24.0 AS south_bound,\n        -66.0 AS east_bound,\n        50.0 AS north_bound\n),\nactive_satellite_sources AS (\n    -- Second CTE: Get active satellite sources\n    SELECT\n        sis.source_id,\n        sis.source_name,\n        sis.satellite_type,\n        sis.source_name,\n        sis.coverage_area,\n        NULL::NUMERIC AS spatial_resolution_km,\n        NULL::NUMERIC AS scan_frequency_minutes,\n        sis.operational_status\n    FROM satellite_imagery_sources sis\n    WHERE sis.operational_status = 'Operational'\n        AND sis.coverage_area IN ('CONUS', 'Full Disk')\n),\nrecent_satellite_scans AS (\n    -- Third CTE: Get recent satellite scans (within last hour)\n    SELECT DISTINCT\n        sip.source_id,\n        sip.scan_start_time,\n        sip.product_type,\n        DATE_TRUNC('minute', sip.scan_start_time) AS scan_time_minute\n    FROM satellite_imagery_products sip\n    INNER JOIN active_satellite_sources ass ON sip.source_id = ass.source_id\n    WHERE sip.scan_start_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND sip.decompression_status = 'Success'\n        AND sip.product_type IN ('Cloud', 'Temperature', 'Moisture')\n),\nsatellite_cloud_data AS (\n    -- Fourth CTE: Get cloud-related satellite data\n    SELECT\n        sip.product_id,\n        sip.source_id,\n        sip.scan_start_time,\n        ST_Y(sip.grid_geom::geometry) AS grid_latitude,\n        ST_X(sip.grid_geom::geometry) AS grid_longitude,\n        sip.grid_geom,\n        NULL::INTEGER AS band_number,\n        NULL::VARCHAR(50) AS band_name,\n        NULL::NUMERIC AS brightness_temperature_k,\n        NULL::NUMERIC AS reflectance_percent,\n        NULL::NUMERIC AS cloud_top_height_m,\n        NULL::NUMERIC AS cloud_top_temperature_k,\n        NULL::VARCHAR(50) AS cloud_phase,\n        NULL::NUMERIC AS cloud_optical_depth,\n        NULL::NUMERIC AS pixel_value,\n        NULL::NUMERIC AS calibrated_value,\n        ass.spatial_resolution_km\n    FROM satellite_imagery_products sip\n    INNER JOIN active_satellite_sources ass ON sip.source_id = ass.source_id\n    INNER JOIN recent_satellite_scans rss ON (\n        sip.source_id = rss.source_id\n        AND DATE_TRUNC('minute', sip.scan_start_time) = rss.scan_time_minute\n        AND sip.product_type = rss.product_type\n    )\n    WHERE sip.grid_geom IS NOT NULL\n        AND sip.decompression_status = 'Success'\n),\nus_grid_cells AS (\n    -- Fifth CTE: Generate US-wide grid cells (2km resolution for satellite data)\n    SELECT\n        grid_id,\n        grid_latitude,\n        grid_longitude,\n        ST_SETSRID(ST_MAKEPOINT(grid_longitude, grid_latitude), 4326)::GEOGRAPHY AS grid_geom\n    FROM (\n        SELECT\n            'SAT_GRID_' || LPAD(ROW_NUMBER() OVER (ORDER BY lat, lon)::VARCHAR, 10, '0') AS grid_id,\n            lat AS grid_latitude,\n            lon AS grid_longitude\n        FROM (\n            SELECT\n                generate_series(24, 50, 0.02) AS lat,\n                generate_series(-125, -66, 0.02) AS lon\n        ) grid_points\n        WHERE lat BETWEEN 24 AND 50\n            AND lon BETWEEN -125 AND -66\n    ) grid\n),\ngrid_satellite_matching AS (\n    -- Sixth CTE: Match grid cells with satellite data\n    SELECT\n        ugc.grid_id,\n        ugc.grid_latitude,\n        ugc.grid_longitude,\n        ugc.grid_geom,\n        scd.source_id,\n        scd.band_number,\n        scd.band_name,\n        scd.cloud_top_height_m,\n        scd.cloud_top_temperature_k,\n        scd.cloud_phase,\n        scd.cloud_optical_depth,\n        scd.brightness_temperature_k,\n        scd.reflectance_percent,\n        scd.spatial_resolution_km,\n        ST_DISTANCE(ugc.grid_geom::geography, scd.grid_geom::geography) / 1000.0 AS distance_to_satellite_km\n    FROM us_grid_cells ugc\n    INNER JOIN satellite_cloud_data scd ON (\n        ST_DWITHIN(ugc.grid_geom, scd.grid_geom, 10000)  -- Within 10km\n    )\n),\ncloud_property_aggregation AS (\n    -- Seventh CTE: Aggregate cloud properties for each grid cell\n    SELECT\n        gsm.grid_id,\n        gsm.grid_latitude,\n        gsm.grid_longitude,\n        gsm.grid_geom,\n        COUNT(DISTINCT gsm.source_id) AS contributing_sources_count,\n        COUNT(*) AS pixel_count,\n        -- Cloud top height\n        MAX(gsm.cloud_top_height_m) AS max_cloud_top_height_m,\n        AVG(gsm.cloud_top_height_m) AS avg_cloud_top_height_m,\n        MIN(gsm.cloud_top_height_m) AS min_cloud_top_height_m,\n        -- Cloud top temperature\n        MIN(gsm.cloud_top_temperature_k) AS min_cloud_top_temperature_k,\n        AVG(gsm.cloud_top_temperature_k) AS avg_cloud_top_temperature_k,\n        MAX(gsm.cloud_top_temperature_k) AS max_cloud_top_temperature_k,\n        -- Cloud optical depth\n        AVG(gsm.cloud_optical_depth) AS avg_cloud_optical_depth,\n        MAX(gsm.cloud_optical_depth) AS max_cloud_optical_depth,\n        -- Cloud phase distribution\n        COUNT(CASE WHEN gsm.cloud_phase = 'Liquid' THEN 1 END) AS liquid_cloud_count,\n        COUNT(CASE WHEN gsm.cloud_phase = 'Ice' THEN 1 END) AS ice_cloud_count,\n        COUNT(CASE WHEN gsm.cloud_phase = 'Mixed' THEN 1 END) AS mixed_cloud_count,\n        -- Brightness temperature (for IR bands)\n        AVG(gsm.brightness_temperature_k) AS avg_brightness_temperature_k,\n        MIN(gsm.brightness_temperature_k) AS min_brightness_temperature_k,\n        -- Reflectance (for visible bands)\n        AVG(gsm.reflectance_percent) AS avg_reflectance_percent,\n        MAX(gsm.reflectance_percent) AS max_reflectance_percent\n    FROM grid_satellite_matching gsm\n    WHERE gsm.cloud_top_height_m IS NOT NULL\n    GROUP BY\n        gsm.grid_id,\n        gsm.grid_latitude,\n        gsm.grid_longitude,\n        gsm.grid_geom\n),\nfinal_cloud_composite AS (\n    -- Eighth CTE:
    Final cloud composite with classifications\n    SELECT\n        cpa.grid_id,\n        cpa.grid_latitude,\n        cpa.grid_longitude,\n        cpa.contributing_sources_count,\n        cpa.pixel_count,\n        ROUND(CAST(CAST(cpa.max_cloud_top_height_m AS NUMERIC) AS NUMERIC), 0) AS max_cloud_top_height_m,\n        ROUND(CAST(CAST(cpa.avg_cloud_top_height_m AS NUMERIC) AS NUMERIC), 0) AS avg_cloud_top_height_m,\n        ROUND(CAST(CAST(cpa.min_cloud_top_height_m AS NUMERIC) AS NUMERIC), 0) AS min_cloud_top_height_m,\n        ROUND(CAST(CAST(cpa.min_cloud_top_temperature_k AS NUMERIC) AS NUMERIC), 2) AS min_cloud_top_temperature_k,\n        ROUND(CAST(CAST(cpa.avg_cloud_top_temperature_k AS NUMERIC) AS NUMERIC), 2) AS avg_cloud_top_temperature_k,\n        ROUND(CAST(CAST(cpa.avg_cloud_optical_depth AS NUMERIC) AS NUMERIC), 4) AS avg_cloud_optical_depth,\n        ROUND(CAST(CAST(cpa.max_cloud_optical_depth AS NUMERIC) AS NUMERIC), 4) AS max_cloud_optical_depth,\n        cpa.liquid_cloud_count,\n        cpa.ice_cloud_count,\n        cpa.mixed_cloud_count,\n        -- Dominant cloud phase\n        CASE\n            WHEN cpa.liquid_cloud_count > cpa.ice_cloud_count AND cpa.liquid_cloud_count > cpa.mixed_cloud_count THEN 'Liquid'\n            WHEN cpa.ice_cloud_count > cpa.mixed_cloud_count THEN 'Ice'\n            WHEN cpa.mixed_cloud_count > 0 THEN 'Mixed'\n            ELSE 'Unknown'\n        END AS dominant_cloud_phase,\n        -- Cloud coverage percentage\n        CASE\n            WHEN cpa.pixel_count > 0 THEN\n                (cpa.pixel_count::NUMERIC / (cpa.pixel_count + 1)::NUMERIC) * 100\n            ELSE 0\n        END AS cloud_coverage_percent,\n        -- Cloud height classification\n        CASE\n            WHEN cpa.max_cloud_top_height_m >= 12000 THEN 'High Clouds (Cirrus)'\n            WHEN cpa.max_cloud_top_height_m >= 6000 THEN 'Mid-Level Clouds'\n            WHEN cpa.max_cloud_top_height_m >= 2000 THEN 'Low Clouds'\n            WHEN cpa.max_cloud_top_height_m IS NOT NULL THEN 'Very Low Clouds'\n            ELSE 'No Cloud Data'\n        END AS cloud_height_classification,\n        -- Cloud thickness classification\n        CASE\n            WHEN cpa.max_cloud_top_height_m - cpa.min_cloud_top_height_m >= 5000 THEN 'Very Thick'\n            WHEN cpa.max_cloud_top_height_m - cpa.min_cloud_top_height_m >= 3000 THEN 'Thick'\n            WHEN cpa.max_cloud_top_height_m - cpa.min_cloud_top_height_m >= 1000 THEN 'Moderate'\n            WHEN cpa.max_cloud_top_height_m - cpa.min_cloud_top_height_m >= 500 THEN 'Thin'\n            ELSE 'Very Thin'\n        END AS cloud_thickness_classification\n    FROM cloud_property_aggregation cpa\n    WHERE cpa.max_cloud_top_height_m IS NOT NULL\n)\nSELECT\n    grid_id,\n    grid_latitude,\n    grid_longitude,\n    contributing_sources_count,\n    pixel_count,\n    max_cloud_top_height_m,\n    avg_cloud_top_height_m,\n    min_cloud_top_height_m,\n    min_cloud_top_temperature_k,\n    avg_cloud_top_temperature_k,\n    avg_cloud_optical_depth,\n    max_cloud_optical_depth,\n    liquid_cloud_count,\n    ice_cloud_count,\n    mixed_cloud_count,\n    dominant_cloud_phase,\n    ROUND(CAST(CAST(cloud_coverage_percent AS NUMERIC) AS NUMERIC), 2) AS cloud_coverage_percent,\n    cloud_height_classification,\n    cloud_thickness_classification\nFROM final_cloud_composite\nWHERE grid_latitude BETWEEN 24 AND 50\n    AND grid_longitude BETWEEN -125 AND -66\nORDER BY grid_latitude, grid_longitude\nLIMIT 50000;",
      "line_number": 6675,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015776,
        "row_count": 0,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 28,
      "title": "NEXRAD-Satellite Data Fusion for Precipitation Estimation",
      "description":
    "Use Case: Precipitation Monitoring - Multi-Source Precipitation Estimation Combining Radar and Satellite Data Description: Fuses NEXRAD radar reflectivity and satellite precipitation estimates to create improved US-wide precipitation maps. Combines strengths of both data sources for more accurate precipitation estimation. Purpose: Fused precipitation product combining NEXRAD and satellite data with improved accuracy and coverage. Business Value: Provides more accurate and comprehensive precipita",
      "complexity": "Multiple CTEs (8+ levels), multi-source data fusion, weighted combination, quality assessment, spatial matching, temporal alignment",
      "expected_output": "Query results",
      "sql": "WITH us_spatial_bounds AS (\n    -- First CTE: Define US spatial bounds\n    SELECT\n        -125.0 AS west_bound,\n        24.0 AS south_bound,\n        -66.0 AS east_bound,\n        50.0 AS north_bound\n),\nrecent_nexrad_precipitation AS (\n    -- Second CTE: Get recent NEXRAD precipitation estimates\n    SELECT\n        nrg.grid_id,\n        nrg.site_id,\n        nrg.scan_time,\n        nrg.grid_latitude,\n        nrg.grid_longitude,\n        nrg.grid_geom,\n        NULL::NUMERIC AS precipitation_rate_mmh,\n        NULL::NUMERIC AS accumulated_precipitation_mm,\n        nrg.reflectivity_value AS max_reflectivity_dbz,\n        nrg.reflectivity_value AS composite_reflectivity_dbz,\n        -- NEXRAD data quality (higher reflectivity = more reliable)\n        CASE\n            WHEN nrg.reflectivity_value >= 30 THEN 0.9  -- High quality\n            WHEN nrg.reflectivity_value >= 20 THEN 0.7  -- Moderate quality\n            WHEN nrg.reflectivity_value >= 10 THEN 0.5  -- Lower quality\n            ELSE 0.3\n        END AS nexrad_quality_weight,\n        -- Distance from radar (closer = more reliable)\n        ST_DISTANCE(nrs.site_geom::geography, nrg.grid_geom::geography) / 1000.0 AS distance_from_radar_km\n    FROM nexrad_reflectivity_grid nrg\n    INNER JOIN nexrad_radar_sites nrs ON nrg.site_id = nrs.site_id\n    WHERE nrg.scan_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND nrg.grid_geom IS NOT NULL\n),\nrecent_satellite_precipitation AS (\n    -- Third CTE: Get recent satellite precipitation estimates\n    SELECT\n        sip.product_id,\n        sip.source_id,\n        sip.scan_start_time,\n        ST_Y(sip.grid_geom::geometry) AS grid_latitude,\n        ST_X(sip.grid_geom::geometry) AS grid_longitude,\n        sip.grid_geom,\n        sip.precipitation_rate_mmh,\n        NULL::NUMERIC AS brightness_temperature_k,\n        NULL::NUMERIC AS cloud_top_height_m,\n        -- Satellite data quality (use precipitation rate as quality indicator)\n        CASE\n            WHEN sip.precipitation_rate_mmh > 0 THEN 0.8  -- High quality (has precipitation data)\n            WHEN sip.precipitation_rate_mmh IS NOT NULL THEN 0.6  -- Moderate quality\n            ELSE 0.4  -- Lower quality\n        END AS satellite_quality_weight\n    FROM satellite_imagery_products sip\n    WHERE sip.scan_start_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND sip.product_type = 'Precipitation'\n        AND sip.precipitation_rate_mmh IS NOT NULL\n        AND sip.grid_geom IS NOT NULL\n        AND sip.decompression_status = 'Success'\n),\nus_precipitation_grid AS (\n    -- Fourth CTE: Generate US-wide precipitation grid\n    SELECT\n        grid_id,\n        grid_latitude,\n        grid_longitude,\n        ST_SETSRID(ST_MAKEPOINT(grid_longitude, grid_latitude), 4326)::GEOGRAPHY AS grid_geom\n    FROM (\n        SELECT\n            'PRECIP_GRID_' || LPAD(ROW_NUMBER() OVER (ORDER BY lat, lon)::VARCHAR, 10, '0') AS grid_id,\n            lat AS grid_latitude,\n            lon AS grid_longitude\n        FROM (\n            SELECT\n                generate_series(24, 50, 0.01) AS lat,\n                generate_series(-125, -66, 0.01) AS lon\n        ) grid_points\n        WHERE lat BETWEEN 24 AND 50\n            AND lon BETWEEN -125 AND -66\n    ) grid\n),\ngrid_nexrad_matching AS (\n    -- Fifth CTE: Match grid cells with NEXRAD data\n    SELECT\n        upg.grid_id,\n        upg.grid_latitude,\n        upg.grid_longitude,\n        upg.grid_geom,\n        rnp.precipitation_rate_mmh AS nexrad_precip_rate,\n        rnp.accumulated_precipitation_mm AS nexrad_accumulated,\n        rnp.max_reflectivity_dbz,\n        rnp.nexrad_quality_weight,\n        rnp.distance_from_radar_km,\n        ST_DISTANCE(upg.grid_geom::geography, rnp.grid_geom::geography) / 1000.0 AS distance_to_nexrad_km\n    FROM us_precipitation_grid upg\n    INNER JOIN recent_nexrad_precipitation rnp ON (\n        ST_DWITHIN(upg.grid_geom, rnp.grid_geom, 50000)  -- Within 50km\n    )\n),\ngrid_satellite_matching AS (\n    -- Sixth CTE: Match grid cells with satellite data\n    SELECT\n        upg.grid_id,\n        upg.grid_latitude,\n        upg.grid_longitude,\n        upg.grid_geom,\n        rsp.precipitation_rate_mmh AS satellite_precip_rate,\n        rsp.brightness_temperature_k,\n        rsp.cloud_top_height_m,\n        rsp.satellite_quality_weight,\n        ST_DISTANCE(upg.grid_geom::geography, rsp.grid_geom::geography) / 1000.0 AS distance_to_satellite_km\n    FROM us_precipitation_grid upg\n    INNER JOIN recent_satellite_precipitation rsp ON (\n        ST_DWITHIN(upg.grid_geom, rsp.grid_geom, 10000)  -- Within 10km\n    )\n),\nfused_precipitation_calculation AS (\n    -- Seventh CTE: Calculate fused precipitation estimates\n    SELECT\n        COALESCE(gnm.grid_id, gsm.grid_id) AS grid_id,\n        COALESCE(gnm.grid_latitude, gsm.grid_latitude) AS grid_latitude,\n        COALESCE(gnm.grid_longitude, gsm.grid_longitude) AS grid_longitude,\n        COALESCE(gnm.grid_geom, gsm.grid_geom) AS grid_geom,\n        -- NEXRAD data\n        AVG(gnm.nexrad_precip_rate) AS avg_nexrad_precip_rate,\n        AVG(gnm.nexrad_accumulated) AS avg_nexrad_accumulated,\n        MAX(gnm.max_reflectivity_dbz) AS max_reflectivity_dbz,\n        AVG(gnm.nexrad_quality_weight) AS avg_nexrad_quality_weight,\n        MIN(gnm.distance_to_nexrad_km) AS min_distance_to_nexrad_km,\n        COUNT(DISTINCT gnm.grid_id) AS nexrad_data_points,\n        -- Satellite data\n        AVG(gsm.satellite_precip_rate) AS avg_satellite_precip_rate,\n        AVG(gsm.brightness_temperature_k) AS avg_brightness_temperature_k,\n        AVG(gsm.cloud_top_height_m) AS avg_cloud_top_height_m,\n        AVG(gsm.satellite_quality_weight) AS avg_satellite_quality_weight,\n        MIN(gsm.distance_to_satellite_km) AS min_distance_to_satellite_km,\n        COUNT(DISTINCT gsm.grid_id) AS satellite_data_points\n    FROM grid_nexrad_matching gnm\n    FULL OUTER JOIN grid_satellite_matching gsm ON gnm.grid_id = gsm.grid_id\n    GROUP BY\n        COALESCE(gnm.grid_id, gsm.grid_id),\n        COALESCE(gnm.grid_latitude, gsm.grid_latitude),\n        COALESCE(gnm.grid_longitude, gsm.grid_longitude),\n        COALESCE(gnm.grid_geom, gsm.grid_geom)\n),\nfinal_fused_precipitation AS (\n    -- Eighth CTE: Final fused precipitation with weighted combination\n    SELECT\n        fpc.grid_id,\n        fpc.grid_latitude,\n        fpc.grid_longitude,\n        fpc.nexrad_data_points,\n        fpc.satellite_data_points,\n        ROUND(CAST(CAST(fpc.avg_nexrad_precip_rate AS NUMERIC) AS NUMERIC), 2) AS avg_nexrad_precip_rate,\n        ROUND(CAST(CAST(fpc.avg_satellite_precip_rate AS NUMERIC) AS NUMERIC), 2) AS avg_satellite_precip_rate,\n        ROUND(CAST(CAST(fpc.max_reflectivity_dbz AS NUMERIC) AS NUMERIC), 2) AS max_reflectivity_dbz,\n        -- Fused precipitation rate (weighted combination)\n        CASE\n            WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                (\n                    COALESCE(fpc.avg_nexrad_precip_rate, 0) * fpc.avg_nexrad_quality_weight +\n                    COALESCE(fpc.avg_satellite_precip_rate, 0) * fpc.avg_satellite_quality_weight\n                ) / (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight)\n            WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                fpc.avg_nexrad_precip_rate\n            WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                fpc.avg_satellite_precip_rate\n            ELSE NULL\n        END AS fused_precipitation_rate_mmh,\n        -- Data source\n        CASE\n            WHEN fpc.nexrad_data_points > 0 AND fpc.satellite_data_points > 0 THEN 'Fused'\n            WHEN fpc.nexrad_data_points > 0 THEN 'NEXRAD Only'\n            WHEN fpc.satellite_data_points > 0 THEN 'Satellite Only'\n            ELSE 'No Data'\n        END AS data_source,\n        -- Data quality score\n        CASE\n            WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight) / 2.0\n            WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                fpc.avg_nexrad_quality_weight\n            WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                fpc.avg_satellite_quality_weight\n            ELSE 0.0\n        END AS data_quality_score,\n        -- Precipitation intensity classification\n        CASE\n            WHEN (\n                CASE\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        (\n                            COALESCE(fpc.avg_nexrad_precip_rate, 0) * fpc.avg_nexrad_quality_weight +\n                            COALESCE(fpc.avg_satellite_precip_rate, 0) * fpc.avg_satellite_quality_weight\n                        ) / (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight)\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                        fpc.avg_nexrad_precip_rate\n                    WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        fpc.avg_satellite_precip_rate\n                    ELSE NULL\n                END\n            ) >= 10.0 THEN 'Heavy'\n            WHEN (\n                CASE\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        (\n                            COALESCE(fpc.avg_nexrad_precip_rate, 0) * fpc.avg_nexrad_quality_weight +\n                            COALESCE(fpc.avg_satellite_precip_rate, 0) * fpc.avg_satellite_quality_weight\n                        ) / (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight)\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                        fpc.avg_nexrad_precip_rate\n                    WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        fpc.avg_satellite_precip_rate\n                    ELSE NULL\n                END\n            ) >= 2.5 THEN 'Moderate'\n            WHEN (\n                CASE\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        (\n                            COALESCE(fpc.avg_nexrad_precip_rate, 0) * fpc.avg_nexrad_quality_weight +\n                            COALESCE(fpc.avg_satellite_precip_rate, 0) * fpc.avg_satellite_quality_weight\n                        ) / (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight)\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                        fpc.avg_nexrad_precip_rate\n                    WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        fpc.avg_satellite_precip_rate\n                    ELSE NULL\n                END\n            ) >= 0.5 THEN 'Light'\n            WHEN (\n                CASE\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL AND fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        (\n                            COALESCE(fpc.avg_nexrad_precip_rate, 0) * fpc.avg_nexrad_quality_weight +\n                            COALESCE(fpc.avg_satellite_precip_rate, 0) * fpc.avg_satellite_quality_weight\n                        ) / (fpc.avg_nexrad_quality_weight + fpc.avg_satellite_quality_weight)\n                    WHEN fpc.avg_nexrad_quality_weight IS NOT NULL THEN\n                        fpc.avg_nexrad_precip_rate\n                    WHEN fpc.avg_satellite_quality_weight IS NOT NULL THEN\n                        fpc.avg_satellite_precip_rate\n                    ELSE NULL\n                END\n            ) > 0 THEN 'Very Light'\n            ELSE 'None'\n        END AS precipitation_intensity\n    FROM fused_precipitation_calculation fpc\n)\nSELECT\n    grid_id,\n    grid_latitude,\n    grid_longitude,\n    nexrad_data_points,\n    satellite_data_points,\n    avg_nexrad_precip_rate,\n    avg_satellite_precip_rate,\n    max_reflectivity_dbz,\n    ROUND(CAST(CAST(fused_precipitation_rate_mmh AS NUMERIC) AS NUMERIC), 2) AS fused_precipitation_rate_mmh,\n    data_source,\n    ROUND(CAST(CAST(data_quality_score AS NUMERIC) AS NUMERIC), 3) AS data_quality_score,\n    precipitation_intensity\nFROM final_fused_precipitation\nWHERE fused_precipitation_rate_mmh IS NOT NULL\n    AND grid_latitude BETWEEN 24 AND 50\n    AND grid_longitude BETWEEN -125 AND -66\nORDER BY grid_latitude, grid_longitude\nLIMIT 100000;",
      "line_number": 6919,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.017517,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 29,
      "title": "Satellite Fire Detection and Monitoring Across US",
      "description": "Use Case: Wildfire Monitoring - Nationwide Fire Detection from GOES Satellite Imagery Description: Detects and monitors fires across the entire United States using decompressed GOES satellite imagery. Analyzes fire radiative power, temperature, and development patterns for wildfire management. Business Value:
    US-wide fire detection report showing fire locations, intensity, and development trends from satellite imagery. Purpose: Enables early wildfire detection and monitoring at national scale, s",
      "complexity": "Multiple CTEs (6+ levels), fire detection algorithms, temporal tracking, spatial clustering, intensity analysis",
      "expected_output": "Query results",
      "sql": "WITH us_spatial_bounds AS (\n    -- First CTE: Define US spatial bounds\n    SELECT\n        -125.0 AS west_bound,\n        24.0 AS south_bound,\n        -66.0 AS east_bound,\n        50.0 AS north_bound\n),\nrecent_fire_detections AS (\n    -- Second CTE: Get recent fire detections from satellite imagery\n    SELECT\n        sip.product_id,\n        sip.source_id,\n        sip.scan_start_time,\n        ST_Y(sip.grid_geom::geometry) AS grid_latitude,\n        ST_X(sip.grid_geom::geometry) AS grid_longitude,\n        sip.grid_geom,\n        sip.fire_detection_confidence,\n        NULL::NUMERIC AS fire_temperature_k,\n        NULL::NUMERIC AS fire_power_mw,\n        NULL::NUMERIC AS brightness_temperature_k,\n        sis.source_name,\n        NULL::NUMERIC AS spatial_resolution_km\n    FROM satellite_imagery_products sip\n    INNER JOIN satellite_imagery_sources sis ON sip.source_id = sis.source_id\n    WHERE sip.scan_start_time >= CURRENT_TIMESTAMP - INTERVAL '24 hours'\n        AND sip.decompression_status = 'Success'\n        AND sip.fire_detection_confidence IS NOT NULL\n        AND sip.fire_detection_confidence >= 50  -- Minimum confidence threshold\n        AND sip.grid_geom IS NOT NULL\n),\nfire_clustering AS (\n    -- Third CTE: Cluster nearby fire detections (same fire)\n    SELECT\n        rfd.product_id,\n        rfd.source_id,\n        rfd.scan_start_time,\n        rfd.grid_latitude,\n        rfd.grid_longitude,\n        rfd.grid_geom,\n        rfd.fire_detection_confidence,\n        rfd.fire_temperature_k,\n        rfd.fire_power_mw,\n        rfd.brightness_temperature_k,\n        rfd.source_name AS satellite_name,\n        rfd.spatial_resolution_km,\n        -- Cluster ID based on proximity (fires within 5km are same cluster)\n        ROW_NUMBER() OVER (\n            ORDER BY rfd.scan_start_time, rfd.grid_latitude, rfd.grid_longitude\n        ) AS fire_cluster_id\n    FROM recent_fire_detections rfd\n),\nfire_cluster_aggregation AS (\n    -- Fourth CTE: Aggregate fire clusters\n    SELECT\n        fc.fire_cluster_id,\n        COUNT(*) AS detection_count,\n        MIN(fc.scan_start_time) AS first_detection_time,\n        MAX(fc.scan_start_time) AS last_detection_time,\n        AVG(fc.grid_latitude) AS cluster_center_latitude,\n        AVG(fc.grid_longitude) AS cluster_center_longitude,\n        ST_SETSRID(\n            ST_MAKEPOINT(AVG(ST_X(fc.grid_geom::GEOMETRY)), AVG(ST_Y(fc.grid_geom::GEOMETRY))),\n            4326\n        )::GEOGRAPHY AS cluster_center_geom,\n        MAX(fc.fire_detection_confidence) AS max_fire_confidence,\n        AVG(fc.fire_detection_confidence) AS avg_fire_confidence,\n        MAX(fc.fire_temperature_k) AS max_fire_temperature_k,\n        AVG(fc.fire_temperature_k) AS avg_fire_temperature_k,\n        SUM(fc.fire_power_mw) AS total_fire_power_mw,\n        AVG(fc.fire_power_mw) AS avg_fire_power_mw,\n        MAX(fc.brightness_temperature_k) AS max_brightness_temperature_k,\n        COUNT(DISTINCT fc.source_id) AS satellite_sources_count,\n        -- Fire duration\n        EXTRACT(EPOCH FROM (MAX(fc.scan_start_time) - MIN(fc.scan_start_time))) / 3600.0 AS fire_duration_hours\n    FROM fire_clustering fc\n    GROUP BY fc.fire_cluster_id\n),\nfire_intensity_classification AS (\n    -- Fifth CTE: Classify fire intensity\n    SELECT\n        fca.fire_cluster_id,\n        fca.detection_count,\n        fca.first_detection_time,\n        fca.last_detection_time,\n        ROUND(CAST(CAST(fca.cluster_center_latitude AS NUMERIC) AS NUMERIC), 6) AS cluster_center_latitude,\n        ROUND(CAST(CAST(fca.cluster_center_longitude AS NUMERIC) AS NUMERIC), 6) AS cluster_center_longitude,\n        fca.cluster_center_geom,\n        ROUND(CAST(CAST(fca.max_fire_confidence AS NUMERIC) AS NUMERIC), 2) AS max_fire_confidence,\n        ROUND(CAST(CAST(fca.avg_fire_confidence AS NUMERIC) AS NUMERIC), 2) AS avg_fire_confidence,\n        ROUND(CAST(CAST(fca.max_fire_temperature_k AS NUMERIC) AS NUMERIC), 2) AS max_fire_temperature_k,\n        ROUND(CAST(CAST(fca.avg_fire_temperature_k AS NUMERIC) AS NUMERIC), 2) AS avg_fire_temperature_k,\n        ROUND(CAST(CAST(fca.total_fire_power_mw AS NUMERIC) AS NUMERIC), 2) AS total_fire_power_mw,\n        ROUND(CAST(CAST(fca.avg_fire_power_mw AS NUMERIC) AS NUMERIC), 2) AS avg_fire_power_mw,\n        ROUND(CAST(CAST(fca.max_brightness_temperature_k AS NUMERIC) AS NUMERIC), 2) AS max_brightness_temperature_k,\n        fca.satellite_sources_count,\n        ROUND(CAST(CAST(fca.fire_duration_hours AS NUMERIC) AS NUMERIC), 2) AS fire_duration_hours,\n        -- Fire intensity classification\n        CASE\n            WHEN fca.total_fire_power_mw >= 1000 THEN 'Extreme'\n            WHEN fca.total_fire_power_mw >= 500 THEN 'Very High'\n            WHEN fca.total_fire_power_mw >= 100 THEN 'High'\n            WHEN fca.total_fire_power_mw >= 50 THEN 'Moderate'\n            WHEN fca.total_fire_power_mw >= 10 THEN 'Low'\n            ELSE 'Very Low'\n        END AS fire_intensity_classification,\n        -- Fire status\n        CASE\n            WHEN fca.last_detection_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour' THEN 'Active'\n            WHEN fca.last_detection_time >= CURRENT_TIMESTAMP - INTERVAL '6 hours' THEN 'Recent'\n            ELSE 'Inactive'\n        END AS fire_status\n    FROM fire_cluster_aggregation fca\n    WHERE fca.total_fire_power_mw > 0\n)\nSELECT\n    fire_cluster_id,\n    detection_count,\n    first_detection_time,\n    last_detection_time,\n    cluster_center_latitude,\n    cluster_center_longitude,\n    max_fire_confidence,\n    avg_fire_confidence,\n    max_fire_temperature_k,\n    avg_fire_temperature_k,\n    total_fire_power_mw,\n    avg_fire_power_mw,\n    max_brightness_temperature_k,\n    satellite_sources_count,\n    fire_duration_hours,\n    fire_intensity_classification,\n    fire_status\nFROM fire_intensity_classification\nWHERE cluster_center_latitude BETWEEN 24 AND 50\n    AND cluster_center_longitude BETWEEN -125 AND -66\nORDER BY total_fire_power_mw DESC, first_detection_time DESC\nLIMIT 5000;",
      "line_number": 7207,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.012528,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    },
    {
      "number": 30,
      "title": "US-Wide Composite Product Generation (NEXRAD + Satellite)",
      "description": "Use Case: Comprehensive Weather Monitoring - Multi-Source Composite Products for National Weather Analysis Description:
    Generates US-wide composite products combining NEXRAD radar and satellite imagery data. Creates seamless nationwide weather products with improved coverage and accuracy. Purpose: US-wide composite products combining radar and satellite data for comprehensive weather monitoring. Purpose: Provides comprehensive weather monitoring by combining strengths of radar (high resolution) ",
      "complexity": "Multiple CTEs (7+ levels), multi-source data fusion, composite generation, quality weighting, spatial interpolation",
      "expected_output": "Query results",
      "sql": "WITH us_spatial_bounds AS (\n    SELECT -125.0 AS west_bound, 24.0 AS south_bound, -66.0 AS east_bound, 50.0 AS north_bound\n),\nrecent_nexrad_data AS (\n    SELECT\n        nrg.site_id, nrg.scan_time, nrg.grid_latitude, nrg.grid_longitude, nrg.grid_geom,\n        nrg.reflectivity_value AS composite_reflectivity_dbz, NULL::NUMERIC AS precipitation_rate_mmh,\n        ST_DISTANCE(nrs.site_geom::geography, nrg.grid_geom::geography) / 1000.0 AS distance_from_radar_km\n    FROM nexrad_reflectivity_grid nrg\n    INNER JOIN nexrad_radar_sites nrs ON nrg.site_id = nrs.site_id\n    WHERE nrg.scan_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND nrg.grid_geom IS NOT NULL\n),\nrecent_satellite_data AS (\n    SELECT\n        sip.source_id, sip.scan_start_time, ST_Y(sip.grid_geom::geometry) AS grid_latitude, ST_X(sip.grid_geom::geometry) AS grid_longitude, sip.grid_geom,\n        NULL::NUMERIC AS brightness_temperature_k, NULL::NUMERIC AS cloud_top_height_m, sip.precipitation_rate_mmh\n    FROM satellite_imagery_products sip\n    WHERE sip.scan_start_time >= CURRENT_TIMESTAMP - INTERVAL '1 hour'\n        AND sip.decompression_status = 'Success'\n        AND sip.grid_geom IS NOT NULL\n),\nus_composite_grid AS (\n    SELECT\n        'COMP_' || LPAD(ROW_NUMBER() OVER (ORDER BY lat, lon)::VARCHAR, 10, '0') AS grid_id,\n        lat AS grid_latitude, lon AS grid_longitude,\n        ST_SETSRID(ST_MAKEPOINT(lon, lat), 4326)::GEOGRAPHY AS grid_geom\n    FROM (\n        SELECT generate_series(24, 50, 0.01) AS lat, generate_series(-125, -66, 0.01) AS lon\n    ) grid_points\n    WHERE lat BETWEEN 24 AND 50 AND lon BETWEEN -125 AND -66\n),\ngrid_data_matching AS (\n    SELECT\n        ucg.grid_id, ucg.grid_latitude, ucg.grid_longitude, ucg.grid_geom,\n        AVG(rnd.composite_reflectivity_dbz) AS nexrad_reflectivity,\n        AVG(rnd.precipitation_rate_mmh) AS nexrad_precipitation,\n        AVG(rsd.brightness_temperature_k) AS satellite_temperature,\n        AVG(rsd.cloud_top_height_m) AS satellite_cloud_height,\n        AVG(rsd.precipitation_rate_mmh) AS satellite_precipitation,\n        COUNT(DISTINCT rnd.site_id) AS nexrad_sites_count,\n        COUNT(DISTINCT rsd.source_id) AS satellite_sources_count\n    FROM us_composite_grid ucg\n    LEFT JOIN recent_nexrad_data rnd ON ST_DWITHIN(ucg.grid_geom, rnd.grid_geom, 50000)\n    LEFT JOIN recent_satellite_data rsd ON ST_DWITHIN(ucg.grid_geom, rsd.grid_geom, 10000)\n    GROUP BY ucg.grid_id, ucg.grid_latitude, ucg.grid_longitude, ucg.grid_geom\n),\ncomposite_calculation AS (\n    SELECT\n        gdm.grid_id, gdm.grid_latitude, gdm.grid_longitude,\n        gdm.nexrad_reflectivity, gdm.nexrad_precipitation,\n        gdm.satellite_temperature, gdm.satellite_cloud_height, gdm.satellite_precipitation,\n        gdm.nexrad_sites_count, gdm.satellite_sources_count,\n        CASE\n            WHEN gdm.nexrad_precipitation IS NOT NULL AND gdm.satellite_precipitation IS NOT NULL THEN\n                (gdm.nexrad_precipitation * 0.6 + gdm.satellite_precipitation * 0.4)\n            WHEN gdm.nexrad_precipitation IS NOT NULL THEN gdm.nexrad_precipitation\n            WHEN gdm.satellite_precipitation IS NOT NULL THEN gdm.satellite_precipitation\n            ELSE NULL\n        END AS composite_precipitation_rate_mmh,\n        CASE\n            WHEN gdm.nexrad_sites_count > 0 AND gdm.satellite_sources_count > 0 THEN 'Fused'\n            WHEN gdm.nexrad_sites_count > 0 THEN 'NEXRAD Only'\n            WHEN gdm.satellite_sources_count > 0 THEN 'Satellite Only'\n            ELSE 'No Data'\n        END AS data_source\n    FROM grid_data_matching gdm\n)\nSELECT\n    grid_id, grid_latitude, grid_longitude,\n    ROUND(CAST(CAST(nexrad_reflectivity AS NUMERIC) AS NUMERIC), 2) AS nexrad_reflectivity_dbz,\n    ROUND(CAST(CAST(nexrad_precipitation AS NUMERIC) AS NUMERIC), 2) AS nexrad_precipitation_rate_mmh,\n    ROUND(CAST(CAST(satellite_temperature AS NUMERIC) AS NUMERIC), 2) AS satellite_temperature_k,\n    ROUND(CAST(CAST(satellite_cloud_height AS NUMERIC) AS NUMERIC), 0) AS satellite_cloud_height_m,\n    ROUND(CAST(CAST(satellite_precipitation AS NUMERIC) AS NUMERIC), 2) AS satellite_precipitation_rate_mmh,\n    ROUND(CAST(CAST(composite_precipitation_rate_mmh AS NUMERIC) AS NUMERIC), 2) AS composite_precipitation_rate_mmh,\n    nexrad_sites_count, satellite_sources_count, data_source\nFROM composite_calculation\nWHERE composite_precipitation_rate_mmh IS NOT NULL\nORDER BY grid_latitude, grid_longitude\nLIMIT 50000;",
      "line_number": 7366,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.017099,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:11.960853"
      }
    }
  ],
  "execution_test_results": {
    "test_timestamp": "2026-02-08T21:06:11.960853",
    "total_queries": 30,
    "passed": 30,
    "failed": 0,
    "success_rate": 100.0,
    "average_execution_time": 0.011033366666666666,
    "total_execution_time": 0.331001
  }
}
# Extract queries list
queries = QUERIES_DATA.get('queries', [])
total_queries = len(queries)
print("="*80)
print("EMBEDDED QUERIES LOADED")
print("="*80)
print(f"Total Queries: {total_queries}")
print(f"Source: Embedded in notebook (no file dependency)")
if queries:
    print(f"\nQuery Overview:")
    for q in queries[:5]:
        title = q.get('title', 'N/A')[:60]
        print(f"  Query {q.get('number')}: {title}...")
    if total_queries > 5:
    print(f"  ... and {total_queries - 5} more queries")
print("="*80)
print("‚úÖ Queries ready to execute!")
print("="*80)


In [None]:
# ============================================================================
# LOAD QUERIES (FROM EMBEDDED DATA)
# ============================================================================
# Queries are already loaded from embedded QUERIES_DATA cell above
# If not loaded, use the embedded queries cell
if 'queries' not in globals():
    print("‚ö†Ô∏è  Queries not found. Run the 'Embedded Queries' cell first.")
    print("   Looking for embedded queries...")
    # Try to find embedded queries
    for cell_num in range(len(notebook['cells'])):
    cell_text = ''.join(notebook['cells'][cell_num].get('source', []))
        if 'EMBEDDED QUERIES.JSON' in cell_text or 'QUERIES_DATA' in cell_text:
    print(f"   ‚úÖ Found embedded queries in cell")
            break
else:
    print("="*80)
    print("QUERIES LOADED")
    print("="*80)
    print(f"Total Queries: {len(queries)}")
    if queries:
    print(f"\nQuery Overview:")
        for q in queries[:5]:
            title = q.get('title', 'N/A')[:60]
            print(f"  Query {q.get('number')}: {title}...")
        if len(queries) > 5:
    print(f"  ... and {len(queries) - 5} more queries")
    print("="*80)


## Step 5: Query Execution Function

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-6"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 6: Execute All Queries

## Step 5: Query Execution Function

In [None]:
# ============================================================================
# QUERY EXECUTION FUNCTION WITH METRICS
# ============================================================================

import time
import pandas as pd

def execute_query_with_metrics(db_name: str, query_sql: str, query_num: int, db_config: dict = None):
    """
    Execute SQL query with metrics collection.
    
    Args:
        db_name: Database name
        query_sql: SQL query string
        query_num: Query number
        db_config: Database configuration (optional, uses global conn if None)
    
    Returns:
    dict: Query execution results with metrics
    """
    result = {
        'query_number': query_num,
        'success': False,
        'execution_time': 0.0,
        'row_count': 0,
        'column_count': 0,
        'dataframe': None,
        'error': None
    }
    
    try:
    # Use global connection if db_config not provided
        if db_config is None:
    # Use the global conn variable
            if 'conn' not in globals():
    raise RuntimeError("Database connection not available. Run connection cell first.")
            exec_conn = globals()['conn']
        else:
            # Create new connection from config
            exec_conn = psycopg2.connect(**db_config)
        
        # Start timing
        start_time = time.time()
        
        # Execute query
        cursor = exec_conn.cursor()
        cursor.execute(query_sql)
        
        # Fetch results
        columns = [desc[0] for desc in cursor.description] if cursor.description else []
        rows = cursor.fetchall()
        
        # Calculate execution time
        execution_time = time.time() - start_time
        
        # Create DataFrame
        if rows and columns:
    df = pd.DataFrame(rows, columns=columns)
        else:
            df = pd.DataFrame()
        
        # Update result
        result['success'] = True
        result['execution_time'] = execution_time
        result['row_count'] = len(df)
        result['column_count'] = len(columns)
        result['dataframe'] = df
        
        # Close cursor
        cursor.close()
        
        # Close connection if we created it
        if db_config is not None:
    exec_conn.close()
        
    except Exception as e:
    result['success'] = False
        result['error'] = str(e)
        result['execution_time'] = time.time() - start_time if 'start_time' in locals() else 0.0
    
    return result

# Database configuration (for reference, uses global conn by default)
DB_CONFIG = {
    'host':
    'localhost',
    'port': 5432,
    'user': 'postgres',
    'password': 'postgres',
    'database': 'postgres'
}

print("‚úÖ Query execution function loaded")
print("   Function: execute_query_with_metrics(db_name, query_sql, query_num, db_config=None)")


In [None]:
# ============================================================================
# EXECUTE ALL QUERIES - END-TO-END TESTING
# ============================================================================

all_results = []

print("="*80)
print("EXECUTING ALL QUERIES")
print("="*80)

for query_info in queries:
    query_num = query_info.get('number')
    query_sql = query_info.get('sql', '')
    query_title = query_info.get('title', f'Query {query_num}')
    
    result = execute_query_with_metrics(DB_NAME, query_sql, query_num, DB_CONFIG)
    result['query_number'] = query_num
    result['query_title'] = query_title
    result['query_info'] = query_info
    
    all_results.append(result)
    
    status = "‚úÖ" if result['success'] else "‚ùå"
    print(f"{status} Query {query_num:2d}: {query_title[:50]:<50} ({result['execution_time']:.3f}s, {result['row_count']:4d} rows)")

# Summary
passed = sum(1 for r in all_results if r['success'])
failed = sum(1 for r in all_results if not r['success'])

print(f"\n{'='*80}")
print(f"EXECUTION SUMMARY")
print(f"{'='*80}")
print(f"Total Queries:
    {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"{'='*80}")

## Step 7: Performance Visualization

In [None]:
import pandas as pdimport matplotlib.pyplot as plt# ============================================================================
# PERFORMANCE VISUALIZATION
# ============================================================================

# Create performance metrics DataFrame
perf_data = []
for r in all_results:
    perf_data.append({
        'Query': r['query_number'],
        'Title': r['query_title'][:40] + '...' if len(r['query_title']) > 40 else r['query_title'],
        'Execution Time (s)':
    r['execution_time'],
        'Row Count': r['row_count'],
        'Column Count': r['column_count'],
        'Status': 'Passed' if r['success'] else 'Failed'
    })

perf_df = pd.DataFrame(perf_data)

# Visualization:
    Execution Time Distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Execution time bar chart
axes[0, 0].bar(perf_df['Query'], perf_df['Execution Time (s)'], color='steelblue', alpha=0.7)
axes[0, 0].set_xlabel('Query Number')
axes[0, 0].set_ylabel('Execution Time (seconds)')
axes[0, 0].set_title('Query Execution Time by Query Number')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

# Execution time histogram
axes[0, 1].hist(perf_df['Execution Time (s)'], bins=20, color='coral', alpha=0.7, edgecolor='black')
axes[0, 1].set_xlabel('Execution Time (seconds)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Distribution of Execution Times')
axes[0, 1].grid(True, alpha=0.3)

# Row count bar chart
axes[1, 0].bar(perf_df['Query'], perf_df['Row Count'], color='green', alpha=0.7)
axes[1, 0].set_xlabel('Query Number')
axes[1, 0].set_ylabel('Row Count')
axes[1, 0].set_title('Rows Returned by Query')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3)

# Status pie chart
status_counts = perf_df['Status'].value_counts()
axes[1, 1].pie(status_counts.values, labels=status_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Query Execution Status')

plt.tight_layout()
plt.show()

# Display performance summary
print("\n" + "="*80)
print("PERFORMANCE SUMMARY")
print("="*80)
print(f"Average execution time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Median execution time: {perf_df['Execution Time (s)'].median():.3f}s")
print(f"Max execution time: {perf_df['Execution Time (s)'].max():.3f}s")
print(f"Min execution time: {perf_df['Execution Time (s)'].min():.3f}s")
print(f"Total rows returned: {perf_df['Row Count'].sum():,}")
print(f"Average rows per query: {perf_df['Row Count'].mean():.1f}")
print("="*80)

## Step 8: Individual Query Documentation and Visualization

In [None]:
import numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom IPython.display import display, HTML, Markdown# ============================================================================
# INDIVIDUAL QUERY DOCUMENTATION AND VISUALIZATION
# ============================================================================

def document_and_visualize_query(query_result: dict, query_num: int):
    """Create comprehensive documentation and visualization for a single query."""
    query_info = query_result['query_info']
    
    # Create markdown documentation
    doc = f"""
## Query {query_num}:
    {query_info.get('title', 'N/A')}

### Execution Status
- **Status:** {'‚úÖ PASSED' if query_result['success'] else '‚ùå FAILED'}
- **Execution Time:** {query_result['execution_time']:.3f} seconds
- **Rows Returned:** {query_result['row_count']:,}
- **Columns Returned:** {query_result['column_count']}

### Query Information
- **Description:** {query_info.get('description', 'N/A')[:300]}...
- **Use Case:** {query_info.get('use_case', 'N/A')}
- **Business Value:** {query_info.get('business_value', 'N/A')}
- **Complexity:** {query_info.get('complexity', 'N/A')}
- **Expected Output:** {query_info.get('expected_output', 'N/A')}

### SQL Query
```sql
{query_info.get('sql', '')[:1000]}...
```

### Results Preview
"""
    
    try:
    display(Markdown(doc))
    except:
        print(doc)
    
    if query_result['success'] and query_result['dataframe'] is not None:
    df = query_result['dataframe']
        
        if len(df) > 0:
    print(f"\nFirst 10 rows of Query {query_num}:")
            try:
    display(df.head(10))
            except:
                print(df.head(10).to_string())
            
            # Create visualizations if numeric data exists
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            if len(numeric_cols) > 0:
    num_plots = min(3, len(numeric_cols))
                fig, axes = plt.subplots(1, num_plots, figsize=(15, 4))
                if num_plots == 1:
    axes = [axes]
                
                for idx, col in enumerate(numeric_cols[:num_plots]):
                    if df[col].notna().sum() > 0:
    axes[idx].hist(df[col].dropna(), bins=min(20, len(df)), alpha=0.7, edgecolor='black')
                        axes[idx].set_title(f'Distribution of {col[:30]}')
                        axes[idx].set_xlabel(col[:30])
                        axes[idx].set_ylabel('Frequency')
                        axes[idx].grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
                
                # Create correlation heatmap if multiple numeric columns
                if len(numeric_cols) > 1:
    fig, ax = plt.subplots(figsize=(10, 8))
                    corr_matrix = df[numeric_cols].corr()
                    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax)
                    ax.set_title('Correlation Matrix of Numeric Columns')
                    plt.tight_layout()
                    plt.show()
        else:
            print(f"\nQuery {query_num} returned 0 rows.")
    else:
        if query_result.get('error'):
    print(f"\n‚ùå Error: {query_result['error'][:500]}")

# Document and visualize each query
print("="*80)
print("INDIVIDUAL QUERY DOCUMENTATION")
print("="*80)

for query_result in all_results:
    query_num = query_result['query_number']
    document_and_visualize_query(query_result, query_num)
    print("\n" + "="*80 + "\n")

## Step 9: Generate Comprehensive Report

In [None]:
# ============================================================================
# GENERATE COMPREHENSIVE REPORT
# ============================================================================

# Create comprehensive report
report_data = {
    'database': DB_NAME,
    'test_timestamp': datetime.now().isoformat(),
    'total_queries': total_queries,
    'passed': passed,
    'failed': failed,
    'success_rate': passed / total_queries * 100 if total_queries > 0 else 0,
    'average_execution_time':
    perf_df['Execution Time (s)'].mean(),
    'total_execution_time': perf_df['Execution Time (s)'].sum(),
    'queries': []
}

for r in all_results:
    query_report = {
        'number': r['query_number'],
        'title': r['query_title'],
        'success': r['success'],
        'execution_time': r['execution_time'],
        'row_count': r['row_count'],
        'column_count': r['column_count'],
        'columns': r['columns']
    }
    if not r['success']:
    query_report['error'] = r['error']
    
    report_data['queries'].append(query_report)

# Save report
report_file = DB_DIR / 'results' / f'{DB_NAME}_comprehensive_report.json'
report_file.parent.mkdir(exist_ok=True)

with open(report_file, 'w') as f:
    json.dump(report_data, f, indent=2, default=str)

print("="*80)
print("COMPREHENSIVE TEST REPORT")
print("="*80)
print(f"Database: {DB_NAME}")
print(f"Total Queries: {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"Average Execution Time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Total Execution Time: {perf_df['Execution Time (s)'].sum():.3f}s")
print(f"\n‚úÖ Report saved to: {report_file}")
print("="*80)

print("\n" + "="*80)
print("END-TO-END TESTING COMPLETE")
print("="*80)
print(f"‚úÖ Database '{DB_NAME}' initialized and tested")
print(f"‚úÖ All {total_queries} queries executed")
print(f"‚úÖ Performance metrics collected")
print(f"‚úÖ Comprehensive report generated")
print("="*80)