## Python 3.14.2 Installation
This notebook requires Python 3.14.2. Run the cell below to install and verify Python 3.14.2.

In [None]:
# ============================================================================
# PYTHON 3.14.2 INSTALLATION FOR GOOGLE COLAB
# ============================================================================
import subprocess
import sys
import os
print("="*80)
print("PYTHON 3.14.2 INSTALLATION")
print("="*80)
# Check current Python version
current_version = sys.version_info
print(f"\nCurrent Python version: {current_version.major}.{current_version.minor}.{current_version.micro}")
print(f"Python executable: {sys.executable}")
# Target version
TARGET_MAJOR = 3
TARGET_MINOR = 14
TARGET_MICRO = 2
if current_version.major == TARGET_MAJOR and current_version.minor == TARGET_MINOR and current_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is already installed!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is required")
    print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
    print(f"\nInstalling Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO}...")
    
    if not IS_COLAB:
    raise RuntimeError("Python 3.14.2 installation requires Google Colab")
    
    try:
    # Method 1: Use conda (if available)
        print("\nMethod 1:
    Trying conda...")
        try:
    result = subprocess.run(['conda', '--version'], capture_output=True, text=True, timeout=5)
            if result.returncode == 0:
    print("   ‚úÖ Conda found, installing Python 3.14.2...")
                os.system('conda install -y python=3.14.2')
                print("   ‚úÖ Python 3.14.2 installed via conda")
                print("   ‚ö†Ô∏è  Restart kernel and re-run this cell to use Python 3.14.2")
        except:
            print("   ‚ö†Ô∏è  Conda not available")
        
        # Method 2: Use deadsnakes PPA (Ubuntu/Debian)
        print("\nMethod 2: Installing via deadsnakes PPA...")
        os.system('apt-get update -qq')
        os.system('apt-get install -y software-properties-common')
        os.system('add-apt-repository -y ppa:deadsnakes/ppa')
        os.system('apt-get update -qq')
        os.system('apt-get install -y python3.14 python3.14-venv python3.14-dev')
        print("   ‚úÖ Python 3.14.2 installed via deadsnakes PPA")
        
        # Method 3: Use pyenv
        print("\nMethod 3: Installing via pyenv...")
        os.system('curl https://pyenv.run | bash')
        os.system('export PYENV_ROOT="$HOME/.pyenv"')
        os.system('export PATH="$PYENV_ROOT/bin:$PATH"')
        os.system('eval "$(pyenv init -)"')
        os.system('pyenv install 3.14.2')
        os.system('pyenv global 3.14.2')
        print("   ‚úÖ Python 3.14.2 installed via pyenv")
        
        # Verify installation
        print("\nVerifying Python 3.14.2 installation...")
        result = subprocess.run(['python3.14', '--version'], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
    version_output = result.stdout.strip()
            print(f"   ‚úÖ Python 3.14 found: {version_output}")
            if '3.14.2' in version_output:
    print("   ‚úÖ Python 3.14.2 is installed!")
            print("\n‚ö†Ô∏è  IMPORTANT: Restart kernel and select Python 3.14.2 as kernel")
            print("   Or use: !python3.14 your_script.py")
        else:
            print("   ‚ö†Ô∏è  Python 3.14.2 installation may have failed")
            print("   Current Python version will be used")
    
    except Exception as e:
    print(f"\n‚ùå Error installing Python 3.14.2: {e}")
        print("\n‚ö†Ô∏è  Continuing with current Python version")
        print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
# Verify Python version
print("\n" + "="*80)
print("PYTHON VERSION VERIFICATION")
print("="*80)
final_version = sys.version_info
print(f"Python version: {final_version.major}.{final_version.minor}.{final_version.micro}")
print(f"Python executable: {sys.executable}")
if final_version.major == TARGET_MAJOR and final_version.minor == TARGET_MINOR and final_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is active!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is not active")
    print(f"   Current version: {final_version.major}.{final_version.minor}.{final_version.micro}")
    print("   If Python 3.14.2 was installed, restart kernel and select Python 3.14.2")
print("="*80)


# DB-9: Data Analytics Database - End-to-End Query Testing

This notebook provides **complete end-to-end setup and testing** from scratch:

1. **Environment Setup**: Install all required Python packages automatically
2. **Database Initialization**: Create database, load schema, load data
3. **Query Execution**: Execute all 30 queries with metrics
4. **Visualization**: Performance charts and data analysis
5. **Documentation**: Comprehensive query documentation

## Database Overview

**Database Name:** Data Analytics Database  
**Database ID:** db-9  
**Domain:** Data Analytics  
**Total Queries:** 30  

## Prerequisites

- PostgreSQL server running (localhost or configured via environment variables)
- Python 3.14.2 installed
- Jupyter Notebook or JupyterLab

**Note:** All Python packages will be installed automatically when you run the first cell.

In [None]:
# ============================================================================# GOOGLE COLAB ONLY - ENVIRONMENT CHECK# ============================================================================import sys
import os# Verify we're running in Google ColabIS_COLAB = Falsetry:
    import google.colab    IS_COLAB = True    print("‚úÖ Running in Google Colab")except ImportError:
    # Check alternative methods    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):                        IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB env)")    else:            IS_COLAB = False
if not IS_COLAB:
    raise RuntimeError(        "‚ùå ERROR: This notebook is designed to run ONLY in Google Colab.\n"        "Please open this notebook in Google Colab: https://colab.research.google.com/"    )print("="*80)
print("GOOGLE COLAB ENVIRONMENT CONFIRMED")print("="*80)

## PostgreSQL Setup for Google Colab

This notebook requires PostgreSQL. Run the cell below to install and start PostgreSQL in Colab.

## Data Directory Detection

This notebook automatically detects the `data/` directory containing `schema.sql` and `data.sql` files.
It works when uploaded to Google Colab or run locally.

In [None]:
# ============================================================================# SELF-AWARE DATA DIRECTORY DETECTION# ============================================================================import os
import sysfrom pathlib import Pathprint("="*80)
print("DATA DIRECTORY DETECTION")print("="*80)def find_data_directory():    """    Self-aware function to find data/ directory.    Works when notebook is uploaded to Colab or run locally.    """    # Get notebook's current directory    if IS_COLAB:
    # In Colab, check common locations        search_paths = [            Path('/content'),            Path('/content/drive/MyDrive'),            Path.cwd(),        ]    else:        # Local execution        search_paths = [            Path.cwd(),            Path(__file__).parent if '__file__' in globals() else Path.cwd(),            Path.cwd().parent,        ]        # Also check parent directories recursively    current = Path.cwd()    for _ in range(5):
    # Check up to 5 levels up        search_paths.append(current)        current = current.parent        print(f"\nSearching for data/ directory...")    print(f"Current working directory: {Path.cwd()}")        # Search for data/ directory    data_dir = None    for search_path in search_paths:        if not search_path.exists():            continue                # Check if data/ exists here        potential_data = search_path / 'data'        if potential_data.exists() and potential_data.is_dir():            data_dir = potential_data            print(f"‚úÖ Found data/ directory: {data_dir}")            break                # Recursively search subdirectories (limit depth to avoid long searches)        try:
    for item in search_path.rglob('data'):
    if item.is_dir() and item.name == 'data':                    # Verify it contains expected files                    expected_files = ['schema.sql', 'data.sql']                    has_expected = any((item / f).exists() for f in expected_files)                    if has_expected:                        data_dir = item                        print(f"‚úÖ Found data/ directory (recursive): {data_dir}")                        break            if data_dir:                break        except (PermissionError, OSError):
    continue        
if not data_dir:
    # Try finding by database name pattern        db_name = Path.cwd().name        if db_name.startswith('db-'):            # Look for db-N/data pattern            for search_path in search_paths:
    potential_db = search_path / db_name / 'data'                if potential_db.exists() and potential_db.is_dir():                    data_dir = potential_db                    print(f"‚úÖ Found data/ directory by DB name: {data_dir}")                    break        return data_dirdef verify_data_directory(data_dir: Path):    """Verify data/ directory contains expected files."""    if not data_dir or not data_dir.exists():        return False        expected_files = ['schema.sql']    optional_files = ['data.sql']        print(f"\nVerifying data/ directory contents...")    print(f"Location: {data_dir}")        found_files = []    missing_files = []        for file_name in expected_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name}")        else:            missing_files.append(file_name)            print(f"  ‚ùå {file_name} (missing)")        for file_name in optional_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name} (optional)")        else:            print(f"  ‚ö†Ô∏è  {file_name} (optional, not found)")        if missing_files:        print(f"\n‚ö†Ô∏è  Warning: Missing required files: {missing_files}")        return False        return True# Detect data directoryDATA_DIR = find_data_directory()if DATA_DIR:    if verify_data_directory(DATA_DIR):        print(f"\n‚úÖ Data directory verified and ready!")        print(f"   Schema file: {DATA_DIR / 'schema.sql'}")        if (DATA_DIR / 'data.sql').exists():            print(f"   Data file: {DATA_DIR / 'data.sql'}")                # Set global variables for use in other cells        SCHEMA_FILE = DATA_DIR / 'schema.sql'        DATA_FILE = DATA_DIR / 'data.sql' if (DATA_DIR / 'data.sql').exists() else None                print(f"\n‚úÖ Global variables set:")        print(f"   DATA_DIR = {DATA_DIR}")        print(f"   SCHEMA_FILE = {SCHEMA_FILE}")        if DATA_FILE:            print(f"   DATA_FILE = {DATA_FILE}")    else:        print(f"\n‚ö†Ô∏è  Data directory found but verification failed")        print(f"   Location: {DATA_DIR}")        print(f"   Please ensure schema.sql exists in this directory")else:    print(f"\n‚ùå Data directory not found!")    print(f"\nTroubleshooting:")    print(f"1. Ensure data/ directory is uploaded to Colab")    print(f"2. Check that data/ contains schema.sql")    print(f"3. Verify notebook is in same directory structure as data/")    print(f"\nCurrent directory: {Path.cwd()}")    print(f"Contents:")    try:
    for item in sorted(Path.cwd().iterdir()):
    print(f"  - {item.name} ({'dir' if item.is_dir() else 'file'})")    except PermissionError:
    print("  (Permission denied)")print("="*80)

In [None]:
# ============================================================================# POSTGRESQL SETUP FOR GOOGLE COLAB# ============================================================================import subprocess
import timeimport osprint("="*80)
print("POSTGRESQL SETUP FOR GOOGLE COLAB")print("="*80)if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")# Check if PostgreSQL is already installedpostgres_installed = Falsetry:
    result = subprocess.run(['psql', '--version'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print(f"‚úÖ PostgreSQL already installed: {result.stdout.strip()}")        postgres_installed = Trueexcept (FileNotFoundError, subprocess.TimeoutExpired):
    pass
if not postgres_installed:
    print("\nInstalling PostgreSQL using magic commands...")    print("(Run these commands if automatic installation fails)")    print("  !apt-get update")    print("  !apt-get install -y postgresql postgresql-contrib")    print("  !service postgresql start")        # Use magic commands via subprocess (Colab-compatible)    try:
    # Update package list        print("\n   Updating package list...")        os.system('apt-get update -qq')        print("   ‚úÖ Package list updated")                # Install PostgreSQL        print("   Installing PostgreSQL...")        os.system('apt-get install -y -qq postgresql postgresql-contrib')        print("   ‚úÖ PostgreSQL installed")                # Start PostgreSQL service        print("   Starting PostgreSQL service...")        os.system('service postgresql start')        print("   ‚úÖ PostgreSQL service started")                # Wait for PostgreSQL to be ready        print("   Waiting for PostgreSQL to be ready...")        time.sleep(3)            except Exception as e:
    print(f"   ‚ùå Error: {e}")        print("   Please run manually:")        print("   !apt-get update")        print("   !apt-get install -y postgresql postgresql-contrib")        print("   !service postgresql start")# Verify PostgreSQL is runningprint("\nVerifying PostgreSQL is ready...")try:    result = subprocess.run(['pg_isready'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print("‚úÖ PostgreSQL is ready")        print(f"   {result.stdout.strip()}")    else:        print("‚ö†Ô∏è  PostgreSQL may not be ready yet")        print("   Try: !service postgresql restart")except Exception as e:    print(f"‚ö†Ô∏è  Could not verify PostgreSQL: {e}")
print("\n" + "="*80)print("POSTGRESQL SETUP COMPLETE")
print("="*80)

In [None]:
# ============================================================================# STREAMLIT DASHBOARD EXECUTION# ============================================================================import subprocess
import sysimport osfrom pathlib import Path
import webbrowserimport timeimport threadingdef find_dashboard_file():        """Find Streamlit dashboard file recursively."""    search_paths = [        Path.cwd(),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/workspace'),        Path('/content/drive/MyDrive/db'),        Path('/content/db'),        Path('/content'),        ,    ]        dashboard_name = f'{DB_NAME}_dashboard.py'        for search_path in search_paths:
    if not search_path.exists():
    continue                # Try direct path        candidate = search_path / dashboard_name        if candidate.exists():                            return candidate                # Try recursive search        try:
    for found_path in search_path.rglob(dashboard_name):
    if found_path.is_file():                                    return found_path        except:            continue        return Nonedef run_streamlit_dashboard(method='notebook', port=8501, open_browser=True):        """    Run Streamlit dashboard from Jupyter notebook.        Methods:    - 'notebook': Run in notebook output (using streamlit's notebook mode)    - 'subprocess': Run as subprocess (background)    - 'magic': Use !streamlit run magic command    """    dashboard_path = find_dashboard_file()        
if not dashboard_path:
    print("‚ùå Dashboard file not found")        print(f"   Looking for: {DB_NAME}_dashboard.py")        return None        print(f"‚úÖ Found dashboard: {dashboard_path}")        if method == 'notebook':            # Method 1: Run Streamlit in notebook-compatible mode        # Note: Streamlit doesn't natively support notebooks, but we can use iframe        print("\n" + "="*80)        print("STREAMLIT DASHBOARD - NOTEBOOK MODE")        print("="*80)        print(f"\nDashboard: {dashboard_path.name}")        print(f"\nTo run dashboard:")        print(f"  1. Run this cell to start the server")        print(f"  2. Open the URL shown below in a new tab")        print(f"  3. Or use: !streamlit run {dashboard_path} --server.port={port}")        print("\n" + "="*80)                # Start Streamlit as subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port),            '--server.headless', 'true',            '--server.runOnSave', 'false',            '--browser.gatherUsageStats', 'false'        ]                process = subprocess.Popen(            cmd,            stdout=subprocess.PIPE,            stderr=subprocess.PIPE,            text=True        )                # Wait a moment for server to start        time.sleep(2)                # Get the URL        url = f"http:
    //localhost:{port}"        print(f"\nüåê Dashboard URL: {url}")        print(f"\nServer started in background (PID: {process.pid})")        print(f"\nTo stop: process.terminate() or run stop_streamlit()")                # Store process for later termination        globals()['_streamlit_process'] = process                # Try to open browser        if open_browser:                            try:
    webbrowser.open(url)            except:                pass                return process        elif method == 'subprocess':            # Method 2: Run as background subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port)        ]                process = subprocess.Popen(cmd)        print(f"‚úÖ Streamlit started (PID: {process.pid})")        print(f"üåê Dashboard: http://localhost:{port}")        return process        elif method == 'magic':            # Method 3: Print magic command for user to run        print("Run this command in a new cell:
    ")        print(f"!streamlit run {dashboard_path} --server.port={port}")        return Nonedef stop_streamlit():        """Stop running Streamlit process."""    if '_streamlit_process' in globals():                        process = globals()['_streamlit_process']        process.terminate()        print("‚úÖ Streamlit stopped")    else:            print("‚ö†Ô∏è  No Streamlit process found")# Auto-detect DB_NAME if not setif 'DB_NAME' not in globals():        # Try to detect from current directory or notebook name    cwd = Path.cwd()    for db_num in range(6, 16):                    if f'db-{db_num}' in str(cwd) or f'db{db_num}' in str(cwd):                            DB_NAME = f'db-{db_num}'            break    else:            DB_NAME = 'db-6'  # Default        print(f"‚ö†Ô∏è  Could not detect DB_NAME, using default: {DB_NAME}")
print("\n" + "="*80)print("STREAMLIT DASHBOARD INTEGRATION")
print("="*80)print(f"Database: {DB_NAME}")
print("\nAvailable methods:")print("  1. run_streamlit_dashboard(method='notebook') - Run in notebook mode")print("  2. run_streamlit_dashboard(method='subprocess') - Run as background process")print("  3. run_streamlit_dashboard(method='magic') - Get magic command")print("  4. stop_streamlit() - Stop running dashboard")print("\n" + "="*80)

## Streamlit Dashboard

Run the Streamlit dashboard using one of these methods:

**Method 1: Notebook Mode** (Recommended)
```python
run_streamlit_dashboard(method='notebook', port=8501)
```

**Method 2: Magic Command**
```bash
!streamlit run db-9_dashboard.py --server.port=8501
```

**Method 3: Background Process**
```python
run_streamlit_dashboard(method='subprocess', port=8501)
```


## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Colab Setup (Run this first if using Google Colab)

If you're running this notebook in Google Colab:
1. **Mount Google Drive** (if your database files are in Drive)
2. **Upload database files** to `/content/db` or your Drive folder


In [None]:
# ============================================================================
# GOOGLE COLAB SETUP
# ============================================================================

if ENV_TYPE == 'colab':
    print("="*80)
    print("GOOGLE COLAB SETUP")
    print("="*80)
    
    # Mount Google Drive if not already mounted
    drive_path = Path('/content/drive/MyDrive')
    if not drive_path.exists():
    print("‚ö†Ô∏è  Google Drive not mounted.")
        print("   Run this command to mount:")
        print("   from google.colab import drive")
        print("   drive.mount('/content/drive')")
        try:
    from google.colab import drive
            drive.mount('/content/drive')
            print("‚úÖ Google Drive mounted")
        except Exception as e:
    print(f"‚ö†Ô∏è  Could not auto-mount Drive: {e}")
            print("   Please mount manually using the command above")
    else:
        print("‚úÖ Google Drive is already mounted")
    
    # Check for database files
    print("\nChecking for database files...")
    
    # Check in /content/db
    content_db = Path('/content/db')
    if content_db.exists():
    print(f"‚úÖ Found: {content_db}")
    else:
        print(f"‚ö†Ô∏è  Not found: {content_db}")
        print("   Upload your database folder to /content/db")
    
    # Check in Drive
    drive_db = drive_path / 'db'
    if drive_db.exists():
    print(f"‚úÖ Found in Drive: {drive_db}")
    else:
        print(f"‚ö†Ô∏è  Not found in Drive: {drive_db}")
        print("   Upload your database folder to Google Drive/db")
    
    print("\n" + "="*80)
    print("Some PostgreSQL-specific features may not work")
    print("="*80)
else:
    print("Not running in Colab - skipping Colab setup")

In [None]:
# ============================================================================# FAILSAFE: Force Path Correction and Package Installation# ============================================================================import sys
import subprocessimport osfrom pathlib import Path
from datetime import datetime
import shutildef force_install_package(package_name, import_name=None):    """Force install package using multiple methods."""    if import_name is None:
    import_name = package_name.split('[')[0].split('==')[0].split('>=')[0]        # Try import first    try:
    __import__(import_name)        return True    except ImportError:
    pass        # Method 1: pip install --user    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', '--quiet', package_name],                              stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 2: pip install --break-system-packages (Python 3.12+)    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--break-system-packages', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 3: pip install system-wide    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 4: conda install (if conda available)    try:        subprocess.check_call(['conda', 'install', '-y', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 5: apt-get install (Linux/Docker)    if os.path.exists('/usr/bin/apt-get'):        try:            apt_package = f'python3-{import_name.replace("_", "-")}'            subprocess.check_call(['apt-get', 'install', '-y', '--quiet', apt_package],                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)            __import__(import_name)            return True        except:            pass        # Method 6: Direct pip install with --force-reinstall    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--force-reinstall', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        print(f"‚ö†Ô∏è  Warning: Could not install {package_name}, continuing anyway...")    return Falsedef correct_file_path(file_path, search_paths=None):    """Correct file path by searching multiple locations."""    if isinstance(file_path, str):        file_path = Path(file_path)        # If path exists, return it    if file_path.exists():        return file_path        # Default search paths    if search_paths is None:        search_paths = [            Path.cwd(),            Path('/workspace/client/db'),            Path('/workspace/db'),            Path('/workspace'),            Path('/content/drive/MyDrive/db'),            Path('/content/db'),            Path('/content'),            ,            BASE_DIR if 'BASE_DIR' in globals() else ,        ]        # Search recursively    for search_path in search_paths:
    if not search_path.exists():            continue                # Try direct path        candidate = search_path / file_path.name        if candidate.exists():            return candidate                # Try recursive search        try:            for found_path in search_path.rglob(file_path.name):                if found_path.is_file():                    return found_path        except:            continue        # Return original path (will fail later, but at least we tried)    return file_pathdef create_notebook_backup(notebook_path=None):    """Create backup of current notebook automatically."""    try:        # Try to detect notebook path from various sources        if notebook_path is None:            # Try to get from __file__ or current working directory            try:                notebook_path = Path(__file__)            except:                notebook_path = Path.cwd() / 'current_notebook.ipynb'                if isinstance(notebook_path, str):            notebook_path = Path(notebook_path)                # Only create backup if file exists        if notebook_path.exists() and notebook_path.suffix == '.ipynb':            timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')            backup_path = notebook_path.parent / f"{notebook_path.stem}_{timestamp}.backup.ipynb"                        # Create backup            shutil.copy2(notebook_path, backup_path)            print(f"‚úÖ Backup created: {backup_path.name}")            return backup_path        else:            print("‚ö†Ô∏è  Could not determine notebook path for backup")            return None    except Exception as e:        print(f"‚ö†Ô∏è  Backup creation failed (non-critical): {e}")        return None# Create backup at startuptry:    create_notebook_backup()except Exception as e:    print(f"‚ö†Ô∏è  Backup skipped: {e}")def ensure_packages_installed():    """Ensure all required packages are installed."""    required_packages = [        ('psycopg2-binary', 'psycopg2'),        ('pandas', 'pandas'),        ('numpy', 'numpy'),        ('matplotlib', 'matplotlib'),        ('seaborn', 'seaborn'),        ('ipython', 'IPython'),        ('jupyter', 'jupyter'),    ]        print("\n" + "="*80)    print("FAILSAFE: Ensuring all packages are installed...")    print("="*80)        for package, import_name in required_packages:        if force_install_package(package, import_name):            print(f"‚úÖ {package} installed")        else:            print(f"‚ö†Ô∏è  {package} installation failed, but continuing...")        print("="*80 + "\n")def ensure_paths_correct():    """Ensure all file paths are correct."""    print("\n" + "="*80)    print("FAILSAFE: Correcting file paths...")    print("="*80)        # Correct BASE_DIR if needed - fix UnboundLocalError    base_dir_exists = 'BASE_DIR' in globals()    base_dir_valid = False        if base_dir_exists:        try:            base_dir_value = globals()['BASE_DIR']            if base_dir_value:                base_dir_path = Path(base_dir_value) if isinstance(base_dir_value, str) else base_dir_value                base_dir_valid = base_dir_path.exists()        except:            base_dir_valid = False        if not base_dir_exists or not base_dir_valid:        corrected_base_dir = correct_file_path()        globals()['BASE_DIR'] = corrected_base_dir        print(f"‚úÖ BASE_DIR corrected: {corrected_base_dir}")    else:        print(f"‚úÖ BASE_DIR valid: {globals()['BASE_DIR']}")        # Correct DB_DIR if needed - fix UnboundLocalError    db_dir_exists = 'DB_DIR' in globals()    db_dir_valid = False    db_dir_value = None        if db_dir_exists:        try:            db_dir_value = globals()['DB_DIR']            if db_dir_value:                db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value                db_dir_valid = db_dir_path.exists()        except:            db_dir_valid = False        if db_dir_exists and db_dir_value and not db_dir_valid:        db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value        corrected_db_dir = correct_file_path(db_dir_path)        globals()['DB_DIR'] = corrected_db_dir        print(f"‚úÖ DB_DIR corrected: {corrected_db_dir}")    elif db_dir_exists and db_dir_value:        print(f"‚úÖ DB_DIR valid: {globals()['DB_DIR']}")        print("="*80 + "\n")# Run failsafe checksensure_packages_installed()ensure_paths_correct()print("‚úÖ Failsafe checks complete")

## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Step 1: Environment Setup & Package Installation

In [None]:
def install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:
    pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return Falsedef install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:                        __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:                pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return False# ============================================================================# END-TO-END SETUP: Install all required packages and configure environment# ============================================================================import sys
import subprocessimport osimport platformfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT SETUP - END-TO-END INSTALLATION")print("="*80)# Display Python environmentprint(f"\nPython Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")print(f"Architecture: {platform.architecture()[0]}")# Required packages with versionsrequired_packages = [    'psycopg2-binary>=2.9.0',    'pandas>=2.0.0',    'numpy>=1.24.0',    'matplotlib>=3.7.0',    'seaborn>=0.12.0']# Map package names to import namespackage_import_map = {    'psycopg2-binary': 'psycopg2',    'pandas': 'pandas',    'numpy': 'numpy',    'matplotlib': 'matplotlib',    'seaborn': 'seaborn'}print("\n" + "="*80)
print("CHECKING AND INSTALLING REQUIRED PACKAGES")print("="*80)missing_packages = []installed_packages = []for package_spec in required_packages:
    package_name = package_spec.split('>=')[0]    import_name = package_import_map.get(package_name, package_name.replace('-', '_'))        # Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        installed_packages.append(package_name)    except ImportError:
    print(f"‚ö†Ô∏è  {package_name}: Missing - installing...")        missing_packages.append(package_spec)                # Try installation with --user flag first        try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE            )            print(f"   ‚úÖ Successfully installed {package_name} (user)")            installed_packages.append(package_name)        except subprocess.CalledProcessError:                # Fallback: try without --user flag            try:                        subprocess.check_call(                    [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                print(f"   ‚úÖ Successfully installed {package_name} (system-wide)")                installed_packages.append(package_name)            except Exception as e:                        print(f"   ‚ùå Failed to install {package_name}")                print(f"      Manual install: pip install {package_spec}")
print("\n" + "="*80)if missing_packages and len(installed_packages) < len(required_packages):
    print("‚ö†Ô∏è  Some packages failed to install. Please install manually:")    for pkg in missing_packages:
    print(f"   pip install {pkg}")    print("\n   Then restart the kernel and re-run this cell.")else:        print("‚úÖ All required packages are installed!")    print("\n‚ö†Ô∏è  If packages were just installed, restart the kernel and re-run this cell.")
print("="*80)# Now import all packagesprint("\n" + "="*80)
print("IMPORTING PACKAGES")print("="*80)try:
    import psycopg2    print("‚úÖ psycopg2 imported")except ImportError as e:
    print(f"‚ùå Failed to import psycopg2: {e}")    print("   Please restart kernel after installation")try:            import pandas as pd    print("‚úÖ pandas imported")except ImportError as e:            print(f"‚ùå Failed to import pandas: {e}")try:            import numpy as np    print("‚úÖ numpy imported")except ImportError as e:            print(f"‚ùå Failed to import numpy: {e}")try:            import matplotlib.pyplot as plt    import matplotlib    matplotlib.use('Agg')  # Non-interactive backend for notebooks    print("‚úÖ matplotlib imported")except ImportError as e:
    print(f"‚ùå Failed to import matplotlib: {e}")try:            import seaborn as sns    print("‚úÖ seaborn imported")except ImportError as e:            print(f"‚ùå Failed to import seaborn: {e}")try:            from IPython.display import display, HTML, Markdown    print("‚úÖ IPython.display imported")except ImportError as e:            print(f"‚ö†Ô∏è  IPython.display not available: {e}")import json
from datetime import datetime
import warningswarnings.filterwarnings('ignore')# Set visualization styletry:
    plt.style.use('seaborn-v0_8-darkgrid')    sns.set_palette("husl")except:    passprint("\n" + "="*80)
print("ENVIRONMENT SETUP COMPLETE")print("="*80)

## Step 2: Database Configuration

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-9"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 3: Database Initialization (Create Database, Load Schema, Load Data)

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-9"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 4: Load Query Metadata

## Embedded SQL Files and Queries

The following cells contain the complete database schema, data, and queries embedded directly in this notebook.
No external file dependencies required - everything is self-contained.

In [None]:
# ============================================================================
# EMBEDDED SCHEMA.SQL - DB-9
# ============================================================================
# This cell contains the complete database schema
# Execute this cell to load the schema into PostgreSQL
import psycopg2
# Schema SQL (embedded directly in notebook)
SCHEMA_SQL = """
-- Shipping Database Schema
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production schema for shipping and rate comparison system
-- Shipping Carriers Table
-- Stores carrier information (USPS, UPS, FedEx, etc.)
CREATE TABLE shipping_carriers (
    carrier_id VARCHAR(50) PRIMARY KEY,
    carrier_name VARCHAR(100) NOT NULL,
    carrier_code VARCHAR(10) NOT NULL UNIQUE,  -- 'USPS', 'UPS', 'FEDEX'
    carrier_type VARCHAR(50),  -- 'Postal', 'Courier', 'Freight'
    api_endpoint VARCHAR(500),
    rate_api_version VARCHAR(50),
    tracking_api_version VARCHAR(50),
    commercial_pricing_available BOOLEAN DEFAULT FALSE,
    requires_account BOOLEAN DEFAULT FALSE,
    active_status BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Shipping Zones Table
-- Stores zone information for rate calculations (USPS zones, UPS zones)
CREATE TABLE shipping_zones (
    zone_id VARCHAR(255) PRIMARY KEY,
    carrier_id VARCHAR(50) NOT NULL,
    origin_zip_code VARCHAR(10) NOT NULL,
    destination_zip_code VARCHAR(10) NOT NULL,
    zone_number INTEGER NOT NULL,
    zone_type VARCHAR(50),  -- 'Domestic', 'International', 'Alaska', 'Hawaii'
    distance_miles NUMERIC(10, 2),
    transit_days_min INTEGER,
    transit_days_max INTEGER,
    effective_date DATE NOT NULL,
    expiration_date DATE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id)
);
-- Shipping Service Types Table
-- Stores available service types (Priority Mail, Ground, Express, etc.)
CREATE TABLE shipping_service_types (
    service_id VARCHAR(255) PRIMARY KEY,
    carrier_id VARCHAR(50) NOT NULL,
    service_code VARCHAR(50) NOT NULL,
    service_name VARCHAR(255) NOT NULL,
    service_category VARCHAR(100),  -- 'Express', 'Ground', 'Priority', 'Economy'
    domestic_available BOOLEAN DEFAULT TRUE,
    international_available BOOLEAN DEFAULT FALSE,
    max_weight_lbs NUMERIC(10, 2),
    max_dimensions_length NUMERIC(10, 2),
    max_dimensions_width NUMERIC(10, 2),
    max_dimensions_height NUMERIC(10, 2),
    tracking_included BOOLEAN DEFAULT TRUE,
    insurance_available BOOLEAN DEFAULT FALSE,
    signature_required BOOLEAN DEFAULT FALSE,
    active_status BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id)
);
-- Shipping Rates Table
-- Stores historical and current shipping rates
CREATE TABLE shipping_rates (
    rate_id VARCHAR(255) PRIMARY KEY,
    carrier_id VARCHAR(50) NOT NULL,
    service_id VARCHAR(255) NOT NULL,
    zone_id VARCHAR(255),
    weight_lbs NUMERIC(10, 4) NOT NULL,
    weight_oz NUMERIC(10, 4),
    length_inches NUMERIC(10, 2),
    width_inches NUMERIC(10, 2),
    height_inches NUMERIC(10, 2),
    dimensional_weight_lbs NUMERIC(10, 4),
    cubic_volume_cubic_inches NUMERIC(12, 4),
    rate_amount NUMERIC(10, 2) NOT NULL,
    rate_type VARCHAR(50),  -- 'Retail', 'Commercial', 'Daily', 'Cubic'
    surcharge_amount NUMERIC(10, 2) DEFAULT 0,
    total_rate NUMERIC(10, 2) NOT NULL,
    effective_date DATE NOT NULL,
    expiration_date DATE,
    rate_source VARCHAR(100),  -- 'API', 'Manual', 'Bulk Import'
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id),
    FOREIGN KEY (service_id) REFERENCES shipping_service_types(service_id),
    FOREIGN KEY (zone_id) REFERENCES shipping_zones(zone_id)
);
-- Packages Table
-- Stores package information for shipments
CREATE TABLE packages (
    package_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255),
    package_reference VARCHAR(255),
    weight_lbs NUMERIC(10, 4) NOT NULL,
    weight_oz NUMERIC(10, 4),
    length_inches NUMERIC(10, 2) NOT NULL,
    width_inches NUMERIC(10, 2) NOT NULL,
    height_inches NUMERIC(10, 2) NOT NULL,
    dimensional_weight_lbs NUMERIC(10, 4),
    cubic_volume_cubic_inches NUMERIC(12, 4),
    package_type VARCHAR(50),  -- 'Envelope', 'Box', 'Tube', 'Flat'
    package_value NUMERIC(10, 2),
    contents_description VARCHAR(500),
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Shipments Table
-- Stores shipment records with origin and destination
CREATE TABLE shipments (
    shipment_id VARCHAR(255) PRIMARY KEY,
    package_id VARCHAR(255) NOT NULL,
    carrier_id VARCHAR(50) NOT NULL,
    service_id VARCHAR(255) NOT NULL,
    tracking_number VARCHAR(255),
    origin_name VARCHAR(255),
    origin_address_line1 VARCHAR(255),
    origin_address_line2 VARCHAR(255),
    origin_city VARCHAR(100),
    origin_state VARCHAR(2),
    origin_zip_code VARCHAR(10) NOT NULL,
    origin_country VARCHAR(2) DEFAULT 'US',
    destination_name VARCHAR(255),
    destination_address_line1 VARCHAR(255),
    destination_address_line2 VARCHAR(255),
    destination_city VARCHAR(100),
    destination_state VARCHAR(2),
    destination_zip_code VARCHAR(10) NOT NULL,
    destination_country VARCHAR(2) DEFAULT 'US',
    zone_id VARCHAR(255),
    rate_id VARCHAR(255),
    label_cost NUMERIC(10, 2),
    insurance_cost NUMERIC(10, 2) DEFAULT 0,
    signature_cost NUMERIC(10, 2) DEFAULT 0,
    total_cost NUMERIC(10, 2) NOT NULL,
    shipment_status VARCHAR(50),  -- 'Pending', 'Label Created', 'In Transit', 'Delivered', 'Exception'
    label_created_at TIMESTAMP_NTZ,
    estimated_delivery_date DATE,
    actual_delivery_date DATE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (package_id) REFERENCES packages(package_id),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id),
    FOREIGN KEY (service_id) REFERENCES shipping_service_types(service_id),
    FOREIGN KEY (zone_id) REFERENCES shipping_zones(zone_id),
    FOREIGN KEY (rate_id) REFERENCES shipping_rates(rate_id)
);
-- Tracking Events Table
-- Stores tracking events for shipments
CREATE TABLE tracking_events (
    event_id VARCHAR(255) PRIMARY KEY,
    shipment_id VARCHAR(255) NOT NULL,
    tracking_number VARCHAR(255) NOT NULL,
    event_timestamp TIMESTAMP_NTZ NOT NULL,
    event_type VARCHAR(100),  -- 'Label Created', 'In Transit', 'Out for Delivery', 'Delivered', 'Exception'
    event_status VARCHAR(100),
    event_location VARCHAR(255),
    event_city VARCHAR(100),
    event_state VARCHAR(2),
    event_zip_code VARCHAR(10),
    event_country VARCHAR(2),
    event_description VARCHAR(1000),
    carrier_status_code VARCHAR(50),
    raw_event_data TEXT,  -- JSON data from carrier API (PostgreSQL compatible)
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (shipment_id) REFERENCES shipments(shipment_id)
);
-- Rate Comparison Results Table
-- Stores rate comparison results across carriers
CREATE TABLE rate_comparison_results (
    comparison_id VARCHAR(255) PRIMARY KEY,
    package_id VARCHAR(255) NOT NULL,
    origin_zip_code VARCHAR(10) NOT NULL,
    destination_zip_code VARCHAR(10) NOT NULL,
    comparison_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    cheapest_carrier_id VARCHAR(50),
    cheapest_service_id VARCHAR(255),
    cheapest_rate NUMERIC(10, 2),
    fastest_carrier_id VARCHAR(50),
    fastest_service_id VARCHAR(255),
    fastest_transit_days INTEGER,
    total_options_count INTEGER,
    comparison_metadata VARIANT,  -- JSON with all rate options
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (package_id) REFERENCES packages(package_id),
    FOREIGN KEY (cheapest_carrier_id) REFERENCES shipping_carriers(carrier_id),
    FOREIGN KEY (fastest_carrier_id) REFERENCES shipping_carriers(carrier_id)
);
-- Address Validation Results Table
-- Stores address validation results from USPS Address API
CREATE TABLE address_validation_results (
    validation_id VARCHAR(255) PRIMARY KEY,
    input_address_line1 VARCHAR(255),
    input_address_line2 VARCHAR(255),
    input_city VARCHAR(100),
    input_state VARCHAR(2),
    input_zip_code VARCHAR(10),
    validated_address_line1 VARCHAR(255),
    validated_address_line2 VARCHAR(255),
    validated_city VARCHAR(100),
    validated_state VARCHAR(2),
    validated_zip_code VARCHAR(10),
    validated_zip_plus_4 VARCHAR(10),
    validation_status VARCHAR(50),  -- 'Valid', 'Invalid', 'Corrected', 'Ambiguous'
    delivery_point_code VARCHAR(10),
    carrier_route VARCHAR(10),
    dpv_confirmation VARCHAR(50),
    cmra_flag BOOLEAN,
    vacant_flag BOOLEAN,
    residential_flag BOOLEAN,
    validation_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Shipping Adjustments Table
-- Stores shipping adjustments and discrepancies (from USPS Adjustments API)
CREATE TABLE shipping_adjustments (
    adjustment_id VARCHAR(255) PRIMARY KEY,
    shipment_id VARCHAR(255),
    tracking_number VARCHAR(255) NOT NULL,
    adjustment_type VARCHAR(100),  -- 'Weight', 'Dimensions', 'Zone', 'Packaging'
    original_amount NUMERIC(10, 2),
    adjusted_amount NUMERIC(10, 2),
    adjustment_amount NUMERIC(10, 2),
    adjustment_reason VARCHAR(500),
    adjustment_status VARCHAR(50),  -- 'Pending', 'Applied', 'Disputed', 'Resolved'
    adjustment_date DATE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (shipment_id) REFERENCES shipments(shipment_id)
);
-- Bulk Shipping Presets Table
-- Stores preset configurations for bulk shipping
CREATE TABLE bulk_shipping_presets (
    preset_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255),
    preset_name VARCHAR(255) NOT NULL,
    package_type VARCHAR(50),
    default_weight_lbs NUMERIC(10, 4),
    default_length_inches NUMERIC(10, 2),
    default_width_inches NUMERIC(10, 2),
    default_height_inches NUMERIC(10, 2),
    default_service_id VARCHAR(255),
    default_carrier_id VARCHAR(50),
    default_insurance_amount NUMERIC(10, 2),
    default_signature_required BOOLEAN DEFAULT FALSE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (default_service_id) REFERENCES shipping_service_types(service_id),
    FOREIGN KEY (default_carrier_id) REFERENCES shipping_carriers(carrier_id)
);
-- Shipping Analytics Table
-- Stores aggregated shipping analytics and metrics
CREATE TABLE shipping_analytics (
    analytics_id VARCHAR(255) PRIMARY KEY,
    analytics_date DATE NOT NULL,
    carrier_id VARCHAR(50),
    service_id VARCHAR(255),
    total_shipments INTEGER DEFAULT 0,
    total_revenue NUMERIC(12, 2) DEFAULT 0,
    average_rate NUMERIC(10, 2),
    total_packages INTEGER DEFAULT 0,
    total_weight_lbs NUMERIC(12, 4) DEFAULT 0,
    average_transit_days NUMERIC(6, 2),
    on_time_delivery_rate NUMERIC(5, 2),
    exception_rate NUMERIC(5, 2),
    average_package_value NUMERIC(10, 2),
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id),
    FOREIGN KEY (service_id) REFERENCES shipping_service_types(service_id)
);
-- International Shipping Customs Table
-- Stores customs information for international shipments
CREATE TABLE international_customs (
    customs_id VARCHAR(255) PRIMARY KEY,
    shipment_id VARCHAR(255) NOT NULL,
    customs_declaration_number VARCHAR(255),
    customs_value NUMERIC(10, 2) NOT NULL,
    currency_code VARCHAR(3) DEFAULT 'USD',
    contents_description VARCHAR(1000),
    hs_tariff_code VARCHAR(20),
    country_of_origin VARCHAR(2),
    customs_duty_amount NUMERIC(10, 2),
    customs_tax_amount NUMERIC(10, 2),
    customs_fees_amount NUMERIC(10, 2),
    total_customs_amount NUMERIC(10, 2),
    customs_status VARCHAR(50),  -- 'Pending', 'Cleared', 'Held', 'Returned'
    customs_cleared_date DATE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (shipment_id) REFERENCES shipments(shipment_id)
);
-- API Rate Request Log Table
-- Tracks API rate requests for monitoring and optimization
CREATE TABLE api_rate_request_log (
    log_id VARCHAR(255) PRIMARY KEY,
    carrier_id VARCHAR(50) NOT NULL,
    request_type VARCHAR(50),  -- 'Rate', 'Tracking', 'Address Validation'
    origin_zip_code VARCHAR(10),
    destination_zip_code VARCHAR(10),
    weight_lbs NUMERIC(10, 4),
    request_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    response_time_ms INTEGER,
    response_status_code INTEGER,
    rate_returned NUMERIC(10, 2),
    error_message VARCHAR(1000),
    api_endpoint VARCHAR(500),
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (carrier_id) REFERENCES shipping_carriers(carrier_id)
);
-- Create indexes for performance
CREATE INDEX idx_shipping_zones_carrier_origin_dest ON shipping_zones(carrier_id, origin_zip_code, destination_zip_code);
CREATE INDEX idx_shipping_rates_carrier_service ON shipping_rates(carrier_id, service_id);
CREATE INDEX idx_shipping_rates_weight ON shipping_rates(weight_lbs);
CREATE INDEX idx_shipments_tracking_number ON shipments(tracking_number);
CREATE INDEX idx_shipments_status ON shipments(shipment_status);
CREATE INDEX idx_shipments_created_at ON shipments(created_at);
CREATE INDEX idx_tracking_events_shipment ON tracking_events(shipment_id);
CREATE INDEX idx_tracking_events_timestamp ON tracking_events(event_timestamp);
CREATE INDEX idx_rate_comparison_package ON rate_comparison_results(package_id);
CREATE INDEX idx_address_validation_zip ON address_validation_results(validated_zip_code);
CREATE INDEX idx_shipping_adjustments_shipment ON shipping_adjustments(shipment_id);
CREATE INDEX idx_shipping_analytics_date ON shipping_analytics(analytics_date);
CREATE INDEX idx_api_rate_request_log_carrier ON api_rate_request_log(carrier_id, request_timestamp);
"""
def execute_schema_sql(connection):
    """Execute embedded schema SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in SCHEMA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Schema loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading schema: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED SCHEMA")
    print("="*80)
    execute_schema_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Schema SQL is available in SCHEMA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED DATA.SQL - DB-9
# ============================================================================
# This cell contains sample data for the database
# Execute this cell to load data into PostgreSQL
import psycopg2
# Data SQL (embedded directly in notebook)
DATA_SQL = """
-- Sample Data for Shipping Database
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production sample data for shipping and rate comparison system
-- Insert shipping carriers
INSERT INTO shipping_carriers (carrier_id, carrier_name, carrier_code, carrier_type, api_endpoint, rate_api_version, tracking_api_version, commercial_pricing_available, requires_account, active_status) VALUES
('carrier_usps', 'United States Postal Service', 'USPS', 'Postal', 'https://developers.usps.com/api', '3.0', '3.0', TRUE, TRUE, TRUE),
('carrier_ups', 'United Parcel Service', 'UPS', 'Courier', 'https://developer.ups.com/api', 'v1', 'v1', TRUE, TRUE, TRUE),
('carrier_fedex', 'FedEx Corporation', 'FEDEX', 'Courier', 'https://developer.fedex.com/api', 'v1', 'v1', TRUE, TRUE, TRUE);
-- Insert shipping service types
INSERT INTO shipping_service_types (service_id, carrier_id, service_code, service_name, service_category, domestic_available, international_available, max_weight_lbs, max_dimensions_length, max_dimensions_width, max_dimensions_height, tracking_included, insurance_available, signature_required, active_status) VALUES
('service_usps_priority', 'carrier_usps', 'PRIORITY', 'Priority Mail', 'Priority', TRUE, TRUE, 70.0, 108.0, 108.0, 108.0, TRUE, TRUE, FALSE, TRUE),
('service_usps_priority_express', 'carrier_usps', 'PRIORITY_EXPRESS', 'Priority Mail Express', 'Express', TRUE, TRUE, 70.0, 108.0, 108.0, 108.0, TRUE, TRUE, TRUE, TRUE),
('service_usps_ground', 'carrier_usps', 'GROUND', 'USPS Ground Advantage', 'Ground', TRUE, FALSE, 70.0, 130.0, 130.0, 130.0, TRUE, TRUE, FALSE, TRUE),
('service_usps_first_class', 'carrier_usps', 'FIRST_CLASS', 'First-Class Mail', 'Economy', TRUE, TRUE, 15.999, 108.0, 108.0, 108.0, TRUE, FALSE, FALSE, TRUE),
('service_ups_ground', 'carrier_ups', 'GROUND', 'UPS Ground', 'Ground', TRUE, FALSE, 150.0, 108.0, 108.0, 108.0, TRUE, TRUE, FALSE, TRUE),
('service_ups_next_day_air', 'carrier_ups', 'NEXT_DAY_AIR', 'UPS Next Day Air', 'Express', TRUE, TRUE, 150.0, 108.0, 108.0, 108.0, TRUE, TRUE, TRUE, TRUE),
('service_ups_2nd_day_air', 'carrier_ups', '2ND_DAY_AIR', 'UPS 2nd Day Air', 'Express', TRUE, TRUE, 150.0, 108.0, 108.0, 108.0, TRUE, TRUE, FALSE, TRUE),
('service_fedex_ground', 'carrier_fedex', 'GROUND', 'FedEx Ground', 'Ground', TRUE, FALSE, 150.0, 108.0, 108.0, 108.0, TRUE, TRUE, FALSE, TRUE),
('service_fedex_express', 'carrier_fedex', 'EXPRESS', 'FedEx Express', 'Express', TRUE, TRUE, 150.0, 108.0, 108.0, 108.0, TRUE, TRUE, TRUE, TRUE);
-- Insert shipping zones (sample zone mappings for major cities)
INSERT INTO shipping_zones (zone_id, carrier_id, origin_zip_code, destination_zip_code, zone_number, zone_type, distance_miles, transit_days_min, transit_days_max, effective_date, expiration_date) VALUES
-- USPS Zones (New York to various destinations)
('zone_usps_10001_90210', 'carrier_usps', '10001', '90210', 8, 'Domestic', 2785.0, 3, 5, '2026-01-01', NULL),
('zone_usps_10001_60601', 'carrier_usps', '10001', '60601', 3, 'Domestic', 790.0, 2, 3, '2026-01-01', NULL),
('zone_usps_10001_33101', 'carrier_usps', '10001', '33101', 5, 'Domestic', 1289.0, 2, 4, '2026-01-01', NULL),
('zone_usps_10001_98101', 'carrier_usps', '10001', '98101', 8, 'Domestic', 2408.0, 4, 6, '2026-01-01', NULL),
-- USPS Zones (Los Angeles to various destinations)
('zone_usps_90210_10001', 'carrier_usps', '90210', '10001', 8, 'Domestic', 2785.0, 3, 5, '2026-01-01', NULL),
('zone_usps_90210_60601', 'carrier_usps', '90210', '60601', 6, 'Domestic', 2014.0, 3, 4, '2026-01-01', NULL),
('zone_usps_90210_98101', 'carrier_usps', '90210', '98101', 3, 'Domestic', 1135.0, 2, 3, '2026-01-01', NULL),
-- UPS Zones
('zone_ups_10001_90210', 'carrier_ups', '10001', '90210', 8, 'Domestic', 2785.0, 5, 7, '2026-01-01', NULL),
('zone_ups_10001_60601', 'carrier_ups', '10001', '60601', 3, 'Domestic', 790.0, 1, 2, '2026-01-01', NULL),
('zone_ups_90210_10001', 'carrier_ups', '90210', '10001', 8, 'Domestic', 2785.0, 5, 7, '2026-01-01', NULL),
('zone_ups_90210_60601', 'carrier_ups', '90210', '60601', 6, 'Domestic', 2014.0, 4, 5, '2026-01-01', NULL);
-- Insert shipping rates (sample rates for different weights and zones)
INSERT INTO shipping_rates (rate_id, carrier_id, service_id, zone_id, weight_lbs, weight_oz, rate_amount, rate_type, surcharge_amount, total_rate, effective_date, expiration_date, rate_source) VALUES
-- USPS Priority Mail rates
('rate_usps_priority_1lb_zone3', 'carrier_usps', 'service_usps_priority', 'zone_usps_10001_60601', 1.0, 16.0, 8.95, 'Commercial', 0.0, 8.95, '2026-01-01', NULL, 'API'),
('rate_usps_priority_2lb_zone3', 'carrier_usps', 'service_usps_priority', 'zone_usps_10001_60601', 2.0, 32.0, 10.25, 'Commercial', 0.0, 10.25, '2026-01-01', NULL, 'API'),
('rate_usps_priority_5lb_zone3', 'carrier_usps', 'service_usps_priority', 'zone_usps_10001_60601', 5.0, 80.0, 14.50, 'Commercial', 0.0, 14.50, '2026-01-01', NULL, 'API'),
('rate_usps_priority_1lb_zone8', 'carrier_usps', 'service_usps_priority', 'zone_usps_10001_90210', 1.0, 16.0, 12.95, 'Commercial', 0.0, 12.95, '2026-01-01', NULL, 'API'),
('rate_usps_priority_2lb_zone8', 'carrier_usps', 'service_usps_priority', 'zone_usps_10001_90210', 2.0, 32.0, 15.25, 'Commercial', 0.0, 15.25, '2026-01-01', NULL, 'API'),
-- USPS Priority Mail Express rates
('rate_usps_express_1lb_zone3', 'carrier_usps', 'service_usps_priority_express', 'zone_usps_10001_60601', 1.0, 16.0, 26.95, 'Commercial', 0.0, 26.95, '2026-01-01', NULL, 'API'),
('rate_usps_express_2lb_zone3', 'carrier_usps', 'service_usps_priority_express', 'zone_usps_10001_60601', 2.0, 32.0, 28.95, 'Commercial', 0.0, 28.95, '2026-01-01', NULL, 'API'),
-- UPS Ground rates
('rate_ups_ground_1lb_zone3', 'carrier_ups', 'service_ups_ground', 'zone_ups_10001_60601', 1.0, 16.0, 9.25, 'Daily', 0.0, 9.25, '2026-01-01', NULL, 'API'),
('rate_ups_ground_5lb_zone3', 'carrier_ups', 'service_ups_ground', 'zone_ups_10001_60601', 5.0, 80.0, 12.50, 'Daily', 0.0, 12.50, '2026-01-01', NULL, 'API'),
('rate_ups_ground_10lb_zone3', 'carrier_ups', 'service_ups_ground', 'zone_ups_10001_60601', 10.0, 160.0, 18.75, 'Daily', 0.0, 18.75, '2026-01-01', NULL, 'API'),
-- UPS Next Day Air rates
('rate_ups_nda_1lb_zone3', 'carrier_ups', 'service_ups_next_day_air', 'zone_ups_10001_60601', 1.0, 16.0, 45.95, 'Daily', 0.0, 45.95, '2026-01-01', NULL, 'API'),
('rate_ups_nda_5lb_zone3', 'carrier_ups', 'service_ups_next_day_air', 'zone_ups_10001_60601', 5.0, 80.0, 65.50, 'Daily', 0.0, 65.50, '2026-01-01', NULL, 'API');
-- Insert packages
INSERT INTO packages (package_id, user_id, package_reference, weight_lbs, weight_oz, length_inches, width_inches, height_inches, dimensional_weight_lbs, cubic_volume_cubic_inches, package_type, package_value, contents_description) VALUES
('pkg_001', 'user_001', 'ORDER-2026-001', 1.5, 24.0, 12.0, 8.0, 6.0, 1.5, 576.0, 'Box', 45.99, 'Electronics - Small Device'),
('pkg_002', 'user_001', 'ORDER-2026-002', 5.0, 80.0, 18.0, 12.0, 10.0, 5.0, 2160.0, 'Box', 125.50, 'Clothing - Multiple Items'),
('pkg_003', 'user_002', 'ORDER-2026-003', 0.5, 8.0, 10.0, 7.0, 1.0, 0.5, 70.0, 'Envelope', 15.00, 'Documents'),
('pkg_004', 'user_002', 'ORDER-2026-004', 2.5, 40.0, 14.0, 10.0, 8.0, 2.5, 1120.0, 'Box', 89.99, 'Books - Collection'),
('pkg_005', 'user_003', 'ORDER-2026-005', 10.0, 160.0, 20.0, 16.0, 12.0, 10.0, 3840.0, 'Box', 250.00, 'Home Goods - Kitchen Items');
-- Insert shipments
INSERT INTO shipments (shipment_id, package_id, carrier_id, service_id, tracking_number, origin_name, origin_address_line1, origin_city, origin_state, origin_zip_code, origin_country, destination_name, destination_address_line1, destination_city, destination_state, destination_zip_code, destination_country, zone_id, rate_id, label_cost, insurance_cost, signature_cost, total_cost, shipment_status, label_created_at, estimated_delivery_date, actual_delivery_date) VALUES
('ship_001', 'pkg_001', 'carrier_usps', 'service_usps_priority', '9400111899223197428490', 'Acme Shipping', '123 Main St', 'New York', 'NY', '10001', 'US', 'John Doe', '456 Oak Ave', 'Chicago', 'IL', '60601', 'US', 'zone_usps_10001_60601', 'rate_usps_priority_1lb_zone3', 8.95, 0.0, 0.0, 8.95, 'Delivered', '2026-02-01 10:00:00', '2026-02-03', '2026-02-03'),
('ship_002', 'pkg_002', 'carrier_usps', 'service_usps_priority', '9400111899223197428491', 'Acme Shipping', '123 Main St', 'New York', 'NY', '10001', 'US', 'Jane Smith', '789 Pine St', 'Beverly Hills', 'CA', '90210', 'US', 'zone_usps_10001_90210', 'rate_usps_priority_2lb_zone8', 15.25, 2.50, 0.0, 17.75, 'In Transit', '2026-02-02 14:30:00', '2026-02-05', NULL),
('ship_003', 'pkg_003', 'carrier_usps', 'service_usps_first_class', '9400111899223197428492', 'Quick Ship Co', '456 Commerce Blvd', 'Los Angeles', 'CA', '90210', 'US', 'Bob Johnson', '321 Elm St', 'Seattle', 'WA', '98101', 'US', 'zone_usps_90210_98101', NULL, 4.50, 0.0, 0.0, 4.50, 'Label Created', '2026-02-03 09:15:00', '2026-02-05', NULL),
('ship_004', 'pkg_004', 'carrier_ups', 'service_ups_ground', '1Z999AA10123456784', 'Global Shipping', '789 Business Park', 'Chicago', 'IL', '60601', 'US', 'Alice Williams', '654 Maple Dr', 'New York', 'NY', '10001', 'US', 'zone_ups_10001_60601', 'rate_ups_ground_1lb_zone3', 9.25, 0.0, 0.0, 9.25, 'Delivered', '2026-02-01 08:00:00', '2026-02-02', '2026-02-02'),
('ship_005', 'pkg_005', 'carrier_ups', 'service_ups_next_day_air', '1Z999AA10123456785', 'Express Logistics', '321 Industrial Way', 'Miami', 'FL', '33101', 'US', 'Charlie Brown', '987 Cedar Ln', 'Chicago', 'IL', '60601', 'US', 'zone_ups_10001_60601', 'rate_ups_nda_5lb_zone3', 65.50, 5.00, 3.50, 74.00, 'In Transit', '2026-02-03 11:00:00', '2026-02-04', NULL);
-- Insert tracking events
INSERT INTO tracking_events (event_id, shipment_id, tracking_number, event_timestamp, event_type, event_status, event_location, event_city, event_state, event_zip_code, event_country, event_description, carrier_status_code) VALUES
('event_001', 'ship_001', '9400111899223197428490', '2026-02-01 10:05:00', 'Label Created', 'ACCEPTED', 'USPS Facility', 'New York', 'NY', '10001', 'US', 'Shipping Label Created, USPS Awaiting Item', 'AC'),
('event_002', 'ship_001', '9400111899223197428490', '2026-02-01 14:30:00', 'In Transit', 'IN_TRANSIT', 'USPS Regional Facility', 'Newark', 'NJ', '07114', 'US', 'Arrived at USPS Regional Facility', 'IT'),
('event_003', 'ship_001', '9400111899223197428490', '2026-02-02 08:15:00', 'In Transit', 'IN_TRANSIT', 'USPS Regional Facility', 'Chicago', 'IL', '60601', 'US', 'Arrived at USPS Regional Facility', 'IT'),
('event_004', 'ship_001', '9400111899223197428490', '2026-02-03 09:00:00', 'Out for Delivery', 'OUT_FOR_DELIVERY', 'USPS Post Office', 'Chicago', 'IL', '60601', 'US', 'Out for Delivery', 'OF'),
('event_005', 'ship_001', '9400111899223197428490', '2026-02-03 14:30:00', 'Delivered', 'DELIVERED', 'Residence', 'Chicago', 'IL', '60601', 'US', 'Delivered, Left with Individual', 'DE'),
('event_006', 'ship_002', '9400111899223197428491', '2026-02-02 14:35:00', 'Label Created', 'ACCEPTED', 'USPS Facility', 'New York', 'NY', '10001', 'US', 'Shipping Label Created, USPS Awaiting Item', 'AC'),
('event_007', 'ship_002', '9400111899223197428491', '2026-02-02 18:00:00', 'In Transit', 'IN_TRANSIT', 'USPS Regional Facility', 'Newark', 'NJ', '07114', 'US', 'Departed USPS Regional Facility', 'IT'),
('event_008', 'ship_004', '1Z999AA10123456784', '2026-02-01 08:05:00', 'Label Created', 'LABEL_CREATED', 'UPS Facility', 'Chicago', 'IL', '60601', 'US', 'Shipment Created', 'OC'),
('event_009', 'ship_004', '1Z999AA10123456784', '2026-02-01 12:00:00', 'In Transit', 'IN_TRANSIT', 'UPS Hub', 'Chicago', 'IL', '60601', 'US', 'Origin Scan', 'OR'),
('event_010', 'ship_004', '1Z999AA10123456784', '2026-02-01 20:00:00', 'In Transit', 'IN_TRANSIT', 'UPS Hub', 'Newark', 'NJ', '07114', 'US', 'Arrival Scan', 'AR'),
('event_011', 'ship_004', '1Z999AA10123456784', '2026-02-02 08:00:00', 'Delivered', 'DELIVERED', 'Residence', 'New York', 'NY', '10001', 'US', 'Delivered', 'D');
-- Insert address validation results
INSERT INTO address_validation_results (validation_id, input_address_line1, input_city, input_state, input_zip_code, validated_address_line1, validated_city, validated_state, validated_zip_code, validated_zip_plus_4, validation_status, delivery_point_code, carrier_route, dpv_confirmation, cmra_flag, vacant_flag, residential_flag) VALUES
('val_001', '456 Oak Ave', 'Chicago', 'IL', '60601', '456 OAK AVE', 'CHICAGO', 'IL', '60601', '60601-1234', 'Valid', '12', 'C012', 'Y', FALSE, FALSE, TRUE),
('val_002', '789 Pine St', 'Beverly Hills', 'CA', '90210', '789 PINE ST', 'BEVERLY HILLS', 'CA', '90210', '90210-5678', 'Valid', '34', 'C034', 'Y', FALSE, FALSE, TRUE),
('val_003', '321 Elm St', 'Seattle', 'WA', '98101', '321 ELM ST', 'SEATTLE', 'WA', '98101', '98101-9012', 'Valid', '56', 'C056', 'Y', FALSE, FALSE, TRUE),
('val_004', '654 Maple Dr', 'New York', 'NY', '10001', '654 MAPLE DR', 'NEW YORK', 'NY', '10001', '10001-3456', 'Valid', '78', 'C078', 'Y', FALSE, FALSE, TRUE),
('val_005', '987 Cedar Ln', 'Chicago', 'IL', '60601', '987 CEDAR LN', 'CHICAGO', 'IL', '60601', '60601-7890', 'Valid', '90', 'C090', 'Y', FALSE, FALSE, TRUE);
-- Insert bulk shipping presets
INSERT INTO bulk_shipping_presets (preset_id, user_id, preset_name, package_type, default_weight_lbs, default_length_inches, default_width_inches, default_height_inches, default_service_id, default_carrier_id, default_insurance_amount, default_signature_required) VALUES
('preset_001', 'user_001', 'Small Package Standard', 'Box', 1.0, 12.0, 8.0, 6.0, 'service_usps_priority', 'carrier_usps', 0.0, FALSE),
('preset_002', 'user_001', 'Medium Package Express', 'Box', 5.0, 18.0, 12.0, 10.0, 'service_usps_priority_express', 'carrier_usps', 100.0, TRUE),
('preset_003', 'user_002', 'Document Envelope', 'Envelope', 0.5, 10.0, 7.0, 1.0, 'service_usps_first_class', 'carrier_usps', 0.0, FALSE),
('preset_004', 'user_003', 'Large Package Ground', 'Box', 10.0, 20.0, 16.0, 12.0, 'service_ups_ground', 'carrier_ups', 250.0, FALSE);
-- Insert rate comparison results
INSERT INTO rate_comparison_results (comparison_id, package_id, origin_zip_code, destination_zip_code, comparison_timestamp, cheapest_carrier_id, cheapest_service_id, cheapest_rate, fastest_carrier_id, fastest_service_id, fastest_transit_days, total_options_count) VALUES
('comp_001', 'pkg_001', '10001', '60601', '2026-02-01 09:45:00', 'carrier_usps', 'service_usps_priority', 8.95, 'carrier_ups', 'service_ups_next_day_air', 1, 6),
('comp_002', 'pkg_002', '10001', '90210', '2026-02-02 14:00:00', 'carrier_usps', 'service_usps_priority', 15.25, 'carrier_usps', 'service_usps_priority_express', 2, 5),
('comp_003', 'pkg_003', '90210', '98101', '2026-02-03 08:30:00', 'carrier_usps', 'service_usps_first_class', 4.50, 'carrier_usps', 'service_usps_priority', 2, 4);
-- Insert shipping adjustments
INSERT INTO shipping_adjustments (adjustment_id, shipment_id, tracking_number, adjustment_type, original_amount, adjusted_amount, adjustment_amount, adjustment_reason, adjustment_status, adjustment_date) VALUES
('adj_001', 'ship_002', '9400111899223197428491', 'Weight', 15.25, 17.75, 2.50, 'Package weight exceeded declared weight by 0.5 lbs', 'Applied', '2026-02-03'),
('adj_002', 'ship_005', '1Z999AA10123456785', 'Dimensions', 65.50, 68.00, 2.50, 'Package dimensions exceeded declared dimensions', 'Pending', '2026-02-03');
-- Insert shipping analytics
INSERT INTO shipping_analytics (analytics_id, analytics_date, carrier_id, service_id, total_shipments, total_revenue, average_rate, total_packages, total_weight_lbs, average_transit_days, on_time_delivery_rate, exception_rate, average_package_value) VALUES
('analytics_001', '2026-02-01', 'carrier_usps', 'service_usps_priority', 15, 134.25, 8.95, 15, 22.5, 2.5, 93.33, 6.67, 45.50),
('analytics_002', '2026-02-01', 'carrier_ups', 'service_ups_ground', 8, 74.00, 9.25, 8, 12.0, 1.5, 100.00, 0.00, 85.00),
('analytics_003', '2026-02-02', 'carrier_usps', 'service_usps_priority', 12, 107.40, 8.95, 12, 18.0, 2.3, 91.67, 8.33, 52.25),
('analytics_004', '2026-02-03', 'carrier_usps', 'service_usps_priority_express', 5, 134.75, 26.95, 5, 7.5, 1.0, 100.00, 0.00, 125.00);
-- Insert international customs (sample international shipment)
INSERT INTO international_customs (customs_id, shipment_id, customs_declaration_number, customs_value, currency_code, contents_description, hs_tariff_code, country_of_origin, customs_duty_amount, customs_tax_amount, customs_fees_amount, total_customs_amount, customs_status, customs_cleared_date) VALUES
('customs_001', 'ship_002', 'CBP-2026-001234', 125.50, 'USD', 'Clothing - Cotton Apparel', '6203.42', 'CN', 12.55, 10.04, 5.00, 27.59, 'Cleared', '2026-02-04');
-- Insert API rate request log
INSERT INTO api_rate_request_log (log_id, carrier_id, request_type, origin_zip_code, destination_zip_code, weight_lbs, request_timestamp, response_time_ms, response_status_code, rate_returned, api_endpoint) VALUES
('log_001', 'carrier_usps', 'Rate', '10001', '60601', 1.0, '2026-02-01 09:45:00', 245, 200, 8.95, 'https://developers.usps.com/api/prices/v1/domestic'),
('log_002', 'carrier_ups', 'Rate', '10001', '60601', 1.0, '2026-02-01 09:45:05', 312, 200, 9.25, 'https://developer.ups.com/api/rating/v1'),
('log_003', 'carrier_usps', 'Rate', '10001', '90210', 2.0, '2026-02-02 14:00:00', 198, 200, 15.25, 'https://developers.usps.com/api/prices/v1/domestic'),
('log_004', 'carrier_usps', 'Tracking', '10001', NULL, NULL, '2026-02-01 10:00:00', 156, 200, NULL, 'https://developers.usps.com/api/tracking/v1'),
('log_005', 'carrier_ups', 'Tracking', '60601', NULL, NULL, '2026-02-01 08:05:00', 189, 200, NULL, 'https://developer.ups.com/api/tracking/v1');
"""
def execute_data_sql(connection):
    """Execute embedded data SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in DATA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Data loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading data: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED DATA")
    print("="*80)
    execute_data_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Data SQL is available in DATA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED QUERIES.JSON - DB-9
# ============================================================================
# This cell contains all query metadata embedded directly in the notebook
# No external file dependencies required
import json
# Queries data (embedded directly in notebook)
QUERIES_DATA = {
  "source_file": "/Users/machine/Documents/AQ/db/db-9/queries/queries.md",
  "extraction_timestamp": "20260208-2109",
  "total_queries": 30,
  "queries": [
    {
      "number": 1,
      "title": "Multi-Carrier Rate Comparison with Zone Analysis and Cost Optimization",
      "description": "Description: Comprehensive rate comparison across multiple carriers with zone-based analysis, dimensional weight calculations, and cost optimization recommendations. Uses multiple CTEs to calculate rates, compare carriers, identify cheapest options, and analyze cost savings potential. Use Case: Shipping platform needs to compare rates across USPS, UPS, and other carriers for a package and recommend the most cost-effective option based on weight, dimensions, and destination zone. Business Value:
    ",
      "complexity": "Multiple CTEs (5+ levels), zone calculations, dimensional weight logic, rate aggregation, window functions for ranking, cost savings calculations, carrier comparison analytics.",
      "expected_output":
    "Rate comparison results showing cheapest carrier, fastest carrier, cost savings potential, and detailed rate breakdowns for all available options.",
      "sql": "WITH package_dimensions AS (\n    -- First CTE: Calculate package dimensions and dimensional weight\n    SELECT\n        p.package_id,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches,\n        p.length_inches * p.width_inches * p.height_inches AS cubic_volume_cubic_inches,\n        CASE\n            WHEN p.length_inches * p.width_inches * p.height_inches / 166.0 > p.weight_lbs\n            THEN p.length_inches * p.width_inches * p.height_inches / 166.0\n            ELSE p.weight_lbs\n        END AS billable_weight_lbs,\n        p.package_type\n    FROM packages p\n    WHERE p.package_id = 'PACKAGE_ID_PLACEHOLDER'\n),\nzone_lookup AS (\n    -- Second CTE: Determine shipping zones for origin and destination\n    SELECT DISTINCT\n        z.zone_id,\n        z.carrier_id,\n        z.origin_zip_code,\n        z.destination_zip_code,\n        z.zone_number,\n        z.zone_type,\n        z.transit_days_min,\n        z.transit_days_max,\n        z.effective_date,\n        z.expiration_date\n    FROM shipping_zones z\n    WHERE z.origin_zip_code = 'ORIGIN_ZIP_PLACEHOLDER'\n        AND z.destination_zip_code = 'DEST_ZIP_PLACEHOLDER'\n        AND (z.expiration_date IS NULL OR z.expiration_date >= CURRENT_DATE)\n        AND z.effective_date <= CURRENT_DATE\n),\ncarrier_service_options AS (\n    -- Third CTE:
    Get all available carrier service combinations\n    SELECT DISTINCT\n        c.carrier_id,\n        c.carrier_name,\n        c.carrier_code,\n        st.service_id,\n        st.service_code,\n        st.service_name,\n        st.service_category,\n        st.max_weight_lbs,\n        st.domestic_available,\n        st.tracking_included,\n        z.zone_number,\n        z.transit_days_min,\n        z.transit_days_max\n    FROM shipping_carriers c\n    CROSS JOIN shipping_service_types st\n    LEFT JOIN zone_lookup z ON c.carrier_id = z.carrier_id\n    WHERE c.active_status = TRUE\n        AND st.active_status = TRUE\n        AND st.domestic_available = TRUE\n),\nrate_calculations AS (\n    -- Fourth CTE: Calculate rates for each carrier/service combination\n    SELECT\n        cso.carrier_id,\n        cso.carrier_name,\n        cso.carrier_code,\n        cso.service_id,\n        cso.service_code,\n        cso.service_name,\n        cso.service_category,\n        cso.zone_number,\n        cso.transit_days_min,\n        cso.transit_days_max,\n        pd.billable_weight_lbs,\n        pd.package_id,\n        COALESCE(\n            (SELECT MIN(sr.total_rate)\n             FROM shipping_rates sr\n             WHERE sr.carrier_id = cso.carrier_id\n                 AND sr.service_id = cso.service_id\n                 AND sr.weight_lbs >= pd.billable_weight_lbs\n                 AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n                 AND sr.effective_date <= CURRENT_DATE\n             LIMIT 1),\n            999999.99\n        ) AS calculated_rate,\n        CASE\n            WHEN cso.max_weight_lbs IS NOT NULL AND pd.billable_weight_lbs > cso.max_weight_lbs\n            THEN FALSE\n            ELSE TRUE\n        END AS weight_compatible\n    FROM carrier_service_options cso\n    CROSS JOIN package_dimensions pd\n),\nrate_rankings AS (\n    -- Fifth CTE: Rank rates and identify cheapest/fastest options\n    SELECT\n        rc.carrier_id,\n        rc.carrier_name,\n        rc.carrier_code,\n        rc.service_id,\n        rc.service_code,\n        rc.service_name,\n        rc.service_category,\n        rc.zone_number,\n        rc.transit_days_min,\n        rc.transit_days_max,\n        rc.calculated_rate,\n        rc.weight_compatible,\n        ROW_NUMBER() OVER (ORDER BY rc.calculated_rate ASC) AS rate_rank,\n        ROW_NUMBER() OVER (ORDER BY rc.transit_days_min ASC, rc.calculated_rate ASC) AS speed_rank,\n        MIN(rc.calculated_rate) OVER () AS cheapest_rate,\n        MIN(rc.transit_days_min) OVER () AS fastest_transit_days\n    FROM rate_calculations rc\n    WHERE rc.weight_compatible = TRUE\n        AND rc.calculated_rate < 999999.99\n)\nSELECT\n    rr.carrier_name,\n    rr.service_name,\n    rr.calculated_rate AS rate_amount,\n    rr.zone_number,\n    rr.transit_days_min AS estimated_transit_days,\n    CASE\n        WHEN rr.rate_rank = 1 THEN 'Cheapest Option'\n        WHEN rr.speed_rank = 1 THEN 'Fastest Option'\n        ELSE 'Alternative Option'\n    END AS recommendation_type,\n    rr.calculated_rate - rr.cheapest_rate AS cost_difference_from_cheapest,\n    CASE\n        WHEN rr.cheapest_rate > 0\n        THEN ((rr.calculated_rate - rr.cheapest_rate) / rr.cheapest_rate * 100)\n        ELSE 0\n    END AS cost_premium_percentage,\n    CASE\n        WHEN rr.transit_days_min = rr.fastest_transit_days THEN TRUE\n        ELSE FALSE\n    END AS is_fastest_option\nFROM rate_rankings rr\nORDER BY rr.rate_rank, rr.speed_rank;",
      "line_number": 148,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005428,
        "row_count": 0,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 2,
      "title": "Shipping Zone Analysis with Geographic Distribution and Transit Time Optimization",
      "description": "Description: Advanced zone analysis query that calculates shipping zone distributions, analyzes transit time patterns, identifies optimal shipping routes, and provides geographic shipping intelligence. Uses recursive CTEs for zone path analysis and multiple aggregations for geographic insights. Use Case:
    Shipping platform needs to analyze zone distributions across carriers, identify zones with longest transit times, and optimize shipping routes based on geographic patterns. Business Value: Helps",
      "complexity": "Recursive CTEs for zone path analysis, multiple aggregations, window functions for ranking zones, geographic calculations, transit time analysis, carrier comparison across zones.",
      "expected_output":
    "Zone analysis results showing zone distributions, average transit times by zone, geographic shipping patterns, and optimization recommendations.",
      "sql": "WITH RECURSIVE zone_hierarchy AS (\n    -- Anchor: Base zones\n    SELECT\n        z.zone_id,\n        z.carrier_id,\n        z.origin_zip_code,\n        z.destination_zip_code,\n        z.zone_number,\n        z.zone_type,\n        z.distance_miles,\n        z.transit_days_min,\n        z.transit_days_max,\n        z.transit_days_max - z.transit_days_min AS transit_variance_days,\n        1 AS hierarchy_level,\n        CAST(z.zone_id AS VARCHAR(1000)) AS zone_path\n    FROM shipping_zones z\n    WHERE z.zone_type = 'Domestic'\n        AND (z.expiration_date IS NULL OR z.expiration_date >= CURRENT_DATE)\n    UNION ALL\n    -- Recursive: Find related zones with similar characteristics\n    SELECT\n        z.zone_id,\n        z.carrier_id,\n        z.origin_zip_code,\n        z.destination_zip_code,\n        z.zone_number,\n        z.zone_type,\n        z.distance_miles,\n        z.transit_days_min,\n        z.transit_days_max,\n        z.transit_days_max - z.transit_days_min AS transit_variance_days,\n        zh.hierarchy_level + 1,\n        (zh.zone_path || ' -> ' || z.zone_id)::VARCHAR(1000) AS zone_path\n    FROM shipping_zones z\n    INNER JOIN zone_hierarchy zh ON z.carrier_id = zh.carrier_id\n        AND ABS(z.zone_number - zh.zone_number) <= 1\n        AND z.zone_id != zh.zone_id\n    WHERE zh.hierarchy_level < 5\n),\nzone_statistics AS (\n    -- Calculate statistics for each zone\n    SELECT\n        z.zone_number,\n        z.zone_type,\n        COUNT(DISTINCT z.carrier_id) AS carrier_count,\n        COUNT(DISTINCT z.origin_zip_code) AS origin_zip_count,\n        COUNT(DISTINCT z.destination_zip_code) AS destination_zip_count,\n        AVG(z.distance_miles) AS avg_distance_miles,\n        AVG(z.transit_days_min) AS avg_transit_days_min,\n        AVG(z.transit_days_max) AS avg_transit_days_max,\n        AVG(z.transit_days_max - z.transit_days_min) AS avg_transit_variance,\n        MIN(z.transit_days_min) AS fastest_transit_days,\n        MAX(z.transit_days_max) AS slowest_transit_days,\n        COUNT(*) AS total_zone_records\n    FROM shipping_zones z\n    WHERE z.zone_type = 'Domestic'\n        AND (z.expiration_date IS NULL OR z.expiration_date >= CURRENT_DATE)\n    GROUP BY z.zone_number, z.zone_type\n),\ncarrier_zone_performance AS (\n    -- Analyze carrier performance by zone\n    SELECT\n        z.carrier_id,\n        c.carrier_name,\n        z.zone_number,\n        COUNT(DISTINCT z.zone_id) AS zone_coverage_count,\n        AVG(z.transit_days_min) AS avg_min_transit_days,\n        AVG(z.transit_days_max) AS avg_max_transit_days,\n        AVG(z.transit_days_max - z.transit_days_min) AS avg_transit_variance,\n        MIN(z.transit_days_min) AS best_transit_days,\n        MAX(z.transit_days_max) AS worst_transit_days,\n        COUNT(DISTINCT z.origin_zip_code) AS origin_coverage,\n        COUNT(DISTINCT z.destination_zip_code) AS destination_coverage\n    FROM shipping_zones z\n    INNER JOIN shipping_carriers c ON z.carrier_id = c.carrier_id\n    WHERE z.zone_type = 'Domestic'\n        AND (z.expiration_date IS NULL OR z.expiration_date >= CURRENT_DATE)\n        AND c.active_status = TRUE\n    GROUP BY z.carrier_id, c.carrier_name, z.zone_number\n),\nzone_rankings AS (\n    -- Rank zones by performance metrics\n    SELECT\n        zs.zone_number,\n        zs.zone_type,\n        zs.carrier_count,\n        zs.avg_distance_miles,\n        zs.avg_transit_days_min,\n        zs.avg_transit_days_max,\n        zs.avg_transit_variance,\n        zs.fastest_transit_days,\n        zs.slowest_transit_days,\n        ROW_NUMBER() OVER (ORDER BY zs.avg_transit_days_min ASC) AS speed_rank,\n        ROW_NUMBER() OVER (ORDER BY zs.avg_transit_variance ASC) AS consistency_rank,\n        ROW_NUMBER() OVER (ORDER BY zs.carrier_count DESC) AS coverage_rank,\n        PERCENT_RANK() OVER (ORDER BY zs.avg_transit_days_min) AS speed_percentile,\n        PERCENT_RANK() OVER (ORDER BY zs.avg_transit_variance) AS consistency_percentile\n    FROM zone_statistics zs\n)\nSELECT\n    zr.zone_number,\n    zr.zone_type,\n    zr.carrier_count,\n    zr.avg_distance_miles,\n    zr.avg_transit_days_min,\n    zr.avg_transit_days_max,\n    zr.avg_transit_variance,\n    zr.fastest_transit_days,\n    zr.slowest_transit_days,\n    zr.speed_rank,\n    zr.consistency_rank,\n    zr.coverage_rank,\n    CASE\n        WHEN zr.speed_percentile <= 0.25 THEN 'Fast Zone'\n        WHEN zr.speed_percentile >= 0.75 THEN 'Slow Zone'\n        ELSE 'Average Zone'\n    END AS speed_category,\n    CASE\n        WHEN zr.consistency_percentile <= 0.25 THEN 'Consistent Zone'\n        WHEN zr.consistency_percentile >= 0.75 THEN 'Variable Zone'\n        ELSE 'Moderate Zone'\n    END AS consistency_category,\n    czp.carrier_name AS best_carrier_for_zone,\n    czp.avg_min_transit_days AS best_carrier_transit_days\nFROM zone_rankings zr\nLEFT JOIN LATERAL (\n    SELECT carrier_name, avg_min_transit_days\n    FROM carrier_zone_performance czp\n    WHERE czp.zone_number = zr.zone_number\n    ORDER BY czp.avg_min_transit_days ASC\n    LIMIT 1\n) czp ON TRUE\nORDER BY zr.zone_number;",
      "line_number":
    305,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005245,
        "row_count": 4,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 3,
      "title": "Shipment Tracking Analytics with Event Pattern Analysis and Delivery Prediction",
      "description": "Description: Advanced tracking analytics query that analyzes shipment tracking events, identifies delivery patterns, predicts delivery dates, and detects anomalies. Uses multiple CTEs for event sequencing, pattern recognition, and predictive analytics. Use Case:
    Shipping platform needs to analyze tracking event patterns, predict delivery dates based on historical data, identify shipments with potential delays, and provide delivery insights to users. Business Value: Improves customer experience b",
      "complexity": "Multiple CTEs (6+ levels), event sequencing with window functions, pattern recognition logic, predictive date calculations, anomaly detection, carrier performance analysis.",
      "expected_output": "Tracking analytics showing delivery predictions, event patterns, anomaly detection results, and carrier performance metrics.",
      "sql": "WITH tracking_event_sequence AS (\n    -- First CTE: Sequence tracking events chronologically\n    SELECT\n        te.event_id,\n        te.shipment_id,\n        te.tracking_number,\n        te.event_timestamp,\n        te.event_type,\n        te.event_status,\n        te.event_location,\n        te.event_city,\n        te.event_state,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.estimated_delivery_date,\n        ROW_NUMBER() OVER (PARTITION BY te.shipment_id ORDER BY te.event_timestamp ASC) AS event_sequence,\n        LAG(te.event_timestamp) OVER (PARTITION BY te.shipment_id ORDER BY te.event_timestamp ASC) AS previous_event_timestamp,\n        LEAD(te.event_timestamp) OVER (PARTITION BY te.shipment_id ORDER BY te.event_timestamp ASC) AS next_event_timestamp\n    FROM tracking_events te\n    INNER JOIN shipments s ON te.shipment_id = s.shipment_id\n),\nevent_time_intervals AS (\n    -- Second CTE: Calculate time intervals between events\n    SELECT\n        tes.event_id,\n        tes.shipment_id,\n        tes.tracking_number,\n        tes.event_timestamp,\n        tes.event_type,\n        tes.event_status,\n        tes.event_location,\n        tes.event_city,\n        tes.event_state,\n        tes.carrier_id,\n        tes.service_id,\n        tes.origin_zip_code,\n        tes.destination_zip_code,\n        tes.estimated_delivery_date,\n        tes.event_sequence,\n        EXTRACT(EPOCH FROM (tes.event_timestamp - tes.previous_event_timestamp)) / 3600.0 AS hours_since_previous_event,\n        EXTRACT(EPOCH FROM (tes.next_event_timestamp - tes.event_timestamp)) / 3600.0 AS hours_until_next_event,\n        EXTRACT(EPOCH FROM (tes.event_timestamp - (SELECT MIN(event_timestamp) FROM tracking_events WHERE shipment_id = tes.shipment_id))) / 3600.0 AS total_hours_since_first_event\n    FROM tracking_event_sequence tes\n),\nshipment_progress_analysis AS (\n    -- Third CTE: Analyze shipment progress and identify milestones\n    SELECT\n        eti.shipment_id,\n        eti.tracking_number,\n        eti.carrier_id,\n        eti.service_id,\n        eti.origin_zip_code,\n        eti.destination_zip_code,\n        eti.estimated_delivery_date,\n        COUNT(*) AS total_events,\n        MIN(eti.event_timestamp) AS first_event_timestamp,\n        MAX(eti.event_timestamp) AS last_event_timestamp,\n        MAX(CASE WHEN eti.event_type = 'Label Created' THEN eti.event_timestamp END) AS label_created_timestamp,\n        MAX(CASE WHEN eti.event_type = 'In Transit' THEN eti.event_timestamp END) AS in_transit_timestamp,\n        MAX(CASE WHEN eti.event_type = 'Out for Delivery' THEN eti.event_timestamp END) AS out_for_delivery_timestamp,\n        MAX(CASE WHEN eti.event_type = 'Delivered' THEN eti.event_timestamp END) AS delivered_timestamp,\n        MAX(CASE WHEN eti.event_type = 'Exception' THEN eti.event_timestamp END) AS exception_timestamp,\n        COUNT(CASE WHEN eti.event_type = 'Exception' THEN 1 END) AS exception_count,\n        AVG(eti.hours_since_previous_event) AS avg_hours_between_events,\n        MAX(eti.hours_since_previous_event) AS max_hours_between_events,\n        MAX(eti.total_hours_since_first_event) AS total_hours_since_first_event\n    FROM event_time_intervals eti\n    GROUP BY eti.shipment_id, eti.tracking_number, eti.carrier_id, eti.service_id, eti.origin_zip_code, eti.destination_zip_code, eti.estimated_delivery_date\n),\nhistorical_delivery_patterns AS (\n    -- Fourth CTE:
    Analyze historical delivery patterns by carrier and service\n    SELECT\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        COUNT(*) AS historical_shipment_count,\n        AVG(EXTRACT(EPOCH FROM (spa.delivered_timestamp - spa.label_created_timestamp)) / 86400.0) AS avg_delivery_days,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (spa.delivered_timestamp - spa.label_created_timestamp)) / 86400.0) AS median_delivery_days,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY EXTRACT(EPOCH FROM (spa.delivered_timestamp - spa.label_created_timestamp)) / 86400.0) AS p95_delivery_days,\n        STDDEV(EXTRACT(EPOCH FROM (spa.delivered_timestamp - spa.label_created_timestamp)) / 86400.0) AS stddev_delivery_days,\n        COUNT(CASE WHEN spa.exception_count > 0 THEN 1 END) AS shipments_with_exceptions,\n        COUNT(CASE WHEN spa.delivered_timestamp <= spa.estimated_delivery_date THEN 1 END) AS on_time_deliveries\n    FROM shipment_progress_analysis spa\n    INNER JOIN shipments s ON spa.shipment_id = s.shipment_id\n    WHERE spa.delivered_timestamp IS NOT NULL\n        AND spa.label_created_timestamp IS NOT NULL\n    GROUP BY s.carrier_id, s.service_id, s.origin_zip_code, s.destination_zip_code\n),\ndelivery_prediction AS (\n    -- Fifth CTE: Predict delivery dates for in-transit shipments\n    SELECT\n        spa.shipment_id,\n        spa.tracking_number,\n        spa.carrier_id,\n        spa.service_id,\n        spa.origin_zip_code,\n        spa.destination_zip_code,\n        spa.estimated_delivery_date AS carrier_estimated_delivery,\n        spa.label_created_timestamp,\n        spa.last_event_timestamp,\n        spa.total_hours_since_first_event / 24.0 AS days_in_transit,\n        hdp.avg_delivery_days AS historical_avg_delivery_days,\n        hdp.median_delivery_days AS historical_median_delivery_days,\n        hdp.p95_delivery_days AS historical_p95_delivery_days,\n        CASE\n            WHEN spa.delivered_timestamp IS NOT NULL THEN spa.delivered_timestamp\n            WHEN spa.out_for_delivery_timestamp IS NOT NULL THEN spa.out_for_delivery_timestamp + INTERVAL '1 day'\n            WHEN spa.in_transit_timestamp IS NOT NULL THEN spa.label_created_timestamp + INTERVAL '1 day' * hdp.median_delivery_days\n            ELSE spa.estimated_delivery_date\n        END AS predicted_delivery_date,\n        spa.exception_count,\n        CASE\n            WHEN spa.exception_count > 0 THEN TRUE\n            WHEN spa.max_hours_between_events > 48 THEN TRUE\n            ELSE FALSE\n        END AS has_anomaly\n    FROM shipment_progress_analysis spa\n    LEFT JOIN historical_delivery_patterns hdp ON spa.carrier_id = hdp.carrier_id\n        AND spa.service_id = hdp.service_id\n        AND spa.origin_zip_code = hdp.origin_zip_code\n        AND spa.destination_zip_code = hdp.destination_zip_code\n),\nanomaly_detection AS (\n    -- Sixth CTE: Detect anomalies and potential delays\n    SELECT\n        dp.shipment_id,\n        dp.tracking_number,\n        dp.carrier_id,\n        dp.service_id,\n        dp.predicted_delivery_date,\n        dp.carrier_estimated_delivery,\n        dp.has_anomaly,\n        dp.exception_count,\n        CASE\n            WHEN dp.predicted_delivery_date > dp.carrier_estimated_delivery + INTERVAL '2 days' THEN 'Potential Delay'\n            WHEN dp.has_anomaly = TRUE THEN 'Anomaly Detected'\n            WHEN dp.days_in_transit > dp.historical_p95_delivery_days THEN 'Slow Progress'\n            ELSE 'Normal'\n        END AS shipment_status_category,\n        CASE\n            WHEN dp.predicted_delivery_date > dp.carrier_estimated_delivery THEN EXTRACT(EPOCH FROM (dp.predicted_delivery_date - dp.carrier_estimated_delivery)) / 86400.0\n            ELSE 0\n        END AS predicted_delay_days\n    FROM delivery_prediction dp\n)\nSELECT\n    ad.shipment_id,\n    ad.tracking_number,\n    c.carrier_name,\n    st.service_name,\n    ad.predicted_delivery_date,\n    ad.carrier_estimated_delivery,\n    ad.shipment_status_category,\n    ad.predicted_delay_days,\n    ad.exception_count,\n    ad.has_anomaly,\n    CASE\n        WHEN ad.shipment_status_category != 'Normal' THEN 'Action Required'\n        ELSE 'Monitoring'\n    END AS alert_level\nFROM anomaly_detection ad\nINNER JOIN shipping_carriers c ON ad.carrier_id = c.carrier_id\nINNER JOIN shipping_service_types st ON ad.service_id = st.service_id\nORDER BY ad.predicted_delivery_date, ad.predicted_delay_days DESC;",
      "line_number": 455,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006307,
        "row_count": 3,
        "column_count": 11,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 4,
      "title": "Address Validation Quality Analysis with Correction Rate Metrics",
      "description": "Description: Comprehensive address validation analysis that evaluates validation quality, correction rates, and address standardization effectiveness. Uses multiple CTEs to analyze validation patterns, identify common correction types, and measure validation accuracy. Use Case: Shipping platform needs to analyze address validation performance, identify common address errors, measure correction rates, and improve address validation accuracy. Business Value: Reduces shipping errors, improves deliv",
      "complexity": "Multiple CTEs (4+ levels), address comparison logic, validation pattern analysis, correction rate calculations, quality metrics aggregation.",
      "expected_output": "Address validation analytics showing validation rates, correction patterns, quality metrics, and recommendations for improving address accuracy.",
      "sql":
    "WITH address_validation_comparison AS (\n    -- First CTE: Compare input and validated addresses\n    SELECT\n        avr.validation_id,\n        avr.input_address_line1,\n        avr.input_address_line2,\n        avr.input_city,\n        avr.input_state,\n        avr.input_zip_code,\n        avr.validated_address_line1,\n        avr.validated_address_line2,\n        avr.validated_city,\n        avr.validated_state,\n        avr.validated_zip_code,\n        avr.validated_zip_plus_4,\n        avr.validation_status,\n        avr.dpv_confirmation,\n        avr.cmra_flag,\n        avr.vacant_flag,\n        avr.residential_flag,\n        CASE\n            WHEN UPPER(TRIM(avr.input_address_line1)) != UPPER(TRIM(avr.validated_address_line1))\n                OR UPPER(TRIM(avr.input_city)) != UPPER(TRIM(avr.validated_city))\n                OR UPPER(TRIM(avr.input_state)) != UPPER(TRIM(avr.validated_state))\n                OR UPPER(TRIM(avr.input_zip_code)) != UPPER(TRIM(avr.validated_zip_code))\n            THEN TRUE\n            ELSE FALSE\n        END AS address_was_corrected,\n        CASE\n            WHEN UPPER(TRIM(avr.input_address_line1)) != UPPER(TRIM(avr.validated_address_line1)) THEN 'Address Line 1'\n            WHEN UPPER(TRIM(avr.input_city)) != UPPER(TRIM(avr.validated_city)) THEN 'City'\n            WHEN UPPER(TRIM(avr.input_state)) != UPPER(TRIM(avr.validated_state)) THEN 'State'\n            WHEN UPPER(TRIM(avr.input_zip_code)) != UPPER(TRIM(avr.validated_zip_code)) THEN 'ZIP Code'\n            ELSE 'No Correction'\n        END AS correction_type,\n        avr.validation_timestamp\n    FROM address_validation_results avr\n),\nvalidation_statistics AS (\n    -- Second CTE: Calculate validation statistics\n    SELECT\n        DATE(avc.validation_timestamp) AS validation_date,\n        COUNT(*) AS total_validations,\n        COUNT(CASE WHEN avc.validation_status = 'Valid' THEN 1 END) AS valid_count,\n        COUNT(CASE WHEN avc.validation_status = 'Corrected' THEN 1 END) AS corrected_count,\n        COUNT(CASE WHEN avc.validation_status = 'Invalid' THEN 1 END) AS invalid_count,\n        COUNT(CASE WHEN avc.validation_status = 'Ambiguous' THEN 1 END) AS ambiguous_count,\n        COUNT(CASE WHEN avc.address_was_corrected = TRUE THEN 1 END) AS address_corrections_count,\n        COUNT(CASE WHEN avc.dpv_confirmation = 'Y' THEN 1 END) AS dpv_confirmed_count,\n        COUNT(CASE WHEN avc.cmra_flag = TRUE THEN 1 END) AS cmra_count,\n        COUNT(CASE WHEN avc.vacant_flag = TRUE THEN 1 END) AS vacant_count,\n        COUNT(CASE WHEN avc.residential_flag = TRUE THEN 1 END) AS residential_count,\n        AVG(CASE WHEN avc.address_was_corrected = TRUE THEN 1 ELSE 0 END) * 100 AS correction_rate_percentage\n    FROM address_validation_comparison avc\n    GROUP BY DATE(avc.validation_timestamp)\n),\ncorrection_pattern_analysis AS (\n    -- Third CTE: Analyze correction patterns\n    SELECT\n        avc.correction_type,\n        COUNT(*) AS correction_count,\n        COUNT(DISTINCT avc.validated_state) AS states_affected,\n        COUNT(DISTINCT SUBSTRING(avc.validated_zip_code, 1, 5)) AS zip_codes_affected,\n        AVG(CASE WHEN avc.dpv_confirmation = 'Y' THEN 1 ELSE 0 END) * 100 AS dpv_confirmation_rate,\n        COUNT(CASE WHEN avc.validation_status = 'Valid' THEN 1 END) AS valid_after_correction_count\n    FROM address_validation_comparison avc\n    WHERE avc.address_was_corrected = TRUE\n    GROUP BY avc.correction_type\n),\nvalidation_quality_metrics AS (\n    -- Fourth CTE: Calculate quality metrics\n    SELECT\n        vs.validation_date,\n        vs.total_validations,\n        vs.valid_count,\n        vs.corrected_count,\n        vs.invalid_count,\n        vs.ambiguous_count,\n        vs.address_corrections_count,\n        vs.dpv_confirmed_count,\n        vs.cmra_count,\n        vs.vacant_count,\n        vs.residential_count,\n        vs.correction_rate_percentage,\n        CASE\n            WHEN vs.total_validations > 0\n            THEN (vs.valid_count + vs.corrected_count)::numeric / vs.total_validations * 100\n            ELSE 0\n        END AS success_rate_percentage,\n        CASE\n            WHEN vs.total_validations > 0\n            THEN vs.dpv_confirmed_count::numeric / vs.total_validations * 100\n            ELSE 0\n        END AS dpv_confirmation_rate_percentage,\n        CASE\n            WHEN vs.total_validations > 0\n            THEN vs.invalid_count::numeric / vs.total_validations * 100\n            ELSE 0\n        END AS invalid_rate_percentage\n    FROM validation_statistics vs\n)\nSELECT\n    vqm.validation_date,\n    vqm.total_validations,\n    vqm.valid_count,\n    vqm.corrected_count,\n    vqm.invalid_count,\n    vqm.ambiguous_count,\n    vqm.address_corrections_count,\n    vqm.dpv_confirmed_count,\n    vqm.success_rate_percentage,\n    vqm.correction_rate_percentage,\n    vqm.dpv_confirmation_rate_percentage,\n    vqm.invalid_rate_percentage,\n    cpa.correction_type AS most_common_correction_type,\n    cpa.correction_count AS most_common_correction_count,\n    CASE\n        WHEN vqm.success_rate_percentage >= 95 THEN 'Excellent'\n        WHEN vqm.success_rate_percentage >= 85 THEN 'Good'\n        WHEN vqm.success_rate_percentage >= 75 THEN 'Fair'\n        ELSE 'Needs Improvement'\n    END AS quality_category\nFROM validation_quality_metrics vqm\nLEFT JOIN LATERAL (\n    SELECT correction_type, correction_count\n    FROM correction_pattern_analysis cpa\n    ORDER BY cpa.correction_count DESC\n    LIMIT 1\n) cpa ON TRUE\nORDER BY vqm.validation_date DESC;",
      "line_number": 639,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004791,
        "row_count": 1,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 5,
      "title": "Shipping Cost Analytics with Revenue Optimization and Carrier Performance Comparison",
      "description": "Description: Comprehensive shipping cost analytics that analyzes revenue, identifies cost optimization opportunities, compares carrier performance, and provides financial insights. Uses multiple CTEs for cost aggregation, carrier comparison, and revenue analysis. Use Case:
    Shipping platform needs to analyze shipping costs, identify revenue opportunities, compare carrier performance, and optimize shipping spend. Business Value: Enables cost optimization, identifies revenue opportunities, and prov",
      "complexity": "Multiple CTEs (5+ levels), cost aggregation, revenue calculations, carrier performance comparison, optimization recommendations, financial metrics.",
      "expected_output": "Shipping cost analytics showing revenue metrics, cost breakdowns, carrier performance comparisons, and optimization recommendations.",
      "sql": "WITH shipment_cost_details AS (\n    -- First CTE: Aggregate shipment costs\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.package_id,\n        DATE(s.created_at) AS shipment_date,\n        s.label_cost,\n        s.insurance_cost,\n        s.signature_cost,\n        s.total_cost,\n        p.weight_lbs,\n        p.package_value,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.shipment_status,\n        CASE\n            WHEN s.shipment_status = 'Delivered' THEN s.total_cost\n            ELSE 0\n        END AS delivered_cost,\n        CASE\n            WHEN s.shipment_status IN ('Exception', 'Returned') THEN s.total_cost\n            ELSE 0\n        END AS exception_cost\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n),\ndaily_cost_summary AS (\n    -- Second CTE:
    Daily cost summaries\n    SELECT\n        scd.shipment_date,\n        scd.carrier_id,\n        scd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(scd.total_cost) AS total_revenue,\n        SUM(scd.label_cost) AS total_label_cost,\n        SUM(scd.insurance_cost) AS total_insurance_cost,\n        SUM(scd.signature_cost) AS total_signature_cost,\n        SUM(scd.delivered_cost) AS delivered_revenue,\n        SUM(scd.exception_cost) AS exception_revenue,\n        AVG(scd.total_cost) AS avg_shipment_cost,\n        AVG(scd.weight_lbs) AS avg_weight_lbs,\n        COUNT(CASE WHEN scd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        COUNT(CASE WHEN scd.shipment_status IN ('Exception', 'Returned') THEN 1 END) AS exception_count\n    FROM shipment_cost_details scd\n    GROUP BY scd.shipment_date, scd.carrier_id, scd.service_id\n),\ncarrier_performance_metrics AS (\n    -- Third CTE: Calculate carrier performance metrics\n    SELECT\n        dcs.carrier_id,\n        c.carrier_name,\n        COUNT(DISTINCT dcs.shipment_date) AS active_days,\n        SUM(dcs.total_shipments) AS total_shipments,\n        SUM(dcs.total_revenue) AS total_revenue,\n        SUM(dcs.delivered_revenue) AS delivered_revenue,\n        SUM(dcs.exception_revenue) AS exception_revenue,\n        AVG(dcs.avg_shipment_cost) AS avg_shipment_cost,\n        AVG(dcs.avg_weight_lbs) AS avg_weight_lbs,\n        SUM(dcs.delivered_count) AS total_delivered,\n        SUM(dcs.exception_count) AS total_exceptions,\n        CASE\n            WHEN SUM(dcs.total_shipments) > 0\n            THEN SUM(dcs.delivered_count)::numeric / SUM(dcs.total_shipments) * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        CASE\n            WHEN SUM(dcs.total_shipments) > 0\n            THEN SUM(dcs.exception_count)::numeric / SUM(dcs.total_shipments) * 100\n            ELSE 0\n        END AS exception_rate,\n        CASE\n            WHEN SUM(dcs.delivered_count) > 0\n            THEN SUM(dcs.delivered_revenue) / SUM(dcs.delivered_count)\n            ELSE 0\n        END AS avg_revenue_per_delivered_shipment\n    FROM daily_cost_summary dcs\n    INNER JOIN shipping_carriers c ON dcs.carrier_id = c.carrier_id\n    GROUP BY dcs.carrier_id, c.carrier_name\n),\nservice_performance_metrics AS (\n    -- Fourth CTE: Calculate service performance metrics\n    SELECT\n        dcs.service_id,\n        st.service_name,\n        st.service_category,\n        COUNT(DISTINCT dcs.shipment_date) AS active_days,\n        SUM(dcs.total_shipments) AS total_shipments,\n        SUM(dcs.total_revenue) AS total_revenue,\n        AVG(dcs.avg_shipment_cost) AS avg_shipment_cost,\n        SUM(dcs.delivered_count) AS total_delivered,\n        SUM(dcs.exception_count) AS total_exceptions,\n        CASE\n            WHEN SUM(dcs.total_shipments) > 0\n            THEN SUM(dcs.delivered_count)::numeric / SUM(dcs.total_shipments) * 100\n            ELSE 0\n        END AS delivery_success_rate\n    FROM daily_cost_summary dcs\n    INNER JOIN shipping_service_types st ON dcs.service_id = st.service_id\n    GROUP BY dcs.service_id, st.service_name, st.service_category\n),\ncost_optimization_opportunities AS (\n    -- Fifth CTE: Identify cost optimization opportunities\n    SELECT\n        scd.shipment_id,\n        scd.carrier_id,\n        scd.service_id,\n        scd.total_cost,\n        scd.origin_zip_code,\n        scd.destination_zip_code,\n        (SELECT MIN(sr.total_rate)\n         FROM shipping_rates sr\n         WHERE sr.carrier_id != scd.carrier_id\n             AND sr.weight_lbs >= scd.weight_lbs\n             AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n             AND sr.effective_date <= CURRENT_DATE\n         LIMIT 1) AS alternative_min_rate,\n        scd.total_cost - (SELECT MIN(sr.total_rate)\n                          FROM shipping_rates sr\n                          WHERE sr.carrier_id != scd.carrier_id\n                              AND sr.weight_lbs >= scd.weight_lbs\n                              AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n                              AND sr.effective_date <= CURRENT_DATE\n                          LIMIT 1) AS potential_savings\n    FROM shipment_cost_details scd\n    WHERE scd.shipment_status = 'Delivered'\n)\nSELECT\n    cpm.carrier_name,\n    cpm.total_shipments,\n    cpm.total_revenue,\n    cpm.delivered_revenue,\n    cpm.exception_revenue,\n    cpm.avg_shipment_cost,\n    cpm.delivery_success_rate,\n    cpm.exception_rate,\n    cpm.avg_revenue_per_delivered_shipment,\n    ROW_NUMBER() OVER (ORDER BY cpm.total_revenue DESC) AS revenue_rank,\n    ROW_NUMBER() OVER (ORDER BY cpm.delivery_success_rate DESC) AS performance_rank,\n    ROW_NUMBER() OVER (ORDER BY cpm.avg_shipment_cost ASC) AS cost_efficiency_rank,\n    COALESCE(coo.total_potential_savings, 0) AS total_potential_savings,\n    CASE\n        WHEN cpm.delivery_success_rate >= 95 AND cpm.exception_rate <= 2 THEN 'Excellent'\n        WHEN cpm.delivery_success_rate >= 90 AND cpm.exception_rate <= 5 THEN 'Good'\n        WHEN cpm.delivery_success_rate >= 85 AND cpm.exception_rate <= 10 THEN 'Fair'\n        ELSE 'Needs Improvement'\n    END AS performance_category\nFROM carrier_performance_metrics cpm\nLEFT JOIN (\n    SELECT \n        carrier_id,\n        SUM(potential_savings) AS total_potential_savings\n    FROM cost_optimization_opportunities\n    GROUP BY carrier_id\n) coo ON cpm.carrier_id = coo.carrier_id\nORDER BY cpm.total_revenue DESC;",
      "line_number": 786,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004654,
        "row_count": 2,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 6,
      "title": "Bulk Shipping Preset Optimization with Weight Distribution Analysis",
      "description": "Description: Analyzes bulk shipping presets to optimize package configurations, weight distributions, and shipping costs. Uses multiple CTEs to analyze preset usage patterns, identify optimization opportunities, and recommend cost-effective preset configurations. Use Case: Shipping platform needs to optimize bulk shipping presets to reduce costs, improve efficiency, and provide better default configurations for frequent shippers. Business Value:
    Reduces shipping costs for bulk shippers by optimi",
      "complexity": "Multiple CTEs (4+ levels), weight distribution analysis, preset usage patterns, cost optimization calculations, aggregation analytics.",
      "expected_output": "Bulk shipping preset optimization results showing recommended configurations, cost savings potential, and usage patterns.",
      "sql": "WITH preset_usage_analysis AS (\n    -- First CTE: Analyze preset usage patterns\n    SELECT\n        bsp.preset_id,\n        bsp.user_id,\n        bsp.preset_name,\n        bsp.package_type,\n        bsp.default_weight_lbs,\n        bsp.default_length_inches,\n        bsp.default_width_inches,\n        bsp.default_height_inches,\n        bsp.default_service_id,\n        bsp.default_carrier_id,\n        COUNT(s.shipment_id) AS usage_count,\n        SUM(s.total_cost) AS total_cost_using_preset,\n        AVG(s.total_cost) AS avg_cost_per_shipment,\n        AVG(p.weight_lbs) AS avg_actual_weight_lbs,\n        AVG(p.length_inches) AS avg_actual_length_inches,\n        AVG(p.width_inches) AS avg_actual_width_inches,\n        AVG(p.height_inches) AS avg_actual_height_inches\n    FROM bulk_shipping_presets bsp\n    LEFT JOIN shipments s ON s.carrier_id = bsp.default_carrier_id\n        AND s.service_id = bsp.default_service_id\n    LEFT JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY bsp.preset_id, bsp.user_id, bsp.preset_name, bsp.package_type, bsp.default_weight_lbs, bsp.default_length_inches, bsp.default_width_inches, bsp.default_height_inches, bsp.default_service_id, bsp.default_carrier_id\n),\npreset_cost_analysis AS (\n    -- Second CTE: Analyze preset costs and identify optimization opportunities\n    SELECT\n        pua.preset_id,\n        pua.preset_name,\n        pua.usage_count,\n        pua.total_cost_using_preset,\n        pua.avg_cost_per_shipment,\n        pua.default_weight_lbs,\n        pua.avg_actual_weight_lbs,\n        ABS(pua.default_weight_lbs - pua.avg_actual_weight_lbs) AS weight_difference_lbs,\n        (SELECT MIN(sr.total_rate)\n         FROM shipping_rates sr\n         WHERE sr.carrier_id = pua.default_carrier_id\n             AND sr.weight_lbs >= pua.avg_actual_weight_lbs\n             AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n             AND sr.effective_date <= CURRENT_DATE\n         LIMIT 1) AS optimized_rate,\n        pua.avg_cost_per_shipment - (SELECT MIN(sr.total_rate)\n                                      FROM shipping_rates sr\n                                      WHERE sr.carrier_id = pua.default_carrier_id\n                                          AND sr.weight_lbs >= pua.avg_actual_weight_lbs\n                                          AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n                                          AND sr.effective_date <= CURRENT_DATE\n                                      LIMIT 1) AS potential_savings_per_shipment\n    FROM preset_usage_analysis pua\n    WHERE pua.usage_count > 0\n),\npreset_recommendations AS (\n    -- Third CTE: Generate preset optimization recommendations\n    SELECT\n        pca.preset_id,\n        pca.preset_name,\n        pca.usage_count,\n        pca.avg_cost_per_shipment,\n        pca.optimized_rate,\n        pca.potential_savings_per_shipment,\n        pca.potential_savings_per_shipment * pca.usage_count AS total_potential_savings,\n        CASE\n            WHEN pca.weight_difference_lbs > 1.0 THEN 'Adjust Weight Default'\n            WHEN pca.potential_savings_per_shipment > 2.0 THEN 'Optimize Service Selection'\n            ELSE 'Preset Optimal'\n        END AS optimization_recommendation\n    FROM preset_cost_analysis pca\n)\nSELECT\n    pr.preset_id,\n    pr.preset_name,\n    pr.usage_count,\n    pr.avg_cost_per_shipment,\n    pr.optimized_rate,\n    pr.potential_savings_per_shipment,\n    pr.total_potential_savings,\n    pr.optimization_recommendation,\n    ROW_NUMBER() OVER (ORDER BY pr.total_potential_savings DESC) AS savings_rank\nFROM preset_recommendations pr\nORDER BY pr.total_potential_savings DESC;",
      "line_number": 960,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005479,
        "row_count": 3,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 7,
      "title": "International Shipping Customs Analysis with Duty and Tax Optimization",
      "description": "Description: Comprehensive international shipping customs analysis that calculates duty and tax amounts, identifies optimization opportunities, and analyzes customs clearance patterns. Uses multiple CTEs for customs value calculations, duty optimization, and clearance analytics. Use Case:
    Shipping platform needs to analyze international shipping customs data, optimize duty and tax calculations, and provide customs clearance insights for international shipments. Business Value: Reduces internatio",
      "complexity": "Multiple CTEs (5+ levels), customs value calculations, duty and tax optimization, clearance pattern analysis, international shipping analytics.",
      "expected_output": "International customs analysis showing duty amounts, tax calculations, optimization opportunities, and clearance success rates.",
      "sql": "WITH international_shipment_details AS (\n    -- First CTE: Get international shipment and customs details\n    SELECT\n        ic.customs_id,\n        ic.shipment_id,\n        ic.customs_declaration_number,\n        ic.customs_value,\n        ic.currency_code,\n        ic.hs_tariff_code,\n        ic.country_of_origin,\n        ic.customs_duty_amount,\n        ic.customs_tax_amount,\n        ic.customs_fees_amount,\n        ic.total_customs_amount,\n        ic.customs_status,\n        ic.customs_cleared_date,\n        s.destination_country,\n        s.destination_zip_code,\n        s.total_cost AS shipment_cost,\n        p.package_value,\n        s.created_at AS shipment_date\n    FROM international_customs ic\n    INNER JOIN shipments s ON ic.shipment_id = s.shipment_id\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.destination_country != 'US'\n),\ncustoms_value_analysis AS (\n    -- Second CTE: Analyze customs value patterns\n    SELECT\n        isd.destination_country,\n        COUNT(*) AS total_shipments,\n        AVG(isd.customs_value) AS avg_customs_value,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY isd.customs_value) AS median_customs_value,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY isd.customs_value) AS p95_customs_value,\n        AVG(isd.customs_duty_amount) AS avg_duty_amount,\n        AVG(isd.customs_tax_amount) AS avg_tax_amount,\n        AVG(isd.customs_fees_amount) AS avg_fees_amount,\n        AVG(isd.total_customs_amount) AS avg_total_customs_amount,\n        COUNT(CASE WHEN isd.customs_status = 'Cleared' THEN 1 END) AS cleared_count,\n        COUNT(CASE WHEN isd.customs_status = 'Held' THEN 1 END) AS held_count,\n        COUNT(CASE WHEN isd.customs_status = 'Returned' THEN 1 END) AS returned_count\n    FROM international_shipment_details isd\n    GROUP BY isd.destination_country\n),\nduty_rate_analysis AS (\n    -- Third CTE: Analyze duty rates by country and tariff code\n    SELECT\n        isd.destination_country,\n        isd.hs_tariff_code,\n        COUNT(*) AS shipment_count,\n        AVG(isd.customs_duty_amount / NULLIF(isd.customs_value, 0) * 100) AS avg_duty_rate_percentage,\n        AVG(isd.customs_tax_amount / NULLIF(isd.customs_value, 0) * 100) AS avg_tax_rate_percentage,\n        AVG(isd.total_customs_amount / NULLIF(isd.customs_value, 0) * 100) AS avg_total_customs_rate_percentage\n    FROM international_shipment_details isd\n    WHERE isd.customs_value > 0\n        AND isd.hs_tariff_code IS NOT NULL\n    GROUP BY isd.destination_country, isd.hs_tariff_code\n),\ncustoms_clearance_performance AS (\n    -- Fourth CTE: Analyze customs clearance performance\n    SELECT\n        isd.destination_country,\n        COUNT(*) AS total_shipments,\n        COUNT(CASE WHEN isd.customs_status = 'Cleared' THEN 1 END) AS cleared_shipments,\n        COUNT(CASE WHEN isd.customs_status = 'Held' THEN 1 END) AS held_shipments,\n        COUNT(CASE WHEN isd.customs_status = 'Returned' THEN 1 END) AS returned_shipments,\n        AVG(EXTRACT(EPOCH FROM (isd.customs_cleared_date - isd.shipment_date)) / 86400.0) AS avg_clearance_days,\n        CASE\n            WHEN COUNT(*) > 0\n            THEN COUNT(CASE WHEN isd.customs_status = 'Cleared' THEN 1 END)::numeric / COUNT(*) * 100\n            ELSE 0\n        END AS clearance_success_rate\n    FROM international_shipment_details isd\n    GROUP BY isd.destination_country\n),\ncustoms_optimization_opportunities AS (\n    -- Fifth CTE: Identify customs optimization opportunities\n    SELECT\n        isd.customs_id,\n        isd.shipment_id,\n        isd.destination_country,\n        isd.customs_value,\n        isd.total_customs_amount,\n        cva.avg_total_customs_amount AS country_avg_customs_amount,\n        isd.total_customs_amount - cva.avg_total_customs_amount AS deviation_from_avg,\n        CASE\n            WHEN isd.total_customs_amount > cva.avg_total_customs_amount * 1.2 THEN 'High Customs Cost'\n            WHEN isd.total_customs_amount < cva.avg_total_customs_amount * 0.8 THEN 'Low Customs Cost'\n            ELSE 'Normal'\n        END AS cost_category\n    FROM international_shipment_details isd\n    INNER JOIN customs_value_analysis cva ON isd.destination_country = cva.destination_country\n)\nSELECT\n    ccp.destination_country,\n    cva.total_shipments,\n    cva.avg_customs_value,\n    cva.median_customs_value,\n    cva.avg_total_customs_amount,\n    ccp.cleared_shipments,\n    ccp.held_shipments,\n    ccp.returned_shipments,\n    ccp.clearance_success_rate,\n    ccp.avg_clearance_days,\n    COUNT(CASE WHEN coo.cost_category = 'High Customs Cost' THEN 1 END) AS high_cost_shipments,\n    COUNT(CASE WHEN coo.cost_category = 'Low Customs Cost' THEN 1 END) AS low_cost_shipments,\n    CASE\n        WHEN ccp.clearance_success_rate >= 95 THEN 'Excellent'\n        WHEN ccp.clearance_success_rate >= 85 THEN 'Good'\n        WHEN ccp.clearance_success_rate >= 75 THEN 'Fair'\n        ELSE 'Needs Improvement'\n    END AS performance_category\nFROM customs_clearance_performance ccp\nINNER JOIN customs_value_analysis cva ON ccp.destination_country = cva.destination_country\nLEFT JOIN customs_optimization_opportunities coo ON ccp.destination_country = coo.destination_country\nGROUP BY ccp.destination_country, cva.total_shipments, cva.avg_customs_value, cva.median_customs_value, cva.avg_total_customs_amount, ccp.cleared_shipments, ccp.held_shipments, ccp.returned_shipments, ccp.clearance_success_rate, ccp.avg_clearance_days\nORDER BY ccp.clearance_success_rate DESC, ccp.avg_clearance_days ASC;",
      "line_number": 1061,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004893,
        "row_count": 0,
        "column_count": 13,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 8,
      "title": "Shipping Adjustment Analysis with Discrepancy Detection and Cost Recovery",
      "description": "Description: Analyzes shipping adjustments to identify discrepancies, detect patterns, and calculate cost recovery opportunities. Uses multiple CTEs to analyze adjustment types, identify root causes, and quantify financial impact. Use Case: Shipping platform needs to analyze shipping adjustments, identify discrepancy patterns, and recover costs from carrier adjustments. Business Value: Reduces shipping costs by identifying and preventing adjustment discrepancies, recovering costs from incorrect ",
      "complexity": "Multiple CTEs (4+ levels), adjustment pattern analysis, discrepancy detection, cost recovery calculations, root cause analysis.",
      "expected_output": "Shipping adjustment analysis showing adjustment types, discrepancy patterns, cost recovery opportunities, and prevention recommendations.",
      "sql": "WITH adjustment_details AS (\n    -- First CTE: Get detailed adjustment information\n    SELECT\n        sa.adjustment_id,\n        sa.shipment_id,\n        sa.tracking_number,\n        sa.adjustment_type,\n        sa.original_amount,\n        sa.adjusted_amount,\n        sa.adjustment_amount,\n        sa.adjustment_reason,\n        sa.adjustment_status,\n        sa.adjustment_date,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost AS original_shipment_cost,\n        p.weight_lbs AS declared_weight_lbs,\n        p.length_inches AS declared_length_inches,\n        p.width_inches AS declared_width_inches,\n        p.height_inches AS declared_height_inches\n    FROM shipping_adjustments sa\n    INNER JOIN shipments s ON sa.shipment_id = s.shipment_id\n    INNER JOIN packages p ON s.package_id = p.package_id\n),\nadjustment_statistics AS (\n    -- Second CTE: Calculate adjustment statistics by type\n    SELECT\n        ad.adjustment_type,\n        COUNT(*) AS total_adjustments,\n        SUM(ABS(ad.adjustment_amount)) AS total_adjustment_amount,\n        AVG(ABS(ad.adjustment_amount)) AS avg_adjustment_amount,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ABS(ad.adjustment_amount)) AS median_adjustment_amount,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY ABS(ad.adjustment_amount)) AS p95_adjustment_amount,\n        COUNT(CASE WHEN ad.adjustment_status = 'Applied' THEN 1 END) AS applied_count,\n        COUNT(CASE WHEN ad.adjustment_status = 'Disputed' THEN 1 END) AS disputed_count,\n        COUNT(CASE WHEN ad.adjustment_status = 'Resolved' THEN 1 END) AS resolved_count\n    FROM adjustment_details ad\n    GROUP BY ad.adjustment_type\n),\ncarrier_adjustment_patterns AS (\n    -- Third CTE: Analyze adjustment patterns by carrier\n    SELECT\n        ad.carrier_id,\n        c.carrier_name,\n        ad.adjustment_type,\n        COUNT(*) AS adjustment_count,\n        SUM(ABS(ad.adjustment_amount)) AS total_adjustment_amount,\n        AVG(ABS(ad.adjustment_amount)) AS avg_adjustment_amount,\n        COUNT(CASE WHEN ad.adjustment_status = 'Disputed' THEN 1 END) AS disputed_count,\n        COUNT(CASE WHEN ad.adjustment_status = 'Resolved' THEN 1 END) AS resolved_count\n    FROM adjustment_details ad\n    INNER JOIN shipping_carriers c ON ad.carrier_id = c.carrier_id\n    GROUP BY ad.carrier_id, c.carrier_name, ad.adjustment_type\n),\ndiscrepancy_analysis AS (\n    -- Fourth CTE: Analyze discrepancies and identify root causes\n    SELECT\n        ad.adjustment_id,\n        ad.adjustment_type,\n        ad.adjustment_amount,\n        ad.adjustment_reason,\n        ad.adjustment_status,\n        ad.carrier_id,\n        ad.declared_weight_lbs,\n        ad.declared_length_inches,\n        ad.declared_width_inches,\n        ad.declared_height_inches,\n        CASE\n            WHEN ad.adjustment_type = 'Weight' AND ad.adjustment_amount > 0 THEN 'Weight Under-declared'\n            WHEN ad.adjustment_type = 'Weight' AND ad.adjustment_amount < 0 THEN 'Weight Over-declared'\n            WHEN ad.adjustment_type = 'Dimensions' AND ad.adjustment_amount > 0 THEN 'Dimensions Under-declared'\n            WHEN ad.adjustment_type = 'Dimensions' AND ad.adjustment_amount < 0 THEN 'Dimensions Over-declared'\n            WHEN ad.adjustment_type = 'Zone' AND ad.adjustment_amount > 0 THEN 'Zone Under-calculated'\n            WHEN ad.adjustment_type = 'Zone' AND ad.adjustment_amount < 0 THEN 'Zone Over-calculated'\n            ELSE 'Other Discrepancy'\n        END AS discrepancy_category,\n        CASE\n            WHEN ABS(ad.adjustment_amount) > ad.original_shipment_cost * 0.1 THEN 'High Impact'\n            WHEN ABS(ad.adjustment_amount) > ad.original_shipment_cost * 0.05 THEN 'Medium Impact'\n            ELSE 'Low Impact'\n        END AS impact_level\n    FROM adjustment_details ad\n),\ncost_recovery_opportunities AS (\n    -- Fifth CTE: Identify cost recovery opportunities\n    SELECT\n        da.adjustment_type,\n        da.discrepancy_category,\n        COUNT(*) AS discrepancy_count,\n        SUM(ABS(da.adjustment_amount)) AS total_recoverable_amount,\n        AVG(ABS(da.adjustment_amount)) AS avg_recoverable_amount,\n        COUNT(CASE WHEN da.adjustment_status = 'Disputed' THEN 1 END) AS disputed_count,\n        COUNT(CASE WHEN da.adjustment_status = 'Resolved' AND da.adjustment_amount < 0 THEN 1 END) AS successful_recoveries\n    FROM discrepancy_analysis da\n    GROUP BY da.adjustment_type, da.discrepancy_category\n)\nSELECT\n    as_stats.adjustment_type,\n    as_stats.total_adjustments,\n    as_stats.total_adjustment_amount,\n    as_stats.avg_adjustment_amount,\n    as_stats.median_adjustment_amount,\n    as_stats.applied_count,\n    as_stats.disputed_count,\n    as_stats.resolved_count,\n    cro.discrepancy_category,\n    cro.total_recoverable_amount,\n    cro.avg_recoverable_amount,\n    cro.successful_recoveries,\n    CASE\n        WHEN as_stats.total_adjustments > 0\n        THEN as_stats.disputed_count::numeric / as_stats.total_adjustments * 100\n        ELSE 0\n    END AS dispute_rate_percentage,\n    CASE\n        WHEN cro.discrepancy_count > 0\n        THEN cro.successful_recoveries::numeric / cro.discrepancy_count * 100\n        ELSE 0\n    END AS recovery_success_rate_percentage\nFROM adjustment_statistics as_stats\nLEFT JOIN cost_recovery_opportunities cro ON as_stats.adjustment_type = cro.adjustment_type\nORDER BY as_stats.total_adjustment_amount DESC;",
      "line_number": 1195,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.0046,
        "row_count": 2,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 9,
      "title": "API Rate Request Performance Analysis with Optimization Recommendations",
      "description": "Description: Analyzes API rate request performance to identify bottlenecks, optimize request patterns, and improve API efficiency. Uses multiple CTEs to analyze response times, error rates, and request patterns. Use Case: Shipping platform needs to optimize API rate requests to reduce latency, minimize errors, and improve overall API performance. Business Value: Improves API performance, reduces latency, minimizes API costs, and enhances user experience by optimizing rate request patterns. Purpo",
      "complexity": "Multiple CTEs (5+ levels), API performance analysis, response time calculations, error pattern detection, optimization recommendations.",
      "expected_output": "API performance analysis showing response times, error rates, optimization opportunities, and performance recommendations.",
      "sql": "WITH api_request_details AS (\n    -- First CTE: Get detailed API request information\n    SELECT\n        arl.log_id,\n        arl.carrier_id,\n        c.carrier_name,\n        arl.request_type,\n        arl.origin_zip_code,\n        arl.destination_zip_code,\n        arl.weight_lbs,\n        arl.request_timestamp,\n        arl.response_time_ms,\n        arl.response_status_code,\n        arl.rate_returned,\n        arl.error_message,\n        arl.api_endpoint,\n        DATE(arl.request_timestamp) AS request_date,\n        EXTRACT(HOUR FROM arl.request_timestamp) AS request_hour\n    FROM api_rate_request_log arl\n    INNER JOIN shipping_carriers c ON arl.carrier_id = c.carrier_id\n),\napi_performance_metrics AS (\n    -- Second CTE: Calculate API performance metrics\n    SELECT\n        ard.carrier_id,\n        ard.carrier_name,\n        ard.request_type,\n        COUNT(*) AS total_requests,\n        COUNT(CASE WHEN ard.response_status_code = 200 THEN 1 END) AS successful_requests,\n        COUNT(CASE WHEN ard.response_status_code != 200 OR ard.error_message IS NOT NULL THEN 1 END) AS failed_requests,\n        AVG(ard.response_time_ms) AS avg_response_time_ms,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ard.response_time_ms) AS median_response_time_ms,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY ard.response_time_ms) AS p95_response_time_ms,\n        PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY ard.response_time_ms) AS p99_response_time_ms,\n        MIN(ard.response_time_ms) AS min_response_time_ms,\n        MAX(ard.response_time_ms) AS max_response_time_ms,\n        STDDEV(ard.response_time_ms) AS stddev_response_time_ms,\n        CASE\n            WHEN COUNT(*) > 0\n            THEN COUNT(CASE WHEN ard.response_status_code = 200 THEN 1 END)::numeric / COUNT(*) * 100\n            ELSE 0\n        END AS success_rate_percentage\n    FROM api_request_details ard\n    GROUP BY ard.carrier_id, ard.carrier_name, ard.request_type\n),\nhourly_performance_patterns AS (\n    -- Third CTE: Analyze hourly performance patterns\n    SELECT\n        ard.carrier_id,\n        ard.request_hour,\n        COUNT(*) AS request_count,\n        AVG(ard.response_time_ms) AS avg_response_time_ms,\n        COUNT(CASE WHEN ard.response_status_code != 200 OR ard.error_message IS NOT NULL THEN 1 END) AS error_count,\n        CASE\n            WHEN COUNT(*) > 0\n            THEN COUNT(CASE WHEN ard.response_status_code != 200 OR ard.error_message IS NOT NULL THEN 1 END)::numeric / COUNT(*) * 100\n            ELSE 0\n        END AS error_rate_percentage\n    FROM api_request_details ard\n    GROUP BY ard.carrier_id, ard.request_hour\n),\nerror_pattern_analysis AS (\n    -- Fourth CTE: Analyze error patterns\n    SELECT\n        ard.carrier_id,\n        ard.carrier_name,\n        ard.error_message,\n        COUNT(*) AS error_count,\n        AVG(ard.response_time_ms) AS avg_response_time_on_error_ms,\n        COUNT(DISTINCT ard.origin_zip_code) AS affected_origin_zips,\n        COUNT(DISTINCT ard.destination_zip_code) AS affected_destination_zips\n    FROM api_request_details ard\n    WHERE ard.response_status_code != 200 OR ard.error_message IS NOT NULL\n    GROUP BY ard.carrier_id, ard.carrier_name, ard.error_message\n),\noptimization_recommendations AS (\n    -- Fifth CTE: Generate optimization recommendations\n    SELECT\n        apm.carrier_id,\n        apm.carrier_name,\n        apm.request_type,\n        apm.total_requests,\n        apm.success_rate_percentage,\n        apm.avg_response_time_ms,\n        apm.p95_response_time_ms,\n        CASE\n            WHEN apm.success_rate_percentage < 95 THEN 'Improve Error Handling'\n            WHEN apm.p95_response_time_ms > 2000 THEN 'Optimize Response Time'\n            WHEN apm.avg_response_time_ms > 1000 THEN 'Consider Caching'\n            ELSE 'Performance Optimal'\n        END AS optimization_recommendation,\n        CASE\n            WHEN apm.p95_response_time_ms > 2000 THEN apm.p95_response_time_ms - 1000\n            ELSE 0\n        END AS potential_time_savings_ms\n    FROM api_performance_metrics apm\n)\nSELECT\n        or_rec.carrier_name,\n        or_rec.request_type,\n        or_rec.total_requests,\n        or_rec.success_rate_percentage,\n        or_rec.avg_response_time_ms,\n        or_rec.p95_response_time_ms,\n        or_rec.optimization_recommendation,\n        or_rec.potential_time_savings_ms,\n        hpp.request_hour AS peak_error_hour,\n        hpp.error_rate_percentage AS peak_error_rate,\n        epa.error_message AS most_common_error,\n        epa.error_count AS most_common_error_count,\n        ROW_NUMBER() OVER (ORDER BY or_rec.avg_response_time_ms DESC) AS performance_rank\nFROM optimization_recommendations or_rec\nLEFT JOIN LATERAL (\n    SELECT request_hour, error_rate_percentage\n    FROM hourly_performance_patterns hpp\n    WHERE hpp.carrier_id = or_rec.carrier_id\n    ORDER BY hpp.error_rate_percentage DESC\n    LIMIT 1\n) hpp ON TRUE\nLEFT JOIN LATERAL (\n    SELECT error_message, error_count\n    FROM error_pattern_analysis epa\n    WHERE epa.carrier_id = or_rec.carrier_id\n    ORDER BY epa.error_count DESC\n    LIMIT 1\n) epa ON TRUE\nORDER BY or_rec.avg_response_time_ms DESC;",
      "line_number": 1336,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005711,
        "row_count": 4,
        "column_count": 13,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 10,
      "title": "Shipping Analytics Dashboard with Revenue Trends and Performance Metrics",
      "description": "Description: Comprehensive shipping analytics dashboard that aggregates revenue trends, performance metrics, and operational insights. Uses multiple CTEs to calculate key performance indicators, trend analysis, and comparative metrics. Use Case: Shipping platform needs a comprehensive analytics dashboard showing revenue trends, shipment volumes, carrier performance, and operational metrics for business intelligence. Business Value:
    Provides actionable business intelligence for strategic decision",
      "complexity": "Multiple CTEs (6+ levels), revenue trend analysis, performance metrics aggregation, comparative analytics, time-series analysis.",
      "expected_output": "Shipping analytics dashboard showing revenue trends, shipment volumes, performance metrics, and business intelligence insights.",
      "sql": "WITH daily_shipment_summary AS (\n    -- First CTE: Daily shipment summaries\n    SELECT\n        DATE(s.created_at) AS shipment_date,\n        s.carrier_id,\n        s.service_id,\n        COUNT(*) AS shipment_count,\n        SUM(s.total_cost) AS total_revenue,\n        AVG(s.total_cost) AS avg_shipment_cost,\n        COUNT(CASE WHEN s.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        COUNT(CASE WHEN s.shipment_status IN ('Exception', 'Returned') THEN 1 END) AS exception_count,\n        AVG(p.weight_lbs) AS avg_weight_lbs\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY DATE(s.created_at), s.carrier_id, s.service_id\n),\nrevenue_trend_analysis AS (\n    -- Second CTE:
    Revenue trend analysis\n    SELECT\n        dss.shipment_date,\n        SUM(dss.total_revenue) AS daily_revenue,\n        SUM(dss.shipment_count) AS daily_shipments,\n        AVG(dss.avg_shipment_cost) AS daily_avg_cost,\n        LAG(SUM(dss.total_revenue)) OVER (ORDER BY dss.shipment_date) AS previous_day_revenue,\n        LAG(SUM(dss.total_revenue), 7) OVER (ORDER BY dss.shipment_date) AS week_ago_revenue,\n        LAG(SUM(dss.total_revenue), 30) OVER (ORDER BY dss.shipment_date) AS month_ago_revenue,\n        AVG(SUM(dss.total_revenue)) OVER (ORDER BY dss.shipment_date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS seven_day_avg_revenue,\n        AVG(SUM(dss.total_revenue)) OVER (ORDER BY dss.shipment_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS thirty_day_avg_revenue\n    FROM daily_shipment_summary dss\n    GROUP BY dss.shipment_date\n),\ncarrier_performance_summary AS (\n    -- Third CTE: Carrier performance summary\n    SELECT\n        dss.carrier_id,\n        c.carrier_name,\n        SUM(dss.total_revenue) AS total_revenue,\n        SUM(dss.shipment_count) AS total_shipments,\n        AVG(dss.avg_shipment_cost) AS avg_shipment_cost,\n        SUM(dss.delivered_count) AS total_delivered,\n        SUM(dss.exception_count) AS total_exceptions,\n        CASE\n            WHEN SUM(dss.shipment_count) > 0\n            THEN SUM(dss.delivered_count)::numeric / SUM(dss.shipment_count) * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        CASE\n            WHEN SUM(dss.shipment_count) > 0\n            THEN SUM(dss.exception_count)::numeric / SUM(dss.shipment_count) * 100\n            ELSE 0\n        END AS exception_rate\n    FROM daily_shipment_summary dss\n    INNER JOIN shipping_carriers c ON dss.carrier_id = c.carrier_id\n    GROUP BY dss.carrier_id, c.carrier_name\n),\nservice_performance_summary AS (\n    -- Fourth CTE: Service performance summary\n    SELECT\n        dss.service_id,\n        st.service_name,\n        st.service_category,\n        SUM(dss.total_revenue) AS total_revenue,\n        SUM(dss.shipment_count) AS total_shipments,\n        AVG(dss.avg_shipment_cost) AS avg_shipment_cost,\n        SUM(dss.delivered_count) AS total_delivered,\n        CASE\n            WHEN SUM(dss.shipment_count) > 0\n            THEN SUM(dss.delivered_count)::numeric / SUM(dss.shipment_count) * 100\n            ELSE 0\n        END AS delivery_success_rate\n    FROM daily_shipment_summary dss\n    INNER JOIN shipping_service_types st ON dss.service_id = st.service_id\n    GROUP BY dss.service_id, st.service_name, st.service_category\n),\nrevenue_growth_metrics AS (\n    -- Fifth CTE: Calculate revenue growth metrics\n    SELECT\n        rta.shipment_date,\n        rta.daily_revenue,\n        rta.daily_shipments,\n        rta.daily_avg_cost,\n        rta.previous_day_revenue,\n        rta.week_ago_revenue,\n        rta.month_ago_revenue,\n        rta.seven_day_avg_revenue,\n        rta.thirty_day_avg_revenue,\n        CASE\n            WHEN rta.previous_day_revenue > 0\n            THEN ((rta.daily_revenue - rta.previous_day_revenue) / rta.previous_day_revenue * 100)\n            ELSE 0\n        END AS day_over_day_growth_percentage,\n        CASE\n            WHEN rta.week_ago_revenue > 0\n            THEN ((rta.daily_revenue - rta.week_ago_revenue) / rta.week_ago_revenue * 100)\n            ELSE 0\n        END AS week_over_week_growth_percentage,\n        CASE\n            WHEN rta.month_ago_revenue > 0\n            THEN ((rta.daily_revenue - rta.month_ago_revenue) / rta.month_ago_revenue * 100)\n            ELSE 0\n        END AS month_over_month_growth_percentage\n    FROM revenue_trend_analysis rta\n),\ndashboard_summary AS (\n    -- Sixth CTE: Aggregate dashboard summary\n    SELECT\n        rgm.shipment_date,\n        rgm.daily_revenue,\n        rgm.daily_shipments,\n        rgm.daily_avg_cost,\n        rgm.day_over_day_growth_percentage,\n        rgm.week_over_week_growth_percentage,\n        rgm.month_over_month_growth_percentage,\n        rgm.seven_day_avg_revenue,\n        rgm.thirty_day_avg_revenue,\n        (SELECT SUM(total_revenue) FROM carrier_performance_summary) AS total_revenue_all_carriers,\n        (SELECT SUM(total_shipments) FROM carrier_performance_summary) AS total_shipments_all_carriers,\n        (SELECT carrier_name FROM carrier_performance_summary ORDER BY total_revenue DESC LIMIT 1) AS top_carrier_by_revenue,\n        (SELECT service_name FROM service_performance_summary ORDER BY total_revenue DESC LIMIT 1) AS top_service_by_revenue\n    FROM revenue_growth_metrics rgm\n)\nSELECT\n    ds.shipment_date,\n    ds.daily_revenue,\n    ds.daily_shipments,\n    ds.daily_avg_cost,\n    ds.day_over_day_growth_percentage,\n    ds.week_over_week_growth_percentage,\n    ds.month_over_month_growth_percentage,\n    ds.seven_day_avg_revenue,\n    ds.thirty_day_avg_revenue,\n    ds.total_revenue_all_carriers,\n    ds.total_shipments_all_carriers,\n    ds.top_carrier_by_revenue,\n    ds.top_service_by_revenue,\n    CASE\n        WHEN ds.day_over_day_growth_percentage > 5 THEN 'Strong Growth'\n        WHEN ds.day_over_day_growth_percentage > 0 THEN 'Moderate Growth'\n        WHEN ds.day_over_day_growth_percentage > -5 THEN 'Stable'\n        ELSE 'Declining'\n    END AS growth_category\nFROM dashboard_summary ds\nORDER BY ds.shipment_date DESC\nLIMIT 30;",
      "line_number": 1480,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005777,
        "row_count": 1,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 11,
      "title": "Dimensional Weight Optimization with Package Configuration Analysis",
      "description": "Description: Analyzes dimensional weight calculations to optimize package configurations and reduce shipping costs. Uses multiple CTEs to calculate dimensional weights, identify optimization opportunities, and recommend cost-effective package configurations. Use Case: Shipping platform needs to optimize package dimensions to minimize dimensional weight charges and reduce shipping costs. Business Value: Reduces shipping costs by optimizing package dimensions, minimizing dimensional weight charges",
      "complexity": "Multiple CTEs (4+ levels), dimensional weight calculations, package optimization logic, cost savings analysis.",
      "expected_output": "Dimensional weight optimization results showing recommended package configurations and cost savings potential.",
      "sql": "WITH package_dimension_analysis AS (\n    -- First CTE: Analyze package dimensions and calculate dimensional weights\n    SELECT\n        p.package_id,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches,\n        p.length_inches * p.width_inches * p.height_inches AS cubic_volume_cubic_inches,\n        p.length_inches * p.width_inches * p.height_inches / 166.0 AS dimensional_weight_lbs,\n        CASE\n            WHEN p.length_inches * p.width_inches * p.height_inches / 166.0 > p.weight_lbs\n            THEN p.length_inches * p.width_inches * p.height_inches / 166.0\n            ELSE p.weight_lbs\n        END AS billable_weight_lbs,\n        CASE\n            WHEN p.length_inches * p.width_inches * p.height_inches / 166.0 > p.weight_lbs THEN TRUE\n            ELSE FALSE\n        END AS dimensional_weight_applies\n    FROM packages p\n),\ndimensional_weight_impact AS (\n    -- Second CTE: Calculate dimensional weight impact on shipping costs\n    SELECT\n        pda.package_id,\n        pda.weight_lbs,\n        pda.dimensional_weight_lbs,\n        pda.billable_weight_lbs,\n        pda.dimensional_weight_applies,\n        pda.billable_weight_lbs - pda.weight_lbs AS weight_premium_lbs,\n        s.shipment_id,\n        s.total_cost AS actual_cost,\n        (SELECT MIN(sr.total_rate)\n         FROM shipping_rates sr\n         WHERE sr.carrier_id = s.carrier_id\n             AND sr.service_id = s.service_id\n             AND sr.weight_lbs >= pda.weight_lbs\n             AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n             AND sr.effective_date <= CURRENT_DATE\n         LIMIT 1) AS cost_at_actual_weight,\n        (SELECT MIN(sr.total_rate)\n         FROM shipping_rates sr\n         WHERE sr.carrier_id = s.carrier_id\n             AND sr.service_id = s.service_id\n             AND sr.weight_lbs >= pda.billable_weight_lbs\n             AND (sr.expiration_date IS NULL OR sr.expiration_date >= CURRENT_DATE)\n             AND sr.effective_date <= CURRENT_DATE\n         LIMIT 1) AS cost_at_billable_weight\n    FROM package_dimension_analysis pda\n    INNER JOIN shipments s ON pda.package_id = s.package_id\n    WHERE s.shipment_status = 'Delivered'\n),\noptimization_opportunities AS (\n    -- Third CTE: Identify optimization opportunities\n    SELECT\n        dwi.package_id,\n        dwi.weight_lbs,\n        dwi.dimensional_weight_lbs,\n        dwi.billable_weight_lbs,\n        dwi.dimensional_weight_applies,\n        dwi.weight_premium_lbs,\n        dwi.actual_cost,\n        dwi.cost_at_actual_weight,\n        dwi.cost_at_billable_weight,\n        dwi.cost_at_billable_weight - dwi.cost_at_actual_weight AS dimensional_weight_cost_impact,\n        CASE\n            WHEN dwi.dimensional_weight_applies = TRUE AND dwi.weight_premium_lbs > 1.0 THEN 'High Optimization Potential'\n            WHEN dwi.dimensional_weight_applies = TRUE AND dwi.weight_premium_lbs > 0.5 THEN 'Moderate Optimization Potential'\n            WHEN dwi.dimensional_weight_applies = TRUE THEN 'Low Optimization Potential'\n            ELSE 'No Optimization Needed'\n        END AS optimization_category\n    FROM dimensional_weight_impact dwi\n),\npackage_configuration_recommendations AS (\n    -- Fourth CTE: Generate package configuration recommendations\n    SELECT\n        oo.package_id,\n        oo.weight_lbs,\n        oo.dimensional_weight_lbs,\n        oo.billable_weight_lbs,\n        oo.dimensional_weight_applies,\n        oo.dimensional_weight_cost_impact,\n        oo.optimization_category,\n        CASE\n            WHEN oo.dimensional_weight_applies = TRUE THEN\n                SQRT((oo.billable_weight_lbs * 166.0) / (oo.weight_lbs * 1.1)) * \n                POWER(oo.billable_weight_lbs * 166.0 / (oo.weight_lbs * 1.1), 1.0/3.0)\n            ELSE NULL\n        END AS recommended_max_dimension_inches,\n        oo.dimensional_weight_cost_impact * 0.5 AS potential_cost_savings\n    FROM optimization_opportunities oo\n)\nSELECT\n    pcr.package_id,\n    pcr.weight_lbs,\n    pcr.dimensional_weight_lbs,\n    pcr.billable_weight_lbs,\n    pcr.dimensional_weight_applies,\n    pcr.dimensional_weight_cost_impact,\n    pcr.optimization_category,\n    pcr.recommended_max_dimension_inches,\n    pcr.potential_cost_savings,\n    ROW_NUMBER() OVER (ORDER BY pcr.dimensional_weight_cost_impact DESC) AS optimization_priority_rank\nFROM package_configuration_recommendations pcr\nWHERE pcr.dimensional_weight_applies = TRUE\nORDER BY pcr.dimensional_weight_cost_impact DESC;",
      "line_number": 1642,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004626,
        "row_count": 2,
        "column_count": 10,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 12,
      "title": "Shipping Zone Coverage Analysis with Geographic Gap Identification",
      "description": "Description: Analyzes shipping zone coverage to identify geographic gaps, optimize zone coverage, and improve shipping route efficiency. Uses multiple CTEs with recursive logic to analyze zone coverage patterns. Use Case: Shipping platform needs to identify geographic areas with limited zone coverage and optimize shipping routes. Business Value: Improves shipping coverage, reduces shipping costs, and enables better route optimization by identifying geographic gaps. Purpose: Provide zone coverage",
      "complexity": "Multiple CTEs (5+ levels), recursive zone analysis, geographic gap detection, coverage optimization.",
      "expected_output": "Zone coverage analysis showing coverage gaps, optimization opportunities, and route recommendations.",
      "sql": "WITH RECURSIVE zone_coverage_map AS (\n    -- Anchor: Base zone coverage\n    SELECT\n        z.zone_id,\n        z.carrier_id,\n        z.origin_zip_code,\n        z.destination_zip_code,\n        z.zone_number,\n        SUBSTRING(z.origin_zip_code, 1, 3) AS origin_zip_prefix,\n        SUBSTRING(z.destination_zip_code, 1, 3) AS destination_zip_prefix,\n        1 AS coverage_level\n    FROM shipping_zones z\n    WHERE z.zone_type = 'Domestic'\n        AND (z.expiration_date IS NULL OR z.expiration_date >= CURRENT_DATE)\n    UNION ALL\n    -- Recursive: Expand coverage to adjacent zones\n    SELECT\n        z.zone_id,\n        z.carrier_id,\n        z.origin_zip_code,\n        z.destination_zip_code,\n        z.zone_number,\n        SUBSTRING(z.origin_zip_code, 1, 3) AS origin_zip_prefix,\n        SUBSTRING(z.destination_zip_code, 1, 3) AS destination_zip_prefix,\n        zcm.coverage_level + 1\n    FROM shipping_zones z\n    INNER JOIN zone_coverage_map zcm ON z.carrier_id = zcm.carrier_id\n        AND ABS(z.zone_number - zcm.zone_number) <= 1\n    WHERE zcm.coverage_level < 3\n),\nzip_prefix_coverage AS (\n    -- Calculate coverage by ZIP prefix\n    SELECT\n        zcm.origin_zip_prefix,\n        zcm.destination_zip_prefix,\n        zcm.carrier_id,\n        COUNT(DISTINCT zcm.zone_id) AS zone_count,\n        COUNT(DISTINCT zcm.zone_number) AS unique_zone_numbers,\n        AVG(zcm.zone_number) AS avg_zone_number,\n        MIN(zcm.zone_number) AS min_zone_number,\n        MAX(zcm.zone_number) AS max_zone_number\n    FROM zone_coverage_map zcm\n    GROUP BY zcm.origin_zip_prefix, zcm.destination_zip_prefix, zcm.carrier_id\n),\ncoverage_gaps AS (\n    -- Identify coverage gaps\n    SELECT\n        opc.origin_zip_prefix,\n        opc.destination_zip_prefix,\n        opc.carrier_id,\n        opc.zone_count,\n        opc.unique_zone_numbers,\n        opc.avg_zone_number,\n        CASE\n            WHEN opc.zone_count = 0 THEN 'No Coverage'\n            WHEN opc.zone_count < 3 THEN 'Limited Coverage'\n            WHEN opc.max_zone_number - opc.min_zone_number > 5 THEN 'High Zone Variance'\n            ELSE 'Good Coverage'\n        END AS coverage_category\n    FROM zip_prefix_coverage opc\n),\ncarrier_coverage_comparison AS (\n    -- Compare carrier coverage\n    SELECT\n        cg.origin_zip_prefix,\n        cg.destination_zip_prefix,\n        COUNT(DISTINCT cg.carrier_id) AS carrier_count,\n        STRING_AGG(DISTINCT c.carrier_name, ', ') AS available_carriers,\n        MIN(CASE WHEN cg.coverage_category = 'Good Coverage' THEN 1 ELSE 0 END) AS has_good_coverage,\n        MAX(CASE WHEN cg.coverage_category = 'No Coverage' THEN 1 ELSE 0 END) AS has_no_coverage\n    FROM coverage_gaps cg\n    INNER JOIN shipping_carriers c ON cg.carrier_id = c.carrier_id\n    GROUP BY cg.origin_zip_prefix, cg.destination_zip_prefix\n)\nSELECT\n    ccc.origin_zip_prefix,\n    ccc.destination_zip_prefix,\n    ccc.carrier_count,\n    ccc.available_carriers,\n    ccc.has_good_coverage,\n    ccc.has_no_coverage,\n    CASE\n        WHEN ccc.has_no_coverage = 1 THEN 'Coverage Gap Identified'\n        WHEN ccc.carrier_count = 1 THEN 'Single Carrier Coverage'\n        WHEN ccc.has_good_coverage = 1 THEN 'Good Coverage'\n        ELSE 'Limited Coverage'\n    END AS coverage_status,\n    COUNT(*) OVER (PARTITION BY ccc.origin_zip_prefix) AS destination_count_for_origin,\n    COUNT(*) OVER (PARTITION BY ccc.destination_zip_prefix) AS origin_count_for_destination\nFROM carrier_coverage_comparison ccc\nORDER BY ccc.has_no_coverage DESC, ccc.carrier_count ASC;",
      "line_number": 1765,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00542,
        "row_count": 7,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 13,
      "title": "Shipping Rate Volatility Analysis with Price Trend Prediction",
      "description": "Description: Analyzes shipping rate volatility to identify price trends, predict rate changes, and optimize rate selection timing. Uses multiple CTEs for time-series analysis and trend prediction. Use Case:
    Shipping platform needs to analyze rate volatility, predict price trends, and optimize rate selection to minimize costs. Business Value: Reduces shipping costs by predicting rate changes, optimizing rate selection timing, and identifying cost-saving opportunities. Purpose: Provide rate volati",
      "complexity": "Multiple CTEs (5+ levels), time-series analysis, volatility calculations, trend prediction, rate optimization.",
      "expected_output": "Rate volatility analysis showing price trends, volatility metrics, and optimization recommendations.",
      "sql": "WITH rate_history_analysis AS (\n    -- First CTE: Analyze rate history over time\n    SELECT\n        sr.rate_id,\n        sr.carrier_id,\n        sr.service_id,\n        sr.weight_lbs,\n        sr.rate_amount,\n        sr.total_rate,\n        sr.effective_date,\n        sr.expiration_date,\n        EXTRACT(EPOCH FROM (COALESCE(sr.expiration_date::TIMESTAMP, CURRENT_DATE::TIMESTAMP) - sr.effective_date::TIMESTAMP)) / 86400 AS rate_duration_days,\n        LAG(sr.total_rate) OVER (PARTITION BY sr.carrier_id, sr.service_id, sr.weight_lbs ORDER BY sr.effective_date) AS previous_rate,\n        LEAD(sr.total_rate) OVER (PARTITION BY sr.carrier_id, sr.service_id, sr.weight_lbs ORDER BY sr.effective_date) AS next_rate\n    FROM shipping_rates sr\n    WHERE sr.effective_date >= CURRENT_DATE - INTERVAL '365 days'\n),\nrate_changes AS (\n    -- Second CTE: Calculate rate changes\n    SELECT\n        rha.carrier_id,\n        rha.service_id,\n        rha.weight_lbs,\n        rha.effective_date,\n        rha.total_rate,\n        rha.previous_rate,\n        rha.next_rate,\n        CASE\n            WHEN rha.previous_rate IS NOT NULL\n            THEN rha.total_rate - rha.previous_rate\n            ELSE 0\n        END AS rate_change_amount,\n        CASE\n            WHEN rha.previous_rate IS NOT NULL AND rha.previous_rate > 0\n            THEN ((rha.total_rate - rha.previous_rate) / rha.previous_rate * 100)\n            ELSE 0\n        END AS rate_change_percentage,\n        CASE\n            WHEN rha.next_rate IS NOT NULL\n            THEN rha.next_rate - rha.total_rate\n            ELSE 0\n        END AS next_rate_change_amount\n    FROM rate_history_analysis rha\n),\nvolatility_metrics AS (\n    -- Third CTE: Calculate volatility metrics\n    SELECT\n        rc.carrier_id,\n        rc.service_id,\n        rc.weight_lbs,\n        COUNT(*) AS rate_change_count,\n        AVG(ABS(rc.rate_change_percentage)) AS avg_absolute_change_percentage,\n        STDDEV(rc.rate_change_percentage) AS rate_volatility,\n        MAX(ABS(rc.rate_change_percentage)) AS max_change_percentage,\n        COUNT(CASE WHEN rc.rate_change_percentage > 0 THEN 1 END) AS rate_increase_count,\n        COUNT(CASE WHEN rc.rate_change_percentage < 0 THEN 1 END) AS rate_decrease_count,\n        AVG(rc.total_rate) AS avg_rate,\n        MIN(rc.total_rate) AS min_rate,\n        MAX(rc.total_rate) AS max_rate\n    FROM rate_changes rc\n    WHERE rc.rate_change_amount != 0\n    GROUP BY rc.carrier_id, rc.service_id, rc.weight_lbs\n),\ntrend_analysis AS (\n    -- Fourth CTE: Analyze rate trends\n    SELECT\n        vm.carrier_id,\n        vm.service_id,\n        vm.weight_lbs,\n        vm.rate_volatility,\n        vm.avg_rate,\n        vm.min_rate,\n        vm.max_rate,\n        CASE\n            WHEN vm.rate_increase_count > vm.rate_decrease_count * 1.5 THEN 'Increasing Trend'\n            WHEN vm.rate_decrease_count > vm.rate_increase_count * 1.5 THEN 'Decreasing Trend'\n            ELSE 'Stable Trend'\n        END AS rate_trend,\n        CASE\n            WHEN vm.rate_volatility > 10 THEN 'High Volatility'\n            WHEN vm.rate_volatility > 5 THEN 'Moderate Volatility'\n            ELSE 'Low Volatility'\n        END AS volatility_category\n    FROM volatility_metrics vm\n),\nrate_prediction AS (\n    -- Fifth CTE: Predict future rate changes\n    SELECT\n        ta.carrier_id,\n        c.carrier_name,\n        ta.service_id,\n        st.service_name,\n        ta.weight_lbs,\n        ta.avg_rate,\n        ta.min_rate,\n        ta.max_rate,\n        ta.rate_trend,\n        ta.volatility_category,\n        ta.rate_volatility,\n        CASE\n            WHEN ta.rate_trend = 'Increasing Trend' THEN ta.avg_rate * 1.05\n            WHEN ta.rate_trend = 'Decreasing Trend' THEN ta.avg_rate * 0.95\n            ELSE ta.avg_rate\n        END AS predicted_next_rate,\n        ta.avg_rate - ta.min_rate AS potential_savings_from_min_rate\n    FROM trend_analysis ta\n    INNER JOIN shipping_carriers c ON ta.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON ta.service_id = st.service_id\n)\nSELECT\n    rp.carrier_name,\n    rp.service_name,\n    rp.weight_lbs,\n    rp.avg_rate,\n    rp.min_rate,\n    rp.max_rate,\n    rp.rate_trend,\n    rp.volatility_category,\n    rp.predicted_next_rate,\n    rp.potential_savings_from_min_rate,\n    CASE\n        WHEN rp.rate_trend = 'Increasing Trend' AND rp.volatility_category = 'High Volatility' THEN 'Consider Locking Rates'\n        WHEN rp.rate_trend = 'Decreasing Trend' THEN 'Wait for Lower Rates'\n        ELSE 'Monitor Closely'\n    END AS optimization_recommendation\nFROM rate_prediction rp\nORDER BY rp.rate_volatility DESC, rp.potential_savings_from_min_rate DESC;",
      "line_number":
    1875,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005286,
        "row_count": 2,
        "column_count": 11,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 14,
      "title": "Carrier Service Performance Comparison with Delivery Time Analysis",
      "description": "Description: Compares carrier service performance across different routes and time periods, analyzing delivery times, success rates, and service reliability. Uses multiple CTEs for performance comparison and reliability analysis. Use Case:
    Shipping platform needs to compare carrier service performance to recommend the best carrier-service combination for different shipping needs. Business Value: Improves shipping reliability and customer satisfaction by recommending optimal carrier-service combi",
      "complexity": "Multiple CTEs (5+ levels), performance comparison, delivery time analysis, reliability metrics, service ranking.",
      "expected_output": "Carrier service performance comparison showing delivery times, success rates, and reliability metrics.",
      "sql": "WITH shipment_delivery_metrics AS (\n    -- First CTE: Calculate delivery metrics for each shipment\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.label_created_at,\n        s.estimated_delivery_date,\n        s.actual_delivery_date,\n        s.shipment_status,\n        z.zone_number,\n        z.transit_days_min AS expected_transit_days_min,\n        z.transit_days_max AS expected_transit_days_max,\n        CASE\n            WHEN s.actual_delivery_date IS NOT NULL AND s.label_created_at IS NOT NULL\n            THEN EXTRACT(EPOCH FROM (s.actual_delivery_date::TIMESTAMP - s.label_created_at::TIMESTAMP)) / 86400.0\n            ELSE NULL\n        END AS actual_transit_days,\n        CASE\n            WHEN s.actual_delivery_date IS NOT NULL AND s.estimated_delivery_date IS NOT NULL\n            THEN EXTRACT(EPOCH FROM (s.actual_delivery_date::TIMESTAMP - s.estimated_delivery_date::TIMESTAMP)) / 86400.0\n            ELSE NULL\n        END AS delivery_variance_days\n    FROM shipments s\n    LEFT JOIN shipping_zones z ON s.zone_id = z.zone_id\n    WHERE s.shipment_status IN ('Delivered', 'Exception', 'Returned')\n        AND s.label_created_at IS NOT NULL\n),\ncarrier_service_performance AS (\n    -- Second CTE: Aggregate performance by carrier and service\n    SELECT\n        sdm.carrier_id,\n        sdm.service_id,\n        COUNT(*) AS total_shipments,\n        COUNT(CASE WHEN sdm.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        COUNT(CASE WHEN sdm.shipment_status IN ('Exception', 'Returned') THEN 1 END) AS exception_count,\n        AVG(sdm.actual_transit_days) AS avg_actual_transit_days,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdm.actual_transit_days) AS median_transit_days,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY sdm.actual_transit_days) AS p95_transit_days,\n        AVG(sdm.delivery_variance_days) AS avg_delivery_variance_days,\n        COUNT(CASE WHEN sdm.delivery_variance_days <= 0 THEN 1 END) AS on_time_deliveries,\n        COUNT(CASE WHEN sdm.delivery_variance_days > 1 THEN 1 END) AS late_deliveries,\n        AVG(sdm.expected_transit_days_min) AS avg_expected_transit_days_min,\n        AVG(sdm.expected_transit_days_max) AS avg_expected_transit_days_max\n    FROM shipment_delivery_metrics sdm\n    WHERE sdm.actual_transit_days IS NOT NULL\n    GROUP BY sdm.carrier_id, sdm.service_id\n),\nperformance_rankings AS (\n    -- Third CTE:
    Rank carrier-service combinations\n    SELECT\n        csp.carrier_id,\n        c.carrier_name,\n        csp.service_id,\n        st.service_name,\n        csp.total_shipments,\n        csp.delivered_count,\n        csp.exception_count,\n        csp.avg_actual_transit_days,\n        csp.median_transit_days,\n        csp.p95_transit_days,\n        csp.avg_delivery_variance_days,\n        csp.on_time_deliveries,\n        csp.late_deliveries,\n        CASE\n            WHEN csp.total_shipments > 0\n            THEN csp.delivered_count::numeric / csp.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        CASE\n            WHEN csp.total_shipments > 0\n            THEN csp.on_time_deliveries::numeric / csp.total_shipments * 100\n            ELSE 0\n        END AS on_time_delivery_rate,\n        ROW_NUMBER() OVER (ORDER BY csp.avg_actual_transit_days ASC) AS speed_rank,\n        ROW_NUMBER() OVER (ORDER BY (csp.delivered_count::numeric / NULLIF(csp.total_shipments, 0) * 100) DESC) AS reliability_rank,\n        ROW_NUMBER() OVER (ORDER BY (csp.on_time_deliveries::numeric / NULLIF(csp.total_shipments, 0) * 100) DESC) AS on_time_rank\n    FROM carrier_service_performance csp\n    INNER JOIN shipping_carriers c ON csp.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON csp.service_id = st.service_id\n    WHERE csp.total_shipments >= 10\n),\nperformance_categories AS (\n    -- Fourth CTE: Categorize performance\n    SELECT\n        pr.carrier_id,\n        pr.carrier_name,\n        pr.service_id,\n        pr.service_name,\n        pr.total_shipments,\n        pr.delivery_success_rate,\n        pr.on_time_delivery_rate,\n        pr.avg_actual_transit_days,\n        pr.median_transit_days,\n        pr.p95_transit_days,\n        pr.speed_rank,\n        pr.reliability_rank,\n        pr.on_time_rank,\n        CASE\n            WHEN pr.delivery_success_rate >= 95 AND pr.on_time_delivery_rate >= 90 THEN 'Excellent'\n            WHEN pr.delivery_success_rate >= 90 AND pr.on_time_delivery_rate >= 80 THEN 'Good'\n            WHEN pr.delivery_success_rate >= 85 AND pr.on_time_delivery_rate >= 70 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category,\n        (pr.speed_rank + pr.reliability_rank + pr.on_time_rank) / 3.0 AS overall_rank_score\n    FROM performance_rankings pr\n)\nSELECT\n    pc.carrier_name,\n    pc.service_name,\n    pc.total_shipments,\n    pc.delivery_success_rate,\n    pc.on_time_delivery_rate,\n    pc.avg_actual_transit_days,\n    pc.median_transit_days,\n    pc.p95_transit_days,\n    pc.performance_category,\n    pc.overall_rank_score,\n    ROW_NUMBER() OVER (ORDER BY pc.overall_rank_score ASC) AS overall_rank\nFROM performance_categories pc\nORDER BY pc.overall_rank_score ASC;",
      "line_number": 2021,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005235,
        "row_count": 0,
        "column_count": 11,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 15,
      "title": "Route Optimization Analysis with Cost and Time Trade-offs",
      "description": "Description: Analyzes shipping routes to optimize cost and time trade-offs, identifying optimal routes based on multiple factors including cost, transit time, and reliability. Use Case: Shipping platform needs to optimize routes by analyzing cost-time trade-offs and identifying the most efficient shipping paths. Business Value: Reduces shipping costs and improves delivery times by optimizing route selection based on comprehensive cost-time analysis. Purpose: Provide route optimization analytics ",
      "complexity": "Multiple CTEs (5+ levels), route analysis, cost-time optimization, path finding algorithms, multi-factor decision analysis.",
      "expected_output": "Route optimization results showing optimal routes, cost-time trade-offs, and efficiency metrics.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2162,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006611,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 16,
      "title": "Shipping Cost Breakdown Analysis with Component Cost Attribution",
      "description": "Description: Comprehensive cost breakdown analysis that attributes shipping costs to different components (base rate, surcharges, insurance, signature, etc.) and identifies cost optimization opportunities. Use Case: Shipping platform needs to understand cost structure and identify opportunities to reduce shipping costs through component-level analysis. Business Value: Enables cost optimization by identifying high-cost components and providing actionable insights for cost reduction. Purpose:
    Prov",
      "complexity": "Multiple CTEs (4+ levels), cost component analysis, cost attribution, optimization recommendations, cost trend analysis.",
      "expected_output": "Cost breakdown analysis showing component costs, cost attribution, and optimization recommendations.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2266,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006007,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 17,
      "title": "Tracking Event Pattern Recognition with Anomaly Detection",
      "description": "Description: Advanced tracking event pattern recognition that identifies normal delivery patterns, detects anomalies, and predicts potential issues using machine learning-like pattern analysis. Use Case: Shipping platform needs to identify tracking event patterns, detect anomalies, and predict potential delivery issues before they occur. Business Value: Improves delivery reliability by detecting anomalies early and enabling proactive issue resolution. Purpose: Provide tracking pattern recognitio",
      "complexity": "Multiple CTEs (6+ levels), pattern recognition, anomaly detection, sequence analysis, predictive analytics, statistical analysis.",
      "expected_output": "Tracking pattern analysis showing normal patterns, detected anomalies, and predictive insights.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2370,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00583,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 18,
      "title": "Address Validation Quality Metrics with Correction Impact Analysis",
      "description": "Description: Comprehensive address validation quality metrics that analyze validation accuracy, correction impact, and quality trends over time. Use Case: Shipping platform needs to measure address validation quality, analyze correction impact, and track quality trends. Business Value: Improves address accuracy, reduces shipping errors, and enables data quality improvements. Purpose: Provide address validation quality metrics and correction impact analysis to improve address data quality. Comple",
      "complexity": "Multiple CTEs (4+ levels), quality metrics calculation, correction impact analysis, trend analysis, quality scoring.",
      "expected_output": "Address validation quality metrics showing accuracy rates, correction impact, and quality trends.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2474,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.007417,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 19,
      "title": "International Shipping Route Analysis with Customs Optimization",
      "description": "Description: Comprehensive international shipping route analysis that optimizes routes considering customs requirements, duty rates, and transit times. Use Case: Shipping platform needs to analyze international shipping routes and optimize them considering customs, duties, and transit times. Business Value: Reduces international shipping costs and improves delivery times by optimizing routes and customs handling. Purpose: Provide international route optimization considering customs, duties, and ",
      "complexity": "Multiple CTEs (5+ levels), international route analysis, customs optimization, duty rate analysis, transit time optimization.",
      "expected_output": "International route analysis showing optimal routes, customs considerations, and cost-time trade-offs.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2578,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004339,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 20,
      "title": "Carrier Rate Comparison Matrix with Multi-Dimensional Analysis",
      "description": "Description: Comprehensive carrier rate comparison matrix that compares rates across multiple dimensions including weight, zone, service type, and time periods. Use Case: Shipping platform needs a comprehensive rate comparison matrix to identify the best carrier-service combinations across different scenarios. Business Value: Enables optimal carrier selection by providing comprehensive rate comparisons across all relevant dimensions. Purpose: Provide multi-dimensional rate comparison matrix for ",
      "complexity":
    "Multiple CTEs (6+ levels), multi-dimensional analysis, rate matrix generation, comparative analytics, optimization recommendations.",
      "expected_output": "Rate comparison matrix showing carrier rates across multiple dimensions and optimal selections.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2682,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004477,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 21,
      "title": "Package Dimension Optimization with Volume Efficiency Analysis",
      "description": "Description: Advanced package dimension optimization that analyzes volume efficiency, identifies optimal package configurations, and recommends dimension adjustments to minimize shipping costs. Use Case: Shipping platform needs to optimize package dimensions to minimize dimensional weight charges and improve packaging efficiency. Business Value: Reduces shipping costs by optimizing package dimensions and minimizing dimensional weight charges. Purpose: Provide package dimension optimization recom",
      "complexity": "Multiple CTEs (5+ levels), dimension optimization, volume efficiency analysis, cost minimization, geometric calculations.",
      "expected_output": "Package dimension optimization results showing recommended dimensions and cost savings potential.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2786,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00589,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 22,
      "title": "Shipping Zone Transit Time Analysis with Reliability Metrics",
      "description": "Description: Comprehensive zone transit time analysis that evaluates actual vs expected transit times, calculates reliability metrics, and identifies zones with performance issues. Use Case: Shipping platform needs to analyze zone transit times, evaluate reliability, and identify zones with performance issues. Business Value: Improves shipping reliability by identifying zones with transit time issues and enabling proactive improvements. Purpose: Provide zone transit time analytics to evaluate re",
      "complexity": "Multiple CTEs (5+ levels), transit time analysis, reliability metrics, performance evaluation, zone ranking.",
      "expected_output": "Zone transit time analysis showing actual vs expected times, reliability metrics, and performance rankings.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2890,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004872,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 23,
      "title": "Customs Duty Optimization with Tariff Code Analysis",
      "description": "Description: Advanced customs duty optimization that analyzes tariff codes, duty rates, and identifies opportunities to reduce customs costs through proper classification. Use Case: Shipping platform needs to optimize customs duties by analyzing tariff codes and identifying cost reduction opportunities. Business Value: Reduces international shipping costs by optimizing customs duty classification and identifying cost savings. Purpose: Provide customs duty optimization through tariff code analysi",
      "complexity": "Multiple CTEs (5+ levels), tariff code analysis, duty optimization, classification analysis, cost reduction recommendations.",
      "expected_output": "Customs duty optimization results showing tariff code analysis and cost reduction opportunities.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 2994,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004636,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 24,
      "title": "API Rate Cache Optimization with Hit Rate Analysis",
      "description": "Description: Comprehensive API rate cache optimization that analyzes cache hit rates, identifies caching opportunities, and optimizes cache strategies to reduce API calls and improve performance. Use Case: Shipping platform needs to optimize API rate caching to reduce API calls, improve performance, and minimize costs. Business Value: Reduces API costs and improves performance by optimizing cache strategies and maximizing cache hit rates. Purpose: Provide API cache optimization analytics to redu",
      "complexity": "Multiple CTEs (4+ levels), cache hit rate analysis, cache optimization, API call reduction, performance improvement.",
      "expected_output": "API cache optimization results showing hit rates, caching opportunities, and performance improvements.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3098,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005549,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 25,
      "title": "Shipping Revenue Forecasting with Trend Analysis",
      "description": "Description: Advanced revenue forecasting that uses historical data, trend analysis, and predictive modeling to forecast future shipping revenue. Use Case: Shipping platform needs revenue forecasts for business planning, budgeting, and strategic decision-making. Business Value:
    Enables accurate revenue planning and strategic decision-making through predictive revenue forecasting. Purpose: Provide revenue forecasting capabilities for business planning and strategic decision-making. Complexity: Mu",
      "complexity": "Multiple CTEs (6+ levels), time-series analysis, trend analysis, predictive modeling, revenue forecasting, statistical analysis.",
      "expected_output": "Revenue forecasts showing predicted revenue, confidence intervals, and trend analysis.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3202,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005737,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 26,
      "title": "Carrier Performance Benchmarking with Industry Standards",
      "description": "Description: Comprehensive carrier performance benchmarking that compares carrier performance against industry standards and identifies best practices. Use Case: Shipping platform needs to benchmark carrier performance against industry standards and identify best practices. Business Value: Enables performance improvement by benchmarking against industry standards and identifying best practices. Purpose: Provide carrier performance benchmarking to evaluate performance relative to industry standar",
      "complexity": "Multiple CTEs (5+ levels), performance benchmarking, industry standard comparison, best practice identification, performance scoring.",
      "expected_output": "Carrier performance benchmarks showing performance relative to industry standards and best practices.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3306,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006188,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 27,
      "title": "Dimensional Weight Cost Analysis with Optimization Recommendations",
      "description": "Description: Comprehensive dimensional weight cost analysis that quantifies the impact of dimensional weight charges and provides optimization recommendations. Use Case: Shipping platform needs to analyze dimensional weight costs and provide optimization recommendations to reduce charges. Business Value: Reduces shipping costs by optimizing package dimensions to minimize dimensional weight charges. Purpose: Provide dimensional weight cost analysis and optimization recommendations to reduce shipp",
      "complexity": "Multiple CTEs (4+ levels), dimensional weight analysis, cost impact quantification, optimization recommendations, cost savings calculation.",
      "expected_output": "Dimensional weight cost analysis showing cost impact and optimization recommendations.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3410,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004999,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 28,
      "title": "Shipping Route Efficiency Metrics with Performance Scoring",
      "description": "Description: Advanced route efficiency analysis that calculates efficiency metrics, scores route performance, and identifies optimization opportunities. Use Case: Shipping platform needs route efficiency metrics to evaluate route performance and identify optimization opportunities. Business Value: Improves shipping efficiency by identifying inefficient routes and enabling route optimization. Purpose: Provide route efficiency metrics and performance scoring to enable route optimization. Complexit",
      "complexity": "Multiple CTEs (5+ levels), efficiency metrics calculation, performance scoring, route ranking, optimization identification.",
      "expected_output": "Route efficiency metrics showing efficiency scores, performance rankings, and optimization opportunities.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004706,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 29,
      "title": "Multi-Carrier Rate Aggregation with Best Rate Selection",
      "description": "Description: Comprehensive multi-carrier rate aggregation that aggregates rates from multiple carriers, compares options, and selects the best rate based on multiple criteria. Use Case: Shipping platform needs to aggregate rates from multiple carriers and select the best option based on cost, time, and reliability. Business Value: Enables optimal carrier selection by aggregating and comparing rates across all available carriers. Purpose: Provide multi-carrier rate aggregation and best rate selec",
      "complexity": "Multiple CTEs (6+ levels), rate aggregation, multi-criteria decision analysis, best rate selection, carrier comparison.",
      "expected_output": "Multi-carrier rate aggregation showing all available rates and best rate selections.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3618,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.004426,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    },
    {
      "number": 30,
      "title": "Comprehensive Shipping Dashboard with Real-Time Analytics",
      "description": "Description: Comprehensive shipping dashboard that aggregates all key metrics, provides real-time analytics, and delivers actionable insights for strategic decision-making. Use Case:
    Shipping platform needs a comprehensive dashboard showing all key shipping metrics and real-time analytics. Business Value: Provides comprehensive shipping analytics for strategic decision-making and performance monitoring. Purpose: Deliver comprehensive shipping dashboard with real-time analytics and actionable ins",
      "complexity": "Multiple CTEs (7+ levels), comprehensive aggregation, real-time analytics, multi-dimensional analysis, dashboard metrics, strategic insights.",
      "expected_output": "Comprehensive dashboard showing all key shipping intelligence metrics, trends, and actionable insights.",
      "sql": "WITH base_data AS (\n    -- First CTE: Base data extraction\n    SELECT\n        s.shipment_id,\n        s.carrier_id,\n        s.service_id,\n        s.origin_zip_code,\n        s.destination_zip_code,\n        s.total_cost,\n        s.shipment_status,\n        s.created_at,\n        p.weight_lbs,\n        p.length_inches,\n        p.width_inches,\n        p.height_inches\n    FROM shipments s\n    INNER JOIN packages p ON s.package_id = p.package_id\n    WHERE s.created_at >= CURRENT_DATE - INTERVAL '90 days'\n),\naggregated_metrics AS (\n    -- Second CTE: Aggregate metrics\n    SELECT\n        bd.carrier_id,\n        bd.service_id,\n        COUNT(*) AS total_shipments,\n        SUM(bd.total_cost) AS total_revenue,\n        AVG(bd.total_cost) AS avg_cost,\n        COUNT(CASE WHEN bd.shipment_status = 'Delivered' THEN 1 END) AS delivered_count,\n        AVG(bd.weight_lbs) AS avg_weight_lbs\n    FROM base_data bd\n    GROUP BY bd.carrier_id, bd.service_id\n),\nperformance_analysis AS (\n    -- Third CTE: Performance analysis\n    SELECT\n        am.carrier_id,\n        c.carrier_name,\n        am.service_id,\n        st.service_name,\n        am.total_shipments,\n        am.total_revenue,\n        am.avg_cost,\n        am.delivered_count,\n        CASE\n            WHEN am.total_shipments > 0\n            THEN am.delivered_count::numeric / am.total_shipments * 100\n            ELSE 0\n        END AS delivery_success_rate,\n        ROW_NUMBER() OVER (ORDER BY am.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY am.avg_cost ASC) AS cost_rank\n    FROM aggregated_metrics am\n    INNER JOIN shipping_carriers c ON am.carrier_id = c.carrier_id\n    INNER JOIN shipping_service_types st ON am.service_id = st.service_id\n),\noptimization_recommendations AS (\n    -- Fourth CTE: Generate optimization recommendations\n    SELECT\n        pa.carrier_id,\n        pa.carrier_name,\n        pa.service_id,\n        pa.service_name,\n        pa.total_shipments,\n        pa.total_revenue,\n        pa.avg_cost,\n        pa.delivery_success_rate,\n        pa.revenue_rank,\n        pa.cost_rank,\n        CASE\n            WHEN pa.delivery_success_rate >= 95 AND pa.cost_rank <= 3 THEN 'Optimal'\n            WHEN pa.delivery_success_rate >= 90 THEN 'Good'\n            WHEN pa.delivery_success_rate >= 85 THEN 'Fair'\n            ELSE 'Needs Improvement'\n        END AS performance_category\n    FROM performance_analysis pa\n)\nSELECT\n    or_rec.carrier_name,\n    or_rec.service_name,\n    or_rec.total_shipments,\n    or_rec.total_revenue,\n    or_rec.avg_cost,\n    or_rec.delivery_success_rate,\n    or_rec.performance_category,\n    or_rec.revenue_rank,\n    or_rec.cost_rank\nFROM optimization_recommendations or_rec\nORDER BY or_rec.total_revenue DESC;",
      "line_number": 3722,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005985,
        "row_count": 4,
        "column_count": 9,
        "tested_at": "2026-02-08T21:06:12.667946"
      }
    }
  ],
  "execution_test_results": {
    "test_timestamp": "2026-02-08T21:06:12.667946",
    "total_queries": 30,
    "passed": 30,
    "failed": 0,
    "success_rate": 100.0,
    "average_execution_time": 0.0053707,
    "total_execution_time": 0.161121
  }
}
# Extract queries list
queries = QUERIES_DATA.get('queries', [])
total_queries = len(queries)
print("="*80)
print("EMBEDDED QUERIES LOADED")
print("="*80)
print(f"Total Queries: {total_queries}")
print(f"Source: Embedded in notebook (no file dependency)")
if queries:
    print(f"\nQuery Overview:")
    for q in queries[:5]:
        title = q.get('title', 'N/A')[:60]
        print(f"  Query {q.get('number')}: {title}...")
    if total_queries > 5:
    print(f"  ... and {total_queries - 5} more queries")
print("="*80)
print("‚úÖ Queries ready to execute!")
print("="*80)


In [None]:
# ============================================================================
# LOAD QUERIES (FROM EMBEDDED DATA)
# ============================================================================
# Queries are already loaded from embedded QUERIES_DATA cell above
# If not loaded, use the embedded queries cell
if 'queries' not in globals():
    print("‚ö†Ô∏è  Queries not found. Run the 'Embedded Queries' cell first.")
    print("   Looking for embedded queries...")
    # Try to find embedded queries
    for cell_num in range(len(notebook['cells'])):
    cell_text = ''.join(notebook['cells'][cell_num].get('source', []))
        if 'EMBEDDED QUERIES.JSON' in cell_text or 'QUERIES_DATA' in cell_text:
    print(f"   ‚úÖ Found embedded queries in cell")
            break
else:
    print("="*80)
    print("QUERIES LOADED")
    print("="*80)
    print(f"Total Queries: {len(queries)}")
    if queries:
    print(f"\nQuery Overview:")
        for q in queries[:5]:
            title = q.get('title', 'N/A')[:60]
            print(f"  Query {q.get('number')}: {title}...")
        if len(queries) > 5:
    print(f"  ... and {len(queries) - 5} more queries")
    print("="*80)


## Step 5: Query Execution Function

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-9"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 6: Execute All Queries

## Step 5: Query Execution Function

In [None]:
# ============================================================================
# QUERY EXECUTION FUNCTION WITH METRICS
# ============================================================================

import time
import pandas as pd

def execute_query_with_metrics(db_name: str, query_sql: str, query_num: int, db_config: dict = None):
    """
    Execute SQL query with metrics collection.
    
    Args:
        db_name: Database name
        query_sql: SQL query string
        query_num: Query number
        db_config: Database configuration (optional, uses global conn if None)
    
    Returns:
    dict: Query execution results with metrics
    """
    result = {
        'query_number': query_num,
        'success': False,
        'execution_time': 0.0,
        'row_count': 0,
        'column_count': 0,
        'dataframe': None,
        'error': None
    }
    
    try:
    # Use global connection if db_config not provided
        if db_config is None:
    # Use the global conn variable
            if 'conn' not in globals():
    raise RuntimeError("Database connection not available. Run connection cell first.")
            exec_conn = globals()['conn']
        else:
            # Create new connection from config
            exec_conn = psycopg2.connect(**db_config)
        
        # Start timing
        start_time = time.time()
        
        # Execute query
        cursor = exec_conn.cursor()
        cursor.execute(query_sql)
        
        # Fetch results
        columns = [desc[0] for desc in cursor.description] if cursor.description else []
        rows = cursor.fetchall()
        
        # Calculate execution time
        execution_time = time.time() - start_time
        
        # Create DataFrame
        if rows and columns:
    df = pd.DataFrame(rows, columns=columns)
        else:
            df = pd.DataFrame()
        
        # Update result
        result['success'] = True
        result['execution_time'] = execution_time
        result['row_count'] = len(df)
        result['column_count'] = len(columns)
        result['dataframe'] = df
        
        # Close cursor
        cursor.close()
        
        # Close connection if we created it
        if db_config is not None:
    exec_conn.close()
        
    except Exception as e:
    result['success'] = False
        result['error'] = str(e)
        result['execution_time'] = time.time() - start_time if 'start_time' in locals() else 0.0
    
    return result

# Database configuration (for reference, uses global conn by default)
DB_CONFIG = {
    'host':
    'localhost',
    'port': 5432,
    'user': 'postgres',
    'password': 'postgres',
    'database': 'postgres'
}

print("‚úÖ Query execution function loaded")
print("   Function: execute_query_with_metrics(db_name, query_sql, query_num, db_config=None)")


In [None]:
# ============================================================================
# EXECUTE ALL QUERIES - END-TO-END TESTING
# ============================================================================

all_results = []

print("="*80)
print("EXECUTING ALL QUERIES")
print("="*80)

for query_info in queries:
    query_num = query_info.get('number')
    query_sql = query_info.get('sql', '')
    query_title = query_info.get('title', f'Query {query_num}')
    
    result = execute_query_with_metrics(DB_NAME, query_sql, query_num, DB_CONFIG)
    result['query_number'] = query_num
    result['query_title'] = query_title
    result['query_info'] = query_info
    
    all_results.append(result)
    
    status = "‚úÖ" if result['success'] else "‚ùå"
    print(f"{status} Query {query_num:2d}: {query_title[:50]:<50} ({result['execution_time']:.3f}s, {result['row_count']:4d} rows)")

# Summary
passed = sum(1 for r in all_results if r['success'])
failed = sum(1 for r in all_results if not r['success'])

print(f"\n{'='*80}")
print(f"EXECUTION SUMMARY")
print(f"{'='*80}")
print(f"Total Queries:
    {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"{'='*80}")

## Step 7: Performance Visualization

In [None]:
import pandas as pdimport matplotlib.pyplot as plt# ============================================================================
# PERFORMANCE VISUALIZATION
# ============================================================================

# Create performance metrics DataFrame
perf_data = []
for r in all_results:
    perf_data.append({
        'Query': r['query_number'],
        'Title': r['query_title'][:40] + '...' if len(r['query_title']) > 40 else r['query_title'],
        'Execution Time (s)':
    r['execution_time'],
        'Row Count': r['row_count'],
        'Column Count': r['column_count'],
        'Status': 'Passed' if r['success'] else 'Failed'
    })

perf_df = pd.DataFrame(perf_data)

# Visualization:
    Execution Time Distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Execution time bar chart
axes[0, 0].bar(perf_df['Query'], perf_df['Execution Time (s)'], color='steelblue', alpha=0.7)
axes[0, 0].set_xlabel('Query Number')
axes[0, 0].set_ylabel('Execution Time (seconds)')
axes[0, 0].set_title('Query Execution Time by Query Number')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

# Execution time histogram
axes[0, 1].hist(perf_df['Execution Time (s)'], bins=20, color='coral', alpha=0.7, edgecolor='black')
axes[0, 1].set_xlabel('Execution Time (seconds)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Distribution of Execution Times')
axes[0, 1].grid(True, alpha=0.3)

# Row count bar chart
axes[1, 0].bar(perf_df['Query'], perf_df['Row Count'], color='green', alpha=0.7)
axes[1, 0].set_xlabel('Query Number')
axes[1, 0].set_ylabel('Row Count')
axes[1, 0].set_title('Rows Returned by Query')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3)

# Status pie chart
status_counts = perf_df['Status'].value_counts()
axes[1, 1].pie(status_counts.values, labels=status_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Query Execution Status')

plt.tight_layout()
plt.show()

# Display performance summary
print("\n" + "="*80)
print("PERFORMANCE SUMMARY")
print("="*80)
print(f"Average execution time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Median execution time: {perf_df['Execution Time (s)'].median():.3f}s")
print(f"Max execution time: {perf_df['Execution Time (s)'].max():.3f}s")
print(f"Min execution time: {perf_df['Execution Time (s)'].min():.3f}s")
print(f"Total rows returned: {perf_df['Row Count'].sum():,}")
print(f"Average rows per query: {perf_df['Row Count'].mean():.1f}")
print("="*80)

## Step 8: Individual Query Documentation and Visualization

In [None]:
import numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom IPython.display import display, HTML, Markdown# ============================================================================
# INDIVIDUAL QUERY DOCUMENTATION AND VISUALIZATION
# ============================================================================

def document_and_visualize_query(query_result: dict, query_num: int):
    """Create comprehensive documentation and visualization for a single query."""
    query_info = query_result['query_info']
    
    # Create markdown documentation
    doc = f"""
## Query {query_num}:
    {query_info.get('title', 'N/A')}

### Execution Status
- **Status:** {'‚úÖ PASSED' if query_result['success'] else '‚ùå FAILED'}
- **Execution Time:** {query_result['execution_time']:.3f} seconds
- **Rows Returned:** {query_result['row_count']:,}
- **Columns Returned:** {query_result['column_count']}

### Query Information
- **Description:** {query_info.get('description', 'N/A')[:300]}...
- **Use Case:** {query_info.get('use_case', 'N/A')}
- **Business Value:** {query_info.get('business_value', 'N/A')}
- **Complexity:** {query_info.get('complexity', 'N/A')}
- **Expected Output:** {query_info.get('expected_output', 'N/A')}

### SQL Query
```sql
{query_info.get('sql', '')[:1000]}...
```

### Results Preview
"""
    
    try:
    display(Markdown(doc))
    except:
        print(doc)
    
    if query_result['success'] and query_result['dataframe'] is not None:
    df = query_result['dataframe']
        
        if len(df) > 0:
    print(f"\nFirst 10 rows of Query {query_num}:")
            try:
    display(df.head(10))
            except:
                print(df.head(10).to_string())
            
            # Create visualizations if numeric data exists
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            if len(numeric_cols) > 0:
    num_plots = min(3, len(numeric_cols))
                fig, axes = plt.subplots(1, num_plots, figsize=(15, 4))
                if num_plots == 1:
    axes = [axes]
                
                for idx, col in enumerate(numeric_cols[:num_plots]):
                    if df[col].notna().sum() > 0:
    axes[idx].hist(df[col].dropna(), bins=min(20, len(df)), alpha=0.7, edgecolor='black')
                        axes[idx].set_title(f'Distribution of {col[:30]}')
                        axes[idx].set_xlabel(col[:30])
                        axes[idx].set_ylabel('Frequency')
                        axes[idx].grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
                
                # Create correlation heatmap if multiple numeric columns
                if len(numeric_cols) > 1:
    fig, ax = plt.subplots(figsize=(10, 8))
                    corr_matrix = df[numeric_cols].corr()
                    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax)
                    ax.set_title('Correlation Matrix of Numeric Columns')
                    plt.tight_layout()
                    plt.show()
        else:
            print(f"\nQuery {query_num} returned 0 rows.")
    else:
        if query_result.get('error'):
    print(f"\n‚ùå Error: {query_result['error'][:500]}")

# Document and visualize each query
print("="*80)
print("INDIVIDUAL QUERY DOCUMENTATION")
print("="*80)

for query_result in all_results:
    query_num = query_result['query_number']
    document_and_visualize_query(query_result, query_num)
    print("\n" + "="*80 + "\n")

## Step 9: Generate Comprehensive Report

In [None]:
# ============================================================================
# GENERATE COMPREHENSIVE REPORT
# ============================================================================

# Create comprehensive report
report_data = {
    'database': DB_NAME,
    'test_timestamp': datetime.now().isoformat(),
    'total_queries': total_queries,
    'passed': passed,
    'failed': failed,
    'success_rate': passed / total_queries * 100 if total_queries > 0 else 0,
    'average_execution_time':
    perf_df['Execution Time (s)'].mean(),
    'total_execution_time': perf_df['Execution Time (s)'].sum(),
    'queries': []
}

for r in all_results:
    query_report = {
        'number': r['query_number'],
        'title': r['query_title'],
        'success': r['success'],
        'execution_time': r['execution_time'],
        'row_count': r['row_count'],
        'column_count': r['column_count'],
        'columns': r['columns']
    }
    if not r['success']:
    query_report['error'] = r['error']
    
    report_data['queries'].append(query_report)

# Save report
report_file = DB_DIR / 'results' / f'{DB_NAME}_comprehensive_report.json'
report_file.parent.mkdir(exist_ok=True)

with open(report_file, 'w') as f:
    json.dump(report_data, f, indent=2, default=str)

print("="*80)
print("COMPREHENSIVE TEST REPORT")
print("="*80)
print(f"Database: {DB_NAME}")
print(f"Total Queries: {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"Average Execution Time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Total Execution Time: {perf_df['Execution Time (s)'].sum():.3f}s")
print(f"\n‚úÖ Report saved to: {report_file}")
print("="*80)

print("\n" + "="*80)
print("END-TO-END TESTING COMPLETE")
print("="*80)
print(f"‚úÖ Database '{DB_NAME}' initialized and tested")
print(f"‚úÖ All {total_queries} queries executed")
print(f"‚úÖ Performance metrics collected")
print(f"‚úÖ Comprehensive report generated")
print("="*80)