## Python 3.14.2 Installation
This notebook requires Python 3.14.2. Run the cell below to install and verify Python 3.14.2.

In [None]:
# ============================================================================
# PYTHON 3.14.2 INSTALLATION FOR GOOGLE COLAB
# ============================================================================
import subprocess
import sys
import os
print("="*80)
print("PYTHON 3.14.2 INSTALLATION")
print("="*80)
# Check current Python version
current_version = sys.version_info
print(f"\nCurrent Python version: {current_version.major}.{current_version.minor}.{current_version.micro}")
print(f"Python executable: {sys.executable}")
# Target version
TARGET_MAJOR = 3
TARGET_MINOR = 14
TARGET_MICRO = 2
if current_version.major == TARGET_MAJOR and current_version.minor == TARGET_MINOR and current_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is already installed!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is required")
    print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
    print(f"\nInstalling Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO}...")
    
    if not IS_COLAB:
    raise RuntimeError("Python 3.14.2 installation requires Google Colab")
    
    try:
    # Method 1: Use conda (if available)
        print("\nMethod 1:
    Trying conda...")
        try:
    result = subprocess.run(['conda', '--version'], capture_output=True, text=True, timeout=5)
            if result.returncode == 0:
    print("   ‚úÖ Conda found, installing Python 3.14.2...")
                os.system('conda install -y python=3.14.2')
                print("   ‚úÖ Python 3.14.2 installed via conda")
                print("   ‚ö†Ô∏è  Restart kernel and re-run this cell to use Python 3.14.2")
        except:
            print("   ‚ö†Ô∏è  Conda not available")
        
        # Method 2: Use deadsnakes PPA (Ubuntu/Debian)
        print("\nMethod 2: Installing via deadsnakes PPA...")
        os.system('apt-get update -qq')
        os.system('apt-get install -y software-properties-common')
        os.system('add-apt-repository -y ppa:deadsnakes/ppa')
        os.system('apt-get update -qq')
        os.system('apt-get install -y python3.14 python3.14-venv python3.14-dev')
        print("   ‚úÖ Python 3.14.2 installed via deadsnakes PPA")
        
        # Method 3: Use pyenv
        print("\nMethod 3: Installing via pyenv...")
        os.system('curl https://pyenv.run | bash')
        os.system('export PYENV_ROOT="$HOME/.pyenv"')
        os.system('export PATH="$PYENV_ROOT/bin:$PATH"')
        os.system('eval "$(pyenv init -)"')
        os.system('pyenv install 3.14.2')
        os.system('pyenv global 3.14.2')
        print("   ‚úÖ Python 3.14.2 installed via pyenv")
        
        # Verify installation
        print("\nVerifying Python 3.14.2 installation...")
        result = subprocess.run(['python3.14', '--version'], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
    version_output = result.stdout.strip()
            print(f"   ‚úÖ Python 3.14 found: {version_output}")
            if '3.14.2' in version_output:
    print("   ‚úÖ Python 3.14.2 is installed!")
            print("\n‚ö†Ô∏è  IMPORTANT: Restart kernel and select Python 3.14.2 as kernel")
            print("   Or use: !python3.14 your_script.py")
        else:
            print("   ‚ö†Ô∏è  Python 3.14.2 installation may have failed")
            print("   Current Python version will be used")
    
    except Exception as e:
    print(f"\n‚ùå Error installing Python 3.14.2: {e}")
        print("\n‚ö†Ô∏è  Continuing with current Python version")
        print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
# Verify Python version
print("\n" + "="*80)
print("PYTHON VERSION VERIFICATION")
print("="*80)
final_version = sys.version_info
print(f"Python version: {final_version.major}.{final_version.minor}.{final_version.micro}")
print(f"Python executable: {sys.executable}")
if final_version.major == TARGET_MAJOR and final_version.minor == TARGET_MINOR and final_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is active!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is not active")
    print(f"   Current version: {final_version.major}.{final_version.minor}.{final_version.micro}")
    print("   If Python 3.14.2 was installed, restart kernel and select Python 3.14.2")
print("="*80)


# DB-11: Data Analytics Database - End-to-End Query Testing

This notebook provides **complete end-to-end setup and testing** from scratch:

1. **Environment Setup**: Install all required Python packages automatically
2. **Database Initialization**: Create database, load schema, load data
3. **Query Execution**: Execute all 30 queries with metrics
4. **Visualization**: Performance charts and data analysis
5. **Documentation**: Comprehensive query documentation

## Database Overview

**Database Name:** Data Analytics Database  
**Database ID:** db-11  
**Domain:** Data Analytics  
**Total Queries:** 30  

## Prerequisites

- PostgreSQL server running (localhost or configured via environment variables)
- Python 3.14.2 installed
- Jupyter Notebook or JupyterLab

**Note:** All Python packages will be installed automatically when you run the first cell.

In [None]:
# ============================================================================# GOOGLE COLAB ONLY - ENVIRONMENT CHECK# ============================================================================import sys
import os# Verify we're running in Google ColabIS_COLAB = Falsetry:
    import google.colab    IS_COLAB = True    print("‚úÖ Running in Google Colab")except ImportError:
    # Check alternative methods    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):                        IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB env)")    else:            IS_COLAB = False
if not IS_COLAB:
    raise RuntimeError(        "‚ùå ERROR: This notebook is designed to run ONLY in Google Colab.\n"        "Please open this notebook in Google Colab: https://colab.research.google.com/"    )print("="*80)
print("GOOGLE COLAB ENVIRONMENT CONFIRMED")print("="*80)

## PostgreSQL Setup for Google Colab

This notebook requires PostgreSQL. Run the cell below to install and start PostgreSQL in Colab.

## Data Directory Detection

This notebook automatically detects the `data/` directory containing `schema.sql` and `data.sql` files.
It works when uploaded to Google Colab or run locally.

In [None]:
# ============================================================================# SELF-AWARE DATA DIRECTORY DETECTION# ============================================================================import os
import sysfrom pathlib import Pathprint("="*80)
print("DATA DIRECTORY DETECTION")print("="*80)def find_data_directory():    """    Self-aware function to find data/ directory.    Works when notebook is uploaded to Colab or run locally.    """    # Get notebook's current directory    if IS_COLAB:
    # In Colab, check common locations        search_paths = [            Path('/content'),            Path('/content/drive/MyDrive'),            Path.cwd(),        ]    else:        # Local execution        search_paths = [            Path.cwd(),            Path(__file__).parent if '__file__' in globals() else Path.cwd(),            Path.cwd().parent,        ]        # Also check parent directories recursively    current = Path.cwd()    for _ in range(5):
    # Check up to 5 levels up        search_paths.append(current)        current = current.parent        print(f"\nSearching for data/ directory...")    print(f"Current working directory: {Path.cwd()}")        # Search for data/ directory    data_dir = None    for search_path in search_paths:        if not search_path.exists():            continue                # Check if data/ exists here        potential_data = search_path / 'data'        if potential_data.exists() and potential_data.is_dir():            data_dir = potential_data            print(f"‚úÖ Found data/ directory: {data_dir}")            break                # Recursively search subdirectories (limit depth to avoid long searches)        try:
    for item in search_path.rglob('data'):
    if item.is_dir() and item.name == 'data':                    # Verify it contains expected files                    expected_files = ['schema.sql', 'data.sql']                    has_expected = any((item / f).exists() for f in expected_files)                    if has_expected:                        data_dir = item                        print(f"‚úÖ Found data/ directory (recursive): {data_dir}")                        break            if data_dir:                break        except (PermissionError, OSError):
    continue        
if not data_dir:
    # Try finding by database name pattern        db_name = Path.cwd().name        if db_name.startswith('db-'):            # Look for db-N/data pattern            for search_path in search_paths:
    potential_db = search_path / db_name / 'data'                if potential_db.exists() and potential_db.is_dir():                    data_dir = potential_db                    print(f"‚úÖ Found data/ directory by DB name: {data_dir}")                    break        return data_dirdef verify_data_directory(data_dir: Path):    """Verify data/ directory contains expected files."""    if not data_dir or not data_dir.exists():        return False        expected_files = ['schema.sql']    optional_files = ['data.sql']        print(f"\nVerifying data/ directory contents...")    print(f"Location: {data_dir}")        found_files = []    missing_files = []        for file_name in expected_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name}")        else:            missing_files.append(file_name)            print(f"  ‚ùå {file_name} (missing)")        for file_name in optional_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name} (optional)")        else:            print(f"  ‚ö†Ô∏è  {file_name} (optional, not found)")        if missing_files:        print(f"\n‚ö†Ô∏è  Warning: Missing required files: {missing_files}")        return False        return True# Detect data directoryDATA_DIR = find_data_directory()if DATA_DIR:    if verify_data_directory(DATA_DIR):        print(f"\n‚úÖ Data directory verified and ready!")        print(f"   Schema file: {DATA_DIR / 'schema.sql'}")        if (DATA_DIR / 'data.sql').exists():            print(f"   Data file: {DATA_DIR / 'data.sql'}")                # Set global variables for use in other cells        SCHEMA_FILE = DATA_DIR / 'schema.sql'        DATA_FILE = DATA_DIR / 'data.sql' if (DATA_DIR / 'data.sql').exists() else None                print(f"\n‚úÖ Global variables set:")        print(f"   DATA_DIR = {DATA_DIR}")        print(f"   SCHEMA_FILE = {SCHEMA_FILE}")        if DATA_FILE:            print(f"   DATA_FILE = {DATA_FILE}")    else:        print(f"\n‚ö†Ô∏è  Data directory found but verification failed")        print(f"   Location: {DATA_DIR}")        print(f"   Please ensure schema.sql exists in this directory")else:    print(f"\n‚ùå Data directory not found!")    print(f"\nTroubleshooting:")    print(f"1. Ensure data/ directory is uploaded to Colab")    print(f"2. Check that data/ contains schema.sql")    print(f"3. Verify notebook is in same directory structure as data/")    print(f"\nCurrent directory: {Path.cwd()}")    print(f"Contents:")    try:
    for item in sorted(Path.cwd().iterdir()):
    print(f"  - {item.name} ({'dir' if item.is_dir() else 'file'})")    except PermissionError:
    print("  (Permission denied)")print("="*80)

In [None]:
# ============================================================================# POSTGRESQL SETUP FOR GOOGLE COLAB# ============================================================================import subprocess
import timeimport osprint("="*80)
print("POSTGRESQL SETUP FOR GOOGLE COLAB")print("="*80)if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")# Check if PostgreSQL is already installedpostgres_installed = Falsetry:
    result = subprocess.run(['psql', '--version'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print(f"‚úÖ PostgreSQL already installed: {result.stdout.strip()}")        postgres_installed = Trueexcept (FileNotFoundError, subprocess.TimeoutExpired):
    pass
if not postgres_installed:
    print("\nInstalling PostgreSQL using magic commands...")    print("(Run these commands if automatic installation fails)")    print("  !apt-get update")    print("  !apt-get install -y postgresql postgresql-contrib")    print("  !service postgresql start")        # Use magic commands via subprocess (Colab-compatible)    try:
    # Update package list        print("\n   Updating package list...")        os.system('apt-get update -qq')        print("   ‚úÖ Package list updated")                # Install PostgreSQL        print("   Installing PostgreSQL...")        os.system('apt-get install -y -qq postgresql postgresql-contrib')        print("   ‚úÖ PostgreSQL installed")                # Start PostgreSQL service        print("   Starting PostgreSQL service...")        os.system('service postgresql start')        print("   ‚úÖ PostgreSQL service started")                # Wait for PostgreSQL to be ready        print("   Waiting for PostgreSQL to be ready...")        time.sleep(3)            except Exception as e:
    print(f"   ‚ùå Error: {e}")        print("   Please run manually:")        print("   !apt-get update")        print("   !apt-get install -y postgresql postgresql-contrib")        print("   !service postgresql start")# Verify PostgreSQL is runningprint("\nVerifying PostgreSQL is ready...")try:    result = subprocess.run(['pg_isready'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print("‚úÖ PostgreSQL is ready")        print(f"   {result.stdout.strip()}")    else:        print("‚ö†Ô∏è  PostgreSQL may not be ready yet")        print("   Try: !service postgresql restart")except Exception as e:    print(f"‚ö†Ô∏è  Could not verify PostgreSQL: {e}")
print("\n" + "="*80)print("POSTGRESQL SETUP COMPLETE")
print("="*80)

In [None]:
# ============================================================================# STREAMLIT DASHBOARD EXECUTION# ============================================================================import subprocess
import sysimport osfrom pathlib import Path
import webbrowserimport timeimport threadingdef find_dashboard_file():        """Find Streamlit dashboard file recursively."""    search_paths = [        Path.cwd(),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/workspace'),        Path('/content/drive/MyDrive/db'),        Path('/content/db'),        Path('/content'),        ,    ]        dashboard_name = f'{DB_NAME}_dashboard.py'        for search_path in search_paths:
    if not search_path.exists():
    continue                # Try direct path        candidate = search_path / dashboard_name        if candidate.exists():                            return candidate                # Try recursive search        try:
    for found_path in search_path.rglob(dashboard_name):
    if found_path.is_file():                                    return found_path        except:            continue        return Nonedef run_streamlit_dashboard(method='notebook', port=8501, open_browser=True):        """    Run Streamlit dashboard from Jupyter notebook.        Methods:    - 'notebook': Run in notebook output (using streamlit's notebook mode)    - 'subprocess': Run as subprocess (background)    - 'magic': Use !streamlit run magic command    """    dashboard_path = find_dashboard_file()        
if not dashboard_path:
    print("‚ùå Dashboard file not found")        print(f"   Looking for: {DB_NAME}_dashboard.py")        return None        print(f"‚úÖ Found dashboard: {dashboard_path}")        if method == 'notebook':            # Method 1: Run Streamlit in notebook-compatible mode        # Note: Streamlit doesn't natively support notebooks, but we can use iframe        print("\n" + "="*80)        print("STREAMLIT DASHBOARD - NOTEBOOK MODE")        print("="*80)        print(f"\nDashboard: {dashboard_path.name}")        print(f"\nTo run dashboard:")        print(f"  1. Run this cell to start the server")        print(f"  2. Open the URL shown below in a new tab")        print(f"  3. Or use: !streamlit run {dashboard_path} --server.port={port}")        print("\n" + "="*80)                # Start Streamlit as subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port),            '--server.headless', 'true',            '--server.runOnSave', 'false',            '--browser.gatherUsageStats', 'false'        ]                process = subprocess.Popen(            cmd,            stdout=subprocess.PIPE,            stderr=subprocess.PIPE,            text=True        )                # Wait a moment for server to start        time.sleep(2)                # Get the URL        url = f"http:
    //localhost:{port}"        print(f"\nüåê Dashboard URL: {url}")        print(f"\nServer started in background (PID: {process.pid})")        print(f"\nTo stop: process.terminate() or run stop_streamlit()")                # Store process for later termination        globals()['_streamlit_process'] = process                # Try to open browser        if open_browser:                            try:
    webbrowser.open(url)            except:                pass                return process        elif method == 'subprocess':            # Method 2: Run as background subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port)        ]                process = subprocess.Popen(cmd)        print(f"‚úÖ Streamlit started (PID: {process.pid})")        print(f"üåê Dashboard: http://localhost:{port}")        return process        elif method == 'magic':            # Method 3: Print magic command for user to run        print("Run this command in a new cell:
    ")        print(f"!streamlit run {dashboard_path} --server.port={port}")        return Nonedef stop_streamlit():        """Stop running Streamlit process."""    if '_streamlit_process' in globals():                        process = globals()['_streamlit_process']        process.terminate()        print("‚úÖ Streamlit stopped")    else:            print("‚ö†Ô∏è  No Streamlit process found")# Auto-detect DB_NAME if not setif 'DB_NAME' not in globals():        # Try to detect from current directory or notebook name    cwd = Path.cwd()    for db_num in range(6, 16):                    if f'db-{db_num}' in str(cwd) or f'db{db_num}' in str(cwd):                            DB_NAME = f'db-{db_num}'            break    else:            DB_NAME = 'db-6'  # Default        print(f"‚ö†Ô∏è  Could not detect DB_NAME, using default: {DB_NAME}")
print("\n" + "="*80)print("STREAMLIT DASHBOARD INTEGRATION")
print("="*80)print(f"Database: {DB_NAME}")
print("\nAvailable methods:")print("  1. run_streamlit_dashboard(method='notebook') - Run in notebook mode")print("  2. run_streamlit_dashboard(method='subprocess') - Run as background process")print("  3. run_streamlit_dashboard(method='magic') - Get magic command")print("  4. stop_streamlit() - Stop running dashboard")print("\n" + "="*80)

## Streamlit Dashboard

Run the Streamlit dashboard using one of these methods:

**Method 1: Notebook Mode** (Recommended)
```python
run_streamlit_dashboard(method='notebook', port=8501)
```

**Method 2: Magic Command**
```bash
!streamlit run db-11_dashboard.py --server.port=8501
```

**Method 3: Background Process**
```python
run_streamlit_dashboard(method='subprocess', port=8501)
```


## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Colab Setup (Run this first if using Google Colab)

If you're running this notebook in Google Colab:
1. **Mount Google Drive** (if your database files are in Drive)
2. **Upload database files** to `/content/db` or your Drive folder


In [None]:
# ============================================================================
# GOOGLE COLAB SETUP
# ============================================================================

if ENV_TYPE == 'colab':
    print("="*80)
    print("GOOGLE COLAB SETUP")
    print("="*80)
    
    # Mount Google Drive if not already mounted
    drive_path = Path('/content/drive/MyDrive')
    if not drive_path.exists():
    print("‚ö†Ô∏è  Google Drive not mounted.")
        print("   Run this command to mount:")
        print("   from google.colab import drive")
        print("   drive.mount('/content/drive')")
        try:
    from google.colab import drive
            drive.mount('/content/drive')
            print("‚úÖ Google Drive mounted")
        except Exception as e:
    print(f"‚ö†Ô∏è  Could not auto-mount Drive: {e}")
            print("   Please mount manually using the command above")
    else:
        print("‚úÖ Google Drive is already mounted")
    
    # Check for database files
    print("\nChecking for database files...")
    
    # Check in /content/db
    content_db = Path('/content/db')
    if content_db.exists():
    print(f"‚úÖ Found: {content_db}")
    else:
        print(f"‚ö†Ô∏è  Not found: {content_db}")
        print("   Upload your database folder to /content/db")
    
    # Check in Drive
    drive_db = drive_path / 'db'
    if drive_db.exists():
    print(f"‚úÖ Found in Drive: {drive_db}")
    else:
        print(f"‚ö†Ô∏è  Not found in Drive: {drive_db}")
        print("   Upload your database folder to Google Drive/db")
    
    print("\n" + "="*80)
    print("Some PostgreSQL-specific features may not work")
    print("="*80)
else:
    print("Not running in Colab - skipping Colab setup")

In [None]:
# ============================================================================# FAILSAFE: Force Path Correction and Package Installation# ============================================================================import sys
import subprocessimport osfrom pathlib import Path
from datetime import datetime
import shutildef force_install_package(package_name, import_name=None):    """Force install package using multiple methods."""    if import_name is None:
    import_name = package_name.split('[')[0].split('==')[0].split('>=')[0]        # Try import first    try:
    __import__(import_name)        return True    except ImportError:
    pass        # Method 1: pip install --user    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', '--quiet', package_name],                              stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 2: pip install --break-system-packages (Python 3.12+)    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--break-system-packages', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 3: pip install system-wide    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 4: conda install (if conda available)    try:        subprocess.check_call(['conda', 'install', '-y', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 5: apt-get install (Linux/Docker)    if os.path.exists('/usr/bin/apt-get'):        try:            apt_package = f'python3-{import_name.replace("_", "-")}'            subprocess.check_call(['apt-get', 'install', '-y', '--quiet', apt_package],                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)            __import__(import_name)            return True        except:            pass        # Method 6: Direct pip install with --force-reinstall    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--force-reinstall', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        print(f"‚ö†Ô∏è  Warning: Could not install {package_name}, continuing anyway...")    return Falsedef correct_file_path(file_path, search_paths=None):    """Correct file path by searching multiple locations."""    if isinstance(file_path, str):        file_path = Path(file_path)        # If path exists, return it    if file_path.exists():        return file_path        # Default search paths    if search_paths is None:        search_paths = [            Path.cwd(),            Path('/workspace/client/db'),            Path('/workspace/db'),            Path('/workspace'),            Path('/content/drive/MyDrive/db'),            Path('/content/db'),            Path('/content'),            ,            BASE_DIR if 'BASE_DIR' in globals() else ,        ]        # Search recursively    for search_path in search_paths:
    if not search_path.exists():            continue                # Try direct path        candidate = search_path / file_path.name        if candidate.exists():            return candidate                # Try recursive search        try:            for found_path in search_path.rglob(file_path.name):                if found_path.is_file():                    return found_path        except:            continue        # Return original path (will fail later, but at least we tried)    return file_pathdef create_notebook_backup(notebook_path=None):    """Create backup of current notebook automatically."""    try:        # Try to detect notebook path from various sources        if notebook_path is None:            # Try to get from __file__ or current working directory            try:                notebook_path = Path(__file__)            except:                notebook_path = Path.cwd() / 'current_notebook.ipynb'                if isinstance(notebook_path, str):            notebook_path = Path(notebook_path)                # Only create backup if file exists        if notebook_path.exists() and notebook_path.suffix == '.ipynb':            timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')            backup_path = notebook_path.parent / f"{notebook_path.stem}_{timestamp}.backup.ipynb"                        # Create backup            shutil.copy2(notebook_path, backup_path)            print(f"‚úÖ Backup created: {backup_path.name}")            return backup_path        else:            print("‚ö†Ô∏è  Could not determine notebook path for backup")            return None    except Exception as e:        print(f"‚ö†Ô∏è  Backup creation failed (non-critical): {e}")        return None# Create backup at startuptry:    create_notebook_backup()except Exception as e:    print(f"‚ö†Ô∏è  Backup skipped: {e}")def ensure_packages_installed():    """Ensure all required packages are installed."""    required_packages = [        ('psycopg2-binary', 'psycopg2'),        ('pandas', 'pandas'),        ('numpy', 'numpy'),        ('matplotlib', 'matplotlib'),        ('seaborn', 'seaborn'),        ('ipython', 'IPython'),        ('jupyter', 'jupyter'),    ]        print("\n" + "="*80)    print("FAILSAFE: Ensuring all packages are installed...")    print("="*80)        for package, import_name in required_packages:        if force_install_package(package, import_name):            print(f"‚úÖ {package} installed")        else:            print(f"‚ö†Ô∏è  {package} installation failed, but continuing...")        print("="*80 + "\n")def ensure_paths_correct():    """Ensure all file paths are correct."""    print("\n" + "="*80)    print("FAILSAFE: Correcting file paths...")    print("="*80)        # Correct BASE_DIR if needed - fix UnboundLocalError    base_dir_exists = 'BASE_DIR' in globals()    base_dir_valid = False        if base_dir_exists:        try:            base_dir_value = globals()['BASE_DIR']            if base_dir_value:                base_dir_path = Path(base_dir_value) if isinstance(base_dir_value, str) else base_dir_value                base_dir_valid = base_dir_path.exists()        except:            base_dir_valid = False        if not base_dir_exists or not base_dir_valid:        corrected_base_dir = correct_file_path()        globals()['BASE_DIR'] = corrected_base_dir        print(f"‚úÖ BASE_DIR corrected: {corrected_base_dir}")    else:        print(f"‚úÖ BASE_DIR valid: {globals()['BASE_DIR']}")        # Correct DB_DIR if needed - fix UnboundLocalError    db_dir_exists = 'DB_DIR' in globals()    db_dir_valid = False    db_dir_value = None        if db_dir_exists:        try:            db_dir_value = globals()['DB_DIR']            if db_dir_value:                db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value                db_dir_valid = db_dir_path.exists()        except:            db_dir_valid = False        if db_dir_exists and db_dir_value and not db_dir_valid:        db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value        corrected_db_dir = correct_file_path(db_dir_path)        globals()['DB_DIR'] = corrected_db_dir        print(f"‚úÖ DB_DIR corrected: {corrected_db_dir}")    elif db_dir_exists and db_dir_value:        print(f"‚úÖ DB_DIR valid: {globals()['DB_DIR']}")        print("="*80 + "\n")# Run failsafe checksensure_packages_installed()ensure_paths_correct()print("‚úÖ Failsafe checks complete")

## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Step 1: Environment Setup & Package Installation

In [None]:
def install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:
    pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return Falsedef install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:                        __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:                pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return False# ============================================================================# END-TO-END SETUP: Install all required packages and configure environment# ============================================================================import sys
import subprocessimport osimport platformfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT SETUP - END-TO-END INSTALLATION")print("="*80)# Display Python environmentprint(f"\nPython Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")print(f"Architecture: {platform.architecture()[0]}")# Required packages with versionsrequired_packages = [    'psycopg2-binary>=2.9.0',    'pandas>=2.0.0',    'numpy>=1.24.0',    'matplotlib>=3.7.0',    'seaborn>=0.12.0']# Map package names to import namespackage_import_map = {    'psycopg2-binary': 'psycopg2',    'pandas': 'pandas',    'numpy': 'numpy',    'matplotlib': 'matplotlib',    'seaborn': 'seaborn'}print("\n" + "="*80)
print("CHECKING AND INSTALLING REQUIRED PACKAGES")print("="*80)missing_packages = []installed_packages = []for package_spec in required_packages:
    package_name = package_spec.split('>=')[0]    import_name = package_import_map.get(package_name, package_name.replace('-', '_'))        # Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        installed_packages.append(package_name)    except ImportError:
    print(f"‚ö†Ô∏è  {package_name}: Missing - installing...")        missing_packages.append(package_spec)                # Try installation with --user flag first        try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE            )            print(f"   ‚úÖ Successfully installed {package_name} (user)")            installed_packages.append(package_name)        except subprocess.CalledProcessError:                # Fallback: try without --user flag            try:                        subprocess.check_call(                    [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                print(f"   ‚úÖ Successfully installed {package_name} (system-wide)")                installed_packages.append(package_name)            except Exception as e:                        print(f"   ‚ùå Failed to install {package_name}")                print(f"      Manual install: pip install {package_spec}")
print("\n" + "="*80)if missing_packages and len(installed_packages) < len(required_packages):
    print("‚ö†Ô∏è  Some packages failed to install. Please install manually:")    for pkg in missing_packages:
    print(f"   pip install {pkg}")    print("\n   Then restart the kernel and re-run this cell.")else:        print("‚úÖ All required packages are installed!")    print("\n‚ö†Ô∏è  If packages were just installed, restart the kernel and re-run this cell.")
print("="*80)# Now import all packagesprint("\n" + "="*80)
print("IMPORTING PACKAGES")print("="*80)try:
    import psycopg2    print("‚úÖ psycopg2 imported")except ImportError as e:
    print(f"‚ùå Failed to import psycopg2: {e}")    print("   Please restart kernel after installation")try:            import pandas as pd    print("‚úÖ pandas imported")except ImportError as e:            print(f"‚ùå Failed to import pandas: {e}")try:            import numpy as np    print("‚úÖ numpy imported")except ImportError as e:            print(f"‚ùå Failed to import numpy: {e}")try:            import matplotlib.pyplot as plt    import matplotlib    matplotlib.use('Agg')  # Non-interactive backend for notebooks    print("‚úÖ matplotlib imported")except ImportError as e:
    print(f"‚ùå Failed to import matplotlib: {e}")try:            import seaborn as sns    print("‚úÖ seaborn imported")except ImportError as e:            print(f"‚ùå Failed to import seaborn: {e}")try:            from IPython.display import display, HTML, Markdown    print("‚úÖ IPython.display imported")except ImportError as e:            print(f"‚ö†Ô∏è  IPython.display not available: {e}")import json
from datetime import datetime
import warningswarnings.filterwarnings('ignore')# Set visualization styletry:
    plt.style.use('seaborn-v0_8-darkgrid')    sns.set_palette("husl")except:    passprint("\n" + "="*80)
print("ENVIRONMENT SETUP COMPLETE")print("="*80)

## Step 2: Database Configuration

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-11"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 3: Database Initialization (Create Database, Load Schema, Load Data)

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-11"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 4: Load Query Metadata

## Embedded SQL Files and Queries

The following cells contain the complete database schema, data, and queries embedded directly in this notebook.
No external file dependencies required - everything is self-contained.

In [None]:
# ============================================================================
# EMBEDDED SCHEMA.SQL - DB-11
# ============================================================================
# This cell contains the complete database schema
# Execute this cell to load the schema into PostgreSQL
import psycopg2
# Schema SQL (embedded directly in notebook)
SCHEMA_SQL = """
-- Parking Database Schema
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production schema for parking data pipeline system
-- Metropolitan Areas Table
-- Stores metropolitan statistical areas (MSAs) and combined statistical areas (CSAs)
CREATE TABLE metropolitan_areas (
    msa_id VARCHAR(50) PRIMARY KEY,
    msa_name VARCHAR(255) NOT NULL,
    msa_type VARCHAR(50) NOT NULL,  -- 'MSA', 'CSA', 'Micropolitan'
    state_codes VARCHAR(100),  -- Comma-separated state codes
    principal_city VARCHAR(255),
    population_estimate INTEGER,
    land_area_sq_miles NUMERIC(12, 2),
    population_density NUMERIC(10, 2),
    median_household_income NUMERIC(12, 2),
    gdp_billions NUMERIC(12, 2),
    msa_geom GEOGRAPHY,  -- Polygon geometry for MSA boundary
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    data_year INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Cities Table
-- Stores city-level demographic and economic data
CREATE TABLE cities (
    city_id VARCHAR(50) PRIMARY KEY,
    city_name VARCHAR(255) NOT NULL,
    state_code VARCHAR(2) NOT NULL,
    county_name VARCHAR(255),
    msa_id VARCHAR(50),
    population INTEGER,
    land_area_sq_miles NUMERIC(10, 2),
    population_density NUMERIC(10, 2),
    median_household_income NUMERIC(12, 2),
    median_age NUMERIC(5, 2),
    employment_total INTEGER,
    unemployment_rate NUMERIC(5, 2),
    city_geom GEOGRAPHY,  -- Point geometry for city center
    city_latitude NUMERIC(10, 7),
    city_longitude NUMERIC(10, 7),
    timezone VARCHAR(50),
    data_year INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (msa_id) REFERENCES metropolitan_areas(msa_id)
);
-- Airports Table
-- Stores airport information including passenger volumes and parking capacity
CREATE TABLE airports (
    airport_id VARCHAR(10) PRIMARY KEY,  -- IATA code
    airport_name VARCHAR(255) NOT NULL,
    city_id VARCHAR(50),
    state_code VARCHAR(2),
    airport_type VARCHAR(50),  -- 'Commercial', 'Cargo', 'General Aviation'
    latitude NUMERIC(10, 7) NOT NULL,
    longitude NUMERIC(10, 7) NOT NULL,
    airport_geom GEOGRAPHY,  -- Point geometry
    annual_passengers INTEGER,
    annual_cargo_tons INTEGER,
    parking_spaces_total INTEGER,
    parking_facilities_count INTEGER,
    valet_available BOOLEAN,
    long_term_parking BOOLEAN,
    short_term_parking BOOLEAN,
    data_year INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id)
);
-- Stadiums and Venues Table
-- Stores sports stadiums, concert venues, and event facilities
CREATE TABLE stadiums_venues (
    venue_id VARCHAR(50) PRIMARY KEY,
    venue_name VARCHAR(255) NOT NULL,
    venue_type VARCHAR(50),  -- 'Stadium', 'Arena', 'Convention Center', 'Amphitheater'
    city_id VARCHAR(50),
    latitude NUMERIC(10, 7) NOT NULL,
    longitude NUMERIC(10, 7) NOT NULL,
    venue_geom GEOGRAPHY,  -- Point geometry
    capacity INTEGER,
    parking_spaces_total INTEGER,
    parking_facilities_count INTEGER,
    primary_sport VARCHAR(100),  -- 'NFL', 'MLB', 'NBA', 'NHL', 'Soccer', 'Concert'
    team_name VARCHAR(255),
    annual_events_count INTEGER,
    peak_attendance INTEGER,
    data_year INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id)
);
-- Parking Facilities Table
-- Stores individual parking facilities (lots, garages, structures)
CREATE TABLE parking_facilities (
    facility_id VARCHAR(100) PRIMARY KEY,
    facility_name VARCHAR(255),
    facility_type VARCHAR(50),  -- 'Surface Lot', 'Garage', 'Structure', 'Valet', 'Street'
    city_id VARCHAR(50),
    latitude NUMERIC(10, 7) NOT NULL,
    longitude NUMERIC(10, 7) NOT NULL,
    facility_geom GEOGRAPHY,  -- Point geometry
    total_spaces INTEGER,
    accessible_spaces INTEGER,
    ev_charging_stations INTEGER,
    covered_spaces INTEGER,
    uncovered_spaces INTEGER,
    height_restriction_feet NUMERIC(5, 2),
    operator_name VARCHAR(255),
    operator_type VARCHAR(50),  -- 'Public', 'Private', 'Municipal', 'Airport', 'Venue'
    airport_id VARCHAR(10),
    venue_id VARCHAR(50),
    is_event_parking BOOLEAN DEFAULT FALSE,
    is_monthly_parking BOOLEAN DEFAULT FALSE,
    is_hourly_parking BOOLEAN DEFAULT TRUE,
    accepts_reservations BOOLEAN DEFAULT FALSE,
    payment_methods VARCHAR(255),  -- Comma-separated:
    'Cash', 'Credit', 'Mobile', 'App'
    amenities VARCHAR(500),  -- Comma-separated amenities
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id),
    FOREIGN KEY (airport_id) REFERENCES airports(airport_id),
    FOREIGN KEY (venue_id) REFERENCES stadiums_venues(venue_id)
);
-- Parking Pricing Table
-- Stores pricing information for parking facilities
CREATE TABLE parking_pricing (
    pricing_id VARCHAR(100) PRIMARY KEY,
    facility_id VARCHAR(100) NOT NULL,
    pricing_type VARCHAR(50),  -- 'Hourly', 'Daily', 'Monthly', 'Event', 'Early Bird'
    base_rate_hourly NUMERIC(8, 2),
    base_rate_daily NUMERIC(8, 2),
    base_rate_monthly NUMERIC(8, 2),
    event_rate NUMERIC(8, 2),
    max_daily_rate NUMERIC(8, 2),
    currency VARCHAR(3) DEFAULT 'USD',
    effective_date DATE,
    expiration_date DATE,
    day_of_week VARCHAR(20),  -- 'Monday', 'Tuesday', etc., or 'All'
    time_range_start TIME,
    time_range_end TIME,
    is_active BOOLEAN DEFAULT TRUE,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (facility_id) REFERENCES parking_facilities(facility_id)
);
-- Traffic Volume Data Table
-- Stores traffic volume statistics from FHWA
CREATE TABLE traffic_volume_data (
    traffic_id VARCHAR(100) PRIMARY KEY,
    location_id VARCHAR(100),
    city_id VARCHAR(50),
    latitude NUMERIC(10, 7),
    longitude NUMERIC(10, 7),
    location_geom GEOGRAPHY,  -- Point geometry
    road_name VARCHAR(255),
    road_type VARCHAR(50),  -- 'Highway', 'Arterial', 'Collector', 'Local'
    annual_average_daily_traffic INTEGER,
    peak_hour_volume INTEGER,
    direction VARCHAR(20),  -- 'Northbound', 'Southbound', 'Eastbound', 'Westbound', 'Both'
    data_year INTEGER,
    data_month INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id)
);
-- Events Table
-- Stores event information (sports games, concerts, conventions)
CREATE TABLE events (
    event_id VARCHAR(100) PRIMARY KEY,
    event_name VARCHAR(255) NOT NULL,
    event_type VARCHAR(50),  -- 'Sports', 'Concert', 'Convention', 'Festival', 'Conference'
    venue_id VARCHAR(50),
    city_id VARCHAR(50),
    event_date DATE NOT NULL,
    event_time TIME,
    attendance INTEGER,
    parking_demand_multiplier NUMERIC(5, 2),  -- Multiplier for parking demand
    is_recurring BOOLEAN DEFAULT FALSE,
    recurrence_pattern VARCHAR(100),  -- 'Weekly', 'Monthly', 'Seasonal'
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (venue_id) REFERENCES stadiums_venues(venue_id),
    FOREIGN KEY (city_id) REFERENCES cities(city_id)
);
-- Market Metrics Table
-- Stores calculated market metrics and analytics
CREATE TABLE market_intelligence_metrics (
    metric_id VARCHAR(100) PRIMARY KEY,
    city_id VARCHAR(50),
    msa_id VARCHAR(50),
    metric_type VARCHAR(50),  -- 'Demand', 'Supply', 'Utilization', 'Revenue', 'Competition'
    metric_name VARCHAR(100),
    metric_value NUMERIC(15, 2),
    metric_unit VARCHAR(50),
    calculation_date DATE,
    time_period VARCHAR(50),  -- 'Daily', 'Weekly', 'Monthly', 'Quarterly', 'Annual'
    data_year INTEGER,
    data_month INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id),
    FOREIGN KEY (msa_id) REFERENCES metropolitan_areas(msa_id)
);
-- Parking Utilization Table
-- Stores parking utilization and occupancy data
CREATE TABLE parking_utilization (
    utilization_id VARCHAR(100) PRIMARY KEY,
    facility_id VARCHAR(100) NOT NULL,
    utilization_date DATE NOT NULL,
    utilization_hour INTEGER,  -- 0-23
    occupancy_rate NUMERIC(5, 2),  -- Percentage 0-100
    spaces_occupied INTEGER,
    spaces_available INTEGER,
    revenue_generated NUMERIC(10, 2),
    reservation_count INTEGER,
    walk_in_count INTEGER,
    data_source VARCHAR(50),  -- 'Sensor', 'Manual', 'App', 'Estimated'
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (facility_id) REFERENCES parking_facilities(facility_id)
);
-- Competitive Analysis Table
-- Stores competitive parking facility analysis
CREATE TABLE competitive_analysis (
    analysis_id VARCHAR(100) PRIMARY KEY,
    facility_id VARCHAR(100) NOT NULL,
    competitor_facility_id VARCHAR(100),
    analysis_date DATE,
    price_difference_pct NUMERIC(5, 2),
    distance_miles NUMERIC(8, 2),
    utilization_difference_pct NUMERIC(5, 2),
    amenity_comparison VARCHAR(500),
    competitive_score NUMERIC(5, 2),  -- 0-100
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (facility_id) REFERENCES parking_facilities(facility_id),
    FOREIGN KEY (competitor_facility_id) REFERENCES parking_facilities(facility_id)
);
-- Business Districts Table
-- Stores business district and commercial area information
CREATE TABLE business_districts (
    district_id VARCHAR(50) PRIMARY KEY,
    district_name VARCHAR(255) NOT NULL,
    city_id VARCHAR(50),
    district_type VARCHAR(50),  -- 'Downtown', 'Financial', 'Retail', 'Entertainment', 'Airport', 'Medical'
    latitude NUMERIC(10, 7),
    longitude NUMERIC(10, 7),
    district_geom GEOGRAPHY,  -- Polygon geometry
    employment_total INTEGER,
    businesses_count INTEGER,
    parking_demand_score NUMERIC(5, 2),  -- 0-100
    spatial_extent_west NUMERIC(10, 6),
    spatial_extent_south NUMERIC(10, 6),
    spatial_extent_east NUMERIC(10, 6),
    spatial_extent_north NUMERIC(10, 6),
    data_year INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (city_id) REFERENCES cities(city_id)
);
-- Parking Facility to Business District Mapping
CREATE TABLE facility_district_mapping (
    mapping_id VARCHAR(100) PRIMARY KEY,
    facility_id VARCHAR(100) NOT NULL,
    district_id VARCHAR(50) NOT NULL,
    distance_miles NUMERIC(8, 2),
    is_primary_district BOOLEAN DEFAULT FALSE,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (facility_id) REFERENCES parking_facilities(facility_id),
    FOREIGN KEY (district_id) REFERENCES business_districts(district_id)
);
-- Data Source Metadata Table
-- Tracks data sources and extraction metadata
CREATE TABLE data_source_metadata (
    source_id VARCHAR(100) PRIMARY KEY,
    source_name VARCHAR(255) NOT NULL,
    source_type VARCHAR(50),  -- 'API', 'CSV', 'Shapefile', 'Database', 'Web Scrape'
    source_url VARCHAR(1000),
    api_endpoint VARCHAR(500),
    extraction_date DATE,
    extraction_timestamp TIMESTAMP_NTZ,
    records_extracted INTEGER,
    data_quality_score NUMERIC(5, 2),  -- 0-100
    completeness_pct NUMERIC(5, 2),
    error_count INTEGER,
    load_timestamp TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Indexes for performance optimization
CREATE INDEX idx_cities_state ON cities(state_code);
CREATE INDEX idx_cities_msa ON cities(msa_id);
CREATE INDEX idx_parking_facilities_city ON parking_facilities(city_id);
CREATE INDEX idx_parking_facilities_airport ON parking_facilities(airport_id);
CREATE INDEX idx_parking_facilities_venue ON parking_facilities(venue_id);
CREATE INDEX idx_parking_pricing_facility ON parking_pricing(facility_id);
CREATE INDEX idx_parking_utilization_facility_date ON parking_utilization(facility_id, utilization_date);
CREATE INDEX idx_events_venue_date ON events(venue_id, event_date);
CREATE INDEX idx_events_city_date ON events(city_id, event_date);
CREATE INDEX idx_traffic_volume_city ON traffic_volume_data(city_id);
CREATE INDEX idx_market_metrics_city ON market_intelligence_metrics(city_id);
CREATE INDEX idx_market_metrics_msa ON market_intelligence_metrics(msa_id);
-- Spatial indexes (PostgreSQL PostGIS)
-- CREATE INDEX idx_metropolitan_areas_geom ON metropolitan_areas USING GIST(msa_geom);
-- CREATE INDEX idx_cities_geom ON cities USING GIST(city_geom);
-- CREATE INDEX idx_airports_geom ON airports USING GIST(airport_geom);
-- CREATE INDEX idx_stadiums_venues_geom ON stadiums_venues USING GIST(venue_geom);
-- CREATE INDEX idx_parking_facilities_geom ON parking_facilities USING GIST(facility_geom);
-- CREATE INDEX idx_traffic_volume_geom ON traffic_volume_data USING GIST(location_geom);
-- CREATE INDEX idx_business_districts_geom ON business_districts USING GIST(district_geom);
"""
def execute_schema_sql(connection):
    """Execute embedded schema SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in SCHEMA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Schema loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading schema: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED SCHEMA")
    print("="*80)
    execute_schema_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Schema SQL is available in SCHEMA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED DATA.SQL - DB-11
# ============================================================================
# This cell contains sample data for the database
# Execute this cell to load data into PostgreSQL
import psycopg2
# Data SQL (embedded directly in notebook)
DATA_SQL = """
-- Sample Data for Parking Database
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production sample data for parking data pipeline system
-- Insert sample metropolitan areas
INSERT INTO metropolitan_areas (msa_id, msa_name, msa_type, state_codes, principal_city, population_estimate, land_area_sq_miles, population_density, median_household_income, gdp_billions, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, data_year) VALUES
('msa_001', 'New York-Newark-Jersey City', 'MSA', 'NY,NJ,PA', 'New York', 20153634, 6718.0, 3000.0, 72000.0, 1900.0, -74.5, 40.4, -73.5, 41.0, 2024),
('msa_002', 'Los Angeles-Long Beach-Anaheim', 'MSA', 'CA', 'Los Angeles', 13214799, 4851.0, 2724.0, 71000.0, 1200.0, -118.8, 33.7, -117.5, 34.5, 2024),
('msa_003', 'Chicago-Naperville-Elgin', 'MSA', 'IL,IN,WI', 'Chicago', 9618502, 10874.0, 884.0, 68000.0, 800.0, -88.2, 41.5, -87.3, 42.1, 2024),
('msa_004', 'Dallas-Fort Worth-Arlington', 'MSA', 'TX', 'Dallas', 7614773, 9178.0, 830.0, 65000.0, 600.0, -97.5, 32.5, -96.5, 33.2, 2024),
('msa_005', 'San Francisco-Oakland-Berkeley', 'MSA', 'CA', 'San Francisco', 4700000, 2851.0, 1648.0, 105000.0, 700.0, -122.6, 37.3, -121.8, 38.0, 2024);
-- Insert sample cities
INSERT INTO cities (city_id, city_name, state_code, county_name, msa_id, population, land_area_sq_miles, population_density, median_household_income, median_age, employment_total, unemployment_rate, city_latitude, city_longitude, timezone, data_year) VALUES
('city_001', 'New York', 'NY', 'New York', 'msa_001', 8336817, 302.6, 27550.0, 72000.0, 36.5, 4200000, 5.2, 40.7128, -74.0060, 'America/New_York', 2024),
('city_002', 'Los Angeles', 'CA', 'Los Angeles', 'msa_002', 3898747, 468.7, 8320.0, 71000.0, 35.8, 1900000, 5.8, 34.0522, -118.2437, 'America/Los_Angeles', 2024),
('city_003', 'Chicago', 'IL', 'Cook', 'msa_003', 2693976, 227.6, 11830.0, 68000.0, 34.9, 1300000, 4.8, 41.8781, -87.6298, 'America/Chicago', 2024),
('city_004', 'Dallas', 'TX', 'Dallas', 'msa_004', 1343573, 385.8, 3482.0, 65000.0, 33.2, 700000, 4.2, 32.7767, -96.7970, 'America/Chicago', 2024),
('city_005', 'San Francisco', 'CA', 'San Francisco', 'msa_005', 873965, 46.9, 18635.0, 105000.0, 38.2, 500000, 3.5, 37.7749, -122.4194, 'America/Los_Angeles', 2024);
-- Insert sample airports
INSERT INTO airports (airport_id, airport_name, city_id, state_code, airport_type, latitude, longitude, annual_passengers, annual_cargo_tons, parking_spaces_total, parking_facilities_count, valet_available, long_term_parking, short_term_parking, data_year) VALUES
('JFK', 'John F. Kennedy International', 'city_001', 'NY', 'Commercial', 40.6413, -73.7781, 62000000, 1500000, 15000, 8, TRUE, TRUE, TRUE, 2024),
('LAX', 'Los Angeles International', 'city_002', 'CA', 'Commercial', 33.9416, -118.4085, 88000000, 2100000, 18000, 10, TRUE, TRUE, TRUE, 2024),
('ORD', 'Chicago O''Hare International', 'city_003', 'IL', 'Commercial', 41.9742, -87.9073, 83000000, 1800000, 20000, 12, TRUE, TRUE, TRUE, 2024),
('DFW', 'Dallas/Fort Worth International', 'city_004', 'TX', 'Commercial', 32.8998, -97.0403, 73000000, 800000, 25000, 15, TRUE, TRUE, TRUE, 2024),
('SFO', 'San Francisco International', 'city_005', 'CA', 'Commercial', 37.6213, -122.3790, 55000000, 500000, 12000, 7, TRUE, TRUE, TRUE, 2024);
-- Insert sample stadiums/venues
INSERT INTO stadiums_venues (venue_id, venue_name, venue_type, city_id, latitude, longitude, capacity, parking_spaces_total, parking_facilities_count, primary_sport, team_name, annual_events_count, peak_attendance, data_year) VALUES
('venue_001', 'Yankee Stadium', 'Stadium', 'city_001', 40.8296, -73.9262, 54251, 8000, 5, 'MLB', 'New York Yankees', 81, 54251, 2024),
('venue_002', 'Madison Square Garden', 'Arena', 'city_001', 40.7505, -73.9934, 19812, 5000, 3, 'NBA', 'New York Knicks', 200, 19812, 2024),
('venue_003', 'Staples Center', 'Arena', 'city_002', 34.0430, -118.2673, 19060, 6000, 4, 'NBA', 'Los Angeles Lakers', 180, 19060, 2024),
('venue_004', 'Soldier Field', 'Stadium', 'city_003', 41.8625, -87.6167, 61500, 10000, 6, 'NFL', 'Chicago Bears', 10, 61500, 2024),
('venue_005', 'AT&T Stadium', 'Stadium', 'city_004', 32.7473, -97.0945, 80000, 12000, 8, 'NFL', 'Dallas Cowboys', 10, 80000, 2024);
-- Insert sample parking facilities
INSERT INTO parking_facilities (facility_id, facility_name, facility_type, city_id, latitude, longitude, total_spaces, accessible_spaces, ev_charging_stations, covered_spaces, uncovered_spaces, operator_name, operator_type, is_event_parking, is_monthly_parking, is_hourly_parking, accepts_reservations, payment_methods, amenities) VALUES
('facility_001', 'Downtown Garage A', 'Garage', 'city_001', 40.7128, -74.0060, 500, 25, 10, 500, 0, 'City Parking Authority', 'Municipal', FALSE, TRUE, TRUE, TRUE, 'Credit,Mobile,App', 'Security,Lighting,Elevator'),
('facility_002', 'Airport Long-Term Lot', 'Surface Lot', 'city_001', 40.6413, -73.7781, 3000, 150, 50, 0, 3000, 'JFK Parking', 'Airport', FALSE, FALSE, TRUE, TRUE, 'Credit,Mobile,App', 'Shuttle Service,24/7 Access'),
('facility_003', 'Stadium Event Parking', 'Surface Lot', 'city_001', 40.8296, -73.9262, 2000, 100, 20, 0, 2000, 'Yankee Stadium Parking', 'Venue', TRUE, FALSE, TRUE, TRUE, 'Credit,Cash,Mobile', 'Event Shuttle'),
('facility_004', 'Business District Garage', 'Garage', 'city_002', 34.0522, -118.2437, 800, 40, 15, 800, 0, 'LA Parking Co', 'Private', FALSE, TRUE, TRUE, TRUE, 'Credit,Mobile,App', 'Valet,Car Wash'),
('facility_005', 'Convention Center Parking', 'Structure', 'city_003', 41.8781, -87.6298, 1200, 60, 25, 1200, 0, 'Chicago Parking', 'Municipal', TRUE, FALSE, TRUE, TRUE, 'Credit,Mobile', 'Convention Shuttle');
-- Insert sample parking pricing
INSERT INTO parking_pricing (pricing_id, facility_id, pricing_type, base_rate_hourly, base_rate_daily, base_rate_monthly, max_daily_rate, currency, effective_date, expiration_date, day_of_week, is_active) VALUES
('price_001', 'facility_001', 'Hourly', 3.50, 25.00, 350.00, 25.00, 'USD', '2024-01-01', '2024-12-31', 'All', TRUE),
('price_002', 'facility_001', 'Monthly', NULL, NULL, 350.00, NULL, 'USD', '2024-01-01', '2024-12-31', 'All', TRUE),
('price_003', 'facility_002', 'Daily', NULL, 18.00, NULL, 18.00, 'USD', '2024-01-01', '2024-12-31', 'All', TRUE),
('price_004', 'facility_003', 'Event', NULL, NULL, NULL, 40.00, 'USD', '2024-01-01', '2024-12-31', 'All', TRUE),
('price_005', 'facility_004', 'Hourly', 4.00, 30.00, 450.00, 30.00, 'USD', '2024-01-01', '2024-12-31', 'All', TRUE);
-- Insert sample parking utilization
INSERT INTO parking_utilization (utilization_id, facility_id, utilization_date, utilization_hour, occupancy_rate, spaces_occupied, spaces_available, revenue_generated, reservation_count, walk_in_count, data_source) VALUES
('util_001', 'facility_001', '2024-02-01', 9, 85.0, 425, 75, 1487.50, 200, 225, 'Sensor'),
('util_002', 'facility_001', '2024-02-01', 12, 95.0, 475, 25, 1662.50, 300, 175, 'Sensor'),
('util_003', 'facility_001', '2024-02-01', 18, 70.0, 350, 150, 1225.00, 150, 200, 'Sensor'),
('util_004', 'facility_002', '2024-02-01', 10, 60.0, 1800, 1200, 3240.00, 800, 1000, 'App'),
('util_005', 'facility_003', '2024-02-01', 19, 100.0, 2000, 0, 8000.00, 1500, 500, 'Manual');
-- Insert sample traffic volume data
INSERT INTO traffic_volume_data (traffic_id, location_id, city_id, latitude, longitude, road_name, road_type, annual_average_daily_traffic, peak_hour_volume, direction, data_year, data_month) VALUES
('traffic_001', 'loc_001', 'city_001', 40.7128, -74.0060, 'Broadway', 'Arterial', 45000, 3500, 'Both', 2024, 2),
('traffic_002', 'loc_002', 'city_002', 34.0522, -118.2437, 'Wilshire Blvd', 'Arterial', 38000, 2800, 'Both', 2024, 2),
('traffic_003', 'loc_003', 'city_003', 41.8781, -87.6298, 'Michigan Ave', 'Arterial', 42000, 3200, 'Both', 2024, 2),
('traffic_004', 'loc_004', 'city_004', 32.7767, -96.7970, 'I-35E', 'Highway', 120000, 8500, 'Both', 2024, 2),
('traffic_005', 'loc_005', 'city_005', 37.7749, -122.4194, 'Market St', 'Arterial', 35000, 2600, 'Both', 2024, 2);
-- Insert sample events
INSERT INTO events (event_id, event_name, event_type, venue_id, city_id, event_date, event_time, attendance, parking_demand_multiplier, is_recurring, recurrence_pattern) VALUES
('event_001', 'Yankees vs Red Sox', 'Sports', 'venue_001', 'city_001', '2024-04-15', '19:00:00', 50000, 0.85, TRUE, 'Seasonal'),
('event_002', 'Knicks vs Lakers', 'Sports', 'venue_002', 'city_001', '2024-03-20', '20:00:00', 19000, 0.90, FALSE, NULL),
('event_003', 'Concert: Taylor Swift', 'Concert', 'venue_003', 'city_002', '2024-05-10', '20:00:00', 19000, 1.20, FALSE, NULL),
('event_004', 'Bears vs Packers', 'Sports', 'venue_004', 'city_003', '2024-09-15', '13:00:00', 60000, 0.95, TRUE, 'Seasonal'),
('event_005', 'Cowboys vs Eagles', 'Sports', 'venue_005', 'city_004', '2024-10-20', '16:00:00', 80000, 1.00, TRUE, 'Seasonal');
-- Insert sample business districts
INSERT INTO business_districts (district_id, district_name, city_id, district_type, latitude, longitude, employment_total, businesses_count, parking_demand_score, spatial_extent_west, spatial_extent_south, spatial_extent_east, spatial_extent_north, data_year) VALUES
('district_001', 'Manhattan Financial District', 'city_001', 'Financial', 40.7074, -74.0113, 500000, 5000, 95.0, -74.02, 40.70, -74.00, 40.72, 2024),
('district_002', 'Downtown LA', 'city_002', 'Downtown', 34.0522, -118.2437, 250000, 3000, 85.0, -118.25, 34.05, -118.24, 34.06, 2024),
('district_003', 'Chicago Loop', 'city_003', 'Financial', 41.8781, -87.6298, 300000, 4000, 90.0, -87.64, 41.87, -87.62, 41.89, 2024),
('district_004', 'Dallas Uptown', 'city_004', 'Retail', 32.8009, -96.8027, 150000, 2000, 75.0, -96.81, 32.80, -96.79, 32.81, 2024),
('district_005', 'SF Financial District', 'city_005', 'Financial', 37.7946, -122.3998, 200000, 2500, 88.0, -122.41, 37.79, -122.39, 37.80, 2024);
-- Insert sample facility-district mappings
INSERT INTO facility_district_mapping (mapping_id, facility_id, district_id, distance_miles, is_primary_district) VALUES
('mapping_001', 'facility_001', 'district_001', 0.2, TRUE),
('mapping_002', 'facility_004', 'district_002', 0.3, TRUE),
('mapping_003', 'facility_005', 'district_003', 0.1, TRUE);
-- Insert sample market metrics
INSERT INTO market_intelligence_metrics (metric_id, city_id, msa_id, metric_type, metric_name, metric_value, metric_unit, calculation_date, time_period, data_year, data_month) VALUES
('metric_001', 'city_001', 'msa_001', 'Demand', 'Average Occupancy Rate', 82.5, 'Percentage', '2024-02-01', 'Monthly', 2024, 2),
('metric_002', 'city_001', 'msa_001', 'Revenue', 'Total Monthly Revenue', 1250000.00, 'USD', '2024-02-01', 'Monthly', 2024, 2),
('metric_003', 'city_002', 'msa_002', 'Supply', 'Total Parking Spaces', 50000, 'Spaces', '2024-02-01', 'Monthly', 2024, 2),
('metric_004', 'city_003', 'msa_003', 'Utilization', 'Peak Hour Utilization', 95.0, 'Percentage', '2024-02-01', 'Monthly', 2024, 2),
('metric_005', 'city_004', 'msa_004', 'Competition', 'Average Competitor Rate', 3.75, 'USD', '2024-02-01', 'Monthly', 2024, 2);
-- Insert sample data source metadata
INSERT INTO data_source_metadata (source_id, source_name, source_type, source_url, api_endpoint, extraction_date, extraction_timestamp, records_extracted, data_quality_score, completeness_pct, error_count) VALUES
('source_001', 'Data.gov CKAN API', 'API', 'https://catalog.data.gov/api/3/action', '/package_search', '2024-02-01', '2024-02-01 10:00:00', 5000, 95.0, 98.5, 75),
('source_002', 'Census Bureau API', 'API', 'https://api.census.gov/data', '/2023/acs/acs5', '2024-02-01', '2024-02-01 11:00:00', 400, 98.0, 100.0, 8),
('source_003', 'BTS TranStats', 'CSV', 'https://www.transtats.bts.gov', NULL, '2024-02-01', '2024-02-01 12:00:00', 500, 92.0, 95.0, 40),
('source_004', 'City Open Data Portal', 'API', 'https://data.seattle.gov', '/api/views', '2024-02-01', '2024-02-01 13:00:00', 2000, 90.0, 97.0, 200),
('source_005', 'FHWA Traffic Data', 'CSV', 'https://www.fhwa.dot.gov', NULL, '2024-02-01', '2024-02-01 14:00:00', 1000, 88.0, 94.0, 120);
"""
def execute_data_sql(connection):
    """Execute embedded data SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in DATA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Data loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading data: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED DATA")
    print("="*80)
    execute_data_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Data SQL is available in DATA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED QUERIES.JSON - DB-11
# ============================================================================
# This cell contains all query metadata embedded directly in the notebook
# No external file dependencies required
import json
# Queries data (embedded directly in notebook)
QUERIES_DATA = {
  "source_file": "/Users/machine/Documents/AQ/db/db-11/queries/queries.md",
  "extraction_timestamp": "20260208-2109",
  "total_queries": 30,
  "queries": [
    {
      "number": 1,
      "title": "Multi-Dimensional Market Demand Analysis with Geographic Segmentation and Temporal Patterns",
      "description": "Description: Analyzes parking demand across metropolitan areas using multi-level CTEs, spatial aggregations, temporal window functions, and demographic correlations. Calculates demand scores by combining utilization rates, population density, traffic volumes, and economic indicators with weighted scoring algorithms. Use Case: Identify high-demand markets for parking marketplace expansion by analyzing utilization patterns, demographic indicators, and traffic correlations across 400+ cities. Busin",
      "complexity":
    "Deep nested CTEs (8+ levels), spatial aggregations, complex window functions with multiple frame clauses, percentile calculations, weighted scoring algorithms, temporal pattern analysis, correlated subqueries, multi-table joins",
      "expected_output": "Market demand scores by metropolitan area with rankings, growth indicators, and expansion recommendations.",
      "sql": "WITH city_demographic_cohorts AS (\n    SELECT\n        c.city_id,\n        c.city_name,\n        c.state_code,\n        c.msa_id,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.median_age,\n        c.employment_total,\n        c.unemployment_rate,\n        c.city_latitude,\n        c.city_longitude,\n        ma.msa_name,\n        ma.msa_type,\n        ma.gdp_billions,\n        CASE\n            WHEN c.population_density > 5000 THEN 'High Density'\n            WHEN c.population_density > 2000 THEN 'Medium Density'\n            ELSE 'Low Density'\n        END AS density_category,\n        CASE\n            WHEN c.median_household_income > 75000 THEN 'High Income'\n            WHEN c.median_household_income > 50000 THEN 'Medium Income'\n            ELSE 'Low Income'\n        END AS income_category\n    FROM cities c\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE c.population > 50000\n),\nparking_facility_aggregations AS (\n    SELECT\n        pf.city_id,\n        COUNT(DISTINCT pf.facility_id) AS total_facilities,\n        SUM(pf.total_spaces) AS total_spaces,\n        AVG(pf.total_spaces) AS avg_spaces_per_facility,\n        COUNT(CASE WHEN pf.is_event_parking THEN 1 END) AS event_facilities_count,\n        COUNT(CASE WHEN pf.is_monthly_parking THEN 1 END) AS monthly_facilities_count,\n        COUNT(CASE WHEN pf.is_hourly_parking THEN 1 END) AS hourly_facilities_count,\n        COUNT(CASE WHEN pf.accepts_reservations THEN 1 END) AS reservation_facilities_count,\n        COUNT(CASE WHEN pf.ev_charging_stations > 0 THEN 1 END) AS ev_charging_facilities_count\n    FROM parking_facilities pf\n    GROUP BY pf.city_id\n),\nutilization_metrics AS (\n    SELECT\n        pu.facility_id,\n        pf.city_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        pu.occupancy_rate,\n        pu.spaces_occupied,\n        pu.spaces_available,\n        pu.revenue_generated,\n        DATE_TRUNC('week', pu.utilization_date) AS utilization_week,\n        DATE_TRUNC('month', pu.utilization_date) AS utilization_month\n    FROM parking_utilization pu\n    INNER JOIN parking_facilities pf ON pu.facility_id = pf.facility_id\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n),\ncity_utilization_aggregations AS (\n    SELECT\n        um.city_id,\n        um.utilization_month,\n        COUNT(DISTINCT um.facility_id) AS facilities_with_data,\n        AVG(um.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY um.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY um.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY um.occupancy_rate) AS p95_occupancy_rate,\n        SUM(um.revenue_generated) AS total_revenue,\n        AVG(um.revenue_generated) AS avg_revenue_per_facility,\n        COUNT(DISTINCT um.utilization_date) AS days_with_data,\n        COUNT(*) AS total_utilization_records\n    FROM utilization_metrics um\n    GROUP BY um.city_id, um.utilization_month\n),\ntraffic_correlation_analysis AS (\n    SELECT\n        tv.city_id,\n        AVG(tv.annual_average_daily_traffic) AS avg_daily_traffic,\n        MAX(tv.annual_average_daily_traffic) AS max_daily_traffic,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY tv.annual_average_daily_traffic) AS p75_daily_traffic,\n        COUNT(DISTINCT tv.location_id) AS traffic_monitoring_locations,\n        AVG(tv.peak_hour_volume) AS avg_peak_hour_volume\n    FROM traffic_volume_data tv\n    WHERE tv.data_year = EXTRACT(YEAR FROM CURRENT_DATE)\n    GROUP BY tv.city_id\n),\nmarket_demand_scoring AS (\n    SELECT\n        cdc.city_id,\n        cdc.city_name,\n        cdc.state_code,\n        cdc.msa_id,\n        cdc.msa_name,\n        cdc.population,\n        cdc.population_density,\n        cdc.median_household_income,\n        cdc.density_category,\n        cdc.income_category,\n        COALESCE(pfa.total_facilities, 0) AS total_facilities,\n        COALESCE(pfa.total_spaces, 0) AS total_spaces,\n        COALESCE(pfa.avg_spaces_per_facility, 0) AS avg_spaces_per_facility,\n        COALESCE(cua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(cua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(cua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(cua.total_revenue, 0) AS total_revenue,\n        COALESCE(tca.avg_daily_traffic, 0) AS avg_daily_traffic,\n        COALESCE(tca.max_daily_traffic, 0) AS max_daily_traffic,\n        -- Demand score calculation (weighted factors)\n        (\n            (COALESCE(cua.avg_occupancy_rate, 0) * 0.30) +\n            (LEAST(cdc.population_density / 10000.0, 1.0) * 100 * 0.25) +\n            (LEAST(COALESCE(tca.avg_daily_traffic, 0) / 100000.0, 1.0) * 100 * 0.20) +\n            (LEAST(cdc.median_household_income / 100000.0, 1.0) * 100 * 0.15) +\n            (LEAST(COALESCE(pfa.total_facilities, 0) / 100.0, 1.0) * 100 * 0.10)\n        ) AS demand_score,\n        -- Growth potential score\n        (\n            CASE WHEN cdc.population > 100000 THEN 20 ELSE 10 END +\n            CASE WHEN cdc.median_household_income > 60000 THEN 20 ELSE 10 END +\n            CASE WHEN COALESCE(cua.avg_occupancy_rate, 0) > 70 THEN 20 ELSE 10 END +\n            CASE WHEN COALESCE(pfa.total_facilities, 0) < 50 THEN 20 ELSE 10 END +\n            CASE WHEN COALESCE(tca.avg_daily_traffic, 0) > 50000 THEN 20 ELSE 10 END\n        ) AS growth_potential_score\n    FROM city_demographic_cohorts cdc\n    LEFT JOIN parking_facility_aggregations pfa ON cdc.city_id = pfa.city_id\n    LEFT JOIN city_utilization_aggregations cua ON cdc.city_id = cua.city_id\n        AND cua.utilization_month = DATE_TRUNC('month', CURRENT_DATE)\n    LEFT JOIN traffic_correlation_analysis tca ON cdc.city_id = tca.city_id\n),\nranked_markets AS (\n    SELECT\n        mds.*,\n        ROW_NUMBER() OVER (ORDER BY mds.demand_score DESC) AS demand_rank,\n        ROW_NUMBER() OVER (ORDER BY mds.growth_potential_score DESC) AS growth_rank,\n        PERCENT_RANK() OVER (ORDER BY mds.demand_score) AS demand_percentile,\n        PERCENT_RANK() OVER (ORDER BY mds.growth_potential_score) AS growth_percentile,\n        LAG(mds.demand_score) OVER (ORDER BY mds.demand_score DESC) AS prev_demand_score,\n        LEAD(mds.demand_score) OVER (ORDER BY mds.demand_score DESC) AS next_demand_score,\n        AVG(mds.demand_score) OVER () AS overall_avg_demand_score,\n        STDDEV(mds.demand_score) OVER () AS overall_stddev_demand_score\n    FROM market_demand_scoring mds\n)\nSELECT\n    rm.city_id,\n    rm.city_name,\n    rm.state_code,\n    rm.msa_name,\n    rm.population,\n    rm.population_density,\n    rm.median_household_income,\n    rm.total_facilities,\n    rm.total_spaces,\n    rm.avg_occupancy_rate,\n    rm.median_occupancy_rate,\n    rm.p95_occupancy_rate,\n    rm.avg_daily_traffic,\n    rm.demand_score,\n    rm.growth_potential_score,\n    rm.demand_rank,\n    rm.growth_rank,\n    rm.demand_percentile,\n    rm.growth_percentile,\n    CASE\n        WHEN rm.demand_score >= rm.overall_avg_demand_score + rm.overall_stddev_demand_score THEN 'High Priority'\n        WHEN rm.demand_score >= rm.overall_avg_demand_score THEN 'Medium Priority'\n        ELSE 'Low Priority'\n    END AS expansion_priority,\n    CASE\n        WHEN rm.demand_rank <= 50 THEN 'Tier 1 Market'\n        WHEN rm.demand_rank <= 150 THEN 'Tier 2 Market'\n        ELSE 'Tier 3 Market'\n    END AS market_tier\nFROM ranked_markets rm\nORDER BY rm.demand_score DESC\nLIMIT 100;",
      "line_number": 203,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006648,
        "row_count": 5,
        "column_count": 21,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 2,
      "title": "Competitive Analysis with Pricing Strategy Optimization and Market Share Calculation",
      "description": "Description: Analyzes competitive parking landscape using multi-level CTEs for market penetration analysis, pricing elasticity calculations, distance-based competitive clustering, and revenue optimization modeling. Identifies pricing gaps and competitive advantages through multi-dimensional analysis. Use Case:
    Optimize pricing strategies by analyzing competitor pricing, market share, and pricing elasticity across geographic clusters of parking facilities. Business Value: Competitive pricing anal",
      "complexity": "Multi-level CTEs for market clustering, complex aggregations with window functions, distance calculations, pricing elasticity modeling, percentile rankings, multi-level joins",
      "expected_output":
    "Competitive analysis by facility cluster with pricing recommendations, market share calculations, and revenue optimization opportunities.",
      "sql": "WITH facility_location_clusters AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.latitude,\n        pf.longitude,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        c.city_name,\n        c.state_code,\n        -- Calculate facility density in area\n        (\n            SELECT COUNT(*)\n            FROM parking_facilities pf2\n            WHERE pf2.city_id = pf.city_id\n            AND ST_DISTANCE(\n                ST_POINT(pf.longitude, pf.latitude),\n                ST_POINT(pf2.longitude, pf2.latitude)\n            ) < 1000\n        ) AS nearby_facilities_count\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    WHERE pf.is_hourly_parking = TRUE\n),\npricing_analysis AS (\n    SELECT\n        pp.pricing_id,\n        pp.facility_id,\n        flc.facility_name,\n        pp.pricing_type,\n        pp.base_rate_hourly,\n        pp.base_rate_daily,\n        pp.base_rate_monthly,\n        pp.max_daily_rate,\n        pp.effective_date,\n        pp.expiration_date,\n        pp.is_active,\n        flc.city_id,\n        flc.city_name,\n        flc.state_code,\n        flc.latitude,\n        flc.longitude,\n        flc.total_spaces,\n        flc.facility_type,\n        flc.nearby_facilities_count,\n        ROW_NUMBER() OVER (\n            PARTITION BY pp.facility_id\n            ORDER BY pp.effective_date DESC\n        ) AS pricing_recency_rank\n    FROM parking_pricing pp\n    INNER JOIN facility_location_clusters flc ON pp.facility_id = flc.facility_id\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type = 'Hourly'\n),\ncompetitive_clusters AS (\n    SELECT\n        pa1.facility_id,\n        pa1.facility_name,\n        pa1.city_id,\n        pa1.city_name,\n        pa1.state_code,\n        pa1.base_rate_hourly AS facility_rate,\n        pa1.total_spaces AS facility_spaces,\n        pa1.facility_type,\n        -- Find competitors within 500 meters\n        ARRAY_AGG(\n            pa2.facility_id\n            ORDER BY ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            )\n        ) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS competitor_facility_ids,\n        COUNT(DISTINCT pa2.facility_id) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS competitor_count,\n        AVG(pa2.base_rate_hourly) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS avg_competitor_rate,\n        MIN(pa2.base_rate_hourly) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS min_competitor_rate,\n        MAX(pa2.base_rate_hourly) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (\n            ORDER BY pa2.base_rate_hourly\n        ) FILTER (\n            WHERE pa2.facility_id != pa1.facility_id\n            AND ST_DISTANCE(\n                ST_POINT(pa1.longitude, pa1.latitude),\n                ST_POINT(pa2.longitude, pa2.latitude)\n            ) < 500\n        ) AS median_competitor_rate\n    FROM pricing_analysis pa1\n    LEFT JOIN pricing_analysis pa2 ON pa1.city_id = pa2.city_id\n        AND pa1.pricing_recency_rank = 1\n        AND pa2.pricing_recency_rank = 1\n    WHERE pa1.pricing_recency_rank = 1\n    GROUP BY\n        pa1.facility_id,\n        pa1.facility_name,\n        pa1.city_id,\n        pa1.city_name,\n        pa1.state_code,\n        pa1.base_rate_hourly,\n        pa1.total_spaces,\n        pa1.facility_type,\n        pa1.latitude,\n        pa1.longitude\n),\nutilization_by_facility AS (\n    SELECT\n        pu.facility_id,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        COUNT(*) AS utilization_records_count,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '30 days'\n    GROUP BY pu.facility_id\n),\nmarket_share_calculation AS (\n    SELECT\n        cc.facility_id,\n        cc.facility_name,\n        cc.city_id,\n        cc.city_name,\n        cc.state_code,\n        cc.facility_rate,\n        cc.facility_spaces,\n        cc.competitor_count,\n        cc.avg_competitor_rate,\n        cc.min_competitor_rate,\n        cc.max_competitor_rate,\n        cc.median_competitor_rate,\n        COALESCE(ubf.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ubf.total_revenue, 0) AS total_revenue,\n        -- Pricing position analysis\n        CASE\n            WHEN cc.facility_rate < cc.min_competitor_rate THEN 'Lowest Price'\n            WHEN cc.facility_rate > cc.max_competitor_rate THEN 'Highest Price'\n            WHEN cc.facility_rate <= cc.median_competitor_rate THEN 'Below Median'\n            ELSE 'Above Median'\n        END AS pricing_position,\n        -- Price difference from average\n        cc.facility_rate - COALESCE(cc.avg_competitor_rate, cc.facility_rate) AS price_difference_from_avg,\n        -- Price difference percentage\n        CASE\n            WHEN cc.avg_competitor_rate > 0 THEN\n                ((cc.facility_rate - cc.avg_competitor_rate) / cc.avg_competitor_rate) * 100\n            ELSE 0\n        END AS price_difference_pct,\n        -- Market share estimate (based on spaces and occupancy)\n        CASE\n            WHEN cc.competitor_count > 0 THEN\n                (cc.facility_spaces * COALESCE(ubf.avg_occupancy_rate, 0) / 100.0) /\n                NULLIF(\n                    (\n                        SELECT SUM(pf2.total_spaces * COALESCE(ubf2.avg_occupancy_rate, 0) / 100.0)\n                        FROM parking_facilities pf2\n                        LEFT JOIN utilization_by_facility ubf2 ON pf2.facility_id = ubf2.facility_id\n                        WHERE pf2.facility_id = ANY(cc.competitor_facility_ids)\n                        OR (\n                            pf2.city_id = cc.city_id\n                            AND ST_DISTANCE(\n                                ST_POINT(\n                                    (SELECT latitude FROM parking_facilities WHERE facility_id = cc.facility_id),\n                                    (SELECT longitude FROM parking_facilities WHERE facility_id = cc.facility_id)\n                                ),\n                                ST_POINT(pf2.longitude, pf2.latitude)\n                            ) < 500\n                        )\n                    ),\n                    1\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM competitive_clusters cc\n    LEFT JOIN utilization_by_facility ubf ON cc.facility_id = ubf.facility_id\n),\npricing_optimization_recommendations AS (\n    SELECT\n        msc.*,\n        -- Optimal pricing recommendation\n        CASE\n            WHEN msc.avg_occupancy_rate > 85 AND msc.price_difference_pct < -10 THEN\n                msc.facility_rate * 1.10  -- Increase price if high demand and underpriced\n            WHEN msc.avg_occupancy_rate < 50 AND msc.price_difference_pct > 10 THEN\n                msc.facility_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN msc.price_difference_pct < -20 THEN\n                msc.avg_competitor_rate * 0.95  -- Price slightly below average if significantly underpriced\n            WHEN msc.price_difference_pct > 20 THEN\n                msc.median_competitor_rate  -- Price at median if significantly overpriced\n            ELSE msc.facility_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN msc.avg_occupancy_rate > 85 AND msc.price_difference_pct < -10 THEN\n                    msc.facility_rate * 1.10 * msc.facility_spaces * msc.avg_occupancy_rate / 100.0\n                WHEN msc.avg_occupancy_rate < 50 AND msc.price_difference_pct > 10 THEN\n                    msc.facility_rate * 0.90 * msc.facility_spaces * msc.avg_occupancy_rate / 100.0\n                ELSE msc.total_revenue\n            END - msc.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Competitive advantage score\n        (\n            CASE WHEN msc.pricing_position = 'Lowest Price' THEN 30 ELSE 0 END +\n            CASE WHEN msc.estimated_market_share_pct > 30 THEN 25 ELSE msc.estimated_market_share_pct * 0.833 END +\n            CASE WHEN msc.avg_occupancy_rate > 80 THEN 25 ELSE msc.avg_occupancy_rate * 0.3125 END +\n            CASE WHEN msc.competitor_count < 3 THEN 20 ELSE GREATEST(20 - msc.competitor_count * 2, 0) END\n        ) AS competitive_advantage_score\n    FROM market_share_calculation msc\n)\nSELECT\n    por.facility_id,\n    por.facility_name,\n    por.city_name,\n    por.state_code,\n    por.facility_rate AS current_rate,\n    por.recommended_rate,\n    por.price_difference_from_avg,\n    por.price_difference_pct,\n    por.pricing_position,\n    por.competitor_count,\n    por.avg_competitor_rate,\n    por.median_competitor_rate,\n    por.avg_occupancy_rate,\n    por.estimated_market_share_pct,\n    por.total_revenue,\n    por.estimated_revenue_impact,\n    por.competitive_advantage_score,\n    CASE\n        WHEN por.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN por.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        WHEN por.estimated_revenue_impact > -500 THEN 'Low Impact'\n        ELSE 'Negative Impact'\n    END AS optimization_priority\nFROM pricing_optimization_recommendations por\nWHERE por.competitor_count > 0\nORDER BY por.estimated_revenue_impact DESC\nLIMIT 200;",
      "line_number":
    398,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014236,
        "row_count": 0,
        "column_count": 18,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 3,
      "title": "Event-Based Parking Demand Forecasting with Multi-Event Correlation and Revenue Optimization",
      "description": "Description: Forecasts parking demand for events using multi-level CTEs for event pattern analysis, temporal correlation with historical utilization, venue capacity modeling, and dynamic pricing recommendations. Analyzes event types, attendance patterns, and parking multiplier effects. Use Case:
    Predict parking demand and optimize pricing for upcoming events (sports games, concerts, conventions) to maximize revenue and ensure availability. Business Value: Event parking demand forecast report wit",
      "complexity": "Multi-level CTEs for event pattern analysis, temporal window functions, correlation analysis, demand multiplier calculations, revenue optimization modeling, multi-table joins with aggregations",
      "expected_output":
    "Event parking demand forecasts with pricing recommendations and revenue optimization opportunities.",
      "sql": "WITH upcoming_events AS (\n    SELECT\n        e.event_id,\n        e.event_name,\n        e.event_type,\n        e.venue_id,\n        e.city_id,\n        e.event_date,\n        e.event_time,\n        e.attendance,\n        e.parking_demand_multiplier,\n        e.is_recurring,\n        sv.venue_name,\n        sv.capacity AS venue_capacity,\n        sv.parking_spaces_total AS venue_parking_spaces,\n        c.city_name,\n        c.state_code,\n        DATE_PART('dow', e.event_date) AS day_of_week,\n        DATE_PART('month', e.event_date) AS event_month\n    FROM events e\n    INNER JOIN stadiums_venues sv ON e.venue_id = sv.venue_id\n    INNER JOIN cities c ON e.city_id = c.city_id\n    WHERE e.event_date >= CURRENT_DATE\n    AND e.event_date <= CURRENT_DATE + INTERVAL '90 days'\n),\nhistorical_event_patterns AS (\n    SELECT\n        e.event_type,\n        DATE_PART('dow', e.event_date) AS day_of_week,\n        DATE_PART('hour', e.event_time) AS event_hour,\n        AVG(e.attendance) AS avg_attendance,\n        AVG(e.parking_demand_multiplier) AS avg_demand_multiplier,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY e.attendance) AS p75_attendance,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY e.attendance) AS p95_attendance,\n        COUNT(*) AS event_count\n    FROM events e\n    WHERE e.event_date < CURRENT_DATE\n    AND e.event_date >= CURRENT_DATE - INTERVAL '365 days'\n    GROUP BY e.event_type, DATE_PART('dow', e.event_date), DATE_PART('hour', e.event_time)\n),\nvenue_parking_analysis AS (\n    SELECT\n        sv.venue_id,\n        sv.venue_name,\n        COUNT(DISTINCT pf.facility_id) AS nearby_facilities_count,\n        SUM(pf.total_spaces) AS total_nearby_spaces,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(sv.longitude, sv.latitude),\n                    ST_POINT(pf.longitude, pf.latitude)\n                ) < 1000 THEN pf.total_spaces\n                ELSE NULL\n            END\n        ) AS avg_spaces_per_facility,\n        AVG(pp.base_rate_hourly) FILTER (\n            WHERE ST_DISTANCE(\n                ST_POINT(sv.longitude, sv.latitude),\n                ST_POINT(pf.longitude, pf.latitude)\n            ) < 1000\n        ) AS avg_nearby_rate\n    FROM stadiums_venues sv\n    LEFT JOIN parking_facilities pf ON sv.city_id = pf.city_id\n    LEFT JOIN parking_pricing pp ON pf.facility_id = pp.facility_id\n        AND pp.is_active = TRUE\n        AND pp.pricing_type = 'Hourly'\n    GROUP BY sv.venue_id, sv.venue_name, sv.latitude, sv.longitude\n),\nevent_demand_forecast AS (\n    SELECT\n        ue.event_id,\n        ue.event_name,\n        ue.event_type,\n        ue.venue_id,\n        ue.venue_name,\n        ue.city_name,\n        ue.state_code,\n        ue.event_date,\n        ue.event_time,\n        ue.attendance,\n        ue.parking_demand_multiplier,\n        ue.venue_capacity,\n        ue.venue_parking_spaces,\n        COALESCE(vpa.nearby_facilities_count, 0) AS nearby_facilities_count,\n        COALESCE(vpa.total_nearby_spaces, 0) AS total_nearby_spaces,\n        COALESCE(hep.avg_attendance, ue.attendance) AS forecasted_attendance,\n        COALESCE(hep.avg_demand_multiplier, ue.parking_demand_multiplier) AS forecasted_multiplier,\n        COALESCE(hep.p95_attendance, ue.attendance * 1.2) AS p95_forecasted_attendance,\n        -- Forecasted parking demand\n        COALESCE(ue.attendance, hep.avg_attendance) * \n        COALESCE(ue.parking_demand_multiplier, hep.avg_demand_multiplier) AS forecasted_parking_demand,\n        -- Peak demand (95th percentile)\n        COALESCE(hep.p95_attendance, ue.attendance * 1.2) * \n        COALESCE(ue.parking_demand_multiplier, hep.avg_demand_multiplier, 0.8) AS peak_parking_demand,\n        COALESCE(vpa.avg_nearby_rate, 0) AS avg_nearby_rate\n    FROM upcoming_events ue\n    LEFT JOIN historical_event_patterns hep ON ue.event_type = hep.event_type\n        AND ue.day_of_week = hep.day_of_week\n        AND DATE_PART('hour', ue.event_time) = hep.event_hour\n    LEFT JOIN venue_parking_analysis vpa ON ue.venue_id = vpa.venue_id\n),\ndemand_supply_analysis AS (\n    SELECT\n        edf.*,\n        -- Supply vs demand analysis\n        CASE\n            WHEN edf.total_nearby_spaces >= edf.peak_parking_demand THEN 'Sufficient Supply'\n            WHEN edf.total_nearby_spaces >= edf.forecasted_parking_demand * 0.8 THEN 'Adequate Supply'\n            ELSE 'Insufficient Supply'\n        END AS supply_status,\n        -- Capacity utilization forecast\n        CASE\n            WHEN edf.total_nearby_spaces > 0 THEN\n                (edf.forecasted_parking_demand / NULLIF(edf.total_nearby_spaces, 0)) * 100\n            ELSE 100\n        END AS forecasted_utilization_pct,\n        -- Peak utilization forecast\n        CASE\n            WHEN edf.total_nearby_spaces > 0 THEN\n                (edf.peak_parking_demand / NULLIF(edf.total_nearby_spaces, 0)) * 100\n            ELSE 100\n        END AS peak_utilization_pct,\n        -- Supply gap\n        GREATEST(edf.peak_parking_demand - edf.total_nearby_spaces, 0) AS supply_gap\n    FROM event_demand_forecast edf\n),\npricing_recommendations AS (\n    SELECT\n        dsa.*,\n        -- Dynamic pricing recommendation based on demand\n        CASE\n            WHEN dsa.supply_status = 'Insufficient Supply' THEN\n                dsa.avg_nearby_rate * 1.30  -- Increase price 30% for high demand\n            WHEN dsa.peak_utilization_pct > 90 THEN\n                dsa.avg_nearby_rate * 1.20  -- Increase price 20% for very high utilization\n            WHEN dsa.forecasted_utilization_pct > 75 THEN\n                dsa.avg_nearby_rate * 1.10  -- Increase price 10% for high utilization\n            WHEN dsa.forecasted_utilization_pct < 50 THEN\n                dsa.avg_nearby_rate * 0.90  -- Decrease price 10% for low utilization\n            ELSE dsa.avg_nearby_rate  -- Keep current price\n        END AS recommended_event_rate,\n        -- Revenue forecast\n        dsa.forecasted_parking_demand * \n        CASE\n            WHEN dsa.supply_status = 'Insufficient Supply' THEN dsa.avg_nearby_rate * 1.30\n            WHEN dsa.peak_utilization_pct > 90 THEN dsa.avg_nearby_rate * 1.20\n            WHEN dsa.forecasted_utilization_pct > 75 THEN dsa.avg_nearby_rate * 1.10\n            WHEN dsa.forecasted_utilization_pct < 50 THEN dsa.avg_nearby_rate * 0.90\n            ELSE dsa.avg_nearby_rate\n        END AS forecasted_revenue\n    FROM demand_supply_analysis dsa\n)\nSELECT\n    pr.event_id,\n    pr.event_name,\n    pr.event_type,\n    pr.venue_name,\n    pr.city_name,\n    pr.state_code,\n    pr.event_date,\n    pr.event_time,\n    pr.forecasted_attendance,\n    pr.forecasted_parking_demand,\n    pr.peak_parking_demand,\n    pr.total_nearby_spaces,\n    pr.supply_status,\n    pr.forecasted_utilization_pct,\n    pr.peak_utilization_pct,\n    pr.supply_gap,\n    pr.avg_nearby_rate AS current_avg_rate,\n    pr.recommended_event_rate,\n    pr.forecasted_revenue,\n    CASE\n        WHEN pr.supply_gap > 100 THEN 'High Priority - Add Facilities'\n        WHEN pr.supply_gap > 50 THEN 'Medium Priority - Monitor Closely'\n        ELSE 'Low Priority'\n    END AS action_priority\nFROM pricing_recommendations pr\nORDER BY pr.forecasted_revenue DESC, pr.event_date\nLIMIT 100;",
      "line_number":
    682,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.01336,
        "row_count": 0,
        "column_count": 20,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 4,
      "title": "Airport Parking Revenue Optimization with Passenger Volume Correlation and Seasonal Pattern Analysis",
      "description": "Description: Analyzes airport parking performance using multi-level CTEs, passenger volume correlations, seasonal pattern detection with window functions, pricing elasticity modeling, and revenue optimization across airport facilities. Use Case: Optimize airport parking revenue by analyzing passenger volume correlations, seasonal patterns, and pricing strategies across different airport facilities. Business Value: Airport parking revenue optimization report with seasonal pricing recommendations,",
      "complexity": "Multi-level CTEs (6+ levels), temporal window functions with seasonal patterns, correlation analysis, revenue optimization modeling, percentile calculations, multi-table joins",
      "expected_output": "Airport parking revenue optimization recommendations with seasonal pricing strategies and capacity utilization insights.",
      "sql": "WITH airport_passenger_volumes AS (\n    SELECT\n        a.airport_id,\n        a.airport_name,\n        a.city_id,\n        a.annual_passengers,\n        a.parking_spaces_total,\n        a.parking_facilities_count,\n        c.city_name,\n        c.state_code,\n        DATE_PART('month', CURRENT_DATE) AS current_month,\n        -- Estimate monthly passengers (annual / 12 with seasonal adjustment)\n        a.annual_passengers / 12.0 AS base_monthly_passengers\n    FROM airports a\n    INNER JOIN cities c ON a.city_id = c.city_id\n    WHERE a.annual_passengers > 1000000\n),\nairport_parking_facilities AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.airport_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pp.base_rate_hourly,\n        pp.base_rate_daily,\n        pp.max_daily_rate,\n        a.airport_name,\n        a.annual_passengers\n    FROM parking_facilities pf\n    INNER JOIN airports a ON pf.airport_id = a.airport_id\n    LEFT JOIN parking_pricing pp ON pf.facility_id = pp.facility_id\n        AND pp.is_active = TRUE\n        AND pp.pricing_type IN ('Hourly', 'Daily')\n    WHERE pf.airport_id IS NOT NULL\n),\nmonthly_utilization_patterns AS (\n    SELECT\n        pu.facility_id,\n        pf.airport_id,\n        DATE_PART('month', pu.utilization_date) AS utilization_month,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records\n    FROM parking_utilization pu\n    INNER JOIN parking_facilities pf ON pu.facility_id = pf.facility_id\n    WHERE pf.airport_id IS NOT NULL\n    AND pu.utilization_date >= CURRENT_DATE - INTERVAL '365 days'\n    GROUP BY pu.facility_id, pf.airport_id, DATE_PART('month', pu.utilization_date), DATE_PART('dow', pu.utilization_date)\n),\nseasonal_pattern_analysis AS (\n    SELECT\n        apf.airport_id,\n        apf.airport_name,\n        apf.annual_passengers,\n        mup.utilization_month,\n        mup.day_of_week,\n        COUNT(DISTINCT mup.facility_id) AS facilities_count,\n        AVG(mup.avg_occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY mup.avg_occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY mup.avg_occupancy_rate) AS p75_occupancy_rate,\n        SUM(mup.total_revenue) AS total_revenue,\n        AVG(mup.avg_revenue_per_record) AS avg_revenue_per_record,\n        -- Seasonal multiplier (compared to annual average)\n        AVG(mup.avg_occupancy_rate) / NULLIF(\n            (SELECT AVG(avg_occupancy_rate) FROM monthly_utilization_patterns mup2\n             INNER JOIN parking_facilities pf2 ON mup2.facility_id = pf2.facility_id\n             WHERE pf2.airport_id = apf.airport_id),\n            1\n        ) AS seasonal_multiplier\n    FROM airport_parking_facilities apf\n    INNER JOIN monthly_utilization_patterns mup ON apf.facility_id = mup.facility_id\n    GROUP BY apf.airport_id, apf.airport_name, apf.annual_passengers, mup.utilization_month, mup.day_of_week\n),\npassenger_correlation_analysis AS (\n    SELECT\n        spa.airport_id,\n        spa.airport_name,\n        spa.annual_passengers,\n        apv.base_monthly_passengers,\n        spa.utilization_month,\n        spa.avg_occupancy_rate,\n        spa.total_revenue,\n        spa.seasonal_multiplier,\n        -- Correlation between passenger volume and parking utilization\n        CASE\n            WHEN spa.utilization_month IN (6, 7, 8, 12) THEN 1.15  -- Summer and holiday months\n            WHEN spa.utilization_month IN (3, 4, 5, 9, 10) THEN 1.05  -- Spring and fall\n            ELSE 0.95  -- Winter months (excluding December)\n        END AS passenger_seasonal_factor,\n        -- Revenue per passenger estimate\n        CASE\n            WHEN apv.base_monthly_passengers > 0 THEN\n                spa.total_revenue / (apv.base_monthly_passengers * spa.seasonal_multiplier)\n            ELSE 0\n        END AS revenue_per_passenger\n    FROM seasonal_pattern_analysis spa\n    INNER JOIN airport_passenger_volumes apv ON spa.airport_id = apv.airport_id\n),\nrevenue_optimization AS (\n    SELECT\n        pca.*,\n        apf.facility_id,\n        apf.facility_name,\n        apf.facility_type,\n        apf.total_spaces,\n        apf.base_rate_hourly,\n        apf.base_rate_daily,\n        apf.max_daily_rate,\n        apv.city_name,\n        apv.state_code,\n        -- Optimal pricing recommendation\n        CASE\n            WHEN pca.seasonal_multiplier > 1.2 THEN\n                apf.base_rate_daily * 1.25  -- Increase price 25% for peak season\n            WHEN pca.seasonal_multiplier > 1.1 THEN\n                apf.base_rate_daily * 1.15  -- Increase price 15% for high season\n            WHEN pca.seasonal_multiplier < 0.9 THEN\n                apf.base_rate_daily * 0.90  -- Decrease price 10% for low season\n            ELSE apf.base_rate_daily  -- Keep current price\n        END AS recommended_seasonal_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN pca.seasonal_multiplier > 1.2 THEN apf.base_rate_daily * 1.25\n                WHEN pca.seasonal_multiplier > 1.1 THEN apf.base_rate_daily * 1.15\n                WHEN pca.seasonal_multiplier < 0.9 THEN apf.base_rate_daily * 0.90\n                ELSE apf.base_rate_daily\n            END - apf.base_rate_daily\n        ) * apf.total_spaces * pca.avg_occupancy_rate / 100.0 AS estimated_revenue_impact\n    FROM passenger_correlation_analysis pca\n    INNER JOIN airport_parking_facilities apf ON pca.airport_id = apf.airport_id\n    INNER JOIN airport_passenger_volumes apv ON pca.airport_id = apv.airport_id\n)\nSELECT\n    ro.airport_id,\n    ro.airport_name,\n    ro.city_name,\n    ro.state_code,\n    ro.annual_passengers,\n    ro.utilization_month,\n    ro.facility_id,\n    ro.facility_name,\n    ro.facility_type,\n    ro.total_spaces,\n    ro.avg_occupancy_rate,\n    ro.seasonal_multiplier,\n    ro.passenger_seasonal_factor,\n    ro.revenue_per_passenger,\n    ro.base_rate_daily AS current_rate,\n    ro.recommended_seasonal_rate,\n    ro.estimated_revenue_impact,\n    CASE\n        WHEN ro.seasonal_multiplier > 1.2 THEN 'Peak Season'\n        WHEN ro.seasonal_multiplier > 1.1 THEN 'High Season'\n        WHEN ro.seasonal_multiplier < 0.9 THEN 'Low Season'\n        ELSE 'Normal Season'\n    END AS season_category\nFROM revenue_optimization ro\nORDER BY ro.annual_passengers DESC, ro.estimated_revenue_impact DESC\nLIMIT 200;",
      "line_number":
    879,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006471,
        "row_count": 0,
        "column_count": 18,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 5,
      "title": "Traffic Volume Correlation Analysis with Parking Demand Forecasting and Revenue Impact Modeling",
      "description": "Description: Correlates traffic volume patterns with parking utilization using multi-level CTEs, temporal alignment analysis, lag correlation calculations, and demand forecasting models. Identifies traffic patterns that predict parking demand and optimizes facility placement. Use Case: Predict parking demand based on traffic patterns and optimize facility locations near high-traffic corridors to maximize utilization and revenue. Business Value: Traffic-parking correlation report with demand fore",
      "complexity": "Multi-level CTEs (7+ levels), temporal correlation analysis, lag functions, demand forecasting models, spatial proximity calculations, revenue impact modeling",
      "expected_output": "Traffic-parking correlation analysis with demand forecasts and facility placement recommendations.",
      "sql": "WITH traffic_monitoring_locations AS (\n    SELECT\n        tv.location_id,\n        tv.city_id,\n        tv.latitude,\n        tv.longitude,\n        tv.road_name,\n        tv.road_type,\n        tv.annual_average_daily_traffic,\n        tv.peak_hour_volume,\n        tv.direction,\n        tv.data_year,\n        c.city_name,\n        c.state_code,\n        ST_POINT(tv.longitude, tv.latitude) AS traffic_point\n    FROM traffic_volume_data tv\n    INNER JOIN cities c ON tv.city_id = c.city_id\n    WHERE tv.data_year = EXTRACT(YEAR FROM CURRENT_DATE)\n),\nnearby_parking_facilities AS (\n    SELECT\n        tml.location_id,\n        tml.city_id,\n        tml.annual_average_daily_traffic,\n        tml.peak_hour_volume,\n        pf.facility_id,\n        pf.facility_name,\n        pf.total_spaces,\n        pf.latitude,\n        pf.longitude,\n        ST_DISTANCE(\n            tml.traffic_point,\n            ST_POINT(pf.longitude, pf.latitude)\n        ) AS distance_meters,\n        ST_POINT(pf.longitude, pf.latitude) AS facility_point\n    FROM traffic_monitoring_locations tml\n    INNER JOIN parking_facilities pf ON tml.city_id = pf.city_id\n    WHERE ST_DISTANCE(\n        tml.traffic_point,\n        ST_POINT(pf.longitude, pf.latitude)\n    ) < 500  -- Within 500 meters\n),\nhourly_utilization_by_facility AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        COUNT(*) AS record_count\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour\n),\ntraffic_parking_correlation AS (\n    SELECT\n        npf.location_id,\n        npf.city_id,\n        npf.annual_average_daily_traffic,\n        npf.peak_hour_volume,\n        npf.facility_id,\n        npf.facility_name,\n        npf.total_spaces,\n        npf.distance_meters,\n        hubf.utilization_hour,\n        AVG(hubf.avg_occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY hubf.avg_occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY hubf.avg_occupancy_rate) AS p95_occupancy_rate,\n        SUM(hubf.total_revenue) AS total_revenue,\n        COUNT(DISTINCT hubf.utilization_date) AS days_with_data,\n        -- Correlation coefficient approximation\n        (\n            AVG(hubf.avg_occupancy_rate * npf.peak_hour_volume) -\n            AVG(hubf.avg_occupancy_rate) * AVG(npf.peak_hour_volume)\n        ) / NULLIF(\n            SQRT(\n                (AVG(hubf.avg_occupancy_rate * hubf.avg_occupancy_rate) - AVG(hubf.avg_occupancy_rate) * AVG(hubf.avg_occupancy_rate)) *\n                (AVG(npf.peak_hour_volume * npf.peak_hour_volume) - AVG(npf.peak_hour_volume) * AVG(npf.peak_hour_volume))\n            ),\n            0\n        ) AS correlation_coefficient\n    FROM nearby_parking_facilities npf\n    INNER JOIN hourly_utilization_by_facility hubf ON npf.facility_id = hubf.facility_id\n    GROUP BY npf.location_id, npf.city_id, npf.annual_average_daily_traffic, npf.peak_hour_volume,\n             npf.facility_id, npf.facility_name, npf.total_spaces, npf.distance_meters, hubf.utilization_hour\n),\ndemand_forecasting_model AS (\n    SELECT\n        tpc.*,\n        -- Forecasted occupancy based on traffic\n        CASE\n            WHEN tpc.correlation_coefficient > 0.7 THEN\n                LEAST(tpc.avg_occupancy_rate * (1 + (tpc.peak_hour_volume / NULLIF(10000.0, 0)) * 0.1), 100)\n            WHEN tpc.correlation_coefficient > 0.5 THEN\n                LEAST(tpc.avg_occupancy_rate * (1 + (tpc.peak_hour_volume / NULLIF(10000.0, 0)) * 0.05), 100)\n            ELSE tpc.avg_occupancy_rate\n        END AS forecasted_occupancy_rate,\n        -- Revenue forecast\n        tpc.total_revenue * \n        CASE\n            WHEN tpc.correlation_coefficient > 0.7 THEN 1.15\n            WHEN tpc.correlation_coefficient > 0.5 THEN 1.08\n            ELSE 1.0\n        END AS forecasted_revenue\n    FROM traffic_parking_correlation tpc\n)\nSELECT\n    dfm.location_id,\n    dfm.city_id,\n    dfm.facility_id,\n    dfm.facility_name,\n    dfm.annual_average_daily_traffic,\n    dfm.peak_hour_volume,\n    dfm.utilization_hour,\n    dfm.distance_meters,\n    dfm.avg_occupancy_rate,\n    dfm.median_occupancy_rate,\n    dfm.p95_occupancy_rate,\n    dfm.correlation_coefficient,\n    dfm.forecasted_occupancy_rate,\n    dfm.total_revenue,\n    dfm.forecasted_revenue,\n    CASE\n        WHEN dfm.correlation_coefficient > 0.7 THEN 'Strong Correlation'\n        WHEN dfm.correlation_coefficient > 0.5 THEN 'Moderate Correlation'\n        WHEN dfm.correlation_coefficient > 0.3 THEN 'Weak Correlation'\n        ELSE 'No Significant Correlation'\n    END AS correlation_strength\nFROM demand_forecasting_model dfm\nWHERE dfm.correlation_coefficient > 0.3\nORDER BY dfm.correlation_coefficient DESC, dfm.forecasted_revenue DESC\nLIMIT 300;",
      "line_number": 1062,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014343,
        "row_count": 0,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 6,
      "title": "Demographic Targeting Analysis with Income-Based Pricing Optimization and Market Penetration Modeling",
      "description": "Description: Analyzes parking demand by demographic segments using multi-level CTEs, income correlation analysis, age-based pattern detection, employment correlation, and demographic targeting optimization. Identifies optimal pricing strategies for different demographic segments. Use Case:
    Optimize pricing and marketing strategies by targeting specific demographic segments based on income, age, and employment patterns. Business Value: Demographic targeting report with segment-specific pricing re",
      "complexity": "Multi-level CTEs (6+ levels), demographic segmentation, correlation analysis, market penetration calculations, revenue optimization modeling, percentile rankings",
      "expected_output": "Demographic targeting analysis with segment-specific pricing and marketing recommendations.",
      "sql": "WITH demographic_segments AS (\n    SELECT\n        c.city_id,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.median_household_income,\n        c.median_age,\n        c.employment_total,\n        c.unemployment_rate,\n        CASE\n            WHEN c.median_household_income > 75000 THEN 'High Income'\n            WHEN c.median_household_income > 50000 THEN 'Medium Income'\n            ELSE 'Low Income'\n        END AS income_segment,\n        CASE\n            WHEN c.median_age > 45 THEN 'Older Demographics'\n            WHEN c.median_age > 35 THEN 'Middle Age'\n            ELSE 'Younger Demographics'\n        END AS age_segment,\n        CASE\n            WHEN c.unemployment_rate < 4 THEN 'Low Unemployment'\n            WHEN c.unemployment_rate < 7 THEN 'Medium Unemployment'\n            ELSE 'High Unemployment'\n        END AS employment_segment\n    FROM cities c\n    WHERE c.population > 50000\n),\nfacility_pricing_by_city AS (\n    SELECT\n        pf.city_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(DISTINCT pf.facility_id) AS facility_count\n    FROM parking_facilities pf\n    INNER JOIN parking_pricing pp ON pf.facility_id = pp.facility_id\n    WHERE pp.is_active = TRUE\n    GROUP BY pf.city_id\n),\nutilization_by_demographic AS (\n    SELECT\n        pf.city_id,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records\n    FROM parking_utilization pu\n    INNER JOIN parking_facilities pf ON pu.facility_id = pf.facility_id\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pf.city_id\n),\ndemographic_correlation AS (\n    SELECT\n        ds.*,\n        COALESCE(fpc.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(fpc.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(fpc.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(fpc.facility_count, 0) AS facility_count,\n        COALESCE(ubd.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ubd.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ubd.total_revenue, 0) AS total_revenue,\n        COALESCE(ubd.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        -- Revenue per capita\n        CASE\n            WHEN ds.population > 0 THEN\n                COALESCE(ubd.total_revenue, 0) / ds.population\n            ELSE 0\n        END AS revenue_per_capita,\n        -- Price sensitivity index\n        CASE\n            WHEN ds.median_household_income > 0 THEN\n                COALESCE(fpc.avg_hourly_rate, 0) / (ds.median_household_income / NULLIF(2000.0, 0))\n            ELSE 0\n        END AS price_sensitivity_index\n    FROM demographic_segments ds\n    LEFT JOIN facility_pricing_by_city fpc ON ds.city_id = fpc.city_id\n    LEFT JOIN utilization_by_demographic ubd ON ds.city_id = ubd.city_id\n),\nsegment_optimization AS (\n    SELECT\n        dc.*,\n        -- Optimal pricing by segment\n        CASE\n            WHEN dc.income_segment = 'High Income' AND dc.avg_occupancy_rate > 70 THEN\n                dc.avg_hourly_rate * 1.15  -- Can charge more for high income, high demand\n            WHEN dc.income_segment = 'High Income' AND dc.avg_occupancy_rate < 50 THEN\n                dc.avg_hourly_rate * 0.95  -- Slight discount for high income, low demand\n            WHEN dc.income_segment = 'Low Income' AND dc.avg_occupancy_rate < 50 THEN\n                dc.avg_hourly_rate * 0.85  -- Discount for low income, low demand\n            ELSE dc.avg_hourly_rate\n        END AS recommended_hourly_rate,\n        -- Market penetration score\n        (\n            CASE WHEN dc.facility_count > 50 THEN 25 ELSE dc.facility_count * 0.5 END +\n            CASE WHEN dc.avg_occupancy_rate > 75 THEN 25 ELSE dc.avg_occupancy_rate * 0.333 END +\n            CASE WHEN dc.revenue_per_capita > 10 THEN 25 ELSE dc.revenue_per_capita * 2.5 END +\n            CASE WHEN dc.price_sensitivity_index < 0.05 THEN 25 ELSE GREATEST(25 - dc.price_sensitivity_index * 500, 0) END\n        ) AS market_penetration_score\n    FROM demographic_correlation dc\n)\nSELECT\n    so.city_id,\n    so.city_name,\n    so.state_code,\n    so.income_segment,\n    so.age_segment,\n    so.employment_segment,\n    so.population,\n    so.median_household_income,\n    so.median_age,\n    so.facility_count,\n    so.avg_occupancy_rate,\n    so.avg_hourly_rate AS current_rate,\n    so.recommended_hourly_rate,\n    so.total_revenue,\n    so.revenue_per_capita,\n    so.price_sensitivity_index,\n    so.market_penetration_score,\n    CASE\n        WHEN so.market_penetration_score > 75 THEN 'High Penetration'\n        WHEN so.market_penetration_score > 50 THEN 'Medium Penetration'\n        ELSE 'Low Penetration'\n    END AS penetration_category\nFROM segment_optimization so\nORDER BY so.market_penetration_score DESC, so.total_revenue DESC\nLIMIT 200;",
      "line_number":
    1213,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006346,
        "row_count": 5,
        "column_count": 18,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 7,
      "title": "Market Segmentation Analysis with Multi-Dimensional Clustering and Revenue Optimization",
      "description": "Description: Segments parking markets using multi-dimensional clustering analysis with multi-level CTEs, K-means-like grouping algorithms, revenue potential scoring, segment-specific optimization strategies, and demographic-economic correlation analysis. Use Case: Identify distinct market segments for targeted marketing campaigns and segment-specific pricing strategies based on demographic, economic, and utilization characteristics. Business Value:
    Market segmentation report with segment profile",
      "complexity": "Multi-level CTEs (7+ levels), clustering algorithms, multi-dimensional distance calculations, revenue optimization modeling, segment profiling, percentile rankings",
      "expected_output": "Market segments with characteristics, revenue potential, and optimization recommendations.",
      "sql": "WITH city_characteristics AS (\n    SELECT\n        c.city_id,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.median_age,\n        c.employment_total,\n        c.unemployment_rate,\n        ma.msa_name,\n        ma.gdp_billions,\n        COUNT(DISTINCT pf.facility_id) AS facility_count,\n        SUM(pf.total_spaces) AS total_spaces,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate\n    FROM cities c\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    LEFT JOIN parking_facilities pf ON c.city_id = pf.city_id\n    LEFT JOIN parking_pricing pp ON pf.facility_id = pp.facility_id AND pp.is_active = TRUE\n    GROUP BY c.city_id, c.city_name, c.state_code, c.population, c.population_density,\n             c.median_household_income, c.median_age, c.employment_total, c.unemployment_rate,\n             ma.msa_name, ma.gdp_billions\n),\nutilization_metrics AS (\n    SELECT\n        pf.city_id,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        COUNT(*) AS utilization_records\n    FROM parking_utilization pu\n    INNER JOIN parking_facilities pf ON pu.facility_id = pf.facility_id\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pf.city_id\n),\nnormalized_characteristics AS (\n    SELECT\n        cc.*,\n        COALESCE(um.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(um.total_revenue, 0) AS total_revenue,\n        -- Normalized features for clustering (0-1 scale)\n        (cc.population_density - MIN(cc.population_density) OVER ()) / \n        NULLIF(MAX(cc.population_density) OVER () - MIN(cc.population_density) OVER (), 0) AS norm_density,\n        (cc.median_household_income - MIN(cc.median_household_income) OVER ()) / \n        NULLIF(MAX(cc.median_household_income) OVER () - MIN(cc.median_household_income) OVER (), 0) AS norm_income,\n        (COALESCE(um.avg_occupancy_rate, 0) - MIN(COALESCE(um.avg_occupancy_rate, 0)) OVER ()) / \n        NULLIF(MAX(COALESCE(um.avg_occupancy_rate, 0)) OVER () - MIN(COALESCE(um.avg_occupancy_rate, 0)) OVER (), 0) AS norm_occupancy,\n        (cc.facility_count - MIN(cc.facility_count) OVER ()) / \n        NULLIF(MAX(cc.facility_count) OVER () - MIN(cc.facility_count) OVER (), 0) AS norm_facilities\n    FROM city_characteristics cc\n    LEFT JOIN utilization_metrics um ON cc.city_id = um.city_id\n),\nsegment_assignment AS (\n    SELECT\n        nc.*,\n        -- Simple segmentation based on normalized characteristics\n        CASE\n            WHEN nc.norm_density > 0.7 AND nc.norm_income > 0.7 AND nc.norm_occupancy > 0.7 THEN 'Premium Urban'\n            WHEN nc.norm_density > 0.5 AND nc.norm_income > 0.5 THEN 'Urban Professional'\n            WHEN nc.norm_density > 0.3 AND nc.norm_income < 0.5 THEN 'Urban Value'\n            WHEN nc.norm_density < 0.3 AND nc.norm_income > 0.7 THEN 'Suburban Premium'\n            WHEN nc.norm_density < 0.3 AND nc.norm_income > 0.5 THEN 'Suburban Standard'\n            ELSE 'Emerging Market'\n        END AS market_segment,\n        -- Revenue potential score\n        (\n            nc.norm_density * 0.25 +\n            nc.norm_income * 0.25 +\n            nc.norm_occupancy * 0.30 +\n            nc.norm_facilities * 0.20\n        ) * 100 AS revenue_potential_score\n    FROM normalized_characteristics nc\n),\nsegment_profiles AS (\n    SELECT\n        sa.market_segment,\n        COUNT(*) AS city_count,\n        AVG(sa.population) AS avg_population,\n        AVG(sa.population_density) AS avg_density,\n        AVG(sa.median_household_income) AS avg_income,\n        AVG(sa.avg_occupancy_rate) AS avg_occupancy_rate,\n        AVG(sa.total_revenue) AS avg_revenue,\n        AVG(sa.facility_count) AS avg_facility_count,\n        AVG(sa.avg_hourly_rate) AS avg_hourly_rate,\n        AVG(sa.revenue_potential_score) AS avg_revenue_potential,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sa.revenue_potential_score) AS median_revenue_potential\n    FROM segment_assignment sa\n    GROUP BY sa.market_segment\n),\nsegment_optimization AS (\n    SELECT\n        sa.*,\n        sp.avg_revenue_potential,\n        sp.median_revenue_potential,\n        -- Segment-specific pricing recommendation\n        CASE\n            WHEN sa.market_segment = 'Premium Urban' THEN sa.avg_hourly_rate * 1.20\n            WHEN sa.market_segment = 'Urban Professional' THEN sa.avg_hourly_rate * 1.10\n            WHEN sa.market_segment = 'Urban Value' THEN sa.avg_hourly_rate * 0.95\n            WHEN sa.market_segment = 'Suburban Premium' THEN sa.avg_hourly_rate * 1.05\n            WHEN sa.market_segment = 'Suburban Standard' THEN sa.avg_hourly_rate * 1.00\n            ELSE sa.avg_hourly_rate * 0.90\n        END AS recommended_segment_rate,\n        -- Growth opportunity score\n        CASE\n            WHEN sa.revenue_potential_score > sp.avg_revenue_potential * 1.2 THEN 'High Growth'\n            WHEN sa.revenue_potential_score > sp.avg_revenue_potential THEN 'Medium Growth'\n            ELSE 'Low Growth'\n        END AS growth_opportunity\n    FROM segment_assignment sa\n    INNER JOIN segment_profiles sp ON sa.market_segment = sp.market_segment\n)\nSELECT\n    so.city_id,\n    so.city_name,\n    so.state_code,\n    so.market_segment,\n    so.population,\n    so.population_density,\n    so.median_household_income,\n    so.facility_count,\n    so.avg_occupancy_rate,\n    so.total_revenue,\n    so.revenue_potential_score,\n    so.avg_hourly_rate AS current_rate,\n    so.recommended_segment_rate,\n    so.growth_opportunity,\n    ROW_NUMBER() OVER (PARTITION BY so.market_segment ORDER BY so.revenue_potential_score DESC) AS segment_rank\nFROM segment_optimization so\nORDER BY so.revenue_potential_score DESC\nLIMIT 300;",
      "line_number":
    1359,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005912,
        "row_count": 5,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 8,
      "title": "Utilization Pattern Analysis with Temporal Decomposition and Anomaly Detection",
      "description": "Description: Analyzes parking utilization patterns using time-series decomposition CTEs, seasonal trend analysis with window functions, anomaly detection algorithms using statistical methods, and pattern classification for capacity planning. Use Case:
    Identify utilization patterns, seasonal trends, and anomalies for capacity planning, pricing optimization, and operational efficiency improvements. Business Value: Utilization pattern report with trend analysis, anomaly alerts, capacity planning re",
      "complexity": "Time-series CTEs (6+ levels), decomposition analysis, statistical anomaly detection, pattern classification, moving averages, seasonal decomposition",
      "expected_output": "Utilization patterns with trend analysis, anomaly flags, and capacity planning recommendations.",
      "sql": "WITH hourly_utilization_base AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        pu.occupancy_rate,\n        pu.revenue_generated,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week,\n        DATE_PART('month', pu.utilization_date) AS month_num,\n        pf.city_id,\n        pf.facility_type\n    FROM parking_utilization pu\n    INNER JOIN parking_facilities pf ON pu.facility_id = pf.facility_id\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '365 days'\n),\ntemporal_aggregations AS (\n    SELECT\n        hub.facility_id,\n        hub.utilization_hour,\n        hub.day_of_week,\n        hub.month_num,\n        AVG(hub.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY hub.occupancy_rate) AS median_occupancy_rate,\n        STDDEV(hub.occupancy_rate) AS stddev_occupancy_rate,\n        COUNT(*) AS record_count,\n        AVG(hub.revenue_generated) AS avg_revenue\n    FROM hourly_utilization_base hub\n    GROUP BY hub.facility_id, hub.utilization_hour, hub.day_of_week, hub.month_num\n),\nmoving_averages AS (\n    SELECT\n        ta.*,\n        AVG(ta.avg_occupancy_rate) OVER (\n            PARTITION BY ta.facility_id, ta.utilization_hour\n            ORDER BY ta.month_num\n            ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING\n        ) AS ma_5_month,\n        AVG(ta.avg_occupancy_rate) OVER (\n            PARTITION BY ta.facility_id\n            ORDER BY ta.month_num\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS ma_6_month\n    FROM temporal_aggregations ta\n),\nseasonal_decomposition AS (\n    SELECT\n        ma.*,\n        ma.avg_occupancy_rate - ma.ma_6_month AS seasonal_component,\n        ma.ma_6_month AS trend_component,\n        ma.avg_occupancy_rate - ma.ma_5_month AS residual_component,\n        CASE\n            WHEN ma.month_num IN (6, 7, 8) THEN 'Summer'\n            WHEN ma.month_num IN (12, 1, 2) THEN 'Winter'\n            WHEN ma.month_num IN (3, 4, 5) THEN 'Spring'\n            ELSE 'Fall'\n        END AS season\n    FROM moving_averages ma\n),\nanomaly_detection AS (\n    SELECT\n        sd.*,\n        CASE\n            WHEN ABS(sd.residual_component) > sd.stddev_occupancy_rate * 2 THEN TRUE\n            ELSE FALSE\n        END AS is_anomaly,\n        CASE\n            WHEN sd.avg_occupancy_rate > sd.ma_5_month + sd.stddev_occupancy_rate * 1.5 THEN 'High Anomaly'\n            WHEN sd.avg_occupancy_rate < sd.ma_5_month - sd.stddev_occupancy_rate * 1.5 THEN 'Low Anomaly'\n            ELSE 'Normal'\n        END AS anomaly_type\n    FROM seasonal_decomposition sd\n)\nSELECT\n    ad.facility_id,\n    pf.facility_name,\n    pf.city_id,\n    c.city_name,\n    ad.utilization_hour,\n    ad.day_of_week,\n    ad.month_num,\n    ad.season,\n    ad.avg_occupancy_rate,\n    ad.median_occupancy_rate,\n    ad.trend_component,\n    ad.seasonal_component,\n    ad.residual_component,\n    ad.is_anomaly,\n    ad.anomaly_type,\n    ad.avg_revenue,\n    CASE\n        WHEN ad.trend_component > 10 THEN 'Increasing Trend'\n        WHEN ad.trend_component < -10 THEN 'Decreasing Trend'\n        ELSE 'Stable Trend'\n    END AS trend_direction\nFROM anomaly_detection ad\nINNER JOIN parking_facilities pf ON ad.facility_id = pf.facility_id\nINNER JOIN cities c ON pf.city_id = c.city_id\nWHERE ad.is_anomaly = TRUE OR ad.trend_component != 0\nORDER BY ad.facility_id, ad.month_num, ad.utilization_hour\nLIMIT 500;",
      "line_number": 1509,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006553,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 9,
      "title": "Geographic Expansion Analysis with Market Opportunity Scoring and Risk Assessment",
      "description": "Description: Identifies geographic expansion opportunities using multi-level CTEs, market opportunity scoring algorithms, competitive landscape analysis, risk assessment modeling, and expansion prioritization ranking. Use Case: Prioritize geographic expansion markets by analyzing market opportunities, competitive landscapes, and risk factors for strategic market entry decisions. Business Value:
    Geographic expansion report with market opportunity scores, risk assessments, competitive analysis, an",
      "complexity": "Multi-level CTEs (7+ levels), market scoring algorithms, risk modeling, competitive analysis, spatial proximity calculations, expansion prioritization",
      "expected_output": "Market expansion opportunities with opportunity scores, risk assessments, and expansion priorities.",
      "sql": "WITH candidate_markets AS (\n    SELECT\n        c.city_id,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        COUNT(DISTINCT pf.facility_id) AS existing_facilities,\n        COUNT(DISTINCT CASE WHEN pf.accepts_reservations THEN pf.facility_id END) AS reservation_facilities\n    FROM cities c\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    LEFT JOIN parking_facilities pf ON c.city_id = pf.city_id\n    WHERE c.population > 100000\n    AND NOT EXISTS (\n        SELECT 1 FROM parking_facilities pf2\n        WHERE pf2.city_id = c.city_id\n        AND pf2.operator_type = 'Private'\n        AND pf2.accepts_reservations = TRUE\n    )\n    GROUP BY c.city_id, c.city_name, c.state_code, c.population, c.population_density,\n             c.median_household_income, c.employment_total, ma.msa_name, ma.gdp_billions\n),\nmarket_metrics AS (\n    SELECT\n        cm.*,\n        COALESCE(AVG(pu.occupancy_rate), 0) AS avg_occupancy_rate,\n        COALESCE(SUM(pu.revenue_generated), 0) AS total_revenue,\n        COUNT(DISTINCT pu.facility_id) AS facilities_with_data\n    FROM candidate_markets cm\n    LEFT JOIN parking_facilities pf ON cm.city_id = pf.city_id\n    LEFT JOIN parking_utilization pu ON pf.facility_id = pu.facility_id\n        AND pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY cm.city_id, cm.city_name, cm.state_code, cm.population, cm.population_density,\n             cm.median_household_income, cm.employment_total, cm.msa_name, cm.gdp_billions,\n             cm.existing_facilities, cm.reservation_facilities\n),\ncompetitive_landscape AS (\n    SELECT\n        mm.city_id,\n        COUNT(DISTINCT pf2.facility_id) AS competitor_facilities,\n        AVG(pp2.base_rate_hourly) AS avg_competitor_rate,\n        COUNT(DISTINCT pf2.operator_name) AS operator_count\n    FROM market_metrics mm\n    LEFT JOIN parking_facilities pf2 ON mm.city_id = pf2.city_id\n    LEFT JOIN parking_pricing pp2 ON pf2.facility_id = pp2.facility_id AND pp2.is_active = TRUE\n    GROUP BY mm.city_id\n),\nrisk_assessment AS (\n    SELECT\n        mm.*,\n        cl.competitor_facilities,\n        cl.avg_competitor_rate,\n        cl.operator_count,\n        CASE\n            WHEN mm.existing_facilities = 0 THEN 0.3  -- Low competition risk\n            WHEN mm.existing_facilities < 10 THEN 0.5  -- Medium competition risk\n            ELSE 0.7  -- High competition risk\n        END AS competition_risk_score,\n        CASE\n            WHEN mm.population_density < 1000 THEN 0.6  -- Low density risk\n            WHEN mm.population_density < 3000 THEN 0.3  -- Medium density risk\n            ELSE 0.1  -- High density (low risk)\n        END AS density_risk_score,\n        CASE\n            WHEN mm.median_household_income < 40000 THEN 0.5  -- Income risk\n            WHEN mm.median_household_income < 60000 THEN 0.3\n            ELSE 0.1\n        END AS income_risk_score\n    FROM market_metrics mm\n    LEFT JOIN competitive_landscape cl ON mm.city_id = cl.city_id\n),\nopportunity_scoring AS (\n    SELECT\n        ra.*,\n        -- Market opportunity score (0-100)\n        (\n            LEAST(ra.population / 1000000.0, 1.0) * 25 +\n            LEAST(ra.population_density / 10000.0, 1.0) * 20 +\n            LEAST(ra.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(ra.avg_occupancy_rate / 100.0, 1.0) * 15 +\n            CASE WHEN ra.existing_facilities = 0 THEN 20 ELSE GREATEST(20 - ra.existing_facilities * 0.5, 0) END\n        ) AS opportunity_score,\n        -- Overall risk score (0-1, higher is riskier)\n        (ra.competition_risk_score * 0.4 + ra.density_risk_score * 0.3 + ra.income_risk_score * 0.3) AS overall_risk_score\n    FROM risk_assessment ra\n),\nexpansion_prioritization AS (\n    SELECT\n        os.*,\n        os.opportunity_score * (1 - os.overall_risk_score) AS expansion_priority_score,\n        ROW_NUMBER() OVER (ORDER BY os.opportunity_score * (1 - os.overall_risk_score) DESC) AS expansion_rank,\n        CASE\n            WHEN os.opportunity_score * (1 - os.overall_risk_score) > 60 THEN 'High Priority'\n            WHEN os.opportunity_score * (1 - os.overall_risk_score) > 40 THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS expansion_priority\n    FROM opportunity_scoring os\n)\nSELECT\n    ep.city_id,\n    ep.city_name,\n    ep.state_code,\n    ep.msa_name,\n    ep.population,\n    ep.population_density,\n    ep.median_household_income,\n    ep.existing_facilities,\n    ep.competitor_facilities,\n    ep.avg_occupancy_rate,\n    ep.opportunity_score,\n    ep.overall_risk_score,\n    ep.expansion_priority_score,\n    ep.expansion_rank,\n    ep.expansion_priority\nFROM expansion_prioritization ep\nORDER BY ep.expansion_priority_score DESC\nLIMIT 100;",
      "line_number": 1626,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006036,
        "row_count": 4,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 10,
      "title": "Revenue Forecasting Models with Time-Series Analysis and Confidence Intervals",
      "description": "Description: Forecasts parking revenue using time-series analysis CTEs, ARIMA-like modeling, confidence interval calculations, and revenue projection scenarios. Use Case: Predict future revenue trends for financial planning and budget forecasting. Business Value:
    Revenue forecast report with projections, confidence intervals, and scenario analysis. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Time-series CTEs (7+ levels), forecasting models, confidence int",
      "complexity": "Time-series CTEs (7+ levels), forecasting models, confidence intervals, scenario analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    1764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.01517,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 11,
      "title": "Competitive Positioning Analysis with Market Share Dynamics and Strategic Recommendations",
      "description": "Description: Analyzes competitive positioning using multi-level CTEs, market share calculations, competitive advantage scoring, and strategic positioning recommendations. Use Case: Understand competitive position and develop strategic recommendations for market leadership. Business Value:
    Competitive positioning report with market share analysis and strategic recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Multi-level CTEs (6+ levels), market",
      "complexity": "Multi-level CTEs (6+ levels), market share calculations, competitive analysis, strategic scoring",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    2014,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013831,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 12,
      "title": "Business District Analysis with Parking Demand Correlation and Revenue Optimization",
      "description": "Description: Analyzes business districts using spatial CTEs, employment correlation analysis, parking demand modeling, and district-specific revenue optimization. Use Case: Optimize parking strategies for business districts based on employment patterns and demand. Business Value:
    Business district analysis report with demand correlations and revenue optimization strategies. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Spatial CTEs (6+ levels), correlation ",
      "complexity": "Spatial CTEs (6+ levels), correlation analysis, demand modeling, revenue optimization",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    2264,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015822,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 13,
      "title": "Monthly Parking Optimization with Customer Lifetime Value and Retention Analysis",
      "description": "Description: Optimizes monthly parking pricing using CTEs for customer segmentation, lifetime value calculations, retention analysis, and pricing optimization. Use Case:
    Optimize monthly parking pricing to maximize customer lifetime value and retention. Business Value: Monthly parking optimization report with CLV analysis and retention strategies. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Customer segmentation CTEs (7+ levels), CLV calculations, retenti",
      "complexity": "Customer segmentation CTEs (7+ levels), CLV calculations, retention modeling, pricing optimization",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    2514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014562,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 14,
      "title": "EV Charging Facility Analysis with Demand Forecasting and Revenue Impact Modeling",
      "description": "Description: Analyzes EV charging facilities using multi-level CTEs, demand forecasting, revenue impact calculations, and expansion recommendations. Use Case: Plan EV charging facility expansion and optimize revenue from EV charging services. Business Value: EV charging analysis report with demand forecasts and expansion recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Multi-level CTEs (6+ levels), demand forecasting, revenue impact modeling, ",
      "complexity": "Multi-level CTEs (6+ levels), demand forecasting, revenue impact modeling, expansion analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    2764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013622,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 15,
      "title": "Reservation vs Walk-in Analysis with Revenue Optimization and Capacity Planning",
      "description": "Description: Compares reservation and walk-in patterns using CTEs for pattern analysis, revenue comparison, capacity optimization, and booking strategy recommendations. Use Case:
    Optimize reservation vs walk-in mix to maximize revenue and capacity utilization. Business Value: Reservation analysis report with revenue optimization and capacity planning recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Pattern analysis CTEs (6+ levels), revenue co",
      "complexity": "Pattern analysis CTEs (6+ levels), revenue comparison, capacity optimization, booking strategies",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    3014,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013609,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 16,
      "title": "Peak Hour Analysis with Dynamic Pricing Optimization and Capacity Management",
      "description": "Description: Analyzes peak hour patterns using temporal CTEs, dynamic pricing models, capacity management algorithms, and revenue maximization strategies. Use Case: Implement dynamic pricing and capacity management for peak hours to maximize revenue. Business Value:
    Peak hour analysis report with dynamic pricing recommendations and capacity management strategies. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Temporal CTEs (7+ levels), dynamic pricing models",
      "complexity": "Temporal CTEs (7+ levels), dynamic pricing models, capacity management, revenue optimization",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    3264,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014784,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 17,
      "title": "Weekend vs Weekday Pattern Analysis with Pricing Strategy Optimization",
      "description": "Description: Compares weekend and weekday patterns using CTEs for pattern analysis, pricing strategy optimization, and revenue maximization. Use Case:
    Optimize pricing strategies for weekends vs weekdays to maximize revenue. Business Value: Weekend / NULLIF(weekday, 0) analysis report with pricing strategy recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Pattern comparison CTEs (6+ levels), pricing optimization, revenue analysis Expected Outpu",
      "complexity": "Pattern comparison CTEs (6+ levels), pricing optimization, revenue analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    3514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.016257,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 18,
      "title": "Facility Type Optimization with Performance Benchmarking and Revenue Maximization",
      "description": "Description: Analyzes facility types using CTEs for performance benchmarking, revenue comparison, optimization recommendations, and type-specific strategies. Use Case:
    Optimize facility type mix and strategies for different facility types. Business Value: Facility type analysis report with performance benchmarks and optimization recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Benchmarking CTEs (6+ levels), performance comparison, optimization",
      "complexity": "Benchmarking CTEs (6+ levels), performance comparison, optimization modeling",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    3764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015317,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 19,
      "title": "Operator Type Analysis with Market Share and Competitive Advantage Assessment",
      "description": "Description: Analyzes operator types using CTEs for market share calculations, competitive advantage assessment, and operator-specific strategies. Use Case:
    Understand operator type dynamics and develop competitive strategies. Business Value: Operator type analysis report with market share and competitive advantage insights. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Market analysis CTEs (6+ levels), competitive assessment, strategic analysis Expected Ou",
      "complexity": "Market analysis CTEs (6+ levels), competitive assessment, strategic analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    4014,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015763,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 20,
      "title": "Multi-City Comparison Analysis with Performance Benchmarking and Best Practice Identification",
      "description": "Description: Compares multiple cities using CTEs for performance benchmarking, best practice identification, and cross-city learning opportunities. Use Case:
    Identify best practices and performance benchmarks across cities. Business Value: Multi-city comparison report with benchmarks and best practices. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Comparison CTEs (7+ levels), benchmarking, best practice analysis Expected Output: Query results with analysis",
      "complexity": "Comparison CTEs (7+ levels), benchmarking, best practice analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    4264,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014574,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 21,
      "title": "MSA-Level Aggregation Analysis with Regional Market Analysis and Expansion Opportunities",
      "description": "Description: Aggregates data at MSA level using CTEs for regional analysis, market analysis, and expansion opportunity identification. Use Case:
    Analyze markets at metropolitan area level for regional expansion planning. Business Value: MSA-level analysis report with regional insights and expansion opportunities. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Aggregation CTEs (6+ levels), regional analysis, expansion planning Expected Output: Query results w",
      "complexity": "Aggregation CTEs (6+ levels), regional analysis, expansion planning",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    4514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014973,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 22,
      "title": "Time-Series Forecasting with Seasonal Decomposition and Trend Analysis",
      "description": "Description: Forecasts time-series patterns using CTEs for seasonal decomposition, trend analysis, and predictive modeling. Use Case:
    Forecast future trends and patterns for strategic planning. Business Value: Time-series forecast report with seasonal patterns and trend projections. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Time-series CTEs (7+ levels), decomposition, trend analysis, forecasting Expected Output: Query results with analysis and recommend",
      "complexity": "Time-series CTEs (7+ levels), decomposition, trend analysis, forecasting",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    4764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014254,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 23,
      "title": "Anomaly Detection Analysis with Statistical Methods and Alert Generation",
      "description": "Description: Detects anomalies using CTEs with statistical methods, outlier identification, and alert generation for operational monitoring. Use Case:
    Identify anomalies and outliers for operational monitoring and alerting. Business Value: Anomaly detection report with statistical analysis and alert recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Statistical CTEs (6+ levels), outlier detection, alert generation Expected Output: Query results ",
      "complexity": "Statistical CTEs (6+ levels), outlier detection, alert generation",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    5014,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.016355,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 24,
      "title": "Customer Segmentation Analysis with Behavioral Patterns and Targeting Strategies",
      "description": "Description: Segments customers using CTEs for behavioral analysis, pattern identification, and targeted marketing strategies. Use Case:
    Segment customers for targeted marketing and personalized strategies. Business Value: Customer segmentation report with behavioral insights and targeting strategies. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Segmentation CTEs (7+ levels), behavioral analysis, targeting strategies Expected Output: Query results with ana",
      "complexity": "Segmentation CTEs (7+ levels), behavioral analysis, targeting strategies",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    5264,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014679,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 25,
      "title": "Price Elasticity Analysis with Demand Response Modeling and Revenue Optimization",
      "description": "Description: Analyzes price elasticity using CTEs for demand response modeling, elasticity calculations, and revenue optimization. Use Case:
    Understand price sensitivity and optimize pricing for revenue maximization. Business Value: Price elasticity report with demand response analysis and pricing recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Elasticity CTEs (6+ levels), demand modeling, revenue optimization Expected Output: Query results w",
      "complexity": "Elasticity CTEs (6+ levels), demand modeling, revenue optimization",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    5514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.015452,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 26,
      "title": "Supply-Demand Gap Analysis with Capacity Planning and Facility Expansion Recommendations",
      "description": "Description: Analyzes supply-demand gaps using CTEs for gap identification, capacity planning, and expansion recommendations. Use Case:
    Identify supply-demand gaps and plan facility expansion. Business Value: Supply-demand gap report with capacity planning and expansion recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Gap analysis CTEs (6+ levels), capacity planning, expansion modeling Expected Output: Query results with analysis and recommend",
      "complexity": "Gap analysis CTEs (6+ levels), capacity planning, expansion modeling",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    5764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014485,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 27,
      "title": "Market Penetration Analysis with Growth Metrics and Expansion Strategies",
      "description": "Description: Analyzes market penetration using CTEs for penetration calculations, growth metrics, and expansion strategy development. Use Case:
    Measure market penetration and develop expansion strategies. Business Value: Market penetration report with growth metrics and expansion strategies. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Penetration CTEs (6+ levels), growth metrics, expansion strategies Expected Output: Query results with analysis and recomm",
      "complexity": "Penetration CTEs (6+ levels), growth metrics, expansion strategies",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    6014,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.013537,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 28,
      "title": "Revenue Per Square Foot Analysis with Facility Efficiency Optimization",
      "description": "Description: Analyzes revenue per square foot using CTEs for efficiency calculations, facility optimization, and space utilization analysis. Use Case:
    Optimize facility efficiency and space utilization for revenue maximization. Business Value: Revenue efficiency report with facility optimization recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Efficiency CTEs (6+ levels), space utilization, optimization modeling Expected Output: Query results ",
      "complexity": "Efficiency CTEs (6+ levels), space utilization, optimization modeling",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    6264,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014094,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 29,
      "title": "Comprehensive Market Dashboard with Multi-Dimensional Analytics",
      "description": "Description: Creates comprehensive dashboard using CTEs for multi-dimensional analytics, KPI calculations, and executive reporting. Use Case:
    Provide executive dashboard with comprehensive market metrics. Business Value: Executive dashboard with comprehensive market metrics and KPIs. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Dashboard CTEs (8+ levels), multi-dimensional analytics, KPI calculations Expected Output: Query results with analysis and recomme",
      "complexity": "Dashboard CTEs (8+ levels), multi-dimensional analytics, KPI calculations",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    6514,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014057,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    },
    {
      "number": 30,
      "title": "Cross-Database Performance Optimization Analysis with Query Efficiency Metrics",
      "description": "Description: Analyzes cross-database performance using CTEs for query efficiency metrics, optimization recommendations, and performance benchmarking. Use Case:
    Optimize query performance across PostgreSQL, Databricks, and Snowflake. Business Value: Performance optimization report with efficiency metrics and optimization recommendations. Purpose: Enables data-driven decision making through advanced analytics. Complexity: Performance CTEs (7+ levels), efficiency metrics, optimization analysis Expe",
      "complexity": "Performance CTEs (7+ levels), efficiency metrics, optimization analysis",
      "expected_output": "Query results with analysis and recommendations.",
      "sql": "WITH facility_base_data AS (\n    SELECT\n        pf.facility_id,\n        pf.facility_name,\n        pf.city_id,\n        pf.total_spaces,\n        pf.facility_type,\n        pf.operator_type,\n        pf.latitude,\n        pf.longitude,\n        pf.is_event_parking,\n        pf.is_monthly_parking,\n        pf.accepts_reservations,\n        c.city_name,\n        c.state_code,\n        c.population,\n        c.population_density,\n        c.median_household_income,\n        c.employment_total,\n        ma.msa_name,\n        ma.gdp_billions,\n        ma.population_estimate AS msa_population\n    FROM parking_facilities pf\n    INNER JOIN cities c ON pf.city_id = c.city_id\n    INNER JOIN metropolitan_areas ma ON c.msa_id = ma.msa_id\n    WHERE pf.is_hourly_parking = TRUE\n),\nutilization_aggregations AS (\n    SELECT\n        pu.facility_id,\n        pu.utilization_date,\n        pu.utilization_hour,\n        AVG(pu.occupancy_rate) AS avg_occupancy_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS median_occupancy_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p75_occupancy_rate,\n        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY pu.occupancy_rate) AS p95_occupancy_rate,\n        SUM(pu.revenue_generated) AS total_revenue,\n        AVG(pu.revenue_generated) AS avg_revenue_per_record,\n        COUNT(*) AS utilization_records,\n        COUNT(DISTINCT pu.utilization_date) AS days_with_data,\n        DATE_PART('dow', pu.utilization_date) AS day_of_week\n    FROM parking_utilization pu\n    WHERE pu.utilization_date >= CURRENT_DATE - INTERVAL '90 days'\n    GROUP BY pu.facility_id, pu.utilization_date, pu.utilization_hour, DATE_PART('dow', pu.utilization_date)\n),\npricing_analysis AS (\n    SELECT\n        pp.facility_id,\n        AVG(pp.base_rate_hourly) AS avg_hourly_rate,\n        AVG(pp.base_rate_daily) AS avg_daily_rate,\n        AVG(pp.base_rate_monthly) AS avg_monthly_rate,\n        MIN(pp.base_rate_hourly) AS min_hourly_rate,\n        MAX(pp.base_rate_hourly) AS max_hourly_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS median_hourly_rate,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY pp.base_rate_hourly) AS p75_hourly_rate,\n        COUNT(*) AS pricing_records\n    FROM parking_pricing pp\n    WHERE pp.is_active = TRUE\n    AND pp.pricing_type IN ('Hourly', 'Daily', 'Monthly')\n    GROUP BY pp.facility_id\n),\ncompetitive_landscape AS (\n    SELECT\n        fbd1.facility_id,\n        fbd1.city_id,\n        COUNT(DISTINCT fbd2.facility_id) AS competitor_count,\n        AVG(fbd2.total_spaces) AS avg_competitor_spaces,\n        SUM(fbd2.total_spaces) AS total_competitor_spaces,\n        AVG(pa2.avg_hourly_rate) AS avg_competitor_rate,\n        MIN(pa2.avg_hourly_rate) AS min_competitor_rate,\n        MAX(pa2.avg_hourly_rate) AS max_competitor_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pa2.avg_hourly_rate) AS median_competitor_rate,\n        AVG(\n            CASE\n                WHEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                ) < 500 THEN ST_DISTANCE(\n                    ST_POINT(fbd1.longitude, fbd1.latitude),\n                    ST_POINT(fbd2.longitude, fbd2.latitude)\n                )\n                ELSE NULL\n            END\n        ) AS avg_distance_to_competitors\n    FROM facility_base_data fbd1\n    LEFT JOIN facility_base_data fbd2 ON fbd1.city_id = fbd2.city_id\n        AND fbd2.facility_id != fbd1.facility_id\n        AND ST_DISTANCE(\n            ST_POINT(fbd1.longitude, fbd1.latitude),\n            ST_POINT(fbd2.longitude, fbd2.latitude)\n        ) < 1000\n    LEFT JOIN pricing_analysis pa2 ON fbd2.facility_id = pa2.facility_id\n    GROUP BY fbd1.facility_id, fbd1.city_id, fbd1.latitude, fbd1.longitude\n),\nmarket_intelligence AS (\n    SELECT\n        fbd.facility_id,\n        fbd.facility_name,\n        fbd.city_id,\n        fbd.city_name,\n        fbd.state_code,\n        fbd.msa_name,\n        fbd.total_spaces,\n        fbd.facility_type,\n        fbd.operator_type,\n        fbd.population,\n        fbd.population_density,\n        fbd.median_household_income,\n        fbd.msa_population,\n        COALESCE(ua.avg_occupancy_rate, 0) AS avg_occupancy_rate,\n        COALESCE(ua.median_occupancy_rate, 0) AS median_occupancy_rate,\n        COALESCE(ua.p95_occupancy_rate, 0) AS p95_occupancy_rate,\n        COALESCE(ua.total_revenue, 0) AS total_revenue,\n        COALESCE(ua.avg_revenue_per_record, 0) AS avg_revenue_per_record,\n        COALESCE(ua.days_with_data, 0) AS days_with_data,\n        COALESCE(pa.avg_hourly_rate, 0) AS avg_hourly_rate,\n        COALESCE(pa.median_hourly_rate, 0) AS median_hourly_rate,\n        COALESCE(pa.p75_hourly_rate, 0) AS p75_hourly_rate,\n        COALESCE(cl.competitor_count, 0) AS competitor_count,\n        COALESCE(cl.avg_competitor_rate, 0) AS avg_competitor_rate,\n        COALESCE(cl.median_competitor_rate, 0) AS median_competitor_rate,\n        COALESCE(cl.avg_distance_to_competitors, 0) AS avg_distance_to_competitors,\n        -- Market opportunity score\n        (\n            LEAST(fbd.population_density / 10000.0, 1.0) * 25 +\n            LEAST(COALESCE(ua.avg_occupancy_rate, 0) / 100.0, 1.0) * 25 +\n            LEAST(fbd.median_household_income / 100000.0, 1.0) * 20 +\n            LEAST(COALESCE(cl.competitor_count, 0) / 10.0, 1.0) * 15 +\n            LEAST(fbd.total_spaces / 500.0, 1.0) * 15\n        ) AS market_opportunity_score,\n        -- Competitive advantage score\n        (\n            CASE WHEN COALESCE(pa.avg_hourly_rate, 0) < COALESCE(cl.avg_competitor_rate, 999) THEN 30 ELSE 0 END +\n            CASE WHEN COALESCE(ua.avg_occupancy_rate, 0) > 80 THEN 25 ELSE COALESCE(ua.avg_occupancy_rate, 0) * 0.3125 END +\n            CASE WHEN COALESCE(cl.competitor_count, 0) < 3 THEN 25 ELSE GREATEST(25 - COALESCE(cl.competitor_count, 0) * 2, 0) END +\n            CASE WHEN fbd.accepts_reservations THEN 20 ELSE 0 END\n        ) AS competitive_advantage_score\n    FROM facility_base_data fbd\n    LEFT JOIN utilization_aggregations ua ON fbd.facility_id = ua.facility_id\n    LEFT JOIN pricing_analysis pa ON fbd.facility_id = pa.facility_id\n    LEFT JOIN competitive_landscape cl ON fbd.facility_id = cl.facility_id\n),\nranked_analysis AS (\n    SELECT\n        mi.*,\n        ROW_NUMBER() OVER (PARTITION BY mi.city_id ORDER BY mi.market_opportunity_score DESC) AS city_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.total_revenue DESC) AS revenue_rank,\n        ROW_NUMBER() OVER (ORDER BY mi.competitive_advantage_score DESC) AS competitive_rank,\n        PERCENT_RANK() OVER (ORDER BY mi.market_opportunity_score) AS opportunity_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.total_revenue) AS revenue_percentile,\n        PERCENT_RANK() OVER (ORDER BY mi.competitive_advantage_score) AS competitive_percentile,\n        LAG(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS prev_opportunity_score,\n        LEAD(mi.market_opportunity_score) OVER (ORDER BY mi.market_opportunity_score DESC) AS next_opportunity_score,\n        AVG(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_avg_opportunity_score,\n        STDDEV(mi.market_opportunity_score) OVER (PARTITION BY mi.city_id) AS city_stddev_opportunity_score,\n        AVG(mi.total_revenue) OVER (PARTITION BY mi.city_id) AS city_avg_revenue,\n        AVG(mi.avg_occupancy_rate) OVER (PARTITION BY mi.city_id) AS city_avg_occupancy_rate\n    FROM market_intelligence mi\n),\noptimization_recommendations AS (\n    SELECT\n        ra.*,\n        CASE\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score + ra.city_stddev_opportunity_score THEN 'High Priority'\n            WHEN ra.market_opportunity_score >= ra.city_avg_opportunity_score THEN 'Medium Priority'\n            ELSE 'Low Priority'\n        END AS optimization_priority,\n        CASE\n            WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 1.15  -- Increase price if high demand and underpriced\n            WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                ra.avg_hourly_rate * 0.90  -- Decrease price if low demand and overpriced\n            WHEN ra.avg_hourly_rate < ra.median_competitor_rate * 0.9 THEN\n                ra.median_competitor_rate * 0.95  -- Price slightly below median if significantly underpriced\n            ELSE ra.avg_hourly_rate  -- Keep current price\n        END AS recommended_rate,\n        -- Revenue impact estimate\n        (\n            CASE\n                WHEN ra.avg_occupancy_rate > 85 AND ra.avg_hourly_rate < ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 1.15 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                WHEN ra.avg_occupancy_rate < 50 AND ra.avg_hourly_rate > ra.avg_competitor_rate THEN\n                    ra.avg_hourly_rate * 0.90 * ra.total_spaces * ra.avg_occupancy_rate / 100.0 * ra.days_with_data\n                ELSE ra.total_revenue\n            END - ra.total_revenue\n        ) AS estimated_revenue_impact,\n        -- Market share estimate\n        CASE\n            WHEN ra.competitor_count > 0 AND ra.total_spaces > 0 THEN\n                (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0)) /\n                NULLIF(\n                    (ra.total_spaces * ra.avg_occupancy_rate / NULLIF(100.0, 0) +\n                    (ra.competitor_count * COALESCE(ra.avg_competitor_rate, 0) * 50)),  -- Estimate competitor spaces\n                    0\n                ) * 100\n            ELSE 100.0\n        END AS estimated_market_share_pct\n    FROM ranked_analysis ra\n)\nSELECT\n    or_rec.facility_id,\n    or_rec.facility_name,\n    or_rec.city_name,\n    or_rec.state_code,\n    or_rec.msa_name,\n    or_rec.total_spaces,\n    or_rec.facility_type,\n    or_rec.operator_type,\n    or_rec.avg_occupancy_rate,\n    or_rec.median_occupancy_rate,\n    or_rec.p95_occupancy_rate,\n    or_rec.total_revenue,\n    or_rec.avg_hourly_rate AS current_rate,\n    or_rec.recommended_rate,\n    or_rec.competitor_count,\n    or_rec.avg_competitor_rate,\n    or_rec.median_competitor_rate,\n    or_rec.market_opportunity_score,\n    or_rec.competitive_advantage_score,\n    or_rec.opportunity_percentile,\n    or_rec.revenue_percentile,\n    or_rec.competitive_percentile,\n    or_rec.optimization_priority,\n    or_rec.estimated_revenue_impact,\n    or_rec.estimated_market_share_pct,\n    CASE\n        WHEN or_rec.estimated_revenue_impact > 1000 THEN 'High Impact'\n        WHEN or_rec.estimated_revenue_impact > 0 THEN 'Medium Impact'\n        ELSE 'Low Impact'\n    END AS impact_category\nFROM optimization_recommendations or_rec\nWHERE or_rec.competitor_count > 0 OR or_rec.total_revenue > 0\nORDER BY or_rec.market_opportunity_score DESC, or_rec.total_revenue DESC\nLIMIT 200;",
      "line_number":
    6764,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.014255,
        "row_count": 3,
        "column_count": 26,
        "tested_at": "2026-02-08T21:06:13.390192"
      }
    }
  ],
  "execution_test_results": {
    "test_timestamp": "2026-02-08T21:06:13.390192",
    "total_queries": 30,
    "passed": 30,
    "failed": 0,
    "success_rate": 100.0,
    "average_execution_time": 0.012978566666666668,
    "total_execution_time": 0.38935700000000006
  }
}
# Extract queries list
queries = QUERIES_DATA.get('queries', [])
total_queries = len(queries)
print("="*80)
print("EMBEDDED QUERIES LOADED")
print("="*80)
print(f"Total Queries: {total_queries}")
print(f"Source: Embedded in notebook (no file dependency)")
if queries:
    print(f"\nQuery Overview:")
    for q in queries[:5]:
        title = q.get('title', 'N/A')[:60]
        print(f"  Query {q.get('number')}: {title}...")
    if total_queries > 5:
    print(f"  ... and {total_queries - 5} more queries")
print("="*80)
print("‚úÖ Queries ready to execute!")
print("="*80)


In [None]:
# ============================================================================
# LOAD QUERIES (FROM EMBEDDED DATA)
# ============================================================================
# Queries are already loaded from embedded QUERIES_DATA cell above
# If not loaded, use the embedded queries cell
if 'queries' not in globals():
    print("‚ö†Ô∏è  Queries not found. Run the 'Embedded Queries' cell first.")
    print("   Looking for embedded queries...")
    # Try to find embedded queries
    for cell_num in range(len(notebook['cells'])):
    cell_text = ''.join(notebook['cells'][cell_num].get('source', []))
        if 'EMBEDDED QUERIES.JSON' in cell_text or 'QUERIES_DATA' in cell_text:
    print(f"   ‚úÖ Found embedded queries in cell")
            break
else:
    print("="*80)
    print("QUERIES LOADED")
    print("="*80)
    print(f"Total Queries: {len(queries)}")
    if queries:
    print(f"\nQuery Overview:")
        for q in queries[:5]:
            title = q.get('title', 'N/A')[:60]
            print(f"  Query {q.get('number')}: {title}...")
        if len(queries) > 5:
    print(f"  ... and {len(queries) - 5} more queries")
    print("="*80)


## Step 5: Query Execution Function

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-11"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 6: Execute All Queries

## Step 5: Query Execution Function

In [None]:
# ============================================================================
# QUERY EXECUTION FUNCTION WITH METRICS
# ============================================================================

import time
import pandas as pd

def execute_query_with_metrics(db_name: str, query_sql: str, query_num: int, db_config: dict = None):
    """
    Execute SQL query with metrics collection.
    
    Args:
        db_name: Database name
        query_sql: SQL query string
        query_num: Query number
        db_config: Database configuration (optional, uses global conn if None)
    
    Returns:
    dict: Query execution results with metrics
    """
    result = {
        'query_number': query_num,
        'success': False,
        'execution_time': 0.0,
        'row_count': 0,
        'column_count': 0,
        'dataframe': None,
        'error': None
    }
    
    try:
    # Use global connection if db_config not provided
        if db_config is None:
    # Use the global conn variable
            if 'conn' not in globals():
    raise RuntimeError("Database connection not available. Run connection cell first.")
            exec_conn = globals()['conn']
        else:
            # Create new connection from config
            exec_conn = psycopg2.connect(**db_config)
        
        # Start timing
        start_time = time.time()
        
        # Execute query
        cursor = exec_conn.cursor()
        cursor.execute(query_sql)
        
        # Fetch results
        columns = [desc[0] for desc in cursor.description] if cursor.description else []
        rows = cursor.fetchall()
        
        # Calculate execution time
        execution_time = time.time() - start_time
        
        # Create DataFrame
        if rows and columns:
    df = pd.DataFrame(rows, columns=columns)
        else:
            df = pd.DataFrame()
        
        # Update result
        result['success'] = True
        result['execution_time'] = execution_time
        result['row_count'] = len(df)
        result['column_count'] = len(columns)
        result['dataframe'] = df
        
        # Close cursor
        cursor.close()
        
        # Close connection if we created it
        if db_config is not None:
    exec_conn.close()
        
    except Exception as e:
    result['success'] = False
        result['error'] = str(e)
        result['execution_time'] = time.time() - start_time if 'start_time' in locals() else 0.0
    
    return result

# Database configuration (for reference, uses global conn by default)
DB_CONFIG = {
    'host':
    'localhost',
    'port': 5432,
    'user': 'postgres',
    'password': 'postgres',
    'database': 'postgres'
}

print("‚úÖ Query execution function loaded")
print("   Function: execute_query_with_metrics(db_name, query_sql, query_num, db_config=None)")


In [None]:
# ============================================================================
# EXECUTE ALL QUERIES - END-TO-END TESTING
# ============================================================================

all_results = []

print("="*80)
print("EXECUTING ALL QUERIES")
print("="*80)

for query_info in queries:
    query_num = query_info.get('number')
    query_sql = query_info.get('sql', '')
    query_title = query_info.get('title', f'Query {query_num}')
    
    result = execute_query_with_metrics(DB_NAME, query_sql, query_num, DB_CONFIG)
    result['query_number'] = query_num
    result['query_title'] = query_title
    result['query_info'] = query_info
    
    all_results.append(result)
    
    status = "‚úÖ" if result['success'] else "‚ùå"
    print(f"{status} Query {query_num:2d}: {query_title[:50]:<50} ({result['execution_time']:.3f}s, {result['row_count']:4d} rows)")

# Summary
passed = sum(1 for r in all_results if r['success'])
failed = sum(1 for r in all_results if not r['success'])

print(f"\n{'='*80}")
print(f"EXECUTION SUMMARY")
print(f"{'='*80}")
print(f"Total Queries:
    {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"{'='*80}")

## Step 7: Performance Visualization

In [None]:
import pandas as pdimport matplotlib.pyplot as plt# ============================================================================
# PERFORMANCE VISUALIZATION
# ============================================================================

# Create performance metrics DataFrame
perf_data = []
for r in all_results:
    perf_data.append({
        'Query': r['query_number'],
        'Title': r['query_title'][:40] + '...' if len(r['query_title']) > 40 else r['query_title'],
        'Execution Time (s)':
    r['execution_time'],
        'Row Count': r['row_count'],
        'Column Count': r['column_count'],
        'Status': 'Passed' if r['success'] else 'Failed'
    })

perf_df = pd.DataFrame(perf_data)

# Visualization:
    Execution Time Distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Execution time bar chart
axes[0, 0].bar(perf_df['Query'], perf_df['Execution Time (s)'], color='steelblue', alpha=0.7)
axes[0, 0].set_xlabel('Query Number')
axes[0, 0].set_ylabel('Execution Time (seconds)')
axes[0, 0].set_title('Query Execution Time by Query Number')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

# Execution time histogram
axes[0, 1].hist(perf_df['Execution Time (s)'], bins=20, color='coral', alpha=0.7, edgecolor='black')
axes[0, 1].set_xlabel('Execution Time (seconds)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Distribution of Execution Times')
axes[0, 1].grid(True, alpha=0.3)

# Row count bar chart
axes[1, 0].bar(perf_df['Query'], perf_df['Row Count'], color='green', alpha=0.7)
axes[1, 0].set_xlabel('Query Number')
axes[1, 0].set_ylabel('Row Count')
axes[1, 0].set_title('Rows Returned by Query')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3)

# Status pie chart
status_counts = perf_df['Status'].value_counts()
axes[1, 1].pie(status_counts.values, labels=status_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Query Execution Status')

plt.tight_layout()
plt.show()

# Display performance summary
print("\n" + "="*80)
print("PERFORMANCE SUMMARY")
print("="*80)
print(f"Average execution time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Median execution time: {perf_df['Execution Time (s)'].median():.3f}s")
print(f"Max execution time: {perf_df['Execution Time (s)'].max():.3f}s")
print(f"Min execution time: {perf_df['Execution Time (s)'].min():.3f}s")
print(f"Total rows returned: {perf_df['Row Count'].sum():,}")
print(f"Average rows per query: {perf_df['Row Count'].mean():.1f}")
print("="*80)

## Step 8: Individual Query Documentation and Visualization

In [None]:
import numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom IPython.display import display, HTML, Markdown# ============================================================================
# INDIVIDUAL QUERY DOCUMENTATION AND VISUALIZATION
# ============================================================================

def document_and_visualize_query(query_result: dict, query_num: int):
    """Create comprehensive documentation and visualization for a single query."""
    query_info = query_result['query_info']
    
    # Create markdown documentation
    doc = f"""
## Query {query_num}:
    {query_info.get('title', 'N/A')}

### Execution Status
- **Status:** {'‚úÖ PASSED' if query_result['success'] else '‚ùå FAILED'}
- **Execution Time:** {query_result['execution_time']:.3f} seconds
- **Rows Returned:** {query_result['row_count']:,}
- **Columns Returned:** {query_result['column_count']}

### Query Information
- **Description:** {query_info.get('description', 'N/A')[:300]}...
- **Use Case:** {query_info.get('use_case', 'N/A')}
- **Business Value:** {query_info.get('business_value', 'N/A')}
- **Complexity:** {query_info.get('complexity', 'N/A')}
- **Expected Output:** {query_info.get('expected_output', 'N/A')}

### SQL Query
```sql
{query_info.get('sql', '')[:1000]}...
```

### Results Preview
"""
    
    try:
    display(Markdown(doc))
    except:
        print(doc)
    
    if query_result['success'] and query_result['dataframe'] is not None:
    df = query_result['dataframe']
        
        if len(df) > 0:
    print(f"\nFirst 10 rows of Query {query_num}:")
            try:
    display(df.head(10))
            except:
                print(df.head(10).to_string())
            
            # Create visualizations if numeric data exists
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            if len(numeric_cols) > 0:
    num_plots = min(3, len(numeric_cols))
                fig, axes = plt.subplots(1, num_plots, figsize=(15, 4))
                if num_plots == 1:
    axes = [axes]
                
                for idx, col in enumerate(numeric_cols[:num_plots]):
                    if df[col].notna().sum() > 0:
    axes[idx].hist(df[col].dropna(), bins=min(20, len(df)), alpha=0.7, edgecolor='black')
                        axes[idx].set_title(f'Distribution of {col[:30]}')
                        axes[idx].set_xlabel(col[:30])
                        axes[idx].set_ylabel('Frequency')
                        axes[idx].grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
                
                # Create correlation heatmap if multiple numeric columns
                if len(numeric_cols) > 1:
    fig, ax = plt.subplots(figsize=(10, 8))
                    corr_matrix = df[numeric_cols].corr()
                    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax)
                    ax.set_title('Correlation Matrix of Numeric Columns')
                    plt.tight_layout()
                    plt.show()
        else:
            print(f"\nQuery {query_num} returned 0 rows.")
    else:
        if query_result.get('error'):
    print(f"\n‚ùå Error: {query_result['error'][:500]}")

# Document and visualize each query
print("="*80)
print("INDIVIDUAL QUERY DOCUMENTATION")
print("="*80)

for query_result in all_results:
    query_num = query_result['query_number']
    document_and_visualize_query(query_result, query_num)
    print("\n" + "="*80 + "\n")

## Step 9: Generate Comprehensive Report

In [None]:
# ============================================================================
# GENERATE COMPREHENSIVE REPORT
# ============================================================================

# Create comprehensive report
report_data = {
    'database': DB_NAME,
    'test_timestamp': datetime.now().isoformat(),
    'total_queries': total_queries,
    'passed': passed,
    'failed': failed,
    'success_rate': passed / total_queries * 100 if total_queries > 0 else 0,
    'average_execution_time':
    perf_df['Execution Time (s)'].mean(),
    'total_execution_time': perf_df['Execution Time (s)'].sum(),
    'queries': []
}

for r in all_results:
    query_report = {
        'number': r['query_number'],
        'title': r['query_title'],
        'success': r['success'],
        'execution_time': r['execution_time'],
        'row_count': r['row_count'],
        'column_count': r['column_count'],
        'columns': r['columns']
    }
    if not r['success']:
    query_report['error'] = r['error']
    
    report_data['queries'].append(query_report)

# Save report
report_file = DB_DIR / 'results' / f'{DB_NAME}_comprehensive_report.json'
report_file.parent.mkdir(exist_ok=True)

with open(report_file, 'w') as f:
    json.dump(report_data, f, indent=2, default=str)

print("="*80)
print("COMPREHENSIVE TEST REPORT")
print("="*80)
print(f"Database: {DB_NAME}")
print(f"Total Queries: {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"Average Execution Time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Total Execution Time: {perf_df['Execution Time (s)'].sum():.3f}s")
print(f"\n‚úÖ Report saved to: {report_file}")
print("="*80)

print("\n" + "="*80)
print("END-TO-END TESTING COMPLETE")
print("="*80)
print(f"‚úÖ Database '{DB_NAME}' initialized and tested")
print(f"‚úÖ All {total_queries} queries executed")
print(f"‚úÖ Performance metrics collected")
print(f"‚úÖ Comprehensive report generated")
print("="*80)