## Python 3.14.2 Installation
This notebook requires Python 3.14.2. Run the cell below to install and verify Python 3.14.2.

In [None]:
# ============================================================================
# PYTHON 3.14.2 INSTALLATION FOR GOOGLE COLAB
# ============================================================================
import subprocess
import sys
import os
print("="*80)
print("PYTHON 3.14.2 INSTALLATION")
print("="*80)
# Check current Python version
current_version = sys.version_info
print(f"\nCurrent Python version: {current_version.major}.{current_version.minor}.{current_version.micro}")
print(f"Python executable: {sys.executable}")
# Target version
TARGET_MAJOR = 3
TARGET_MINOR = 14
TARGET_MICRO = 2
if current_version.major == TARGET_MAJOR and current_version.minor == TARGET_MINOR and current_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is already installed!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is required")
    print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
    print(f"\nInstalling Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO}...")
    
    if not IS_COLAB:
    raise RuntimeError("Python 3.14.2 installation requires Google Colab")
    
    try:
    # Method 1: Use conda (if available)
        print("\nMethod 1:
    Trying conda...")
        try:
    result = subprocess.run(['conda', '--version'], capture_output=True, text=True, timeout=5)
            if result.returncode == 0:
    print("   ‚úÖ Conda found, installing Python 3.14.2...")
                os.system('conda install -y python=3.14.2')
                print("   ‚úÖ Python 3.14.2 installed via conda")
                print("   ‚ö†Ô∏è  Restart kernel and re-run this cell to use Python 3.14.2")
        except:
            print("   ‚ö†Ô∏è  Conda not available")
        
        # Method 2: Use deadsnakes PPA (Ubuntu/Debian)
        print("\nMethod 2: Installing via deadsnakes PPA...")
        os.system('apt-get update -qq')
        os.system('apt-get install -y software-properties-common')
        os.system('add-apt-repository -y ppa:deadsnakes/ppa')
        os.system('apt-get update -qq')
        os.system('apt-get install -y python3.14 python3.14-venv python3.14-dev')
        print("   ‚úÖ Python 3.14.2 installed via deadsnakes PPA")
        
        # Method 3: Use pyenv
        print("\nMethod 3: Installing via pyenv...")
        os.system('curl https://pyenv.run | bash')
        os.system('export PYENV_ROOT="$HOME/.pyenv"')
        os.system('export PATH="$PYENV_ROOT/bin:$PATH"')
        os.system('eval "$(pyenv init -)"')
        os.system('pyenv install 3.14.2')
        os.system('pyenv global 3.14.2')
        print("   ‚úÖ Python 3.14.2 installed via pyenv")
        
        # Verify installation
        print("\nVerifying Python 3.14.2 installation...")
        result = subprocess.run(['python3.14', '--version'], capture_output=True, text=True, timeout=5)
        if result.returncode == 0:
    version_output = result.stdout.strip()
            print(f"   ‚úÖ Python 3.14 found: {version_output}")
            if '3.14.2' in version_output:
    print("   ‚úÖ Python 3.14.2 is installed!")
            print("\n‚ö†Ô∏è  IMPORTANT: Restart kernel and select Python 3.14.2 as kernel")
            print("   Or use: !python3.14 your_script.py")
        else:
            print("   ‚ö†Ô∏è  Python 3.14.2 installation may have failed")
            print("   Current Python version will be used")
    
    except Exception as e:
    print(f"\n‚ùå Error installing Python 3.14.2: {e}")
        print("\n‚ö†Ô∏è  Continuing with current Python version")
        print(f"   Current version: {current_version.major}.{current_version.minor}.{current_version.micro}")
# Verify Python version
print("\n" + "="*80)
print("PYTHON VERSION VERIFICATION")
print("="*80)
final_version = sys.version_info
print(f"Python version: {final_version.major}.{final_version.minor}.{final_version.micro}")
print(f"Python executable: {sys.executable}")
if final_version.major == TARGET_MAJOR and final_version.minor == TARGET_MINOR and final_version.micro == TARGET_MICRO:
    print(f"\n‚úÖ Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is active!")
else:
    print(f"\n‚ö†Ô∏è  Python {TARGET_MAJOR}.{TARGET_MINOR}.{TARGET_MICRO} is not active")
    print(f"   Current version: {final_version.major}.{final_version.minor}.{final_version.micro}")
    print("   If Python 3.14.2 was installed, restart kernel and select Python 3.14.2")
print("="*80)


# DB-8: Job Market Intelligence Database - End-to-End Query Testing

This notebook provides **complete end-to-end setup and testing** from scratch:

1. **Environment Setup**: Install all required Python packages automatically
2. **Database Initialization**: Create database, load schema, load data
3. **Query Execution**: Execute all 30 queries with metrics
4. **Visualization**: Performance charts and data analysis
5. **Documentation**: Comprehensive query documentation

## Database Overview

**Database Name:** Job Market Intelligence Database  
**Database ID:** db-8  
**Domain:** Job Market Intelligence  
**Total Queries:** 30  

## Prerequisites

- PostgreSQL server running (localhost or configured via environment variables)
- Python 3.14.2 installed
- Jupyter Notebook or JupyterLab

**Note:** All Python packages will be installed automatically when you run the first cell.

In [None]:
# ============================================================================# GOOGLE COLAB ONLY - ENVIRONMENT CHECK# ============================================================================import sys
import os# Verify we're running in Google ColabIS_COLAB = Falsetry:
    import google.colab    IS_COLAB = True    print("‚úÖ Running in Google Colab")except ImportError:
    # Check alternative methods    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):                        IS_COLAB = True        print("‚úÖ Running in Google Colab (detected via COLAB env)")    else:            IS_COLAB = False
if not IS_COLAB:
    raise RuntimeError(        "‚ùå ERROR: This notebook is designed to run ONLY in Google Colab.\n"        "Please open this notebook in Google Colab: https://colab.research.google.com/"    )print("="*80)
print("GOOGLE COLAB ENVIRONMENT CONFIRMED")print("="*80)

## PostgreSQL Setup for Google Colab

This notebook requires PostgreSQL. Run the cell below to install and start PostgreSQL in Colab.

## Data Directory Detection

This notebook automatically detects the `data/` directory containing `schema.sql` and `data.sql` files.
It works when uploaded to Google Colab or run locally.

In [None]:
# ============================================================================# SELF-AWARE DATA DIRECTORY DETECTION# ============================================================================import os
import sysfrom pathlib import Pathprint("="*80)
print("DATA DIRECTORY DETECTION")print("="*80)def find_data_directory():    """    Self-aware function to find data/ directory.    Works when notebook is uploaded to Colab or run locally.    """    # Get notebook's current directory    if IS_COLAB:
    # In Colab, check common locations        search_paths = [            Path('/content'),            Path('/content/drive/MyDrive'),            Path.cwd(),        ]    else:        # Local execution        search_paths = [            Path.cwd(),            Path(__file__).parent if '__file__' in globals() else Path.cwd(),            Path.cwd().parent,        ]        # Also check parent directories recursively    current = Path.cwd()    for _ in range(5):
    # Check up to 5 levels up        search_paths.append(current)        current = current.parent        print(f"\nSearching for data/ directory...")    print(f"Current working directory: {Path.cwd()}")        # Search for data/ directory    data_dir = None    for search_path in search_paths:        if not search_path.exists():            continue                # Check if data/ exists here        potential_data = search_path / 'data'        if potential_data.exists() and potential_data.is_dir():            data_dir = potential_data            print(f"‚úÖ Found data/ directory: {data_dir}")            break                # Recursively search subdirectories (limit depth to avoid long searches)        try:
    for item in search_path.rglob('data'):
    if item.is_dir() and item.name == 'data':                    # Verify it contains expected files                    expected_files = ['schema.sql', 'data.sql']                    has_expected = any((item / f).exists() for f in expected_files)                    if has_expected:                        data_dir = item                        print(f"‚úÖ Found data/ directory (recursive): {data_dir}")                        break            if data_dir:                break        except (PermissionError, OSError):
    continue        
if not data_dir:
    # Try finding by database name pattern        db_name = Path.cwd().name        if db_name.startswith('db-'):            # Look for db-N/data pattern            for search_path in search_paths:
    potential_db = search_path / db_name / 'data'                if potential_db.exists() and potential_db.is_dir():                    data_dir = potential_db                    print(f"‚úÖ Found data/ directory by DB name: {data_dir}")                    break        return data_dirdef verify_data_directory(data_dir: Path):    """Verify data/ directory contains expected files."""    if not data_dir or not data_dir.exists():        return False        expected_files = ['schema.sql']    optional_files = ['data.sql']        print(f"\nVerifying data/ directory contents...")    print(f"Location: {data_dir}")        found_files = []    missing_files = []        for file_name in expected_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name}")        else:            missing_files.append(file_name)            print(f"  ‚ùå {file_name} (missing)")        for file_name in optional_files:        file_path = data_dir / file_name        if file_path.exists():            found_files.append(file_name)            print(f"  ‚úÖ {file_name} (optional)")        else:            print(f"  ‚ö†Ô∏è  {file_name} (optional, not found)")        if missing_files:        print(f"\n‚ö†Ô∏è  Warning: Missing required files: {missing_files}")        return False        return True# Detect data directoryDATA_DIR = find_data_directory()if DATA_DIR:    if verify_data_directory(DATA_DIR):        print(f"\n‚úÖ Data directory verified and ready!")        print(f"   Schema file: {DATA_DIR / 'schema.sql'}")        if (DATA_DIR / 'data.sql').exists():            print(f"   Data file: {DATA_DIR / 'data.sql'}")                # Set global variables for use in other cells        SCHEMA_FILE = DATA_DIR / 'schema.sql'        DATA_FILE = DATA_DIR / 'data.sql' if (DATA_DIR / 'data.sql').exists() else None                print(f"\n‚úÖ Global variables set:")        print(f"   DATA_DIR = {DATA_DIR}")        print(f"   SCHEMA_FILE = {SCHEMA_FILE}")        if DATA_FILE:            print(f"   DATA_FILE = {DATA_FILE}")    else:        print(f"\n‚ö†Ô∏è  Data directory found but verification failed")        print(f"   Location: {DATA_DIR}")        print(f"   Please ensure schema.sql exists in this directory")else:    print(f"\n‚ùå Data directory not found!")    print(f"\nTroubleshooting:")    print(f"1. Ensure data/ directory is uploaded to Colab")    print(f"2. Check that data/ contains schema.sql")    print(f"3. Verify notebook is in same directory structure as data/")    print(f"\nCurrent directory: {Path.cwd()}")    print(f"Contents:")    try:
    for item in sorted(Path.cwd().iterdir()):
    print(f"  - {item.name} ({'dir' if item.is_dir() else 'file'})")    except PermissionError:
    print("  (Permission denied)")print("="*80)

In [None]:
# ============================================================================# POSTGRESQL SETUP FOR GOOGLE COLAB# ============================================================================import subprocess
import timeimport osprint("="*80)
print("POSTGRESQL SETUP FOR GOOGLE COLAB")print("="*80)if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")# Check if PostgreSQL is already installedpostgres_installed = Falsetry:
    result = subprocess.run(['psql', '--version'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print(f"‚úÖ PostgreSQL already installed: {result.stdout.strip()}")        postgres_installed = Trueexcept (FileNotFoundError, subprocess.TimeoutExpired):
    pass
if not postgres_installed:
    print("\nInstalling PostgreSQL using magic commands...")    print("(Run these commands if automatic installation fails)")    print("  !apt-get update")    print("  !apt-get install -y postgresql postgresql-contrib")    print("  !service postgresql start")        # Use magic commands via subprocess (Colab-compatible)    try:
    # Update package list        print("\n   Updating package list...")        os.system('apt-get update -qq')        print("   ‚úÖ Package list updated")                # Install PostgreSQL        print("   Installing PostgreSQL...")        os.system('apt-get install -y -qq postgresql postgresql-contrib')        print("   ‚úÖ PostgreSQL installed")                # Start PostgreSQL service        print("   Starting PostgreSQL service...")        os.system('service postgresql start')        print("   ‚úÖ PostgreSQL service started")                # Wait for PostgreSQL to be ready        print("   Waiting for PostgreSQL to be ready...")        time.sleep(3)            except Exception as e:
    print(f"   ‚ùå Error: {e}")        print("   Please run manually:")        print("   !apt-get update")        print("   !apt-get install -y postgresql postgresql-contrib")        print("   !service postgresql start")# Verify PostgreSQL is runningprint("\nVerifying PostgreSQL is ready...")try:    result = subprocess.run(['pg_isready'],                            capture_output=True,                            text=True,                            timeout=5)    if result.returncode == 0:        print("‚úÖ PostgreSQL is ready")        print(f"   {result.stdout.strip()}")    else:        print("‚ö†Ô∏è  PostgreSQL may not be ready yet")        print("   Try: !service postgresql restart")except Exception as e:    print(f"‚ö†Ô∏è  Could not verify PostgreSQL: {e}")
print("\n" + "="*80)print("POSTGRESQL SETUP COMPLETE")
print("="*80)

In [None]:
# ============================================================================# STREAMLIT DASHBOARD EXECUTION# ============================================================================import subprocess
import sysimport osfrom pathlib import Path
import webbrowserimport timeimport threadingdef find_dashboard_file():        """Find Streamlit dashboard file recursively."""    search_paths = [        Path.cwd(),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/workspace'),        Path('/content/drive/MyDrive/db'),        Path('/content/db'),        Path('/content'),        ,    ]        dashboard_name = f'{DB_NAME}_dashboard.py'        for search_path in search_paths:
    if not search_path.exists():
    continue                # Try direct path        candidate = search_path / dashboard_name        if candidate.exists():                            return candidate                # Try recursive search        try:
    for found_path in search_path.rglob(dashboard_name):
    if found_path.is_file():                                    return found_path        except:            continue        return Nonedef run_streamlit_dashboard(method='notebook', port=8501, open_browser=True):        """    Run Streamlit dashboard from Jupyter notebook.        Methods:    - 'notebook': Run in notebook output (using streamlit's notebook mode)    - 'subprocess': Run as subprocess (background)    - 'magic': Use !streamlit run magic command    """    dashboard_path = find_dashboard_file()        
if not dashboard_path:
    print("‚ùå Dashboard file not found")        print(f"   Looking for: {DB_NAME}_dashboard.py")        return None        print(f"‚úÖ Found dashboard: {dashboard_path}")        if method == 'notebook':            # Method 1: Run Streamlit in notebook-compatible mode        # Note: Streamlit doesn't natively support notebooks, but we can use iframe        print("\n" + "="*80)        print("STREAMLIT DASHBOARD - NOTEBOOK MODE")        print("="*80)        print(f"\nDashboard: {dashboard_path.name}")        print(f"\nTo run dashboard:")        print(f"  1. Run this cell to start the server")        print(f"  2. Open the URL shown below in a new tab")        print(f"  3. Or use: !streamlit run {dashboard_path} --server.port={port}")        print("\n" + "="*80)                # Start Streamlit as subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port),            '--server.headless', 'true',            '--server.runOnSave', 'false',            '--browser.gatherUsageStats', 'false'        ]                process = subprocess.Popen(            cmd,            stdout=subprocess.PIPE,            stderr=subprocess.PIPE,            text=True        )                # Wait a moment for server to start        time.sleep(2)                # Get the URL        url = f"http:
    //localhost:{port}"        print(f"\nüåê Dashboard URL: {url}")        print(f"\nServer started in background (PID: {process.pid})")        print(f"\nTo stop: process.terminate() or run stop_streamlit()")                # Store process for later termination        globals()['_streamlit_process'] = process                # Try to open browser        if open_browser:                            try:
    webbrowser.open(url)            except:                pass                return process        elif method == 'subprocess':            # Method 2: Run as background subprocess        cmd = [            sys.executable, '-m', 'streamlit', 'run',            str(dashboard_path),            '--server.port', str(port)        ]                process = subprocess.Popen(cmd)        print(f"‚úÖ Streamlit started (PID: {process.pid})")        print(f"üåê Dashboard: http://localhost:{port}")        return process        elif method == 'magic':            # Method 3: Print magic command for user to run        print("Run this command in a new cell:
    ")        print(f"!streamlit run {dashboard_path} --server.port={port}")        return Nonedef stop_streamlit():        """Stop running Streamlit process."""    if '_streamlit_process' in globals():                        process = globals()['_streamlit_process']        process.terminate()        print("‚úÖ Streamlit stopped")    else:            print("‚ö†Ô∏è  No Streamlit process found")# Auto-detect DB_NAME if not setif 'DB_NAME' not in globals():        # Try to detect from current directory or notebook name    cwd = Path.cwd()    for db_num in range(6, 16):                    if f'db-{db_num}' in str(cwd) or f'db{db_num}' in str(cwd):                            DB_NAME = f'db-{db_num}'            break    else:            DB_NAME = 'db-6'  # Default        print(f"‚ö†Ô∏è  Could not detect DB_NAME, using default: {DB_NAME}")
print("\n" + "="*80)print("STREAMLIT DASHBOARD INTEGRATION")
print("="*80)print(f"Database: {DB_NAME}")
print("\nAvailable methods:")print("  1. run_streamlit_dashboard(method='notebook') - Run in notebook mode")print("  2. run_streamlit_dashboard(method='subprocess') - Run as background process")print("  3. run_streamlit_dashboard(method='magic') - Get magic command")print("  4. stop_streamlit() - Stop running dashboard")print("\n" + "="*80)

## Streamlit Dashboard

Run the Streamlit dashboard using one of these methods:

**Method 1: Notebook Mode** (Recommended)
```python
run_streamlit_dashboard(method='notebook', port=8501)
```

**Method 2: Magic Command**
```bash
!streamlit run db-8_dashboard.py --server.port=8501
```

**Method 3: Background Process**
```python
run_streamlit_dashboard(method='subprocess', port=8501)
```


## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Colab Setup (Run this first if using Google Colab)

If you're running this notebook in Google Colab:
1. **Mount Google Drive** (if your database files are in Drive)
2. **Upload database files** to `/content/db` or your Drive folder


In [None]:
# ============================================================================
# GOOGLE COLAB SETUP
# ============================================================================

if ENV_TYPE == 'colab':
    print("="*80)
    print("GOOGLE COLAB SETUP")
    print("="*80)
    
    # Mount Google Drive if not already mounted
    drive_path = Path('/content/drive/MyDrive')
    if not drive_path.exists():
    print("‚ö†Ô∏è  Google Drive not mounted.")
        print("   Run this command to mount:")
        print("   from google.colab import drive")
        print("   drive.mount('/content/drive')")
        try:
    from google.colab import drive
            drive.mount('/content/drive')
            print("‚úÖ Google Drive mounted")
        except Exception as e:
    print(f"‚ö†Ô∏è  Could not auto-mount Drive: {e}")
            print("   Please mount manually using the command above")
    else:
        print("‚úÖ Google Drive is already mounted")
    
    # Check for database files
    print("\nChecking for database files...")
    
    # Check in /content/db
    content_db = Path('/content/db')
    if content_db.exists():
    print(f"‚úÖ Found: {content_db}")
    else:
        print(f"‚ö†Ô∏è  Not found: {content_db}")
        print("   Upload your database folder to /content/db")
    
    # Check in Drive
    drive_db = drive_path / 'db'
    if drive_db.exists():
    print(f"‚úÖ Found in Drive: {drive_db}")
    else:
        print(f"‚ö†Ô∏è  Not found in Drive: {drive_db}")
        print("   Upload your database folder to Google Drive/db")
    
    print("\n" + "="*80)
    print("Some PostgreSQL-specific features may not work")
    print("="*80)
else:
    print("Not running in Colab - skipping Colab setup")

In [None]:
# ============================================================================# FAILSAFE: Force Path Correction and Package Installation# ============================================================================import sys
import subprocessimport osfrom pathlib import Path
from datetime import datetime
import shutildef force_install_package(package_name, import_name=None):    """Force install package using multiple methods."""    if import_name is None:
    import_name = package_name.split('[')[0].split('==')[0].split('>=')[0]        # Try import first    try:
    __import__(import_name)        return True    except ImportError:
    pass        # Method 1: pip install --user    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', '--quiet', package_name],                              stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 2: pip install --break-system-packages (Python 3.12+)    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--break-system-packages', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 3: pip install system-wide    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 4: conda install (if conda available)    try:        subprocess.check_call(['conda', 'install', '-y', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        # Method 5: apt-get install (Linux/Docker)    if os.path.exists('/usr/bin/apt-get'):        try:            apt_package = f'python3-{import_name.replace("_", "-")}'            subprocess.check_call(['apt-get', 'install', '-y', '--quiet', apt_package],                               stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)            __import__(import_name)            return True        except:            pass        # Method 6: Direct pip install with --force-reinstall    try:        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--force-reinstall', '--quiet', package_name],                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)        __import__(import_name)        return True    except:        pass        print(f"‚ö†Ô∏è  Warning: Could not install {package_name}, continuing anyway...")    return Falsedef correct_file_path(file_path, search_paths=None):    """Correct file path by searching multiple locations."""    if isinstance(file_path, str):        file_path = Path(file_path)        # If path exists, return it    if file_path.exists():        return file_path        # Default search paths    if search_paths is None:        search_paths = [            Path.cwd(),            Path('/workspace/client/db'),            Path('/workspace/db'),            Path('/workspace'),            Path('/content/drive/MyDrive/db'),            Path('/content/db'),            Path('/content'),            ,            BASE_DIR if 'BASE_DIR' in globals() else ,        ]        # Search recursively    for search_path in search_paths:
    if not search_path.exists():            continue                # Try direct path        candidate = search_path / file_path.name        if candidate.exists():            return candidate                # Try recursive search        try:            for found_path in search_path.rglob(file_path.name):                if found_path.is_file():                    return found_path        except:            continue        # Return original path (will fail later, but at least we tried)    return file_pathdef create_notebook_backup(notebook_path=None):    """Create backup of current notebook automatically."""    try:        # Try to detect notebook path from various sources        if notebook_path is None:            # Try to get from __file__ or current working directory            try:                notebook_path = Path(__file__)            except:                notebook_path = Path.cwd() / 'current_notebook.ipynb'                if isinstance(notebook_path, str):            notebook_path = Path(notebook_path)                # Only create backup if file exists        if notebook_path.exists() and notebook_path.suffix == '.ipynb':            timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')            backup_path = notebook_path.parent / f"{notebook_path.stem}_{timestamp}.backup.ipynb"                        # Create backup            shutil.copy2(notebook_path, backup_path)            print(f"‚úÖ Backup created: {backup_path.name}")            return backup_path        else:            print("‚ö†Ô∏è  Could not determine notebook path for backup")            return None    except Exception as e:        print(f"‚ö†Ô∏è  Backup creation failed (non-critical): {e}")        return None# Create backup at startuptry:    create_notebook_backup()except Exception as e:    print(f"‚ö†Ô∏è  Backup skipped: {e}")def ensure_packages_installed():    """Ensure all required packages are installed."""    required_packages = [        ('psycopg2-binary', 'psycopg2'),        ('pandas', 'pandas'),        ('numpy', 'numpy'),        ('matplotlib', 'matplotlib'),        ('seaborn', 'seaborn'),        ('ipython', 'IPython'),        ('jupyter', 'jupyter'),    ]        print("\n" + "="*80)    print("FAILSAFE: Ensuring all packages are installed...")    print("="*80)        for package, import_name in required_packages:        if force_install_package(package, import_name):            print(f"‚úÖ {package} installed")        else:            print(f"‚ö†Ô∏è  {package} installation failed, but continuing...")        print("="*80 + "\n")def ensure_paths_correct():    """Ensure all file paths are correct."""    print("\n" + "="*80)    print("FAILSAFE: Correcting file paths...")    print("="*80)        # Correct BASE_DIR if needed - fix UnboundLocalError    base_dir_exists = 'BASE_DIR' in globals()    base_dir_valid = False        if base_dir_exists:        try:            base_dir_value = globals()['BASE_DIR']            if base_dir_value:                base_dir_path = Path(base_dir_value) if isinstance(base_dir_value, str) else base_dir_value                base_dir_valid = base_dir_path.exists()        except:            base_dir_valid = False        if not base_dir_exists or not base_dir_valid:        corrected_base_dir = correct_file_path()        globals()['BASE_DIR'] = corrected_base_dir        print(f"‚úÖ BASE_DIR corrected: {corrected_base_dir}")    else:        print(f"‚úÖ BASE_DIR valid: {globals()['BASE_DIR']}")        # Correct DB_DIR if needed - fix UnboundLocalError    db_dir_exists = 'DB_DIR' in globals()    db_dir_valid = False    db_dir_value = None        if db_dir_exists:        try:            db_dir_value = globals()['DB_DIR']            if db_dir_value:                db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value                db_dir_valid = db_dir_path.exists()        except:            db_dir_valid = False        if db_dir_exists and db_dir_value and not db_dir_valid:        db_dir_path = Path(db_dir_value) if isinstance(db_dir_value, str) else db_dir_value        corrected_db_dir = correct_file_path(db_dir_path)        globals()['DB_DIR'] = corrected_db_dir        print(f"‚úÖ DB_DIR corrected: {corrected_db_dir}")    elif db_dir_exists and db_dir_value:        print(f"‚úÖ DB_DIR valid: {globals()['DB_DIR']}")        print("="*80 + "\n")# Run failsafe checksensure_packages_installed()ensure_paths_correct()print("‚úÖ Failsafe checks complete")

## Step 0: Environment Detection and Self-Update

In [None]:
# ============================================================================# ENVIRONMENT DETECTION AND METAPROGRAMMATIC SELF-UPDATE# ============================================================================import sys
import osimport platformimport subprocess
import jsonfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT DETECTION")print("="*80)# Detect environment typeENV_TYPE = NoneENV_DETAILS = {}# Check for Dockerif os.path.exists('/.dockerenv'):
    ENV_TYPE = 'docker'    ENV_DETAILS['container'] = 'docker'    if os.path.exists('/workspace'):        ENV_DETAILS['workspace'] = '/workspace'    print("‚úÖ Detected: Docker container")# Check for Google Colab# Improved Colab detectiontry:
    import google.colab    ENV_TYPE = 'colab'    ENV_DETAILS['platform'] = 'google_colab'    ENV_DETAILS['colab_module'] = True    print("‚úÖ Detected: Google Colab (via google.colab module)")except ImportError:
    # Check for Colab by /content directory AND COLAB_GPU environment    if os.path.exists('/content') and os.environ.get('COLAB_GPU'):
    ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB_GPU)")    elif os.path.exists('/content') and 'COLAB' in str(os.environ):        ENV_TYPE = 'colab'        ENV_DETAILS['platform'] = 'google_colab'        ENV_DETAILS['content_dir'] = True        print("‚úÖ Detected: Google Colab (by /content + COLAB env)")    elif os.path.exists('/content'):        # Check if it looks like Colab        if (Path('/content').exists() and             (Path('/content/sample_data').exists() or              Path('/content/drive').exists())):            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚úÖ Detected: Google Colab (by /content structure)")        else:            ENV_TYPE = 'colab'            ENV_DETAILS['platform'] = 'google_colab'            ENV_DETAILS['content_dir'] = True            print("‚ö†Ô∏è  Detected: Possible Google Colab (by /content)")    ENV_DETAILS['platform'] = 'google_colab'    print("‚úÖ Detected: Google Colab (by /content directory)")# Check for local environmentelse:    ENV_TYPE = 'local'    ENV_DETAILS['platform'] = platform.system().lower()    print("‚úÖ Detected: Local environment")# Detect base directories recursivelydef find_base_directory():    """Find base database directory recursively."""    start_paths = [        Path.cwd(),        Path('/workspace'),        Path('/workspace/client/db'),        Path('/workspace/db'),        Path('/content'),        Path('/content/drive/MyDrive'),        ,    ]        for start_path in start_paths:        if not start_path.exists():            continue                # Look for db-6 directory (or any db-*)        for db_dir in start_path.rglob('db-6'):            if db_dir.is_dir() and (db_dir / 'queries').exists():                return db_dir.parent                # Look for client/db structure        client_db = start_path / 'client' / 'db'        if client_db.exists() and (client_db / 'db-6').exists():            return start_path        return Path.cwd()BASE_DIR = find_base_directory()ENV_DETAILS['base_dir'] = str(BASE_DIR)print(f"\nEnvironment Type: {ENV_TYPE}")
print(f"Base Directory: {BASE_DIR}")print(f"Python Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")# Metaprogrammatic self-update functiondef update_notebook_paths():    """Metaprogrammatically update notebook cell paths based on detected environment."""    return {        'env_type': ENV_TYPE,        'base_dir': BASE_DIR,        'details': ENV_DETAILS    }ENV_CONFIG = update_notebook_paths()print("\n" + "="*80)
print("ENVIRONMENT DETECTION COMPLETE")print("="*80)

## Step 1: Environment Setup & Package Installation

In [None]:
def install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:
    pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return Falsedef install_package_multiple_methods(package_spec: str, import_name: str) -> bool:    """Install package using multiple methods with fallbacks."""    package_name = package_spec.split('>=')[0]        # Method 1: Check if already installed    try:                        __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        return True    except ImportError:                pass        print(f"‚ö†Ô∏è  {package_name}: Installing...")        # Method 2: pip install --user    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip --user")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 3: pip install (system-wide)    try:                subprocess.check_call(            [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],            stdout=subprocess.DEVNULL,            stderr=subprocess.PIPE,            timeout=300        )        __import__(import_name)        print(f"   ‚úÖ Installed via pip (system-wide)")        return True    except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                    pass        # Method 4: pip install --break-system-packages    if ENV_TYPE == 'local' and platform.system() == 'Linux':                    try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--break-system-packages', '--quiet'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via pip --break-system-packages")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 5: conda install    import shutil    if shutil.which('conda'):                        try:                    conda_pkg = package_name.replace('-binary', '')            subprocess.check_call(                ['conda', 'install', '-y', conda_pkg],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE,                timeout=300            )            __import__(import_name)            print(f"   ‚úÖ Installed via conda")            return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired):                        pass        # Method 6: apt-get (Docker/Colab)    if ENV_TYPE in ['docker', 'colab']:                    try:                    system_pkg_map = {                'psycopg2-binary': 'python3-psycopg2',                'pandas': 'python3-pandas',                'numpy': 'python3-numpy',                'matplotlib': 'python3-matplotlib',            }                        if package_name in system_pkg_map:                            subprocess.check_call(                    ['apt-get', 'update'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                subprocess.check_call(                    ['apt-get', 'install', '-y', system_pkg_map[package_name]],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE,                    timeout=300                )                __import__(import_name)                print(f"   ‚úÖ Installed via apt-get")                return True        except (subprocess.CalledProcessError, ImportError, subprocess.TimeoutExpired, FileNotFoundError):                        pass        print(f"   ‚ùå Failed to install {package_name} via all methods")    return False# ============================================================================# END-TO-END SETUP: Install all required packages and configure environment# ============================================================================import sys
import subprocessimport osimport platformfrom pathlib import Pathprint("="*80)
print("ENVIRONMENT SETUP - END-TO-END INSTALLATION")print("="*80)# Display Python environmentprint(f"\nPython Version: {sys.version}")
print(f"Python Executable: {sys.executable}")print(f"Platform: {platform.platform()}")print(f"Architecture: {platform.architecture()[0]}")# Required packages with versionsrequired_packages = [    'psycopg2-binary>=2.9.0',    'pandas>=2.0.0',    'numpy>=1.24.0',    'matplotlib>=3.7.0',    'seaborn>=0.12.0']# Map package names to import namespackage_import_map = {    'psycopg2-binary': 'psycopg2',    'pandas': 'pandas',    'numpy': 'numpy',    'matplotlib': 'matplotlib',    'seaborn': 'seaborn'}print("\n" + "="*80)
print("CHECKING AND INSTALLING REQUIRED PACKAGES")print("="*80)missing_packages = []installed_packages = []for package_spec in required_packages:
    package_name = package_spec.split('>=')[0]    import_name = package_import_map.get(package_name, package_name.replace('-', '_'))        # Check if already installed    try:
    __import__(import_name)        print(f"‚úÖ {package_name}: Already installed")        installed_packages.append(package_name)    except ImportError:
    print(f"‚ö†Ô∏è  {package_name}: Missing - installing...")        missing_packages.append(package_spec)                # Try installation with --user flag first        try:                    subprocess.check_call(                [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet', '--user'],                stdout=subprocess.DEVNULL,                stderr=subprocess.PIPE            )            print(f"   ‚úÖ Successfully installed {package_name} (user)")            installed_packages.append(package_name)        except subprocess.CalledProcessError:                # Fallback: try without --user flag            try:                        subprocess.check_call(                    [sys.executable, '-m', 'pip', 'install', package_spec, '--quiet'],                    stdout=subprocess.DEVNULL,                    stderr=subprocess.PIPE                )                print(f"   ‚úÖ Successfully installed {package_name} (system-wide)")                installed_packages.append(package_name)            except Exception as e:                        print(f"   ‚ùå Failed to install {package_name}")                print(f"      Manual install: pip install {package_spec}")
print("\n" + "="*80)if missing_packages and len(installed_packages) < len(required_packages):
    print("‚ö†Ô∏è  Some packages failed to install. Please install manually:")    for pkg in missing_packages:
    print(f"   pip install {pkg}")    print("\n   Then restart the kernel and re-run this cell.")else:        print("‚úÖ All required packages are installed!")    print("\n‚ö†Ô∏è  If packages were just installed, restart the kernel and re-run this cell.")
print("="*80)# Now import all packagesprint("\n" + "="*80)
print("IMPORTING PACKAGES")print("="*80)try:
    import psycopg2    print("‚úÖ psycopg2 imported")except ImportError as e:
    print(f"‚ùå Failed to import psycopg2: {e}")    print("   Please restart kernel after installation")try:            import pandas as pd    print("‚úÖ pandas imported")except ImportError as e:            print(f"‚ùå Failed to import pandas: {e}")try:            import numpy as np    print("‚úÖ numpy imported")except ImportError as e:            print(f"‚ùå Failed to import numpy: {e}")try:            import matplotlib.pyplot as plt    import matplotlib    matplotlib.use('Agg')  # Non-interactive backend for notebooks    print("‚úÖ matplotlib imported")except ImportError as e:
    print(f"‚ùå Failed to import matplotlib: {e}")try:            import seaborn as sns    print("‚úÖ seaborn imported")except ImportError as e:            print(f"‚ùå Failed to import seaborn: {e}")try:            from IPython.display import display, HTML, Markdown    print("‚úÖ IPython.display imported")except ImportError as e:            print(f"‚ö†Ô∏è  IPython.display not available: {e}")import json
from datetime import datetime
import warningswarnings.filterwarnings('ignore')# Set visualization styletry:
    plt.style.use('seaborn-v0_8-darkgrid')    sns.set_palette("husl")except:    passprint("\n" + "="*80)
print("ENVIRONMENT SETUP COMPLETE")print("="*80)

## Step 2: Database Configuration

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-8"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 3: Database Initialization (Create Database, Load Schema, Load Data)

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-8"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 4: Load Query Metadata

## Embedded SQL Files and Queries

The following cells contain the complete database schema, data, and queries embedded directly in this notebook.
No external file dependencies required - everything is self-contained.

In [None]:
# ============================================================================
# EMBEDDED SCHEMA.SQL - DB-8
# ============================================================================
# This cell contains the complete database schema
# Execute this cell to load the schema into PostgreSQL
import psycopg2
# Schema SQL (embedded directly in notebook)
SCHEMA_SQL = """
-- Job Market Database Schema
-- Compatible with PostgreSQL, Databricks, and Snowflake
-- Production schema for job market and targeted application system
-- Integrates data from USAJobs.gov, BLS, Department of Labor, and state employment boards
-- User Profiles Table
-- Stores user profiles for job matching and application tracking
CREATE TABLE user_profiles (
    user_id VARCHAR(255) PRIMARY KEY,
    email VARCHAR(255) NOT NULL UNIQUE,
    full_name VARCHAR(255),
    location_city VARCHAR(100),
    location_state VARCHAR(2),
    location_country VARCHAR(2) DEFAULT 'US',
    location_latitude NUMERIC(10, 7),
    location_longitude NUMERIC(10, 7),
    current_job_title VARCHAR(255),
    current_company VARCHAR(255),
    years_experience INTEGER,
    education_level VARCHAR(50),
    resume_text TEXT,
    linkedin_url VARCHAR(500),
    github_url VARCHAR(500),
    portfolio_url VARCHAR(500),
    preferred_work_model VARCHAR(50), -- 'remote', 'hybrid', 'onsite'
    salary_expectation_min INTEGER,
    salary_expectation_max INTEGER,
    preferred_locations TEXT, -- JSON array of preferred locations
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    last_active_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    profile_completeness_score NUMERIC(5, 2),
    is_active BOOLEAN DEFAULT TRUE
);
-- Companies Table
-- Stores employer/company information from job postings
CREATE TABLE companies (
    company_id VARCHAR(255) PRIMARY KEY,
    company_name VARCHAR(255) NOT NULL,
    company_name_normalized VARCHAR(255), -- Normalized name for matching
    industry VARCHAR(100),
    company_size VARCHAR(50), -- 'startup', 'small', 'medium', 'large', 'enterprise'
    headquarters_city VARCHAR(100),
    headquarters_state VARCHAR(2),
    headquarters_country VARCHAR(2) DEFAULT 'US',
    website_url VARCHAR(500),
    linkedin_url VARCHAR(500),
    description TEXT,
    founded_year INTEGER,
    employee_count INTEGER,
    revenue_range VARCHAR(50),
    is_federal_agency BOOLEAN DEFAULT FALSE,
    agency_code VARCHAR(50), -- For federal agencies
    data_source VARCHAR(50), -- 'usajobs', 'bls', 'state_board', 'aggregated'
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    company_rating NUMERIC(3, 2), -- Average rating from reviews
    total_reviews INTEGER DEFAULT 0
);
-- Job Postings Table
-- Stores job listings from various .gov sources and aggregated sources
CREATE TABLE job_postings (
    job_id VARCHAR(255) PRIMARY KEY,
    company_id VARCHAR(255) NOT NULL,
    job_title VARCHAR(255) NOT NULL,
    job_title_normalized VARCHAR(255), -- Normalized title for matching
    job_description TEXT,
    job_type VARCHAR(50), -- 'full_time', 'part_time', 'contract', 'temporary', 'internship'
    work_model VARCHAR(50), -- 'remote', 'hybrid', 'onsite'
    location_city VARCHAR(100),
    location_state VARCHAR(2),
    location_country VARCHAR(2) DEFAULT 'US',
    location_latitude NUMERIC(10, 7),
    location_longitude NUMERIC(10, 7),
    salary_min INTEGER,
    salary_max INTEGER,
    salary_currency VARCHAR(3) DEFAULT 'USD',
    salary_type VARCHAR(50), -- 'annual', 'hourly', 'monthly'
    posted_date TIMESTAMP_NTZ NOT NULL,
    expiration_date TIMESTAMP_NTZ,
    application_url VARCHAR(1000),
    application_method VARCHAR(50), -- 'url', 'email', 'ats', 'usajobs'
    is_active BOOLEAN DEFAULT TRUE,
    is_federal_job BOOLEAN DEFAULT FALSE,
    usajobs_id VARCHAR(255), -- USAJobs.gov job ID
    agency_name VARCHAR(255), -- For federal jobs
    pay_plan VARCHAR(50), -- For federal jobs
    grade_level VARCHAR(50), -- For federal jobs
    data_source VARCHAR(50) NOT NULL, -- 'usajobs', 'bls', 'state_board', 'aggregated'
    source_url VARCHAR(1000),
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    view_count INTEGER DEFAULT 0,
    application_count INTEGER DEFAULT 0,
    match_score_avg NUMERIC(5, 2), -- Average match score from recommendations
    FOREIGN KEY (company_id) REFERENCES companies(company_id)
);
-- Skills Table
-- Master list of skills/technologies/competencies
CREATE TABLE skills (
    skill_id VARCHAR(255) PRIMARY KEY,
    skill_name VARCHAR(255) NOT NULL UNIQUE,
    skill_category VARCHAR(100), -- 'programming', 'framework', 'tool', 'soft_skill', 'certification'
    skill_type VARCHAR(50), -- 'technical', 'soft', 'certification', 'language'
    parent_skill_id VARCHAR(255), -- For skill hierarchies
    description TEXT,
    popularity_score NUMERIC(10, 2), -- Based on job posting frequency
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (parent_skill_id) REFERENCES skills(skill_id)
);
-- Job Skills Requirements Table
-- Links job postings to required/desired skills
CREATE TABLE job_skills_requirements (
    requirement_id VARCHAR(255) PRIMARY KEY,
    job_id VARCHAR(255) NOT NULL,
    skill_id VARCHAR(255) NOT NULL,
    requirement_type VARCHAR(50), -- 'required', 'preferred', 'nice_to_have'
    importance_score NUMERIC(5, 2), -- 1-10 importance score
    years_experience_required NUMERIC(4, 1),
    extracted_from_description BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (job_id) REFERENCES job_postings(job_id),
    FOREIGN KEY (skill_id) REFERENCES skills(skill_id),
    UNIQUE(job_id, skill_id, requirement_type)
);
-- User Skills Table
-- Links user profiles to their skills and proficiency levels
CREATE TABLE user_skills (
    user_skill_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255) NOT NULL,
    skill_id VARCHAR(255) NOT NULL,
    proficiency_level VARCHAR(50), -- 'beginner', 'intermediate', 'advanced', 'expert'
    proficiency_score NUMERIC(5, 2), -- 1-10 proficiency score
    years_experience NUMERIC(4, 1),
    last_used_date DATE,
    verified BOOLEAN DEFAULT FALSE, -- Skills verified through assessments/certifications
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (user_id) REFERENCES user_profiles(user_id),
    FOREIGN KEY (skill_id) REFERENCES skills(skill_id),
    UNIQUE(user_id, skill_id)
);
-- Job Applications Table
-- Tracks user applications to job postings
CREATE TABLE job_applications (
    application_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255) NOT NULL,
    job_id VARCHAR(255) NOT NULL,
    application_status VARCHAR(50), -- 'draft', 'submitted', 'under_review', 'interview', 'offer', 'rejected', 'withdrawn'
    application_date TIMESTAMP_NTZ,
    submitted_at TIMESTAMP_NTZ,
    status_updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    cover_letter_text TEXT,
    resume_version VARCHAR(255),
    match_score NUMERIC(5, 2), -- Calculated match score at time of application
    application_method VARCHAR(50), -- 'direct', 'ats', 'email', 'usajobs'
    application_reference_id VARCHAR(255), -- External application ID
    notes TEXT,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    updated_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (user_id) REFERENCES user_profiles(user_id),
    FOREIGN KEY (job_id) REFERENCES job_postings(job_id)
);
-- Job Recommendations Table
-- Stores AI-generated job recommendations for users (mirroring jobright.ai)
CREATE TABLE job_recommendations (
    recommendation_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255) NOT NULL,
    job_id VARCHAR(255) NOT NULL,
    match_score NUMERIC(5, 2) NOT NULL, -- Overall match score (0-100)
    skill_match_score NUMERIC(5, 2), -- Skill alignment score
    location_match_score NUMERIC(5, 2), -- Location preference match
    salary_match_score NUMERIC(5, 2), -- Salary expectation match
    experience_match_score NUMERIC(5, 2), -- Experience level match
    work_model_match_score NUMERIC(5, 2), -- Work model preference match
    recommendation_reason TEXT, -- Explanation for recommendation
    recommendation_rank INTEGER, -- Rank within user's recommendations
    is_liked BOOLEAN DEFAULT FALSE,
    is_applied BOOLEAN DEFAULT FALSE,
    is_dismissed BOOLEAN DEFAULT FALSE,
    recommendation_date TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    expires_at TIMESTAMP_NTZ, -- Recommendation expiration
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (user_id) REFERENCES user_profiles(user_id),
    FOREIGN KEY (job_id) REFERENCES job_postings(job_id),
    UNIQUE(user_id, job_id, recommendation_date)
);
-- Market Trends Table
-- Aggregated job market trends and statistics
CREATE TABLE market_trends (
    trend_id VARCHAR(255) PRIMARY KEY,
    trend_date DATE NOT NULL,
    geographic_scope VARCHAR(50), -- 'national', 'state', 'city', 'metro'
    location_state VARCHAR(2),
    location_city VARCHAR(100),
    location_metro VARCHAR(100),
    industry VARCHAR(100),
    job_category VARCHAR(100),
    total_job_postings INTEGER,
    new_job_postings INTEGER, -- New postings in period
    active_job_seekers INTEGER, -- Estimated from application data
    average_salary_min INTEGER,
    average_salary_max INTEGER,
    median_salary INTEGER,
    top_skills TEXT, -- JSON array of top skills
    skill_demand_trend TEXT, -- JSON object of skill demand changes
    competition_index NUMERIC(5, 2), -- Applications per job ratio
    growth_rate NUMERIC(10, 4), -- Percentage growth in postings
    data_source VARCHAR(50), -- 'bls', 'aggregated', 'usajobs'
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    UNIQUE(trend_date, geographic_scope, location_state, location_city, industry, job_category)
);
-- Job Market Analytics Table
-- Detailed analytics for job market data
CREATE TABLE job_market_analytics (
    analytics_id VARCHAR(255) PRIMARY KEY,
    analysis_date DATE NOT NULL,
    analysis_type VARCHAR(50), -- 'daily', 'weekly', 'monthly', 'quarterly'
    geographic_scope VARCHAR(50),
    location_state VARCHAR(2),
    location_city VARCHAR(100),
    industry VARCHAR(100),
    total_companies INTEGER,
    total_active_jobs INTEGER,
    remote_job_percentage NUMERIC(5, 2),
    hybrid_job_percentage NUMERIC(5, 2),
    average_time_to_fill_days INTEGER,
    average_applications_per_job NUMERIC(10, 2),
    top_employers TEXT, -- JSON array
    emerging_skills TEXT, -- JSON array of trending skills
    declining_skills TEXT, -- JSON array of declining skills
    salary_trends TEXT, -- JSON object with salary trend data
    job_type_distribution TEXT, -- JSON object
    work_model_distribution TEXT, -- JSON object
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- Data Source Metadata Table
-- Tracks data sources and extraction metadata
CREATE TABLE data_source_metadata (
    metadata_id VARCHAR(255) PRIMARY KEY,
    source_name VARCHAR(100) NOT NULL, -- 'usajobs', 'bls', 'state_board'
    source_type VARCHAR(50), -- 'api', 'scraper', 'manual', 'aggregated'
    extraction_date TIMESTAMP_NTZ NOT NULL,
    extraction_method VARCHAR(100),
    records_extracted INTEGER,
    records_new INTEGER,
    records_updated INTEGER,
    records_failed INTEGER,
    extraction_status VARCHAR(50), -- 'success', 'partial', 'failed'
    error_message TEXT,
    api_endpoint VARCHAR(1000),
    api_response_code INTEGER,
    extraction_duration_seconds INTEGER,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
);
-- User Job Search History Table
-- Tracks user search behavior for recommendation improvement
CREATE TABLE user_job_search_history (
    search_id VARCHAR(255) PRIMARY KEY,
    user_id VARCHAR(255) NOT NULL,
    search_query VARCHAR(500),
    search_filters TEXT, -- JSON object of filters applied
    location_filter VARCHAR(255),
    salary_filter_min INTEGER,
    salary_filter_max INTEGER,
    work_model_filter VARCHAR(50),
    job_type_filter VARCHAR(50),
    industry_filter VARCHAR(100),
    results_count INTEGER,
    search_date TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP(),
    FOREIGN KEY (user_id) REFERENCES user_profiles(user_id)
);
-- Indexes for performance optimization
CREATE INDEX idx_job_postings_company_id ON job_postings(company_id);
CREATE INDEX idx_job_postings_posted_date ON job_postings(posted_date);
CREATE INDEX idx_job_postings_location_state ON job_postings(location_state);
CREATE INDEX idx_job_postings_location_city ON job_postings(location_city);
CREATE INDEX idx_job_postings_work_model ON job_postings(work_model);
CREATE INDEX idx_job_postings_job_type ON job_postings(job_type);
CREATE INDEX idx_job_postings_is_active ON job_postings(is_active);
CREATE INDEX idx_job_postings_data_source ON job_postings(data_source);
CREATE INDEX idx_job_postings_posted_date_active ON job_postings(posted_date, is_active);
CREATE INDEX idx_job_skills_requirements_job_id ON job_skills_requirements(job_id);
CREATE INDEX idx_job_skills_requirements_skill_id ON job_skills_requirements(skill_id);
CREATE INDEX idx_job_skills_requirements_type ON job_skills_requirements(requirement_type);
CREATE INDEX idx_user_skills_user_id ON user_skills(user_id);
CREATE INDEX idx_user_skills_skill_id ON user_skills(skill_id);
CREATE INDEX idx_job_applications_user_id ON job_applications(user_id);
CREATE INDEX idx_job_applications_job_id ON job_applications(job_id);
CREATE INDEX idx_job_applications_status ON job_applications(application_status);
CREATE INDEX idx_job_applications_submitted_at ON job_applications(submitted_at);
CREATE INDEX idx_job_recommendations_user_id ON job_recommendations(user_id);
CREATE INDEX idx_job_recommendations_job_id ON job_recommendations(job_id);
CREATE INDEX idx_job_recommendations_match_score ON job_recommendations(match_score DESC);
CREATE INDEX idx_job_recommendations_date ON job_recommendations(recommendation_date);
CREATE INDEX idx_market_trends_date ON market_trends(trend_date);
CREATE INDEX idx_market_trends_location ON market_trends(location_state, location_city);
CREATE INDEX idx_market_trends_industry ON market_trends(industry);
CREATE INDEX idx_user_profiles_location ON user_profiles(location_state, location_city);
CREATE INDEX idx_user_profiles_email ON user_profiles(email);
CREATE INDEX idx_user_profiles_is_active ON user_profiles(is_active);
CREATE INDEX idx_companies_industry ON companies(industry);
CREATE INDEX idx_companies_name_normalized ON companies(company_name_normalized);
"""
def execute_schema_sql(connection):
    """Execute embedded schema SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in SCHEMA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Schema loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading schema: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED SCHEMA")
    print("="*80)
    execute_schema_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Schema SQL is available in SCHEMA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED DATA.SQL - DB-8
# ============================================================================
# This cell contains sample data for the database
# Execute this cell to load data into PostgreSQL
import psycopg2
# Data SQL (embedded directly in notebook)
DATA_SQL = """
-- Job Market Database Sample Data
-- Production sample data for job market and targeted application system
-- Includes federal jobs (USAJobs.gov format), private sector jobs, users, companies, and market analytics
-- User Profiles Sample Data
INSERT INTO user_profiles (user_id, email, full_name, location_city, location_state, location_country, location_latitude, location_longitude, current_job_title, current_company, years_experience, education_level, resume_text, linkedin_url, github_url, portfolio_url, preferred_work_model, salary_expectation_min, salary_expectation_max, preferred_locations, profile_completeness_score, is_active) VALUES
('user_001', 'alice.johnson@email.com', 'Alice Johnson', 'Washington', 'DC', 'US', 38.9072, -77.0369, 'Data Engineer', 'Tech Corp', 5, 'Bachelor''s Degree', 'Experienced data engineer with expertise in Python, SQL, and cloud platforms. Led data pipeline projects serving 1M+ users.', 'https://linkedin.com/in/alicejohnson', 'https://github.com/alicejohnson', 'https://alicejohnson.dev', 'remote', 120000, 160000, '["Washington DC", "Remote", "New York NY"]', 85.50, TRUE),
('user_002', 'bob.smith@email.com', 'Bob Smith', 'San Francisco', 'CA', 'US', 37.7749, -122.4194, 'Software Engineer', 'StartupXYZ', 3, 'Master''s Degree', 'Full-stack developer specializing in React and Node.js. Built scalable web applications.', 'https://linkedin.com/in/bobsmith', 'https://github.com/bobsmith', NULL, 'hybrid', 100000, 140000, '["San Francisco CA", "Remote"]', 78.25, TRUE),
('user_003', 'carol.williams@email.com', 'Carol Williams', 'Austin', 'TX', 'US', 30.2672, -97.7431, 'Data Scientist', 'DataCo', 7, 'PhD', 'Machine learning researcher with publications in top-tier conferences. Expert in NLP and computer vision.', 'https://linkedin.com/in/carolwilliams', 'https://github.com/carolwilliams', 'https://carolwilliams.research', 'remote', 140000, 180000, '["Remote", "Austin TX"]', 92.00, TRUE),
('user_004', 'david.brown@email.com', 'David Brown', 'New York', 'NY', 'US', 40.7128, -74.0060, 'Product Manager', 'ProductInc', 4, 'MBA', 'Product manager with track record of launching successful products. Strong analytical and leadership skills.', 'https://linkedin.com/in/davidbrown', NULL, NULL, 'onsite', 130000, 170000, '["New York NY"]', 70.50, TRUE),
('user_005', 'emma.davis@email.com', 'Emma Davis', 'Seattle', 'WA', 'US', 47.6062, -122.3321, 'DevOps Engineer', 'CloudTech', 6, 'Bachelor''s Degree', 'DevOps engineer specializing in Kubernetes, AWS, and CI/CD pipelines. Reduced deployment time by 80%.', 'https://linkedin.com/in/emmadavis', 'https://github.com/emmadavis', NULL, 'hybrid', 115000, 155000, '["Seattle WA", "Remote"]', 81.75, TRUE),
('user_006', 'frank.miller@email.com', 'Frank Miller', 'Chicago', 'IL', 'US', 41.8781, -87.6298, 'Backend Engineer', 'BackendCo', 2, 'Bachelor''s Degree', 'Backend engineer with experience in Java, Spring Boot, and microservices architecture.', 'https://linkedin.com/in/frankmiller', 'https://github.com/frankmiller', NULL, 'onsite', 90000, 120000, '["Chicago IL"]', 65.00, TRUE),
('user_007', 'grace.wilson@email.com', 'Grace Wilson', 'Boston', 'MA', 'US', 42.3601, -71.0589, 'Frontend Engineer', 'FrontendPro', 4, 'Bachelor''s Degree', 'Frontend engineer specializing in React, TypeScript, and modern UI frameworks. Built responsive web applications.', 'https://linkedin.com/in/gracewilson', 'https://github.com/gracewilson', 'https://gracewilson.dev', 'remote', 100000, 135000, '["Remote", "Boston MA"]', 75.25, TRUE),
('user_008', 'henry.moore@email.com', 'Henry Moore', 'Denver', 'CO', 'US', 39.7392, -104.9903, 'Data Analyst', 'AnalyticsCo', 3, 'Master''s Degree', 'Data analyst with expertise in SQL, Python, and Tableau. Created dashboards driving business decisions.', 'https://linkedin.com/in/henrymoore', NULL, NULL, 'hybrid', 85000, 110000, '["Denver CO", "Remote"]', 68.50, TRUE),
('user_009', 'ivy.taylor@email.com', 'Ivy Taylor', 'Atlanta', 'GA', 'US', 33.7490, -84.3880, 'ML Engineer', 'MLTech', 5, 'Master''s Degree', 'Machine learning engineer with experience deploying ML models to production. Expert in TensorFlow and PyTorch.', 'https://linkedin.com/in/ivytaylor', 'https://github.com/ivytaylor', NULL, 'remote', 125000, 165000, '["Remote", "Atlanta GA"]', 88.00, TRUE),
('user_010', 'jack.anderson@email.com', 'Jack Anderson', 'Portland', 'OR', 'US', 45.5152, -122.6784, 'Security Engineer', 'SecureTech', 8, 'Bachelor''s Degree', 'Cybersecurity engineer with CISSP certification. Expert in network security and threat detection.', 'https://linkedin.com/in/jackanderson', NULL, NULL, 'hybrid', 140000, 180000, '["Portland OR", "Remote"]', 90.25, TRUE);
-- Companies Sample Data (mix of federal agencies and private companies)
INSERT INTO companies (company_id, company_name, company_name_normalized, industry, company_size, headquarters_city, headquarters_state, headquarters_country, website_url, linkedin_url, description, founded_year, employee_count, revenue_range, is_federal_agency, agency_code, data_source, company_rating, total_reviews) VALUES
('comp_001', 'U.S. Department of Defense', 'us department of defense', 'Government', 'enterprise', 'Arlington', 'VA', 'US', 'https://www.defense.gov', 'https://linkedin.com/company/us-department-of-defense', 'The Department of Defense is America''s largest government agency.', 1947, 2800000, 'N/A', TRUE, 'DOD', 'usajobs', 4.2, 15000),
('comp_002', 'National Security Agency', 'national security agency', 'Government', 'large', 'Fort Meade', 'MD', 'US', 'https://www.nsa.gov', NULL, 'The National Security Agency leads the U.S. Government in cryptology.', 1952, 30000, 'N/A', TRUE, 'NSA', 'usajobs', 4.5, 5000),
('comp_003', 'Federal Bureau of Investigation', 'federal bureau of investigation', 'Government', 'large', 'Washington', 'DC', 'US', 'https://www.fbi.gov', 'https://linkedin.com/company/fbi', 'The FBI protects the American people and upholds the Constitution.', 1908, 35000, 'N/A', TRUE, 'FBI', 'usajobs', 4.3, 8000),
('comp_004', 'Tech Corp', 'tech corp', 'Technology', 'large', 'San Francisco', 'CA', 'US', 'https://www.techcorp.com', 'https://linkedin.com/company/techcorp', 'Leading technology company specializing in cloud computing and AI.', 2010, 50000, '$10B+', FALSE, NULL, 'aggregated', 4.1, 25000),
('comp_005', 'DataCo', 'dataco', 'Technology', 'medium', 'Austin', 'TX', 'US', 'https://www.dataco.com', 'https://linkedin.com/company/dataco', 'Data analytics and machine learning solutions provider.', 2015, 500, '$50M-$100M', FALSE, NULL, 'aggregated', 4.0, 500),
('comp_006', 'CloudTech', 'cloudtech', 'Technology', 'large', 'Seattle', 'WA', 'US', 'https://www.cloudtech.com', 'https://linkedin.com/company/cloudtech', 'Cloud infrastructure and DevOps solutions.', 2012, 10000, '$1B-$10B', FALSE, NULL, 'aggregated', 4.4, 12000),
('comp_007', 'U.S. Department of Energy', 'us department of energy', 'Government', 'large', 'Washington', 'DC', 'US', 'https://www.energy.gov', 'https://linkedin.com/company/us-department-of-energy', 'The Department of Energy ensures America''s security and prosperity.', 1977, 13000, 'N/A', TRUE, 'DOE', 'usajobs', 4.0, 3000),
('comp_008', 'National Aeronautics and Space Administration', 'national aeronautics and space administration', 'Government', 'large', 'Washington', 'DC', 'US', 'https://www.nasa.gov', 'https://linkedin.com/company/nasa', 'NASA explores space and advances aeronautics research.', 1958, 18000, 'N/A', TRUE, 'NASA', 'usajobs', 4.7, 20000),
('comp_009', 'StartupXYZ', 'startupxyz', 'Technology', 'startup', 'San Francisco', 'CA', 'US', 'https://www.startupxyz.com', 'https://linkedin.com/company/startupxyz', 'Innovative startup building next-generation software products.', 2020, 50, '$1M-$10M', FALSE, NULL, 'aggregated', 3.8, 100),
('comp_010', 'ProductInc', 'productinc', 'Technology', 'medium', 'New York', 'NY', 'US', 'https://www.productinc.com', 'https://linkedin.com/company/productinc', 'Product development and innovation company.', 2018, 200, '$10M-$50M', FALSE, NULL, 'aggregated', 4.2, 300);
-- Skills Sample Data
INSERT INTO skills (skill_id, skill_name, skill_category, skill_type, parent_skill_id, description, popularity_score) VALUES
('skill_001', 'Python', 'programming', 'technical', NULL, 'High-level programming language for data science and web development', 95.5),
('skill_002', 'SQL', 'programming', 'technical', NULL, 'Structured Query Language for database management', 92.0),
('skill_003', 'JavaScript', 'programming', 'technical', NULL, 'Programming language for web development', 90.0),
('skill_004', 'Java', 'programming', 'technical', NULL, 'Object-oriented programming language for enterprise applications', 88.5),
('skill_005', 'React', 'framework', 'technical', 'skill_003', 'JavaScript library for building user interfaces', 87.0),
('skill_006', 'Node.js', 'framework', 'technical', 'skill_003', 'JavaScript runtime for server-side development', 85.0),
('skill_007', 'AWS', 'tool', 'technical', NULL, 'Amazon Web Services cloud computing platform', 89.0),
('skill_008', 'Kubernetes', 'tool', 'technical', NULL, 'Container orchestration platform', 82.0),
('skill_009', 'Docker', 'tool', 'technical', NULL, 'Containerization platform', 80.0),
('skill_010', 'TensorFlow', 'framework', 'technical', 'skill_001', 'Machine learning framework', 75.0),
('skill_011', 'PyTorch', 'framework', 'technical', 'skill_001', 'Deep learning framework', 73.0),
('skill_012', 'PostgreSQL', 'tool', 'technical', NULL, 'Open-source relational database', 78.0),
('skill_013', 'MongoDB', 'tool', 'technical', NULL, 'NoSQL database', 70.0),
('skill_014', 'Git', 'tool', 'technical', NULL, 'Version control system', 85.0),
('skill_015', 'Linux', 'tool', 'technical', NULL, 'Operating system', 75.0),
('skill_016', 'Communication', 'soft_skill', 'soft', NULL, 'Effective verbal and written communication', 90.0),
('skill_017', 'Leadership', 'soft_skill', 'soft', NULL, 'Ability to lead and inspire teams', 85.0),
('skill_018', 'Problem Solving', 'soft_skill', 'soft', NULL, 'Analytical thinking and problem-solving abilities', 88.0),
('skill_019', 'CISSP', 'certification', 'certification', NULL, 'Certified Information Systems Security Professional', 70.0),
('skill_020', 'AWS Certified Solutions Architect', 'certification', 'certification', NULL, 'AWS cloud architecture certification', 75.0);
-- Job Postings Sample Data (mix of federal and private jobs)
INSERT INTO job_postings (job_id, company_id, job_title, job_title_normalized, job_description, job_type, work_model, location_city, location_state, location_country, location_latitude, location_longitude, salary_min, salary_max, salary_currency, salary_type, posted_date, expiration_date, application_url, application_method, is_active, is_federal_job, usajobs_id, agency_name, pay_plan, grade_level, data_source, source_url, view_count, application_count, match_score_avg) VALUES
('job_001', 'comp_001', 'Data Engineer', 'data engineer', 'Seeking experienced Data Engineer to design and implement data pipelines for defense systems. Must have TS/SCI clearance.', 'full_time', 'onsite', 'Arlington', 'VA', 'US', 38.8816, -77.0910, 120000, 160000, 'USD', 'annual', '2026-01-20 10:00:00', '2026-02-20 23:59:59', 'https://www.usajobs.gov/job/12345678', 'usajobs', TRUE, TRUE, '12345678', 'Department of Defense', 'GS', 'GS-13', 'usajobs', 'https://www.usajobs.gov/job/12345678', 450, 25, 78.5),
('job_002', 'comp_002', 'Cybersecurity Analyst', 'cybersecurity analyst', 'NSA is seeking Cybersecurity Analysts to protect national security systems. Requires TS/SCI clearance.', 'full_time', 'onsite', 'Fort Meade', 'MD', 'US', 39.1084, -76.7435, 110000, 150000, 'USD', 'annual', '2026-01-22 09:00:00', '2026-02-22 23:59:59', 'https://www.usajobs.gov/job/12345679', 'usajobs', TRUE, TRUE, '12345679', 'National Security Agency', 'GG', 'GG-12', 'usajobs', 'https://www.usajobs.gov/job/12345679', 320, 18, 82.0),
('job_003', 'comp_003', 'Data Scientist', 'data scientist', 'FBI is hiring Data Scientists to analyze intelligence data. Must have TS clearance.', 'full_time', 'onsite', 'Washington', 'DC', 'US', 38.9072, -77.0369, 130000, 170000, 'USD', 'annual', '2026-01-21 11:00:00', '2026-02-21 23:59:59', 'https://www.usajobs.gov/job/12345680', 'usajobs', TRUE, TRUE, '12345680', 'Federal Bureau of Investigation', 'GS', 'GS-14', 'usajobs', 'https://www.usajobs.gov/job/12345680', 280, 15, 85.5),
('job_004', 'comp_004', 'Senior Software Engineer', 'senior software engineer', 'Tech Corp is looking for Senior Software Engineers to build scalable cloud applications. Remote work available.', 'full_time', 'remote', 'San Francisco', 'CA', 'US', 37.7749, -122.4194, 150000, 200000, 'USD', 'annual', '2026-01-19 08:00:00', '2026-02-19 23:59:59', 'https://www.techcorp.com/careers/job-001', 'ats', TRUE, FALSE, NULL, NULL, NULL, NULL, 'aggregated', 'https://www.techcorp.com/careers/job-001', 1200, 85, 88.0),
('job_005', 'comp_005', 'Machine Learning Engineer', 'machine learning engineer', 'DataCo seeks ML Engineers to develop and deploy machine learning models. Strong Python and TensorFlow experience required.', 'full_time', 'hybrid', 'Austin', 'TX', 'US', 30.2672, -97.7431, 140000, 180000, 'USD', 'annual', '2026-01-18 14:00:00', '2026-02-18 23:59:59', 'https://www.dataco.com/careers/ml-engineer', 'ats', TRUE, FALSE, NULL, NULL, NULL, NULL, 'aggregated', 'https://www.dataco.com/careers/ml-engineer', 890, 42, 90.5),
('job_006', 'comp_006', 'DevOps Engineer', 'devops engineer', 'CloudTech is hiring DevOps Engineers to manage Kubernetes clusters and CI/CD pipelines. AWS certification preferred.', 'full_time', 'hybrid', 'Seattle', 'WA', 'US', 47.6062, -122.3321, 130000, 170000, 'USD', 'annual', '2026-01-17 10:00:00', '2026-02-17 23:59:59', 'https://www.cloudtech.com/careers/devops', 'ats', TRUE, FALSE, NULL, NULL, NULL, NULL, 'aggregated', 'https://www.cloudtech.com/careers/devops', 650, 38, 87.5),
('job_007', 'comp_007', 'Energy Data Analyst', 'energy data analyst', 'DOE is seeking Data Analysts to analyze energy consumption data and support policy decisions.', 'full_time', 'onsite', 'Washington', 'DC', 'US', 38.9072, -77.0369, 90000, 120000, 'USD', 'annual', '2026-01-23 09:00:00', '2026-02-23 23:59:59', 'https://www.usajobs.gov/job/12345681', 'usajobs', TRUE, TRUE, '12345681', 'Department of Energy', 'GS', 'GS-11', 'usajobs', 'https://www.usajobs.gov/job/12345681', 180, 12, 75.0),
('job_008', 'comp_008', 'Aerospace Data Engineer', 'aerospace data engineer', 'NASA is hiring Data Engineers to process satellite and mission data. Must have strong Python and SQL skills.', 'full_time', 'onsite', 'Houston', 'TX', 'US', 29.7604, -95.3698, 125000, 165000, 'USD', 'annual', '2026-01-24 08:00:00', '2026-02-24 23:59:59', 'https://www.usajobs.gov/job/12345682', 'usajobs', TRUE, TRUE, '12345682', 'National Aeronautics and Space Administration', 'GS', 'GS-13', 'usajobs', 'https://www.usajobs.gov/job/12345682', 520, 30, 88.5),
('job_009', 'comp_009', 'Full-Stack Developer', 'full stack developer', 'StartupXYZ is looking for Full-Stack Developers to build web applications using React and Node.js.', 'full_time', 'remote', 'San Francisco', 'CA', 'US', 37.7749, -122.4194, 100000, 140000, 'USD', 'annual', '2026-01-16 12:00:00', '2026-02-16 23:59:59', 'https://www.startupxyz.com/careers/fullstack', 'email', TRUE, FALSE, NULL, NULL, NULL, NULL, 'aggregated', 'https://www.startupxyz.com/careers/fullstack', 420, 28, 80.0),
('job_010', 'comp_010', 'Product Manager', 'product manager', 'ProductInc seeks Product Managers to lead product development initiatives. MBA preferred.', 'full_time', 'onsite', 'New York', 'NY', 'US', 40.7128, -74.0060, 140000, 180000, 'USD', 'annual', '2026-01-15 11:00:00', '2026-02-15 23:59:59', 'https://www.productinc.com/careers/pm', 'ats', TRUE, FALSE, NULL, NULL, NULL, NULL, 'aggregated', 'https://www.productinc.com/careers/pm', 380, 22, 82.5);
-- Job Skills Requirements Sample Data
INSERT INTO job_skills_requirements (requirement_id, job_id, skill_id, requirement_type, importance_score, years_experience_required, extracted_from_description) VALUES
('req_001', 'job_001', 'skill_001', 'required', 9.5, 5.0, TRUE),
('req_002', 'job_001', 'skill_002', 'required', 9.0, 5.0, TRUE),
('req_003', 'job_001', 'skill_007', 'preferred', 8.0, 3.0, TRUE),
('req_004', 'job_002', 'skill_019', 'required', 10.0, 5.0, TRUE),
('req_005', 'job_002', 'skill_015', 'required', 8.5, 4.0, TRUE),
('req_006', 'job_003', 'skill_001', 'required', 9.5, 5.0, TRUE),
('req_007', 'job_003', 'skill_010', 'preferred', 8.5, 3.0, TRUE),
('req_008', 'job_004', 'skill_003', 'required', 9.0, 5.0, TRUE),
('req_009', 'job_004', 'skill_005', 'required', 9.5, 4.0, TRUE),
('req_010', 'job_004', 'skill_006', 'preferred', 8.0, 3.0, TRUE),
('req_011', 'job_005', 'skill_001', 'required', 10.0, 5.0, TRUE),
('req_012', 'job_005', 'skill_010', 'required', 9.5, 4.0, TRUE),
('req_013', 'job_006', 'skill_008', 'required', 9.0, 4.0, TRUE),
('req_014', 'job_006', 'skill_007', 'required', 9.5, 5.0, TRUE),
('req_015', 'job_006', 'skill_020', 'preferred', 8.0, NULL, TRUE),
('req_016', 'job_007', 'skill_002', 'required', 8.5, 3.0, TRUE),
('req_017', 'job_007', 'skill_001', 'preferred', 7.5, 2.0, TRUE),
('req_018', 'job_008', 'skill_001', 'required', 9.5, 5.0, TRUE),
('req_019', 'job_008', 'skill_002', 'required', 9.0, 5.0, TRUE),
('req_020', 'job_009', 'skill_003', 'required', 9.0, 3.0, TRUE),
('req_021', 'job_009', 'skill_005', 'required', 9.0, 3.0, TRUE),
('req_022', 'job_009', 'skill_006', 'preferred', 8.0, 2.0, TRUE),
('req_023', 'job_010', 'skill_016', 'required', 9.5, 5.0, TRUE),
('req_024', 'job_010', 'skill_017', 'preferred', 8.5, 4.0, TRUE);
-- User Skills Sample Data
INSERT INTO user_skills (user_skill_id, user_id, skill_id, proficiency_level, proficiency_score, years_experience, last_used_date, verified) VALUES
('us_001', 'user_001', 'skill_001', 'advanced', 8.5, 5.0, '2026-01-15', TRUE),
('us_002', 'user_001', 'skill_002', 'expert', 9.5, 5.0, '2026-01-20', TRUE),
('us_003', 'user_001', 'skill_007', 'advanced', 8.0, 4.0, '2026-01-18', TRUE),
('us_004', 'user_002', 'skill_003', 'advanced', 8.5, 3.0, '2026-01-19', TRUE),
('us_005', 'user_002', 'skill_005', 'advanced', 8.0, 3.0, '2026-01-17', TRUE),
('us_006', 'user_002', 'skill_006', 'intermediate', 7.0, 2.0, '2026-01-16', FALSE),
('us_007', 'user_003', 'skill_001', 'expert', 9.5, 7.0, '2026-01-21', TRUE),
('us_008', 'user_003', 'skill_010', 'expert', 9.0, 6.0, '2026-01-20', TRUE),
('us_009', 'user_003', 'skill_011', 'advanced', 8.5, 5.0, '2026-01-19', TRUE),
('us_010', 'user_004', 'skill_016', 'expert', 9.5, 4.0, '2026-01-18', TRUE),
('us_011', 'user_004', 'skill_017', 'advanced', 8.5, 4.0, '2026-01-17', TRUE),
('us_012', 'user_005', 'skill_008', 'advanced', 8.5, 5.0, '2026-01-20', TRUE),
('us_013', 'user_005', 'skill_007', 'expert', 9.5, 6.0, '2026-01-19', TRUE),
('us_014', 'user_005', 'skill_009', 'advanced', 8.0, 4.0, '2026-01-18', TRUE),
('us_015', 'user_006', 'skill_004', 'intermediate', 7.5, 2.0, '2026-01-16', FALSE),
('us_016', 'user_007', 'skill_003', 'advanced', 8.0, 4.0, '2026-01-17', TRUE),
('us_017', 'user_007', 'skill_005', 'advanced', 8.5, 4.0, '2026-01-18', TRUE),
('us_018', 'user_008', 'skill_002', 'advanced', 8.0, 3.0, '2026-01-19', TRUE),
('us_019', 'user_008', 'skill_001', 'intermediate', 7.0, 2.0, '2026-01-18', FALSE),
('us_020', 'user_009', 'skill_001', 'advanced', 8.5, 5.0, '2026-01-20', TRUE),
('us_021', 'user_009', 'skill_010', 'advanced', 8.0, 4.0, '2026-01-19', TRUE),
('us_022', 'user_010', 'skill_019', 'expert', 9.5, 8.0, '2026-01-21', TRUE),
('us_023', 'user_010', 'skill_015', 'expert', 9.0, 8.0, '2026-01-20', TRUE);
-- Job Applications Sample Data
INSERT INTO job_applications (application_id, user_id, job_id, application_status, application_date, submitted_at, status_updated_at, cover_letter_text, resume_version, match_score, application_method, application_reference_id, notes) VALUES
('app_001', 'user_001', 'job_001', 'under_review', '2026-01-25 10:00:00', '2026-01-25 10:15:00', '2026-01-26 14:30:00', 'I am excited to apply for the Data Engineer position at DOD. My 5 years of experience align perfectly with your requirements.', 'resume_v2.pdf', 85.5, 'usajobs', 'USAJOBS-12345678', 'Strong match for federal position'),
('app_002', 'user_003', 'job_003', 'interview', '2026-01-26 09:00:00', '2026-01-26 09:20:00', '2026-01-28 16:00:00', 'As a Data Scientist with 7 years of experience, I am eager to contribute to FBI''s mission.', 'resume_v3.pdf', 90.0, 'usajobs', 'USAJOBS-12345680', 'Interview scheduled for next week'),
('app_003', 'user_002', 'job_009', 'submitted', '2026-01-24 14:00:00', '2026-01-24 14:30:00', '2026-01-24 14:30:00', 'I am interested in the Full-Stack Developer role at StartupXYZ.', 'resume_v1.pdf', 78.5, 'email', NULL, NULL),
('app_004', 'user_005', 'job_006', 'under_review', '2026-01-23 11:00:00', '2026-01-23 11:45:00', '2026-01-25 10:00:00', 'My DevOps experience makes me a strong candidate for this position.', 'resume_v2.pdf', 88.0, 'ats', 'CLOUDTECH-001', NULL),
('app_005', 'user_004', 'job_010', 'rejected', '2026-01-22 08:00:00', '2026-01-22 08:15:00', '2026-01-27 12:00:00', 'I am applying for the Product Manager position.', 'resume_v1.pdf', 75.0, 'ats', 'PRODUCTINC-001', 'Not selected - insufficient product management experience');
-- Job Recommendations Sample Data
INSERT INTO job_recommendations (recommendation_id, user_id, job_id, match_score, skill_match_score, location_match_score, salary_match_score, experience_match_score, work_model_match_score, recommendation_reason, recommendation_rank, is_liked, is_applied, is_dismissed, recommendation_date, expires_at) VALUES
('rec_001', 'user_001', 'job_001', 85.5, 90.0, 80.0, 85.0, 90.0, 60.0, 'Strong skill match (Python, SQL, AWS). Location preference met. Salary within range.', 1, FALSE, TRUE, FALSE, '2026-01-25 08:00:00', '2026-02-25 23:59:59'),
('rec_002', 'user_001', 'job_008', 82.0, 88.0, 70.0, 80.0, 85.0, 60.0, 'Excellent skill alignment. Federal position with competitive salary.', 2, FALSE, FALSE, FALSE, '2026-01-25 08:00:00', '2026-02-25 23:59:59'),
('rec_003', 'user_002', 'job_009', 78.5, 85.0, 90.0, 75.0, 70.0, 100.0, 'Perfect work model match (remote). Skills align with React and Node.js requirements.', 1, TRUE, TRUE, FALSE, '2026-01-24 09:00:00', '2026-02-24 23:59:59'),
('rec_004', 'user_003', 'job_003', 90.0, 95.0, 80.0, 90.0, 95.0, 60.0, 'Exceptional match. PhD in ML with TensorFlow experience aligns perfectly.', 1, TRUE, TRUE, FALSE, '2026-01-26 10:00:00', '2026-02-26 23:59:59'),
('rec_005', 'user_003', 'job_005', 88.5, 92.0, 90.0, 85.0, 90.0, 80.0, 'Strong ML engineering match. Hybrid work model preferred.', 2, FALSE, FALSE, FALSE, '2026-01-26 10:00:00', '2026-02-26 23:59:59'),
('rec_006', 'user_005', 'job_006', 88.0, 90.0, 85.0, 85.0, 90.0, 90.0, 'Perfect DevOps match. Kubernetes and AWS expertise required.', 1, TRUE, TRUE, FALSE, '2026-01-23 11:00:00', '2026-02-23 23:59:59'),
('rec_007', 'user_007', 'job_004', 75.0, 80.0, 70.0, 70.0, 75.0, 100.0, 'Good frontend match. Remote work available.', 1, FALSE, FALSE, FALSE, '2026-01-22 12:00:00', '2026-02-22 23:59:59'),
('rec_008', 'user_009', 'job_005', 90.5, 95.0, 90.0, 90.0, 90.0, 80.0, 'Excellent ML engineering match. Strong Python and TensorFlow alignment.', 1, FALSE, FALSE, FALSE, '2026-01-21 13:00:00', '2026-02-21 23:59:59');
-- Market Trends Sample Data
INSERT INTO market_trends (trend_id, trend_date, geographic_scope, location_state, location_city, location_metro, industry, job_category, total_job_postings, new_job_postings, active_job_seekers, average_salary_min, average_salary_max, median_salary, top_skills, skill_demand_trend, competition_index, growth_rate, data_source) VALUES
('trend_001', '2026-01-20', 'national', NULL, NULL, NULL, 'Technology', 'Data Engineering', 5000, 500, 15000, 120000, 160000, 140000, '["Python", "SQL", "AWS", "Kubernetes"]', '{"Python": 15, "SQL": 12, "AWS": 18, "Kubernetes": 20}', 3.0, 5.2, 'bls'),
('trend_002', '2026-01-20', 'state', 'CA', NULL, NULL, 'Technology', 'Software Engineering', 8000, 800, 25000, 130000, 180000, 155000, '["JavaScript", "React", "Node.js", "Python"]', '{"JavaScript": 10, "React": 15, "Node.js": 12, "Python": 8}', 3.1, 6.5, 'aggregated'),
('trend_003', '2026-01-20', 'city', 'DC', 'Washington', 'Washington-Arlington-Alexandria', 'Government', 'Data Science', 300, 30, 1200, 110000, 150000, 130000, '["Python", "SQL", "TensorFlow", "Security Clearance"]', '{"Python": 20, "SQL": 18, "TensorFlow": 15, "Security Clearance": 25}', 4.0, 8.0, 'usajobs'),
('trend_004', '2026-01-21', 'national', NULL, NULL, NULL, 'Technology', 'DevOps', 3500, 350, 10000, 125000, 170000, 147500, '["Kubernetes", "AWS", "Docker", "Linux"]', '{"Kubernetes": 22, "AWS": 20, "Docker": 18, "Linux": 15}', 2.9, 7.8, 'aggregated'),
('trend_005', '2026-01-21', 'state', 'TX', NULL, NULL, 'Technology', 'Machine Learning', 2000, 200, 6000, 135000, 180000, 157500, '["Python", "TensorFlow", "PyTorch", "MLOps"]', '{"Python": 18, "TensorFlow": 20, "PyTorch": 15, "MLOps": 25}', 3.0, 9.5, 'aggregated');
-- Job Market Analytics Sample Data
INSERT INTO job_market_analytics (analytics_id, analysis_date, analysis_type, geographic_scope, location_state, location_city, industry, total_companies, total_active_jobs, remote_job_percentage, hybrid_job_percentage, average_time_to_fill_days, average_applications_per_job, top_employers, emerging_skills, declining_skills, salary_trends, job_type_distribution, work_model_distribution) VALUES
('analytics_001', '2026-01-20', 'daily', 'national', NULL, NULL, 'Technology', 500, 15000, 35.5, 25.0, 28, 45.5, '["Tech Corp", "CloudTech", "DataCo"]', '["Kubernetes", "MLOps", "React"]', '["jQuery", "PHP"]', '{"trend": "increasing", "annual_growth": 5.2}', '{"full_time": 85, "contract": 10, "part_time": 5}', '{"remote": 35.5, "hybrid": 25.0, "onsite": 39.5}'),
('analytics_002', '2026-01-20', 'daily', 'state', 'CA', NULL, 'Technology', 200, 8000, 40.0, 30.0, 25, 50.0, '["Tech Corp", "StartupXYZ"]', '["React", "TypeScript", "Next.js"]', '["AngularJS", "Backbone.js"]', '{"trend": "increasing", "annual_growth": 6.5}', '{"full_time": 80, "contract": 15, "part_time": 5}', '{"remote": 40.0, "hybrid": 30.0, "onsite": 30.0}'),
('analytics_003', '2026-01-21', 'daily', 'city', 'DC', 'Washington', 'Government', 50, 500, 5.0, 10.0, 45, 30.0, '["Department of Defense", "FBI", "NSA"]', '["Security Clearance", "Python", "Data Engineering"]', '["Legacy Systems"]', '{"trend": "stable", "annual_growth": 2.0}', '{"full_time": 95, "contract": 5, "part_time": 0}', '{"remote": 5.0, "hybrid": 10.0, "onsite": 85.0}');
-- Data Source Metadata Sample Data
INSERT INTO data_source_metadata (metadata_id, source_name, source_type, extraction_date, extraction_method, records_extracted, records_new, records_updated, records_failed, extraction_status, error_message, api_endpoint, api_response_code, extraction_duration_seconds) VALUES
('meta_001', 'usajobs', 'api', '2026-01-20 08:00:00', 'REST API', 150, 120, 30, 0, 'success', NULL, 'https://data.usajobs.gov/api/Search?DatePosted=14', 200, 45),
('meta_002', 'bls', 'api', '2026-01-20 09:00:00', 'REST API POST', 5000, 5000, 0, 0, 'success', NULL, 'https://api.bls.gov/publicAPI/v2/timeseries/data', 200, 120),
('meta_003', 'aggregated', 'scraper', '2026-01-20 10:00:00', 'Web Scraping', 2000, 1800, 200, 0, 'success', NULL, NULL, NULL, 300),
('meta_004', 'usajobs', 'api', '2026-01-21 08:00:00', 'REST API', 180, 150, 30, 0, 'success', NULL, 'https://data.usajobs.gov/api/Search?DatePosted=14', 200, 50),
('meta_005', 'aggregated', 'scraper', '2026-01-21 10:00:00', 'Web Scraping', 2200, 2000, 200, 0, 'success', NULL, NULL, NULL, 320);
-- User Job Search History Sample Data
INSERT INTO user_job_search_history (search_id, user_id, search_query, search_filters, location_filter, salary_filter_min, salary_filter_max, work_model_filter, job_type_filter, industry_filter, results_count, search_date) VALUES
('search_001', 'user_001', 'data engineer', '{"skills": ["Python", "SQL"], "experience": "5+"}', 'Washington DC', 120000, 160000, 'remote', 'full_time', 'Technology', 25, '2026-01-25 08:00:00'),
('search_002', 'user_002', 'full stack developer', '{"skills": ["React", "Node.js"]}', 'San Francisco CA', 100000, 140000, 'remote', 'full_time', 'Technology', 18, '2026-01-24 09:00:00'),
('search_003', 'user_003', 'machine learning engineer', '{"skills": ["Python", "TensorFlow"], "education": "PhD"}', 'Remote', 140000, 180000, 'remote', 'full_time', 'Technology', 12, '2026-01-26 10:00:00'),
('search_004', 'user_005', 'devops engineer', '{"skills": ["Kubernetes", "AWS"]}', 'Seattle WA', 130000, 170000, 'hybrid', 'full_time', 'Technology', 15, '2026-01-23 11:00:00'),
('search_005', 'user_007', 'frontend engineer', '{"skills": ["React", "TypeScript"]}', 'Remote', 100000, 135000, 'remote', 'full_time', 'Technology', 20, '2026-01-22 12:00:00');
"""
def execute_data_sql(connection):
    """Execute embedded data SQL."""
    cursor = connection.cursor()
    try:
    # Split by semicolons and execute each statement
        statements = [s.strip() for s in DATA_SQL.split(';') if s.strip()]
        for idx, statement in enumerate(statements, 1):
    if statement:
                try:
    cursor.execute(statement)
                    print(f"  ‚úÖ Executed statement {idx}/{len(statements)}")
                except Exception as e:
    error_msg = str(e)[:100]
                    print(f"  ‚ö†Ô∏è  Statement {idx} warning: {error_msg}")
        connection.commit()
        print("\n‚úÖ Data loaded successfully!")
        return True
    except Exception as e:
    connection.rollback()
        print(f"\n‚ùå Error loading data: {e}")
        return False
    finally:
        cursor.close()
# Auto-execute if connection exists
if 'conn' in globals():
    print("="*80)
    print("LOADING EMBEDDED DATA")
    print("="*80)
    execute_data_sql(conn)
else:
    print("‚ö†Ô∏è  Database connection not found. Run connection cell first.")
    print("   Data SQL is available in DATA_SQL variable")


In [None]:
# ============================================================================
# EMBEDDED QUERIES.JSON - DB-8
# ============================================================================
# This cell contains all query metadata embedded directly in the notebook
# No external file dependencies required
import json
# Queries data (embedded directly in notebook)
QUERIES_DATA = {
  "source_file": "/Users/machine/Documents/AQ/db/db-8/queries/queries.md",
  "extraction_timestamp": "20260208-2109",
  "total_queries": 30,
  "queries": [
    {
      "number": 1,
      "title": "Production-Grade AI Job Matching Engine with Multi-Dimensional Scoring and Skill Alignment Analysis",
      "description": "Description: Enterprise-level job matching algorithm with multi-dimensional scoring (skills, location, salary, experience, work model), skill gap analysis, weighted matching scores, and recommendation ranking. Demonstrates production patterns used by jobright.ai, LinkedIn, and Indeed for personalized job recommendations. Use Case:
    Personalized Job Recommendations - AI-Powered Job Matching for Targeted Applications Business Value: Generates personalized job recommendations with detailed match sco",
      "complexity": "Deep nested CTEs (8+ levels), multi-dimensional scoring algorithms, skill gap analysis, weighted aggregations, window functions with ranking, percentile calculations, correlated subqueries, complex joins across 6+ tables",
      "expected_output": "Ranked list of job recommendations with match scores, skill alignment details, location compatibility, salary match, and recommendation reasons.",
      "sql": "WITH user_profile_analysis AS (\n    -- First CTE: Extract and normalize user profile data\n    SELECT\n        up.user_id,\n        up.email,\n        up.full_name,\n        up.location_city,\n        up.location_state,\n        up.location_country,\n        up.current_job_title,\n        up.years_experience,\n        up.education_level,\n        up.preferred_work_model,\n        up.salary_expectation_min,\n        up.salary_expectation_max,\n        up.preferred_locations,\n        up.profile_completeness_score,\n        CASE\n            WHEN up.years_experience < 2 THEN 'entry_level'\n            WHEN up.years_experience BETWEEN 2 AND 5 THEN 'mid_level'\n            WHEN up.years_experience BETWEEN 6 AND 10 THEN 'senior_level'\n            ELSE 'executive_level'\n        END AS experience_level\n    FROM user_profiles up\n    WHERE up.is_active = TRUE\n),\nuser_skills_aggregated AS (\n    -- Second CTE: Aggregate user skills with proficiency scores\n    SELECT\n        usa.user_id,\n        COUNT(DISTINCT usa.skill_id) AS total_skills_count,\n        COUNT(CASE WHEN usa.proficiency_level IN ('advanced', 'expert') THEN 1 END) AS advanced_skills_count,\n        AVG(usa.proficiency_score) AS avg_proficiency_score,\n        SUM(CASE WHEN usa.proficiency_level = 'expert' THEN 10 WHEN usa.proficiency_level = 'advanced' THEN 7 WHEN usa.proficiency_level = 'intermediate' THEN 4 ELSE 1 END) AS weighted_skill_score,\n        ARRAY_AGG(DISTINCT usa.skill_id) AS user_skill_ids,\n        ARRAY_AGG(DISTINCT s.skill_category) AS skill_categories\n    FROM user_skills usa\n    INNER JOIN skills s ON usa.skill_id = s.skill_id\n    GROUP BY usa.user_id\n),\nactive_job_postings AS (\n    -- Third CTE: Filter active job postings with recent postings priority\n    SELECT\n        jp.job_id,\n        jp.company_id,\n        jp.job_title,\n        jp.job_type,\n        jp.work_model,\n        jp.location_city,\n        jp.location_state,\n        jp.location_country,\n        jp.salary_min,\n        jp.salary_max,\n        jp.salary_type,\n        jp.posted_date,\n        jp.expiration_date,\n        jp.is_federal_job,\n        jp.agency_name,\n        jp.data_source,\n        c.company_name,\n        c.industry,\n        c.company_size,\n        c.company_rating,\n        DATE_PART('day', CURRENT_TIMESTAMP - jp.posted_date) AS days_since_posted,\n        CASE\n            WHEN jp.posted_date >= CURRENT_TIMESTAMP - INTERVAL '7 days' THEN 1.2\n            WHEN jp.posted_date >= CURRENT_TIMESTAMP - INTERVAL '14 days' THEN 1.1\n            WHEN jp.posted_date >= CURRENT_TIMESTAMP - INTERVAL '30 days' THEN 1.0\n            ELSE 0.9\n        END AS recency_multiplier\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND (jp.expiration_date IS NULL OR jp.expiration_date > CURRENT_TIMESTAMP)\n),\njob_skills_aggregated AS (\n    -- Fourth CTE: Aggregate required and preferred skills for each job\n    SELECT\n        jsr.job_id,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'required' THEN jsr.skill_id END) AS required_skills_count,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'preferred' THEN jsr.skill_id END) AS preferred_skills_count,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'nice_to_have' THEN jsr.skill_id END) AS nice_to_have_skills_count,\n        AVG(CASE WHEN jsr.requirement_type = 'required' THEN jsr.importance_score ELSE NULL END) AS avg_required_importance,\n        ARRAY_AGG(DISTINCT CASE WHEN jsr.requirement_type = 'required' THEN jsr.skill_id END) AS required_skill_ids,\n        ARRAY_AGG(DISTINCT CASE WHEN jsr.requirement_type = 'preferred' THEN jsr.skill_id END) AS preferred_skill_ids,\n        ARRAY_AGG(DISTINCT jsr.skill_id) AS all_required_skill_ids,\n        SUM(CASE WHEN jsr.requirement_type = 'required' THEN jsr.importance_score ELSE jsr.importance_score * 0.5 END) AS weighted_skill_requirement_score\n    FROM job_skills_requirements jsr\n    GROUP BY jsr.job_id\n),\nuser_job_skill_matching AS (\n    -- Fifth CTE:
    Calculate skill matching between user and job requirements\n    SELECT\n        upa.user_id,\n        ajp.job_id,\n        ajp.job_title,\n        ajp.company_name,\n        ajp.industry,\n        ajp.work_model,\n        ajp.location_state,\n        ajp.location_city,\n        ajp.salary_min,\n        ajp.salary_max,\n        ajp.posted_date,\n        ajp.recency_multiplier,\n        usa.total_skills_count AS user_total_skills,\n        jsa.required_skills_count,\n        jsa.preferred_skills_count,\n        jsa.all_required_skill_ids,\n        -- Calculate skill overlap\n        (\n            SELECT COUNT(*)\n            FROM UNNEST(usa.user_skill_ids) AS user_skill\n            WHERE user_skill = ANY(jsa.all_required_skill_ids)\n        ) AS matching_required_skills_count,\n        (\n            SELECT COUNT(*)\n            FROM UNNEST(usa.user_skill_ids) AS user_skill\n            WHERE user_skill = ANY(jsa.preferred_skill_ids)\n        ) AS matching_preferred_skills_count,\n        -- Calculate skill gap (missing required skills)\n        (\n            SELECT COUNT(*)\n            FROM UNNEST(jsa.required_skill_ids) AS req_skill\n            WHERE req_skill != ALL(COALESCE(usa.user_skill_ids, ARRAY[]::VARCHAR[]))\n        ) AS missing_required_skills_count,\n        -- Skill match score (0-100)\n        CASE\n            WHEN jsa.required_skills_count > 0 THEN\n                ROUND(\n                    (\n                        (\n                            SELECT COUNT(*)\n                            FROM UNNEST(usa.user_skill_ids) AS user_skill\n                            WHERE user_skill = ANY(jsa.all_required_skill_ids)\n                        )::NUMERIC / NULLIF(jsa.required_skills_count, 0)\n                    ) * 100,\n                    2\n                )\n            ELSE 0\n        END AS skill_match_score,\n        jsa.weighted_skill_requirement_score,\n        usa.weighted_skill_score AS user_weighted_skill_score\n    FROM user_profile_analysis upa\n    CROSS JOIN active_job_postings ajp\n    LEFT JOIN user_skills_aggregated usa ON upa.user_id = usa.user_id\n    LEFT JOIN job_skills_aggregated jsa ON ajp.job_id = jsa.job_id\n    WHERE usa.user_id IS NOT NULL\n        AND jsa.job_id IS NOT NULL\n),\nlocation_matching AS (\n    -- Sixth CTE: Calculate location compatibility scores\n    SELECT\n        ujsm.user_id,\n        ujsm.job_id,\n        ujsm.job_title,\n        ujsm.company_name,\n        ujsm.skill_match_score,\n        ujsm.matching_required_skills_count,\n        ujsm.missing_required_skills_count,\n        ujsm.recency_multiplier,\n        -- Location match score\n        CASE\n            WHEN upa.location_state = ujsm.location_state AND upa.location_city = ujsm.location_city THEN 100\n            WHEN upa.location_state = ujsm.location_state THEN 80\n            WHEN ujsm.work_model = 'remote' THEN 90\n            WHEN ujsm.work_model = 'hybrid' THEN 70\n            ELSE 50\n        END AS location_match_score,\n        -- Work model match score\n        CASE\n            WHEN upa.preferred_work_model = ujsm.work_model THEN 100\n            WHEN upa.preferred_work_model = 'remote' AND ujsm.work_model IN ('remote', 'hybrid') THEN 90\n            WHEN upa.preferred_work_model = 'hybrid' AND ujsm.work_model = 'hybrid' THEN 100\n            WHEN upa.preferred_work_model = 'hybrid' AND ujsm.work_model = 'remote' THEN 80\n            ELSE 60\n        END AS work_model_match_score,\n        ujsm.salary_min,\n        ujsm.salary_max,\n        upa.salary_expectation_min,\n        upa.salary_expectation_max\n    FROM user_job_skill_matching ujsm\n    INNER JOIN user_profile_analysis upa ON ujsm.user_id = upa.user_id\n),\nsalary_matching AS (\n    -- Seventh CTE: Calculate salary compatibility scores\n    SELECT\n        lm.user_id,\n        lm.job_id,\n        lm.job_title,\n        lm.company_name,\n        lm.skill_match_score,\n        lm.location_match_score,\n        lm.work_model_match_score,\n        lm.matching_required_skills_count,\n        lm.missing_required_skills_count,\n        lm.recency_multiplier,\n        -- Salary match score\n        CASE\n            WHEN lm.salary_min IS NULL AND lm.salary_max IS NULL THEN 70\n            WHEN lm.salary_expectation_min IS NULL AND lm.salary_expectation_max IS NULL THEN 70\n            WHEN lm.salary_min >= lm.salary_expectation_min AND lm.salary_max <= lm.salary_expectation_max THEN 100\n            WHEN lm.salary_min >= lm.salary_expectation_min THEN 90\n            WHEN lm.salary_max <= lm.salary_expectation_max THEN 85\n            WHEN (lm.salary_min + lm.salary_max) / 2 >= (lm.salary_expectation_min + lm.salary_expectation_max) / 2 THEN 75\n            ELSE 60\n        END AS salary_match_score,\n        lm.salary_min,\n        lm.salary_max\n    FROM location_matching lm\n),\nexperience_matching AS (\n    -- Eighth CTE: Calculate experience level compatibility\n    SELECT\n        sm.user_id,\n        sm.job_id,\n        sm.job_title,\n        sm.company_name,\n        sm.skill_match_score,\n        sm.location_match_score,\n        sm.work_model_match_score,\n        sm.salary_match_score,\n        sm.matching_required_skills_count,\n        sm.missing_required_skills_count,\n        sm.recency_multiplier,\n        -- Experience match score based on job title analysis and user experience\n        CASE\n            WHEN upa.experience_level = 'entry_level' AND LOWER(sm.job_title) LIKE '%junior%' OR LOWER(sm.job_title) LIKE '%entry%' OR LOWER(sm.job_title) LIKE '%intern%' THEN 100\n            WHEN upa.experience_level = 'mid_level' AND (LOWER(sm.job_title) LIKE '%mid%' OR LOWER(sm.job_title) LIKE '%engineer%' OR LOWER(sm.job_title) LIKE '%developer%') THEN 95\n            WHEN upa.experience_level = 'senior_level' AND (LOWER(sm.job_title) LIKE '%senior%' OR LOWER(sm.job_title) LIKE '%lead%' OR LOWER(sm.job_title) LIKE '%principal%') THEN 100\n            WHEN upa.experience_level = 'executive_level' AND (LOWER(sm.job_title) LIKE '%director%' OR LOWER(sm.job_title) LIKE '%vp%' OR LOWER(sm.job_title) LIKE '%chief%') THEN 100\n            ELSE 70\n        END AS experience_match_score,\n        upa.years_experience,\n        upa.experience_level\n    FROM salary_matching sm\n    INNER JOIN user_profile_analysis upa ON sm.user_id = upa.user_id\n),\nfinal_match_scoring AS (\n    -- Final CTE: Calculate weighted overall match score\n    SELECT\n        em.user_id,\n        em.job_id,\n        em.job_title,\n        em.company_name,\n        em.skill_match_score,\n        em.location_match_score,\n        em.work_model_match_score,\n        em.salary_match_score,\n        em.experience_match_score,\n        em.matching_required_skills_count,\n        em.missing_required_skills_count,\n        em.recency_multiplier,\n        em.years_experience,\n        em.experience_level,\n        -- Weighted overall match score (mirroring jobright.ai algorithm)\n        ROUND(\n            (\n                em.skill_match_score * 0.35 +\n                em.location_match_score * 0.20 +\n                em.work_model_match_score * 0.15 +\n                em.salary_match_score * 0.15 +\n                em.experience_match_score * 0.15\n            ) * em.recency_multiplier,\n            2\n        ) AS overall_match_score,\n        -- Generate recommendation reason\n        CONCAT(\n            CASE\n                WHEN em.skill_match_score >= 80 THEN 'Strong skill alignment (' || em.matching_required_skills_count || ' matching skills). '\n                WHEN em.skill_match_score >= 60 THEN 'Good skill match (' || em.matching_required_skills_count || ' matching skills). '\n                ELSE 'Partial skill match (' || em.matching_required_skills_count || ' matching skills). '\n            END,\n            CASE\n                WHEN em.location_match_score >= 90 THEN 'Perfect location fit. '\n                WHEN em.location_match_score >= 70 THEN 'Good location match. '\n                ELSE 'Location may require relocation. '\n            END,\n            CASE\n                WHEN em.salary_match_score >= 85 THEN 'Salary expectations aligned. '\n                ELSE 'Salary range available. '\n            END\n        ) AS recommendation_reason\n    FROM experience_matching em\n)\nSELECT\n    fms.user_id,\n    fms.job_id,\n    fms.job_title,\n    fms.company_name,\n    fms.overall_match_score,\n    fms.skill_match_score,\n    fms.location_match_score,\n    fms.work_model_match_score,\n    fms.salary_match_score,\n    fms.experience_match_score,\n    fms.matching_required_skills_count,\n    fms.missing_required_skills_count,\n    fms.recommendation_reason,\n    -- Ranking within user's recommendations\n    ROW_NUMBER() OVER (\n        PARTITION BY fms.user_id\n        ORDER BY fms.overall_match_score DESC, fms.recency_multiplier DESC\n    ) AS recommendation_rank,\n    ajp.posted_date,\n    ajp.location_city,\n    ajp.location_state,\n    ajp.work_model,\n    ajp.salary_min,\n    ajp.salary_max,\n    ajp.industry,\n    ajp.company_rating\nFROM final_match_scoring fms\nINNER JOIN active_job_postings ajp ON fms.job_id = ajp.job_id\nWHERE fms.overall_match_score >= 60  -- Minimum match threshold\nORDER BY fms.user_id, fms.overall_match_score DESC, fms.recency_multiplier DESC\nLIMIT 100;",
      "line_number": 197,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00963,
        "row_count": 33,
        "column_count": 22,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 2,
      "title": "Recursive Skill Gap Analysis with Hierarchical Skill Dependencies and Learning Path Recommendations",
      "description": "Description: Recursive CTE-based skill gap analysis that identifies missing skills for target jobs, analyzes skill hierarchies and dependencies, calculates learning path recommendations, and determines prerequisite skills. Uses recursive CTEs to traverse skill dependency trees and identify optimal skill acquisition paths. Use Case:
    Skill Development Planning - Learning Path Recommendations for Career Advancement Business Value: Identifies skill gaps for target job roles, analyzes skill hierarchi",
      "complexity": "Recursive CTEs (WITH RECURSIVE), hierarchical skill traversal, dependency analysis, path finding algorithms, complex aggregations, window functions",
      "expected_output": "Skill gap analysis with missing skills, prerequisite skill chains, learning path recommendations, and estimated time to acquire skills.",
      "sql": "WITH RECURSIVE skill_hierarchy AS (\n    -- Anchor: Base skills with no parent dependencies\n    SELECT\n        s.skill_id,\n        s.skill_name,\n        s.skill_category,\n        s.skill_type,\n        s.parent_skill_id,\n        0 AS hierarchy_level,\n        ARRAY[s.skill_id::VARCHAR(255)] AS skill_path,\n        s.skill_name AS full_path_name\n    FROM skills s\n    WHERE s.parent_skill_id IS NULL\n\n    UNION ALL\n\n    -- Recursive: Build skill hierarchy with dependencies\n    SELECT\n        s.skill_id,\n        s.skill_name,\n        s.skill_category,\n        s.skill_type,\n        s.parent_skill_id,\n        sh.hierarchy_level + 1,\n        (sh.skill_path || s.skill_id::VARCHAR(255))::VARCHAR(255)[] AS skill_path,\n        (sh.full_path_name || ' -> ' || s.skill_name)::VARCHAR(255) AS full_path_name\n    FROM skills s\n    INNER JOIN skill_hierarchy sh ON s.parent_skill_id = sh.skill_id\n    WHERE sh.hierarchy_level < 10  -- Prevent infinite recursion\n        AND NOT (s.skill_id = ANY(sh.skill_path))  -- Prevent cycles\n),\ntarget_job_skills AS (\n    -- CTE: Aggregate required skills for target job\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.company_id,\n        ARRAY_AGG(DISTINCT jsr.skill_id) AS required_skill_ids,\n        COUNT(DISTINCT jsr.skill_id) AS total_required_skills\n    FROM job_postings jp\n    INNER JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n    WHERE jsr.requirement_type = 'required'\n        AND jp.is_active = TRUE\n    GROUP BY jp.job_id, jp.job_title, jp.company_id\n),\nuser_skill_gaps AS (\n    -- CTE:
    Identify missing skills for target job\n    SELECT\n        up.user_id,\n        tjs.job_id,\n        tjs.job_title,\n        tjs.total_required_skills,\n        ARRAY_AGG(DISTINCT us.skill_id) AS user_skill_ids,\n        -- Missing required skills\n        ARRAY(\n            SELECT req_skill\n            FROM UNNEST(tjs.required_skill_ids) AS req_skill\n            WHERE req_skill != ALL(COALESCE(ARRAY_AGG(DISTINCT us.skill_id), ARRAY[]::VARCHAR[]))\n        ) AS missing_skill_ids\n    FROM user_profiles up\n    CROSS JOIN target_job_skills tjs\n    LEFT JOIN user_skills us ON up.user_id = us.user_id AND us.skill_id = ANY(tjs.required_skill_ids)\n    WHERE up.is_active = TRUE\n    GROUP BY up.user_id, tjs.job_id, tjs.job_title, tjs.total_required_skills, tjs.required_skill_ids\n),\nmissing_skill_dependencies AS (\n    -- CTE: Find all prerequisite skills for missing skills using recursive hierarchy\n    SELECT DISTINCT\n        usg.user_id,\n        usg.job_id,\n        usg.job_title,\n        sh.skill_id AS missing_skill_id,\n        sh.skill_name AS missing_skill_name,\n        sh.skill_category,\n        sh.hierarchy_level,\n        sh.skill_path,\n        sh.full_path_name,\n        -- Check if user has any prerequisite skills in the path\n        CASE\n            WHEN EXISTS (\n                SELECT 1\n                FROM UNNEST(sh.skill_path[1:ARRAY_LENGTH(sh.skill_path, 1) - 1]) AS prereq_skill\n                WHERE prereq_skill = ANY(usg.user_skill_ids)\n            ) THEN TRUE\n            ELSE FALSE\n        END AS has_prerequisites\n    FROM user_skill_gaps usg\n    CROSS JOIN UNNEST(usg.missing_skill_ids) AS missing_skill(skill_id)\n    INNER JOIN skill_hierarchy sh ON missing_skill.skill_id = sh.skill_id\n),\nlearning_path_recommendations AS (\n    -- CTE: Generate learning path recommendations with optimal ordering\n    SELECT\n        msd.user_id,\n        msd.job_id,\n        msd.job_title,\n        msd.missing_skill_id,\n        msd.missing_skill_name,\n        msd.skill_category,\n        msd.hierarchy_level,\n        msd.has_prerequisites,\n        -- Calculate learning priority (lower level = higher priority)\n        CASE\n            WHEN msd.hierarchy_level = 0 THEN 1\n            WHEN msd.has_prerequisites = TRUE THEN msd.hierarchy_level + 1\n            ELSE msd.hierarchy_level + 10  -- Skills without prerequisites come later\n        END AS learning_priority,\n        -- Estimate time to learn (based on hierarchy level and category)\n        CASE\n            WHEN msd.skill_category = 'certification' THEN 60 + (msd.hierarchy_level * 10)\n            WHEN msd.skill_category = 'programming' THEN 30 + (msd.hierarchy_level * 5)\n            WHEN msd.skill_category = 'framework' THEN 20 + (msd.hierarchy_level * 3)\n            ELSE 15 + (msd.hierarchy_level * 2)\n        END AS estimated_days_to_learn,\n        -- Prerequisite skills needed\n        (\n            SELECT ARRAY_AGG(prereq_skill)\n            FROM UNNEST(msd.skill_path[1:ARRAY_LENGTH(msd.skill_path, 1) - 1]) AS prereq_skill\n            WHERE prereq_skill != ALL(COALESCE(usg.user_skill_ids, ARRAY[]::VARCHAR[]))\n        ) AS prerequisite_skill_ids\n    FROM missing_skill_dependencies msd\n    INNER JOIN user_skill_gaps usg ON msd.user_id = usg.user_id AND msd.job_id = usg.job_id\n)\nSELECT\n    lpr.user_id,\n    up.full_name,\n    lpr.job_id,\n    lpr.job_title,\n    lpr.missing_skill_id,\n    lpr.missing_skill_name,\n    lpr.skill_category,\n    lpr.hierarchy_level,\n    lpr.learning_priority,\n    lpr.estimated_days_to_learn,\n    lpr.prerequisite_skill_ids,\n    -- Total skills gap summary\n    COUNT(*) OVER (PARTITION BY lpr.user_id, lpr.job_id) AS total_missing_skills,\n    -- Cumulative learning time estimate\n    SUM(lpr.estimated_days_to_learn) OVER (\n        PARTITION BY lpr.user_id, lpr.job_id\n        ORDER BY lpr.learning_priority\n        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n    ) AS cumulative_days_to_learn,\n    -- Learning path step number\n    ROW_NUMBER() OVER (\n        PARTITION BY lpr.user_id, lpr.job_id\n        ORDER BY lpr.learning_priority, lpr.hierarchy_level\n    ) AS learning_step_number\nFROM learning_path_recommendations lpr\nINNER JOIN user_profiles up ON lpr.user_id = up.user_id\nORDER BY lpr.user_id, lpr.job_id, lpr.learning_priority, lpr.hierarchy_level\nLIMIT 100;",
      "line_number": 531,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.008133,
        "row_count": 7,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 3,
      "title": "Market Trend Analysis with Time-Series Forecasting and Skill Demand Projections",
      "description": "Description: Advanced time-series analysis of job market trends with forecasting, skill demand projections, growth rate calculations, and competitive intelligence. Uses window functions for trend analysis, moving averages, and predictive modeling patterns. Use Case:
    Market Intelligence - Job Market Trend Analysis and Skill Demand Forecasting Business Value: Provides comprehensive market trend analysis showing job posting growth rates, skill demand trends, salary trends, geographic distribution s",
      "complexity": "Deep nested CTEs (6+ levels), time-series analysis, window functions with multiple frame clauses, moving averages, growth rate calculations, percentile rankings, forecasting patterns",
      "expected_output": "Market trend analysis with growth rates, skill demand projections, salary trends, geographic shifts, and competitive metrics.",
      "sql": "WITH market_trend_base AS (\n    -- First CTE: Base market trend data with time windows\n    SELECT\n        mt.trend_id,\n        mt.trend_date,\n        mt.geographic_scope,\n        mt.location_state,\n        mt.location_city,\n        mt.industry,\n        mt.job_category,\n        mt.total_job_postings,\n        mt.new_job_postings,\n        mt.average_salary_min,\n        mt.average_salary_max,\n        mt.median_salary,\n        mt.competition_index,\n        mt.growth_rate,\n        DATE_TRUNC('month', mt.trend_date) AS trend_month,\n        DATE_TRUNC('quarter', mt.trend_date) AS trend_quarter,\n        DATE_TRUNC('year', mt.trend_date) AS trend_year,\n        EXTRACT(MONTH FROM mt.trend_date) AS month_num,\n        EXTRACT(QUARTER FROM mt.trend_date) AS quarter_num\n    FROM market_trends mt\n    WHERE mt.trend_date >= CURRENT_DATE - INTERVAL '2 years'\n),\ntrend_aggregations AS (\n    -- Second CTE: Aggregate trends by time periods\n    SELECT\n        mtb.trend_month,\n        mtb.trend_quarter,\n        mtb.trend_year,\n        mtb.geographic_scope,\n        mtb.location_state,\n        mtb.location_city,\n        mtb.industry,\n        mtb.job_category,\n        COUNT(DISTINCT mtb.trend_id) AS trend_data_points,\n        SUM(mtb.total_job_postings) AS total_postings,\n        SUM(mtb.new_job_postings) AS new_postings,\n        AVG(mtb.average_salary_min) AS avg_salary_min,\n        AVG(mtb.average_salary_max) AS avg_salary_max,\n        AVG(mtb.median_salary) AS avg_median_salary,\n        AVG(mtb.competition_index) AS avg_competition_index,\n        AVG(mtb.growth_rate) AS avg_growth_rate,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY mtb.total_job_postings) AS median_postings,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY mtb.total_job_postings) AS p75_postings,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY mtb.total_job_postings) AS p25_postings\n    FROM market_trend_base mtb\n    GROUP BY\n        mtb.trend_month,\n        mtb.trend_quarter,\n        mtb.trend_year,\n        mtb.geographic_scope,\n        mtb.location_state,\n        mtb.location_city,\n        mtb.industry,\n        mtb.job_category\n),\ntime_series_analysis AS (\n    -- Third CTE: Time-series analysis with window functions\n    SELECT\n        ta.trend_month,\n        ta.trend_quarter,\n        ta.trend_year,\n        ta.geographic_scope,\n        ta.location_state,\n        ta.location_city,\n        ta.industry,\n        ta.job_category,\n        ta.total_postings,\n        ta.new_postings,\n        ta.avg_salary_min,\n        ta.avg_salary_max,\n        ta.avg_median_salary,\n        ta.avg_competition_index,\n        ta.avg_growth_rate,\n        -- Moving averages for trend smoothing\n        AVG(ta.total_postings) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_3month_postings,\n        AVG(ta.total_postings) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_6month_postings,\n        -- Period-over-period growth\n        LAG(ta.total_postings, 1) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n        ) AS prev_month_postings,\n        LAG(ta.total_postings, 12) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n        ) AS prev_year_postings,\n        -- Salary trend analysis\n        AVG(ta.avg_median_salary) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_3month_salary,\n        LAG(ta.avg_median_salary, 1) OVER (\n            PARTITION BY ta.geographic_scope, ta.location_state, ta.industry, ta.job_category\n            ORDER BY ta.trend_month\n        ) AS prev_month_salary\n    FROM trend_aggregations ta\n),\ngrowth_metrics AS (\n    -- Fourth CTE:
    Calculate growth metrics and trends\n    SELECT\n        tsa.trend_month,\n        tsa.trend_quarter,\n        tsa.trend_year,\n        tsa.geographic_scope,\n        tsa.location_state,\n        tsa.location_city,\n        tsa.industry,\n        tsa.job_category,\n        tsa.total_postings,\n        tsa.new_postings,\n        tsa.avg_salary_min,\n        tsa.avg_salary_max,\n        tsa.avg_median_salary,\n        tsa.avg_competition_index,\n        tsa.moving_avg_3month_postings,\n        tsa.moving_avg_6month_postings,\n        -- Month-over-month growth rate\n        CASE\n            WHEN tsa.prev_month_postings > 0 THEN\n                ROUND(((tsa.total_postings - tsa.prev_month_postings)::NUMERIC / tsa.prev_month_postings) * 100, 2)\n            ELSE NULL\n        END AS mom_growth_rate,\n        -- Year-over-year growth rate\n        CASE\n            WHEN tsa.prev_year_postings > 0 THEN\n                ROUND(((tsa.total_postings - tsa.prev_year_postings)::NUMERIC / tsa.prev_year_postings) * 100, 2)\n            ELSE NULL\n        END AS yoy_growth_rate,\n        -- Salary growth rate\n        CASE\n            WHEN tsa.prev_month_salary > 0 THEN\n                ROUND(((tsa.avg_median_salary - tsa.prev_month_salary)::NUMERIC / tsa.prev_month_salary) * 100, 2)\n            ELSE NULL\n        END AS salary_growth_rate,\n        -- Trend direction\n        CASE\n            WHEN tsa.total_postings > tsa.moving_avg_6month_postings * 1.1 THEN 'accelerating'\n            WHEN tsa.total_postings > tsa.moving_avg_6month_postings THEN 'growing'\n            WHEN tsa.total_postings < tsa.moving_avg_6month_postings * 0.9 THEN 'declining'\n            ELSE 'stable'\n        END AS trend_direction\n    FROM time_series_analysis tsa\n),\nskill_demand_projection AS (\n    -- Fifth CTE: Project skill demand based on job posting trends\n    SELECT\n        gm.trend_month,\n        gm.location_state,\n        gm.industry,\n        gm.job_category,\n        gm.total_postings,\n        gm.mom_growth_rate,\n        gm.yoy_growth_rate,\n        gm.trend_direction,\n        -- Aggregate skill requirements from active job postings\n        (\n            SELECT ARRAY_AGG(s.skill_name ORDER BY s.skill_count DESC)\n            FROM (\n                SELECT DISTINCT s.skill_name, COUNT(*) AS skill_count\n                FROM job_postings jp\n                INNER JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n                INNER JOIN skills s ON jsr.skill_id = s.skill_id\n                INNER JOIN companies c ON jp.company_id = c.company_id\n                WHERE jp.location_state = gm.location_state\n                    AND c.industry = gm.industry\n                    AND DATE_TRUNC('month', jp.posted_date) = gm.trend_month\n                    AND jsr.requirement_type = 'required'\n                GROUP BY s.skill_name\n                ORDER BY COUNT(*) DESC\n                LIMIT 10\n            ) s\n        ) AS top_skills,\n        -- Skill demand growth\n        (\n            SELECT COUNT(DISTINCT jsr.skill_id)\n            FROM job_postings jp\n            INNER JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n            INNER JOIN companies c ON jp.company_id = c.company_id\n            WHERE jp.location_state = gm.location_state\n                AND c.industry = gm.industry\n                AND DATE_TRUNC('month', jp.posted_date) = gm.trend_month\n        ) AS unique_skills_demand,\n        (\n            SELECT COUNT(DISTINCT jsr.skill_id)\n            FROM job_postings jp\n            INNER JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n            INNER JOIN companies c ON jp.company_id = c.company_id\n            WHERE jp.location_state = gm.location_state\n                AND c.industry = gm.industry\n                AND DATE_TRUNC('month', jp.posted_date) = gm.trend_month - INTERVAL '1 month'\n        ) AS prev_unique_skills_demand\n    FROM growth_metrics gm\n),\nforecast_projections AS (\n    -- Sixth CTE: Generate simple forecasting projections\n    SELECT\n        sdp.trend_month,\n        sdp.location_state,\n        sdp.industry,\n        sdp.job_category,\n        sdp.total_postings,\n        sdp.mom_growth_rate,\n        sdp.yoy_growth_rate,\n        sdp.trend_direction,\n        sdp.top_skills,\n        sdp.unique_skills_demand,\n        -- Project next month postings (simple linear projection)\n        CASE\n            WHEN sdp.mom_growth_rate IS NOT NULL THEN\n                ROUND(sdp.total_postings * (1 + (sdp.mom_growth_rate / 100)), 0)\n            ELSE sdp.total_postings\n        END AS projected_next_month_postings,\n        -- Project 3 months ahead\n        CASE\n            WHEN sdp.mom_growth_rate IS NOT NULL THEN\n                ROUND(sdp.total_postings * POWER(1 + (sdp.mom_growth_rate / 100), 3), 0)\n            ELSE sdp.total_postings\n        END AS projected_3month_postings,\n        -- Skill demand change\n        CASE\n            WHEN sdp.prev_unique_skills_demand > 0 THEN\n                ROUND(((sdp.unique_skills_demand - sdp.prev_unique_skills_demand)::NUMERIC / sdp.prev_unique_skills_demand) * 100, 2)\n            ELSE NULL\n        END AS skill_demand_change_rate\n    FROM skill_demand_projection sdp\n)\nSELECT\n    fp.trend_month,\n    fp.location_state,\n    fp.industry,\n    fp.job_category,\n    fp.total_postings,\n    fp.mom_growth_rate,\n    fp.yoy_growth_rate,\n    fp.trend_direction,\n    fp.top_skills,\n    fp.unique_skills_demand,\n    fp.projected_next_month_postings,\n    fp.projected_3month_postings,\n    fp.skill_demand_change_rate,\n    -- Market ranking\n    RANK() OVER (\n        PARTITION BY fp.trend_month, fp.location_state\n        ORDER BY fp.total_postings DESC\n    ) AS market_rank_by_postings,\n    -- Growth ranking\n    RANK() OVER (\n        PARTITION BY fp.trend_month, fp.location_state\n        ORDER BY fp.yoy_growth_rate DESC NULLS LAST\n    ) AS market_rank_by_growth\nFROM forecast_projections fp\nWHERE fp.trend_month >= CURRENT_DATE - INTERVAL '6 months'\nORDER BY fp.trend_month DESC, fp.location_state, fp.total_postings DESC\nLIMIT 100;",
      "line_number": 700,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.007039,
        "row_count": 5,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 4,
      "title": "Application Success Rate Analysis with Cohort Segmentation and Conversion Funnel Analytics",
      "description": "Description: Comprehensive application success rate analysis with cohort segmentation by application date, conversion funnel metrics, time-to-response analysis, and success factor identification. Uses window functions for cohort analysis and conversion rate calculations. Use Case:
    Application Performance Tracking - Success Rate Analysis and Conversion Optimization Business Value: Analyzes application success rates by cohort, identifies conversion bottlenecks in the application funnel, calculates",
      "complexity": "Deep nested CTEs (7+ levels), cohort analysis, conversion funnel calculations, window functions with multiple partitions, percentile rankings, time-to-event analysis",
      "expected_output": "Application success rates by cohort, conversion funnel metrics, time-to-response statistics, and success factor correlations.",
      "sql": "WITH application_cohorts AS (\n    -- First CTE: Segment applications by submission date cohorts\n    SELECT\n        ja.application_id,\n        ja.user_id,\n        ja.job_id,\n        ja.application_status,\n        ja.application_date,\n        ja.submitted_at,\n        ja.status_updated_at,\n        ja.match_score,\n        DATE_TRUNC('week', ja.submitted_at) AS application_week,\n        DATE_TRUNC('month', ja.submitted_at) AS application_month,\n        EXTRACT(WEEK FROM ja.submitted_at) AS week_num,\n        EXTRACT(MONTH FROM ja.submitted_at) AS month_num,\n        EXTRACT(YEAR FROM ja.submitted_at) AS year_num,\n        CASE\n            WHEN ja.application_status IN ('offer', 'interview') THEN 'success'\n            WHEN ja.application_status = 'rejected' THEN 'rejected'\n            WHEN ja.application_status IN ('under_review', 'submitted') THEN 'pending'\n            ELSE 'other'\n        END AS status_category\n    FROM job_applications ja\n    WHERE ja.submitted_at IS NOT NULL\n        AND ja.submitted_at >= CURRENT_TIMESTAMP - INTERVAL '1 year'\n),\napplication_timeline AS (\n    -- Second CTE: Calculate time-to-event metrics\n    SELECT\n        ac.application_id,\n        ac.user_id,\n        ac.job_id,\n        ac.application_status,\n        ac.status_category,\n        ac.application_week,\n        ac.application_month,\n        ac.submitted_at,\n        ac.status_updated_at,\n        ac.match_score,\n        -- Time to first status update\n        EXTRACT(EPOCH FROM (ac.status_updated_at - ac.submitted_at)) / 86400 AS days_to_status_update,\n        -- Time to current status\n        CASE\n            WHEN ac.status_category = 'success' THEN\n                EXTRACT(EPOCH FROM (ac.status_updated_at - ac.submitted_at)) / 86400\n            ELSE NULL\n        END AS days_to_success,\n        CASE\n            WHEN ac.status_category = 'rejected' THEN\n                EXTRACT(EPOCH FROM (ac.status_updated_at - ac.submitted_at)) / 86400\n            ELSE NULL\n        END AS days_to_rejection\n    FROM application_cohorts ac\n),\ncohort_aggregations AS (\n    -- Third CTE: Aggregate metrics by cohort\n    SELECT\n        at.application_week,\n        at.application_month,\n        COUNT(DISTINCT at.application_id) AS total_applications,\n        COUNT(DISTINCT at.user_id) AS unique_applicants,\n        COUNT(DISTINCT CASE WHEN at.status_category = 'success' THEN at.application_id END) AS successful_applications,\n        COUNT(DISTINCT CASE WHEN at.status_category = 'rejected' THEN at.application_id END) AS rejected_applications,\n        COUNT(DISTINCT CASE WHEN at.status_category = 'pending' THEN at.application_id END) AS pending_applications,\n        AVG(at.match_score) AS avg_match_score,\n        AVG(at.days_to_status_update) AS avg_days_to_update,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY at.days_to_status_update) AS median_days_to_update,\n        AVG(at.days_to_success) AS avg_days_to_success,\n        AVG(at.days_to_rejection) AS avg_days_to_rejection,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY at.days_to_success) AS median_days_to_success\n    FROM application_timeline at\n    GROUP BY at.application_week, at.application_month\n),\nconversion_funnel AS (\n    -- Fourth CTE: Calculate conversion funnel metrics\n    SELECT\n        ca.application_week,\n        ca.application_month,\n        ca.total_applications,\n        ca.unique_applicants,\n        ca.successful_applications,\n        ca.rejected_applications,\n        ca.pending_applications,\n        ca.avg_match_score,\n        -- Conversion rates\n        ROUND((ca.successful_applications::NUMERIC / NULLIF(ca.total_applications, 0)) * 100, 2) AS success_rate_pct,\n        ROUND((ca.rejected_applications::NUMERIC / NULLIF(ca.total_applications, 0)) * 100, 2) AS rejection_rate_pct,\n        ROUND((ca.pending_applications::NUMERIC / NULLIF(ca.total_applications, 0)) * 100, 2) AS pending_rate_pct,\n        -- Time metrics\n        ROUND(CAST(ca.avg_days_to_update AS NUMERIC), 2) AS avg_days_to_update,\n        ROUND(CAST(ca.median_days_to_update AS NUMERIC), 2) AS median_days_to_update,\n        ROUND(CAST(ca.avg_days_to_success AS NUMERIC), 2) AS avg_days_to_success,\n        ROUND(CAST(ca.median_days_to_success AS NUMERIC), 2) AS median_days_to_success,\n        ROUND(CAST(ca.avg_days_to_rejection AS NUMERIC), 2) AS avg_days_to_rejection\n    FROM cohort_aggregations ca\n),\nsuccess_factor_analysis AS (\n    -- Fifth CTE: Analyze factors correlated with success\n    SELECT\n        at.application_week,\n        at.status_category,\n        COUNT(*) AS application_count,\n        AVG(at.match_score) AS avg_match_score,\n        AVG(at.days_to_status_update) AS avg_days_to_update,\n        -- Job posting factors\n        AVG(jp.salary_min) AS avg_job_salary_min,\n        AVG(jp.salary_max) AS avg_job_salary_max,\n        COUNT(DISTINCT jp.company_id) AS unique_companies,\n        COUNT(DISTINCT c.industry) AS unique_industries,\n        -- User profile factors\n        AVG(up.years_experience) AS avg_years_experience,\n        COUNT(DISTINCT CASE WHEN up.preferred_work_model = jp.work_model THEN at.application_id END) AS work_model_matches,\n        COUNT(DISTINCT CASE WHEN up.location_state = jp.location_state THEN at.application_id END) AS location_matches\n    FROM application_timeline at\n    INNER JOIN job_postings jp ON at.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    INNER JOIN user_profiles up ON at.user_id = up.user_id\n    GROUP BY at.application_week, at.status_category\n),\ncohort_comparison AS (\n    -- Sixth CTE: Compare cohorts with window functions\n    SELECT\n        cf.application_week,\n        cf.application_month,\n        cf.total_applications,\n        cf.success_rate_pct,\n        cf.rejection_rate_pct,\n        cf.avg_days_to_update,\n        cf.avg_days_to_success,\n        -- Compare to previous week\n        LAG(cf.success_rate_pct, 1) OVER (ORDER BY cf.application_week) AS prev_week_success_rate,\n        LAG(cf.total_applications, 1) OVER (ORDER BY cf.application_week) AS prev_week_applications,\n        -- Moving averages\n        AVG(cf.success_rate_pct) OVER (\n            ORDER BY cf.application_week\n            ROWS BETWEEN 3 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_success_rate_4weeks,\n        AVG(cf.total_applications) OVER (\n            ORDER BY cf.application_week\n            ROWS BETWEEN 3 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_applications_4weeks,\n        -- Trend indicators\n        CASE\n            WHEN cf.success_rate_pct > LAG(cf.success_rate_pct, 1) OVER (ORDER BY cf.application_week) THEN 'improving'\n            WHEN cf.success_rate_pct < LAG(cf.success_rate_pct, 1) OVER (ORDER BY cf.application_week) THEN 'declining'\n            ELSE 'stable'\n        END AS success_rate_trend\n    FROM conversion_funnel cf\n)\nSELECT\n    cc.application_week,\n    cc.application_month,\n    cc.total_applications,\n    cc.success_rate_pct,\n    cc.rejection_rate_pct,\n    cc.avg_days_to_update,\n    cc.avg_days_to_success,\n    cc.prev_week_success_rate,\n    cc.moving_avg_success_rate_4weeks,\n    cc.success_rate_trend,\n    -- Success rate change\n    CASE\n        WHEN cc.prev_week_success_rate IS NOT NULL THEN\n            ROUND(cc.success_rate_pct - cc.prev_week_success_rate, 2)\n        ELSE NULL\n    END AS success_rate_change,\n    -- Success factor insights\n    sfa.avg_match_score AS successful_avg_match_score,\n    sfa.avg_job_salary_min AS successful_avg_salary_min,\n    sfa.work_model_matches AS successful_work_model_matches\nFROM cohort_comparison cc\nLEFT JOIN success_factor_analysis sfa ON cc.application_week = sfa.application_week AND sfa.status_category = 'success'\nORDER BY cc.application_week DESC\nLIMIT 100;",
      "line_number": 982,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.007343,
        "row_count": 2,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 5,
      "title": "Company Competitive Intelligence with Market Share Analysis and Employer Branding Metrics",
      "description": "Description: Comprehensive competitive intelligence analysis comparing companies by market share, job posting volume, application rates, employer ratings, and brand strength metrics. Uses aggregations, rankings, and comparative analytics. Use Case: Competitive Intelligence - Company Market Position and Employer Brand Analysis Business Value: Provides competitive intelligence showing company market share by industry, job posting trends, application attraction rates, employer brand strength, and c",
      "complexity": "Deep nested CTEs (6+ levels), market share calculations, competitive rankings, window functions with rankings, percentile analysis, comparative metrics",
      "expected_output": "Company competitive analysis with market share, posting trends, application rates, brand metrics, and competitive rankings.",
      "sql": "WITH company_job_metrics AS (\n    -- First CTE: Aggregate job posting metrics by company\n    SELECT\n        c.company_id,\n        c.company_name,\n        c.industry,\n        c.company_size,\n        c.company_rating,\n        c.total_reviews,\n        COUNT(DISTINCT jp.job_id) AS total_job_postings,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN jp.job_id END) AS recent_job_postings_30d,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '90 days' THEN jp.job_id END) AS recent_job_postings_90d,\n        AVG(jp.salary_min) AS avg_salary_min,\n        AVG(jp.salary_max) AS avg_salary_max,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary_midpoint,\n        COUNT(DISTINCT jp.location_state) AS states_active,\n        COUNT(DISTINCT jp.work_model) AS work_models_offered,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS remote_jobs_count,\n        COUNT(DISTINCT CASE WHEN jp.is_federal_job = TRUE THEN jp.job_id END) AS federal_jobs_count\n    FROM companies c\n    LEFT JOIN job_postings jp ON c.company_id = jp.company_id\n    WHERE jp.is_active = TRUE OR jp.is_active IS NULL\n    GROUP BY c.company_id, c.company_name, c.industry, c.company_size, c.company_rating, c.total_reviews\n),\ncompany_application_metrics AS (\n    -- Second CTE: Aggregate application metrics by company\n    SELECT\n        jp.company_id,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT ja.user_id) AS unique_applicants,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications,\n        AVG(ja.match_score) AS avg_application_match_score,\n        COUNT(DISTINCT CASE WHEN ja.submitted_at >= CURRENT_DATE - INTERVAL '30 days' THEN ja.application_id END) AS recent_applications_30d,\n        COUNT(DISTINCT CASE WHEN ja.submitted_at >= CURRENT_DATE - INTERVAL '90 days' THEN ja.application_id END) AS recent_applications_90d\n    FROM job_postings jp\n    LEFT JOIN job_applications ja ON jp.job_id = ja.job_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jp.company_id\n),\ncompany_skill_demand AS (\n    -- Third CTE: Analyze skill demand by company\n    SELECT\n        jp.company_id,\n        COUNT(DISTINCT jsr.skill_id) AS unique_skills_demanded,\n        ARRAY_AGG(DISTINCT s.skill_category) AS skill_categories,\n        ARRAY_AGG(DISTINCT s.skill_name) FILTER (\n            WHERE jsr.requirement_type = 'required'\n        ) AS top_required_skills\n    FROM job_postings jp\n    INNER JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n    INNER JOIN skills s ON jsr.skill_id = s.skill_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jp.company_id\n),\nindustry_market_share AS (\n    -- Fourth CTE: Calculate market share by industry\n    SELECT\n        cjm.industry,\n        cjm.company_id,\n        cjm.company_name,\n        cjm.total_job_postings,\n        cjm.recent_job_postings_30d,\n        SUM(cjm.total_job_postings) OVER (PARTITION BY cjm.industry) AS industry_total_postings,\n        SUM(cjm.recent_job_postings_30d) OVER (PARTITION BY cjm.industry) AS industry_recent_postings_30d,\n        -- Market share calculation\n        ROUND(\n            (cjm.total_job_postings::NUMERIC / NULLIF(SUM(cjm.total_job_postings) OVER (PARTITION BY cjm.industry), 0)) * 100,\n            2\n        ) AS market_share_pct,\n        -- Recent market share\n        ROUND(\n            (cjm.recent_job_postings_30d::NUMERIC / NULLIF(SUM(cjm.recent_job_postings_30d) OVER (PARTITION BY cjm.industry), 0)) * 100,\n            2\n        ) AS recent_market_share_pct\n    FROM company_job_metrics cjm\n    WHERE cjm.industry IS NOT NULL\n),\ncompany_competitive_position AS (\n    -- Fifth CTE: Calculate competitive positioning metrics\n    SELECT\n        ims.industry,\n        ims.company_id,\n        ims.company_name,\n        ims.total_job_postings,\n        ims.recent_job_postings_30d,\n        ims.market_share_pct,\n        ims.recent_market_share_pct,\n        cjm.company_size,\n        cjm.company_rating,\n        cjm.total_reviews,\n        cjm.avg_salary_midpoint,\n        cjm.states_active,\n        cjm.work_models_offered,\n        cjm.remote_jobs_count,\n        -- Application metrics\n        cam.total_applications,\n        cam.unique_applicants,\n        cam.successful_applications,\n        cam.avg_application_match_score,\n        cam.recent_applications_30d,\n        -- Application attraction rate\n        CASE\n            WHEN cjm.recent_job_postings_30d > 0 THEN\n                ROUND((cam.recent_applications_30d::NUMERIC / cjm.recent_job_postings_30d), 2)\n            ELSE 0\n        END AS applications_per_job_30d,\n        -- Success rate\n        CASE\n            WHEN cam.total_applications > 0 THEN\n                ROUND((cam.successful_applications::NUMERIC / cam.total_applications) * 100, 2)\n            ELSE NULL\n        END AS application_success_rate_pct,\n        -- Skill demand metrics\n        csd.unique_skills_demanded,\n        csd.skill_categories,\n        csd.top_required_skills,\n        -- Industry ranking\n        RANK() OVER (\n            PARTITION BY ims.industry\n            ORDER BY ims.total_job_postings DESC\n        ) AS industry_rank_by_postings,\n        RANK() OVER (\n            PARTITION BY ims.industry\n            ORDER BY ims.recent_market_share_pct DESC\n        ) AS industry_rank_by_recent_share,\n        RANK() OVER (\n            PARTITION BY ims.industry\n            ORDER BY cam.recent_applications_30d DESC NULLS LAST\n        ) AS industry_rank_by_applications\n    FROM industry_market_share ims\n    INNER JOIN company_job_metrics cjm ON ims.company_id = cjm.company_id\n    LEFT JOIN company_application_metrics cam ON ims.company_id = cam.company_id\n    LEFT JOIN company_skill_demand csd ON ims.company_id = csd.company_id\n),\nemployer_brand_score AS (\n    -- Sixth CTE: Calculate employer brand strength score\n    SELECT\n        ccp.industry,\n        ccp.company_id,\n        ccp.company_name,\n        ccp.total_job_postings,\n        ccp.market_share_pct,\n        ccp.company_rating,\n        ccp.total_reviews,\n        ccp.applications_per_job_30d,\n        ccp.application_success_rate_pct,\n        ccp.industry_rank_by_postings,\n        ccp.industry_rank_by_applications,\n        -- Employer brand score (0-100)\n        ROUND(\n            (\n                COALESCE(ccp.company_rating, 3.0) * 10 +  -- Rating component (0-50)\n                LEAST(ccp.market_share_pct, 20) * 1.5 +  -- Market share component (0-30)\n                LEAST(ccp.applications_per_job_30d, 50) * 0.4 +  -- Application attraction (0-20)\n                CASE\n                    WHEN ccp.industry_rank_by_postings <= 5 THEN 10\n                    WHEN ccp.industry_rank_by_postings <= 10 THEN 7\n                    WHEN ccp.industry_rank_by_postings <= 20 THEN 4\n                    ELSE 0\n                END  -- Industry position component (0-10)\n            ),\n            2\n        ) AS employer_brand_score,\n        -- Brand strength category\n        CASE\n            WHEN ccp.company_rating >= 4.5 AND ccp.market_share_pct >= 10 THEN 'top_employer'\n            WHEN ccp.company_rating >= 4.0 AND ccp.market_share_pct >= 5 THEN 'strong_employer'\n            WHEN ccp.company_rating >= 3.5 THEN 'good_employer'\n            ELSE 'developing_employer'\n        END AS brand_strength_category\n    FROM company_competitive_position ccp\n)\nSELECT\n    ebs.industry,\n    ebs.company_id,\n    ebs.company_name,\n    ebs.total_job_postings,\n    ebs.market_share_pct,\n    ebs.company_rating,\n    ebs.total_reviews,\n    ebs.applications_per_job_30d,\n    ebs.application_success_rate_pct,\n    ebs.industry_rank_by_postings,\n    ebs.industry_rank_by_applications,\n    ebs.employer_brand_score,\n    ebs.brand_strength_category,\n    ccp.avg_salary_midpoint,\n    ccp.states_active,\n    ccp.remote_jobs_count,\n    ccp.unique_skills_demanded,\n    ccp.top_required_skills,\n    -- Overall ranking\n    RANK() OVER (ORDER BY ebs.employer_brand_score DESC) AS overall_brand_rank\nFROM employer_brand_score ebs\nINNER JOIN company_competitive_position ccp ON ebs.company_id = ccp.company_id\nWHERE ebs.total_job_postings > 0\nORDER BY ebs.industry, ebs.employer_brand_score DESC\nLIMIT 100;",
      "line_number": 1173,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006749,
        "row_count": 10,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 6,
      "title": "Geographic Job Market Analysis with Location Intelligence and Remote Work Trends",
      "description": "Description: Comprehensive geographic analysis of job markets by location, analyzing job density, salary variations, remote work adoption, competition levels, and location attractiveness scores. Uses spatial aggregations and geographic comparisons. Use Case: Location Intelligence - Geographic Job Market Analysis and Relocation Planning Business Value: Provides geographic job market intelligence showing job density by location, salary variations across regions, remote work adoption rates, competi",
      "complexity": "Deep nested CTEs (6+ levels), geographic aggregations, spatial comparisons, window functions with geographic partitions, percentile analysis by location",
      "expected_output": "Geographic market analysis with job density, salary ranges, remote work rates, competition metrics, and location attractiveness scores.",
      "sql": "WITH location_job_aggregations AS (\n    -- First CTE: Aggregate job postings by location\n    SELECT\n        jp.location_state,\n        jp.location_city,\n        jp.location_country,\n        COUNT(DISTINCT jp.job_id) AS total_job_postings,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN jp.job_id END) AS recent_postings_30d,\n        COUNT(DISTINCT jp.company_id) AS unique_companies,\n        COUNT(DISTINCT c.industry) AS unique_industries,\n        AVG(jp.salary_min) AS avg_salary_min,\n        AVG(jp.salary_max) AS avg_salary_max,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary_midpoint,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS median_salary,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p25_salary,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p75_salary,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS remote_jobs,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'hybrid' THEN jp.job_id END) AS hybrid_jobs,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'onsite' THEN jp.job_id END) AS onsite_jobs\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.location_state IS NOT NULL\n    GROUP BY jp.location_state, jp.location_city, jp.location_country\n),\nlocation_application_metrics AS (\n    -- Second CTE: Aggregate application metrics by location\n    SELECT\n        jp.location_state,\n        jp.location_city,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT ja.user_id) AS unique_applicants,\n        AVG(ja.match_score) AS avg_match_score,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications\n    FROM job_postings jp\n    INNER JOIN job_applications ja ON jp.job_id = ja.job_id\n    WHERE jp.is_active = TRUE\n        AND jp.location_state IS NOT NULL\n    GROUP BY jp.location_state, jp.location_city\n),\nlocation_competition_analysis AS (\n    -- Third CTE: Calculate competition metrics by location\n    SELECT\n        lja.location_state,\n        lja.location_city,\n        lja.total_job_postings,\n        lja.recent_postings_30d,\n        lja.unique_companies,\n        lja.unique_industries,\n        lja.avg_salary_midpoint,\n        lja.median_salary,\n        lja.p25_salary,\n        lja.p75_salary,\n        lja.remote_jobs,\n        lja.hybrid_jobs,\n        lja.onsite_jobs,\n        -- Remote work percentage\n        ROUND(\n            (lja.remote_jobs::NUMERIC / NULLIF(lja.total_job_postings, 0)) * 100,\n            2\n        ) AS remote_work_pct,\n        -- Competition index (applications per job)\n        CASE\n            WHEN lja.total_job_postings > 0 THEN\n                ROUND((lam.total_applications::NUMERIC / lja.total_job_postings), 2)\n            ELSE NULL\n        END AS competition_index,\n        -- Success rate\n        CASE\n            WHEN lam.total_applications > 0 THEN\n                ROUND((lam.successful_applications::NUMERIC / lam.total_applications) * 100, 2)\n            ELSE NULL\n        END AS application_success_rate_pct,\n        lam.avg_match_score\n    FROM location_job_aggregations lja\n    LEFT JOIN location_application_metrics lam ON lja.location_state = lam.location_state\n        AND lja.location_city = lam.location_city\n),\nstate_level_aggregations AS (\n    -- Fourth CTE: Aggregate to state level for comparison\n    SELECT\n        lca.location_state,\n        SUM(lca.total_job_postings) AS state_total_postings,\n        SUM(lca.recent_postings_30d) AS state_recent_postings,\n        COUNT(DISTINCT lca.location_city) AS cities_with_jobs,\n        AVG(lca.avg_salary_midpoint) AS state_avg_salary,\n        AVG(lca.median_salary) AS state_median_salary,\n        AVG(lca.remote_work_pct) AS state_avg_remote_pct,\n        AVG(lca.competition_index) AS state_avg_competition,\n        SUM(lca.remote_jobs) AS state_total_remote_jobs,\n        SUM(lca.hybrid_jobs) AS state_total_hybrid_jobs,\n        SUM(lca.onsite_jobs) AS state_total_onsite_jobs\n    FROM location_competition_analysis lca\n    GROUP BY lca.location_state\n),\nlocation_attractiveness_scoring AS (\n    -- Fifth CTE:
    Calculate location attractiveness scores\n    SELECT\n        lca.location_state,\n        lca.location_city,\n        lca.total_job_postings,\n        lca.recent_postings_30d,\n        lca.unique_companies,\n        lca.unique_industries,\n        lca.avg_salary_midpoint,\n        lca.median_salary,\n        lca.remote_work_pct,\n        lca.competition_index,\n        lca.application_success_rate_pct,\n        -- Attractiveness score components\n        CASE\n            WHEN lca.total_job_postings >= sla.state_total_postings * 0.1 THEN 25\n            WHEN lca.total_job_postings >= sla.state_total_postings * 0.05 THEN 20\n            WHEN lca.total_job_postings >= sla.state_total_postings * 0.01 THEN 15\n            ELSE 10\n        END AS job_availability_score,\n        CASE\n            WHEN lca.avg_salary_midpoint >= sla.state_avg_salary * 1.1 THEN 25\n            WHEN lca.avg_salary_midpoint >= sla.state_avg_salary THEN 20\n            WHEN lca.avg_salary_midpoint >= sla.state_avg_salary * 0.9 THEN 15\n            ELSE 10\n        END AS salary_score,\n        CASE\n            WHEN lca.remote_work_pct >= 50 THEN 25\n            WHEN lca.remote_work_pct >= 30 THEN 20\n            WHEN lca.remote_work_pct >= 15 THEN 15\n            ELSE 10\n        END AS remote_work_score,\n        CASE\n            WHEN lca.competition_index <= 5 THEN 25\n            WHEN lca.competition_index <= 10 THEN 20\n            WHEN lca.competition_index <= 20 THEN 15\n            ELSE 10\n        END AS competition_score,\n        sla.state_total_postings,\n        sla.state_avg_salary\n    FROM location_competition_analysis lca\n    INNER JOIN state_level_aggregations sla ON lca.location_state = sla.location_state\n),\nfinal_location_ranking AS (\n    -- Sixth CTE: Calculate final attractiveness scores and rankings\n    SELECT\n        las.location_state,\n        las.location_city,\n        las.total_job_postings,\n        las.recent_postings_30d,\n        las.unique_companies,\n        las.unique_industries,\n        ROUND(CAST(las.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n        ROUND(CAST(las.median_salary AS NUMERIC), 0) AS median_salary,\n        las.remote_work_pct,\n        las.competition_index,\n        las.application_success_rate_pct,\n        -- Overall attractiveness score\n        (\n            las.job_availability_score +\n            las.salary_score +\n            las.remote_work_score +\n            las.competition_score\n        ) AS location_attractiveness_score,\n        -- Attractiveness category\n        CASE\n            WHEN (\n                las.job_availability_score +\n                las.salary_score +\n                las.remote_work_score +\n                las.competition_score\n            ) >= 90 THEN 'highly_attractive'\n            WHEN (\n                las.job_availability_score +\n                las.salary_score +\n                las.remote_work_score +\n                las.competition_score\n            ) >= 75 THEN 'attractive'\n            WHEN (\n                las.job_availability_score +\n                las.salary_score +\n                las.remote_work_score +\n                las.competition_score\n            ) >= 60 THEN 'moderate'\n            ELSE 'developing'\n        END AS attractiveness_category,\n        -- Rankings\n        RANK() OVER (\n            PARTITION BY las.location_state\n            ORDER BY (\n                las.job_availability_score +\n                las.salary_score +\n                las.remote_work_score +\n                las.competition_score\n            ) DESC\n        ) AS city_rank_in_state,\n        RANK() OVER (\n            ORDER BY (\n                las.job_availability_score +\n                las.salary_score +\n                las.remote_work_score +\n                las.competition_score\n            ) DESC\n        ) AS national_rank\n    FROM location_attractiveness_scoring las\n)\nSELECT\n    flr.location_state,\n    flr.location_city,\n    flr.total_job_postings,\n    flr.recent_postings_30d,\n    flr.unique_companies,\n    flr.unique_industries,\n    flr.avg_salary_midpoint,\n    flr.median_salary,\n    flr.remote_work_pct,\n    flr.competition_index,\n    flr.application_success_rate_pct,\n    flr.location_attractiveness_score,\n    flr.attractiveness_category,\n    flr.city_rank_in_state,\n    flr.national_rank\nFROM final_location_ranking flr\nWHERE flr.total_job_postings >= 10\nORDER BY flr.location_attractiveness_score DESC, flr.total_job_postings DESC\nLIMIT 100;",
      "line_number": 1388,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006513,
        "row_count": 0,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 7,
      "title": "Salary Benchmarking Analysis with Percentile Rankings and Market Positioning",
      "description": "Description: Comprehensive salary benchmarking analysis comparing salaries across roles, industries, locations, and experience levels. Uses percentile rankings, market positioning, and comparative analytics. Use Case: Salary Intelligence - Compensation Benchmarking and Market Positioning Business Value: Provides salary benchmarking data showing percentile rankings, market positioning, salary ranges by role/industry/location, and compensation trends. Helps users understand market rates and negoti",
      "complexity": "Deep nested CTEs (7+ levels), percentile calculations, market positioning, window functions with multiple partitions, comparative analytics",
      "expected_output": "Salary benchmarking analysis with percentile rankings, market positioning, salary ranges, and compensation trends.",
      "sql": "WITH salary_data_normalization AS (\n    -- First CTE: Normalize salary data to annual equivalents\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.company_id,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.work_model,\n        jp.job_type,\n        jp.posted_date,\n        -- Normalize to annual salary\n        CASE\n            WHEN jp.salary_type = 'annual' THEN jp.salary_min\n            WHEN jp.salary_type = 'hourly' THEN jp.salary_min * 2080  -- 40 hours * 52 weeks\n            WHEN jp.salary_type = 'monthly' THEN jp.salary_min * 12\n            ELSE jp.salary_min\n        END AS annual_salary_min,\n        CASE\n            WHEN jp.salary_type = 'annual' THEN jp.salary_max\n            WHEN jp.salary_type = 'hourly' THEN jp.salary_max * 2080\n            WHEN jp.salary_type = 'monthly' THEN jp.salary_max * 12\n            ELSE jp.salary_max\n        END AS annual_salary_max,\n        CASE\n            WHEN jp.salary_type = 'annual' THEN (jp.salary_min + jp.salary_max) / 2\n            WHEN jp.salary_type = 'hourly' THEN ((jp.salary_min + jp.salary_max) / 2) * 2080\n            WHEN jp.salary_type = 'monthly' THEN ((jp.salary_min + jp.salary_max) / 2) * 12\n            ELSE (jp.salary_min + jp.salary_max) / 2\n        END AS annual_salary_midpoint\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.salary_min IS NOT NULL\n        AND jp.salary_max IS NOT NULL\n        AND jp.salary_min > 0\n        AND jp.salary_max >= jp.salary_min\n),\nrole_salary_aggregations AS (\n    -- Second CTE: Aggregate salaries by job title/role\n    SELECT\n        sdn.job_title,\n        COUNT(DISTINCT sdn.job_id) AS job_count,\n        AVG(sdn.annual_salary_min) AS avg_salary_min,\n        AVG(sdn.annual_salary_max) AS avg_salary_max,\n        AVG(sdn.annual_salary_midpoint) AS avg_salary_midpoint,\n        PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS p10_salary,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS p25_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS p50_salary,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS p75_salary,\n        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS p90_salary,\n        MIN(sdn.annual_salary_min) AS min_salary,\n        MAX(sdn.annual_salary_max) AS max_salary,\n        STDDEV(sdn.annual_salary_midpoint) AS salary_stddev\n    FROM salary_data_normalization sdn\n    GROUP BY sdn.job_title\n),\nindustry_salary_benchmarks AS (\n    -- Third CTE: Calculate industry-level salary benchmarks\n    SELECT\n        sdn.industry,\n        COUNT(DISTINCT sdn.job_id) AS industry_job_count,\n        COUNT(DISTINCT sdn.job_title) AS unique_roles,\n        AVG(sdn.annual_salary_midpoint) AS industry_avg_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS industry_median_salary,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS industry_p25_salary,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS industry_p75_salary,\n        MIN(sdn.annual_salary_min) AS industry_min_salary,\n        MAX(sdn.annual_salary_max) AS industry_max_salary\n    FROM salary_data_normalization sdn\n    WHERE sdn.industry IS NOT NULL\n    GROUP BY sdn.industry\n),\nlocation_salary_benchmarks AS (\n    -- Fourth CTE: Calculate location-level salary benchmarks\n    SELECT\n        sdn.location_state,\n        sdn.location_city,\n        COUNT(DISTINCT sdn.job_id) AS location_job_count,\n        AVG(sdn.annual_salary_midpoint) AS location_avg_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS location_median_salary,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS location_p25_salary,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS location_p75_salary\n    FROM salary_data_normalization sdn\n    WHERE sdn.location_state IS NOT NULL\n    GROUP BY sdn.location_state, sdn.location_city\n),\nrole_market_positioning AS (\n    -- Fifth CTE: Calculate market positioning for each role\n    SELECT\n        rsa.job_title,\n        rsa.job_count,\n        ROUND(CAST(rsa.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n        ROUND(CAST(rsa.p10_salary AS NUMERIC), 0) AS p10_salary,\n        ROUND(CAST(rsa.p25_salary AS NUMERIC), 0) AS p25_salary,\n        ROUND(CAST(rsa.p50_salary AS NUMERIC), 0) AS p50_salary,\n        ROUND(CAST(rsa.p75_salary AS NUMERIC), 0) AS p75_salary,\n        ROUND(CAST(rsa.p90_salary AS NUMERIC), 0) AS p90_salary,\n        ROUND(CAST(rsa.min_salary AS NUMERIC), 0) AS min_salary,\n        ROUND(CAST(rsa.max_salary AS NUMERIC), 0) AS max_salary,\n        ROUND(CAST(rsa.salary_stddev AS NUMERIC), 0) AS salary_stddev,\n        -- Market positioning\n        CASE\n            WHEN rsa.p50_salary >= (SELECT AVG(p50_salary) FROM role_salary_aggregations) * 1.2 THEN 'premium'\n            WHEN rsa.p50_salary >= (SELECT AVG(p50_salary) FROM role_salary_aggregations) THEN 'above_market'\n            WHEN rsa.p50_salary >= (SELECT AVG(p50_salary) FROM role_salary_aggregations) * 0.8 THEN 'market_rate'\n            ELSE 'below_market'\n        END AS market_positioning,\n        -- Salary range spread\n        ROUND(CAST(((rsa.p75_salary - rsa.p25_salary) / NULLIF(rsa.p50_salary, 0)) * 100 AS NUMERIC), 2) AS salary_range_spread_pct\n    FROM role_salary_aggregations rsa\n    WHERE rsa.job_count >= 5  -- Minimum jobs for reliable benchmark\n),\nrole_industry_comparison AS (\n    -- Sixth CTE:
    Compare role salaries across industries\n    SELECT\n        sdn.job_title,\n        sdn.industry,\n        COUNT(DISTINCT sdn.job_id) AS role_industry_job_count,\n        AVG(sdn.annual_salary_midpoint) AS role_industry_avg_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS role_industry_median_salary,\n        -- Compare to industry average\n        ROUND(\n            ((AVG(sdn.annual_salary_midpoint) - isb.industry_avg_salary) / NULLIF(isb.industry_avg_salary, 0)) * 100,\n            2\n        ) AS vs_industry_avg_pct,\n        -- Compare to role average\n        ROUND(\n            ((AVG(sdn.annual_salary_midpoint) - rmp.avg_salary_midpoint) / NULLIF(rmp.avg_salary_midpoint, 0)) * 100,\n            2\n        ) AS vs_role_avg_pct\n    FROM salary_data_normalization sdn\n    INNER JOIN industry_salary_benchmarks isb ON sdn.industry = isb.industry\n    INNER JOIN role_market_positioning rmp ON sdn.job_title = rmp.job_title\n    GROUP BY sdn.job_title, sdn.industry, isb.industry_avg_salary, rmp.avg_salary_midpoint\n),\nrole_location_comparison AS (\n    -- Seventh CTE: Compare role salaries across locations\n    SELECT\n        sdn.job_title,\n        sdn.location_state,\n        sdn.location_city,\n        COUNT(DISTINCT sdn.job_id) AS role_location_job_count,\n        AVG(sdn.annual_salary_midpoint) AS role_location_avg_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sdn.annual_salary_midpoint) AS role_location_median_salary,\n        -- Compare to location average\n        ROUND(\n            ((AVG(sdn.annual_salary_midpoint) - lsb.location_avg_salary) / NULLIF(lsb.location_avg_salary, 0)) * 100,\n            2\n        ) AS vs_location_avg_pct,\n        -- Compare to role average\n        ROUND(\n            ((AVG(sdn.annual_salary_midpoint) - rmp.avg_salary_midpoint) / NULLIF(rmp.avg_salary_midpoint, 0)) * 100,\n            2\n        ) AS vs_role_avg_pct\n    FROM salary_data_normalization sdn\n    INNER JOIN location_salary_benchmarks lsb ON sdn.location_state = lsb.location_state\n        AND sdn.location_city = lsb.location_city\n    INNER JOIN role_market_positioning rmp ON sdn.job_title = rmp.job_title\n    GROUP BY sdn.job_title, sdn.location_state, sdn.location_city, lsb.location_avg_salary, rmp.avg_salary_midpoint\n)\nSELECT\n    rmp.job_title,\n    rmp.job_count,\n    rmp.avg_salary_midpoint,\n    rmp.p10_salary,\n    rmp.p25_salary,\n    rmp.p50_salary,\n    rmp.p75_salary,\n    rmp.p90_salary,\n    rmp.min_salary,\n    rmp.max_salary,\n    rmp.salary_range_spread_pct,\n    rmp.market_positioning,\n    -- Industry comparison summary\n    (\n        SELECT JSON_OBJECT_AGG(\n            ric.industry,\n            JSON_BUILD_OBJECT(\n                'avg_salary', ROUND(CAST(ric.role_industry_avg_salary AS NUMERIC), 0),\n                'vs_industry_pct', ric.vs_industry_avg_pct,\n                'vs_role_pct', ric.vs_role_avg_pct\n            )\n        )\n        FROM (\n            SELECT ric.industry, ric.role_industry_avg_salary, ric.vs_industry_avg_pct, ric.vs_role_avg_pct\n            FROM role_industry_comparison ric\n            WHERE ric.job_title = rmp.job_title\n            ORDER BY ric.role_industry_avg_salary DESC\n            LIMIT 5\n        ) ric\n    ) AS top_industries,\n    -- Location comparison summary\n    (\n        SELECT JSON_OBJECT_AGG(\n            CONCAT(rlc.location_city, ', ', rlc.location_state),\n            JSON_BUILD_OBJECT(\n                'avg_salary', ROUND(CAST(rlc.role_location_avg_salary AS NUMERIC), 0),\n                'vs_location_pct', rlc.vs_location_avg_pct,\n                'vs_role_pct', rlc.vs_role_avg_pct\n            )\n        )\n        FROM (\n            SELECT rlc.location_city, rlc.location_state, rlc.role_location_avg_salary, rlc.vs_location_avg_pct, rlc.vs_role_avg_pct\n            FROM role_location_comparison rlc\n            WHERE rlc.job_title = rmp.job_title\n            ORDER BY rlc.role_location_avg_salary DESC\n            LIMIT 5\n        ) rlc\n    ) AS top_locations,\n    -- Overall ranking\n    RANK() OVER (ORDER BY rmp.avg_salary_midpoint DESC) AS salary_rank\nFROM role_market_positioning rmp\nWHERE rmp.job_count >= 10\nORDER BY rmp.avg_salary_midpoint DESC\nLIMIT 100;",
      "line_number": 1627,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006304,
        "row_count": 0,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 8,
      "title": "User Cohort Analysis with Retention Metrics and Career Progression Tracking",
      "description": "Description: Comprehensive user cohort analysis tracking user registration cohorts, application activity patterns, retention metrics, career progression indicators, and engagement trends. Uses cohort segmentation and retention analysis patterns. Use Case: User Analytics - Cohort Analysis and Career Progression Tracking Business Value: Provides user cohort analysis showing registration cohorts, application activity patterns, retention rates, career progression indicators, and engagement trends. H",
      "complexity": "Deep nested CTEs (7+ levels), cohort analysis, retention calculations, window functions with cohort partitions, time-series analysis",
      "expected_output": "User cohort analysis with registration cohorts, retention metrics, application patterns, and career progression indicators.",
      "sql": "WITH user_registration_cohorts AS (\n    -- First CTE: Identify user registration cohorts\n    SELECT\n        up.user_id,\n        up.email,\n        up.full_name,\n        up.created_at AS registration_date,\n        DATE_TRUNC('month', up.created_at) AS registration_month,\n        DATE_TRUNC('quarter', up.created_at) AS registration_quarter,\n        EXTRACT(YEAR FROM up.created_at) AS registration_year,\n        EXTRACT(MONTH FROM up.created_at) AS registration_month_num,\n        EXTRACT(QUARTER FROM up.created_at) AS registration_quarter_num,\n        up.years_experience,\n        up.current_job_title,\n        up.location_state,\n        up.profile_completeness_score\n    FROM user_profiles up\n    WHERE up.is_active = TRUE\n),\nuser_application_activity AS (\n    -- Second CTE: Aggregate application activity by user\n    SELECT\n        urc.user_id,\n        urc.registration_month,\n        urc.registration_quarter,\n        urc.registration_year,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT CASE WHEN ja.submitted_at >= urc.registration_date THEN ja.application_id END) AS applications_since_registration,\n        MIN(ja.submitted_at) AS first_application_date,\n        MAX(ja.submitted_at) AS last_application_date,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications,\n        AVG(ja.match_score) AS avg_match_score,\n        COUNT(DISTINCT DATE_TRUNC('month', ja.submitted_at)) AS active_months,\n        COUNT(DISTINCT ja.job_id) AS unique_jobs_applied\n    FROM user_registration_cohorts urc\n    LEFT JOIN job_applications ja ON urc.user_id = ja.user_id\n    GROUP BY urc.user_id, urc.registration_month, urc.registration_quarter, urc.registration_year, urc.registration_date\n),\ncohort_aggregations AS (\n    -- Third CTE: Aggregate metrics by registration cohort\n    SELECT\n        urc.registration_month,\n        urc.registration_quarter,\n        urc.registration_year,\n        COUNT(DISTINCT urc.user_id) AS cohort_size,\n        COUNT(DISTINCT uaa.user_id) AS users_with_applications,\n        AVG(uaa.total_applications) AS avg_applications_per_user,\n        AVG(uaa.successful_applications) AS avg_successful_applications,\n        AVG(uaa.avg_match_score) AS avg_match_score,\n        AVG(uaa.active_months) AS avg_active_months,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY uaa.total_applications) AS median_applications,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY uaa.total_applications) AS p75_applications,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY uaa.total_applications) AS p25_applications\n    FROM user_registration_cohorts urc\n    LEFT JOIN user_application_activity uaa ON urc.user_id = uaa.user_id\n    GROUP BY urc.registration_month, urc.registration_quarter, urc.registration_year\n),\ncohort_retention_analysis AS (\n    -- Fourth CTE: Calculate retention metrics by cohort\n    SELECT\n        ca.registration_month,\n        ca.registration_quarter,\n        ca.registration_year,\n        ca.cohort_size,\n        ca.users_with_applications,\n        ca.avg_applications_per_user,\n        ca.avg_successful_applications,\n        ca.avg_match_score,\n        -- Retention rate (users who applied at least once)\n        ROUND(\n            (ca.users_with_applications::NUMERIC / NULLIF(ca.cohort_size, 0)) * 100,\n            2\n        ) AS application_retention_rate_pct,\n        -- Active user rate (users active in last 30 days)\n        (\n            SELECT COUNT(DISTINCT up.user_id)\n            FROM user_profiles up\n            WHERE DATE_TRUNC('month', up.created_at) = ca.registration_month\n                AND up.last_active_at >= CURRENT_TIMESTAMP - INTERVAL '30 days'\n        ) AS active_users_30d,\n        -- 30-day retention rate\n        ROUND(\n            ((\n                SELECT COUNT(DISTINCT up.user_id)\n                FROM user_profiles up\n                WHERE DATE_TRUNC('month', up.created_at) = ca.registration_month\n                    AND up.last_active_at >= CURRENT_TIMESTAMP - INTERVAL '30 days'\n            )::NUMERIC / NULLIF(ca.cohort_size, 0)) * 100,\n            2\n        ) AS retention_rate_30d_pct\n    FROM cohort_aggregations ca\n),\nmonthly_cohort_activity AS (\n    -- Fifth CTE: Track monthly activity by cohort\n    SELECT\n        urc.registration_month,\n        DATE_TRUNC('month', ja.submitted_at) AS activity_month,\n        DATE_PART('month', AGE(DATE_TRUNC('month', ja.submitted_at), urc.registration_month)) AS months_since_registration,\n        COUNT(DISTINCT urc.user_id) AS active_users,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications\n    FROM user_registration_cohorts urc\n    INNER JOIN job_applications ja ON urc.user_id = ja.user_id\n    WHERE ja.submitted_at IS NOT NULL\n    GROUP BY urc.registration_month, DATE_TRUNC('month', ja.submitted_at), DATE_PART('month', AGE(DATE_TRUNC('month', ja.submitted_at), urc.registration_month))\n),\ncohort_progression_metrics AS (\n    -- Sixth CTE: Calculate career progression metrics\n    SELECT\n        cra.registration_month,\n        cra.registration_quarter,\n        cra.cohort_size,\n        cra.application_retention_rate_pct,\n        cra.retention_rate_30d_pct,\n        -- Career progression indicators\n        (\n            SELECT COUNT(DISTINCT up.user_id)\n            FROM user_profiles up\n            WHERE DATE_TRUNC('month', up.created_at) = cra.registration_month\n                AND up.years_experience < (\n                    SELECT AVG(years_experience)\n                    FROM user_profiles up2\n                    WHERE DATE_TRUNC('month', up2.created_at) = cra.registration_month\n                )\n        ) AS users_with_below_avg_experience,\n        -- Average time to first application\n        (\n            SELECT AVG(days_to_first)\n            FROM (\n                SELECT EXTRACT(EPOCH FROM (MIN(ja.submitted_at) - up.created_at)) / 86400 AS days_to_first\n                FROM user_profiles up\n                INNER JOIN job_applications ja ON up.user_id = ja.user_id\n                WHERE DATE_TRUNC('month', up.created_at) = cra.registration_month\n                GROUP BY up.user_id\n            ) user_first_app\n        ) AS avg_days_to_first_application,\n        -- Success rate by cohort\n        (\n            SELECT AVG(has_success::NUMERIC)\n            FROM (\n                SELECT \n                    CASE\n                        WHEN COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) > 0 THEN 1\n                        ELSE 0\n                    END AS has_success\n                FROM user_profiles up\n                LEFT JOIN job_applications ja ON up.user_id = ja.user_id\n                WHERE DATE_TRUNC('month', up.created_at) = cra.registration_month\n                GROUP BY up.user_id\n            ) user_success\n        ) AS success_rate_per_user\n    FROM cohort_retention_analysis cra\n)\nSELECT\n    cpm.registration_month,\n    cpm.registration_quarter,\n    cpm.cohort_size,\n    cpm.application_retention_rate_pct,\n    cpm.retention_rate_30d_pct,\n    ROUND(CAST(cpm.avg_days_to_first_application AS NUMERIC), 1) AS avg_days_to_first_application,\n    ROUND(cpm.success_rate_per_user * 100, 2) AS success_rate_per_user_pct,\n    -- Cohort comparison metrics\n    LAG(cpm.application_retention_rate_pct, 1) OVER (ORDER BY cpm.registration_month) AS prev_cohort_retention_rate,\n    AVG(cpm.application_retention_rate_pct) OVER (\n        ORDER BY cpm.registration_month\n        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n    ) AS moving_avg_retention_rate,\n    -- Cohort health score\n    ROUND(\n        (\n            cpm.application_retention_rate_pct * 0.4 +\n            cpm.retention_rate_30d_pct * 0.3 +\n            COALESCE(cpm.success_rate_per_user * 100, 0) * 0.3\n        ),\n        2\n    ) AS cohort_health_score\nFROM cohort_progression_metrics cpm\nORDER BY cpm.registration_month DESC\nLIMIT 100;",
      "line_number": 1861,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005861,
        "row_count": 1,
        "column_count": 10,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 9,
      "title": "Skill Demand vs Supply Analysis with Market Imbalance Detection",
      "description": "Description: Comprehensive analysis comparing skill demand (from job postings) vs skill supply (from user profiles), identifying market imbalances, high-demand/low-supply skills, and skill gap opportunities. Uses aggregations and comparative analytics. Use Case: Skill Market Intelligence - Demand vs Supply Analysis and Skill Gap Identification Business Value: Identifies skill market imbalances showing high-demand/low-supply skills, skill gap opportunities, and market trends. Helps users identify",
      "complexity": "Deep nested CTEs (6+ levels), demand vs supply comparisons, market imbalance calculations, window functions with rankings, percentile analysis",
      "expected_output": "Skill demand vs supply analysis with market imbalances, skill gap opportunities, and market trend indicators.",
      "sql": "WITH skill_demand_aggregation AS (\n    -- First CTE: Aggregate skill demand from job postings\n    SELECT\n        jsr.skill_id,\n        s.skill_name,\n        s.skill_category,\n        s.skill_type,\n        COUNT(DISTINCT jsr.job_id) AS total_job_demand,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN jsr.job_id END) AS recent_job_demand_30d,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '90 days' THEN jsr.job_id END) AS recent_job_demand_90d,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'required' THEN jsr.job_id END) AS required_job_demand,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'preferred' THEN jsr.job_id END) AS preferred_job_demand,\n        AVG(jsr.importance_score) AS avg_importance_score,\n        AVG(jsr.years_experience_required) AS avg_years_experience_required,\n        COUNT(DISTINCT c.industry) AS industries_demanding,\n        COUNT(DISTINCT jp.location_state) AS states_demanding,\n        COUNT(DISTINCT jp.company_id) AS companies_demanding\n    FROM job_skills_requirements jsr\n    INNER JOIN skills s ON jsr.skill_id = s.skill_id\n    INNER JOIN job_postings jp ON jsr.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jsr.skill_id, s.skill_name, s.skill_category, s.skill_type\n),\nskill_supply_aggregation AS (\n    -- Second CTE: Aggregate skill supply from user profiles\n    SELECT\n        us.skill_id,\n        s.skill_name,\n        s.skill_category,\n        s.skill_type,\n        COUNT(DISTINCT us.user_id) AS total_user_supply,\n        COUNT(DISTINCT CASE WHEN up.last_active_at >= CURRENT_TIMESTAMP - INTERVAL '30 days' THEN us.user_id END) AS active_user_supply_30d,\n        COUNT(DISTINCT CASE WHEN up.last_active_at >= CURRENT_TIMESTAMP - INTERVAL '90 days' THEN us.user_id END) AS active_user_supply_90d,\n        AVG(us.proficiency_score) AS avg_proficiency_score,\n        AVG(us.years_experience) AS avg_years_experience,\n        COUNT(DISTINCT CASE WHEN us.proficiency_level IN ('advanced', 'expert') THEN us.user_id END) AS advanced_users_count,\n        COUNT(DISTINCT up.location_state) AS states_supplying,\n        COUNT(DISTINCT up.current_job_title) AS job_titles_with_skill\n    FROM user_skills us\n    INNER JOIN skills s ON us.skill_id = s.skill_id\n    INNER JOIN user_profiles up ON us.user_id = up.user_id\n    WHERE up.is_active = TRUE\n    GROUP BY us.skill_id, s.skill_name, s.skill_category, s.skill_type\n),\ndemand_supply_comparison AS (\n    -- Third CTE: Compare demand vs supply\n    SELECT\n        COALESCE(sda.skill_id, ssa.skill_id) AS skill_id,\n        COALESCE(sda.skill_name, ssa.skill_name) AS skill_name,\n        COALESCE(sda.skill_category, ssa.skill_category) AS skill_category,\n        COALESCE(sda.skill_type, ssa.skill_type) AS skill_type,\n        COALESCE(sda.total_job_demand, 0) AS total_job_demand,\n        COALESCE(sda.recent_job_demand_30d, 0) AS recent_job_demand_30d,\n        COALESCE(sda.recent_job_demand_90d, 0) AS recent_job_demand_90d,\n        COALESCE(sda.required_job_demand, 0) AS required_job_demand,\n        COALESCE(ssa.total_user_supply, 0) AS total_user_supply,\n        COALESCE(ssa.active_user_supply_30d, 0) AS active_user_supply_30d,\n        COALESCE(ssa.active_user_supply_90d, 0) AS active_user_supply_90d,\n        COALESCE(ssa.advanced_users_count, 0) AS advanced_users_count,\n        sda.avg_importance_score,\n        sda.avg_years_experience_required,\n        sda.industries_demanding,\n        sda.states_demanding,\n        sda.companies_demanding,\n        ssa.avg_proficiency_score,\n        ssa.states_supplying,\n        ssa.job_titles_with_skill\n    FROM skill_demand_aggregation sda\n    FULL OUTER JOIN skill_supply_aggregation ssa ON sda.skill_id = ssa.skill_id\n),\nmarket_imbalance_calculation AS (\n    -- Fourth CTE: Calculate market imbalance metrics\n    SELECT\n        dsc.skill_id,\n        dsc.skill_name,\n        dsc.skill_category,\n        dsc.skill_type,\n        dsc.total_job_demand,\n        dsc.recent_job_demand_30d,\n        dsc.total_user_supply,\n        dsc.active_user_supply_30d,\n        dsc.required_job_demand,\n        dsc.advanced_users_count,\n        -- Demand-supply ratio\n        CASE\n            WHEN dsc.total_user_supply > 0 THEN\n                ROUND(dsc.total_job_demand::NUMERIC / dsc.total_user_supply, 2)\n            ELSE NULL\n        END AS demand_supply_ratio,\n        -- Recent demand-supply ratio\n        CASE\n            WHEN dsc.active_user_supply_30d > 0 THEN\n                ROUND(dsc.recent_job_demand_30d::NUMERIC / dsc.active_user_supply_30d, 2)\n            ELSE NULL\n        END AS recent_demand_supply_ratio,\n        -- Market imbalance score (higher = more imbalance)\n        CASE\n            WHEN dsc.total_user_supply = 0 AND dsc.total_job_demand > 0 THEN 100  -- High demand, no supply\n            WHEN dsc.total_job_demand > 0 AND dsc.total_user_supply > 0 THEN\n                ROUND(\n                    LEAST(\n                        (dsc.total_job_demand::NUMERIC / NULLIF(dsc.total_user_supply, 0)) * 10,\n                        100\n                    ),\n                    2\n                )\n            ELSE 0\n        END AS market_imbalance_score,\n        -- Imbalance category\n        CASE\n            WHEN dsc.total_user_supply = 0 AND dsc.total_job_demand > 10 THEN 'critical_shortage'\n            WHEN dsc.total_job_demand > dsc.total_user_supply * 3 THEN 'high_demand_low_supply'\n            WHEN dsc.total_job_demand > dsc.total_user_supply * 1.5 THEN 'moderate_demand_supply_gap'\n            WHEN dsc.total_user_supply > dsc.total_job_demand * 3 THEN 'oversupplied'\n            WHEN dsc.total_user_supply > dsc.total_job_demand * 1.5 THEN 'moderate_oversupply'\n            ELSE 'balanced'\n        END AS market_balance_category,\n        dsc.avg_importance_score,\n        dsc.industries_demanding,\n        dsc.states_demanding,\n        dsc.companies_demanding,\n        dsc.avg_proficiency_score\n    FROM demand_supply_comparison dsc\n    WHERE dsc.total_job_demand > 0 OR dsc.total_user_supply > 0\n),\nskill_opportunity_scoring AS (\n    -- Fifth CTE: Score skills by opportunity (high demand, low supply)\n    SELECT\n        mic.skill_id,\n        mic.skill_name,\n        mic.skill_category,\n        mic.skill_type,\n        mic.total_job_demand,\n        mic.recent_job_demand_30d,\n        mic.total_user_supply,\n        mic.active_user_supply_30d,\n        mic.demand_supply_ratio,\n        mic.recent_demand_supply_ratio,\n        mic.market_imbalance_score,\n        mic.market_balance_category,\n        mic.required_job_demand,\n        mic.advanced_users_count,\n        mic.avg_importance_score,\n        mic.industries_demanding,\n        mic.states_demanding,\n        mic.companies_demanding,\n        -- Opportunity score (combines demand, supply gap, and importance)\n        ROUND(\n            (\n                mic.market_imbalance_score * 0.5 +\n                COALESCE(mic.avg_importance_score * 5, 0) * 0.3 +\n                LEAST(mic.required_job_demand / 10.0, 1) * 20 * 0.2\n            ),\n            2\n        ) AS opportunity_score,\n        -- Learning priority\n        CASE\n            WHEN mic.market_balance_category IN ('critical_shortage', 'high_demand_low_supply') THEN 'high_priority'\n            WHEN mic.market_balance_category = 'moderate_demand_supply_gap' THEN 'medium_priority'\n            ELSE 'low_priority'\n        END AS learning_priority\n    FROM market_imbalance_calculation mic\n)\nSELECT\n    sos.skill_id,\n    sos.skill_name,\n    sos.skill_category,\n    sos.skill_type,\n    sos.total_job_demand,\n    sos.recent_job_demand_30d,\n    sos.total_user_supply,\n    sos.active_user_supply_30d,\n    sos.demand_supply_ratio,\n    sos.recent_demand_supply_ratio,\n    sos.market_imbalance_score,\n    sos.market_balance_category,\n    sos.opportunity_score,\n    sos.learning_priority,\n    sos.required_job_demand,\n    sos.advanced_users_count,\n    sos.avg_importance_score,\n    sos.industries_demanding,\n    sos.states_demanding,\n    sos.companies_demanding,\n    -- Rankings\n    RANK() OVER (ORDER BY sos.opportunity_score DESC) AS opportunity_rank,\n    RANK() OVER (ORDER BY sos.market_imbalance_score DESC) AS imbalance_rank,\n    RANK() OVER (ORDER BY sos.total_job_demand DESC) AS demand_rank\nFROM skill_opportunity_scoring sos\nWHERE sos.total_job_demand >= 5  -- Minimum demand threshold\nORDER BY sos.opportunity_score DESC, sos.market_imbalance_score DESC\nLIMIT 100;",
      "line_number": 2057,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006838,
        "row_count": 1,
        "column_count": 23,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 10,
      "title": "Federal Job Opportunities Analysis with USAJobs.gov Integration and Agency Intelligence",
      "description": "Description: Comprehensive analysis of federal job opportunities from USAJobs.gov, analyzing agency hiring patterns, pay plan distributions, grade levels, geographic distribution, and federal employment trends. Uses aggregations and federal-specific analytics. Use Case: Federal Employment Intelligence - USAJobs.gov Analysis and Agency Hiring Patterns Business Value: Provides federal job market intelligence showing agency hiring patterns, pay plan distributions, grade level trends, geographic dis",
      "complexity": "Deep nested CTEs (6+ levels), federal-specific aggregations, agency analysis, pay plan distributions, geographic analysis",
      "expected_output": "Federal job opportunities analysis with agency patterns, pay plan distributions, grade level trends, and geographic distribution.",
      "sql": "WITH federal_job_aggregations AS (\n    -- First CTE: Aggregate federal job postings\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.company_id,\n        jp.agency_name,\n        jp.pay_plan,\n        jp.grade_level,\n        jp.location_state,\n        jp.location_city,\n        jp.posted_date,\n        jp.expiration_date,\n        jp.salary_min,\n        jp.salary_max,\n        jp.usajobs_id,\n        jp.data_source,\n        c.industry,\n        DATE_TRUNC('month', jp.posted_date) AS posting_month,\n        DATE_TRUNC('quarter', jp.posted_date) AS posting_quarter\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_federal_job = TRUE\n        AND jp.is_active = TRUE\n),\nagency_hiring_patterns AS (\n    -- Second CTE: Analyze agency hiring patterns\n    SELECT\n        fja.agency_name,\n        COUNT(DISTINCT fja.job_id) AS total_job_postings,\n        COUNT(DISTINCT CASE WHEN fja.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN fja.job_id END) AS recent_postings_30d,\n        COUNT(DISTINCT CASE WHEN fja.posted_date >= CURRENT_DATE - INTERVAL '90 days' THEN fja.job_id END) AS recent_postings_90d,\n        COUNT(DISTINCT fja.job_title) AS unique_job_titles,\n        COUNT(DISTINCT fja.location_state) AS states_active,\n        COUNT(DISTINCT fja.location_city) AS cities_active,\n        COUNT(DISTINCT fja.pay_plan) AS pay_plans_used,\n        COUNT(DISTINCT fja.grade_level) AS grade_levels_used,\n        AVG(fja.salary_min) AS avg_salary_min,\n        AVG(fja.salary_max) AS avg_salary_max,\n        AVG((fja.salary_min + fja.salary_max) / 2) AS avg_salary_midpoint,\n        MIN(fja.posted_date) AS first_posting_date,\n        MAX(fja.posted_date) AS last_posting_date\n    FROM federal_job_aggregations fja\n    WHERE fja.agency_name IS NOT NULL\n    GROUP BY fja.agency_name\n),\npay_plan_analysis AS (\n    -- Third CTE: Analyze pay plan distributions\n    SELECT\n        fja.pay_plan,\n        COUNT(DISTINCT fja.job_id) AS total_jobs,\n        COUNT(DISTINCT fja.agency_name) AS agencies_using,\n        AVG(fja.salary_min) AS avg_salary_min,\n        AVG(fja.salary_max) AS avg_salary_max,\n        AVG((fja.salary_min + fja.salary_max) / 2) AS avg_salary_midpoint,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (fja.salary_min + fja.salary_max) / 2) AS median_salary,\n        COUNT(DISTINCT fja.grade_level) AS grade_levels_in_plan,\n        COUNT(DISTINCT fja.location_state) AS states_active\n    FROM federal_job_aggregations fja\n    WHERE fja.pay_plan IS NOT NULL\n    GROUP BY fja.pay_plan\n),\ngrade_level_analysis AS (\n    -- Fourth CTE: Analyze grade level distributions\n    SELECT\n        fja.grade_level,\n        COUNT(DISTINCT fja.job_id) AS total_jobs,\n        COUNT(DISTINCT fja.agency_name) AS agencies_using,\n        COUNT(DISTINCT fja.pay_plan) AS pay_plans_used,\n        AVG(fja.salary_min) AS avg_salary_min,\n        AVG(fja.salary_max) AS avg_salary_max,\n        AVG((fja.salary_min + fja.salary_max) / 2) AS avg_salary_midpoint,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (fja.salary_min + fja.salary_max) / 2) AS median_salary,\n        COUNT(DISTINCT fja.job_title) AS unique_job_titles,\n        COUNT(DISTINCT fja.location_state) AS states_active\n    FROM federal_job_aggregations fja\n    WHERE fja.grade_level IS NOT NULL\n    GROUP BY fja.grade_level\n),\ngeographic_federal_distribution AS (\n    -- Fifth CTE: Analyze geographic distribution\n    SELECT\n        fja.location_state,\n        fja.location_city,\n        COUNT(DISTINCT fja.job_id) AS total_federal_jobs,\n        COUNT(DISTINCT fja.agency_name) AS agencies_present,\n        COUNT(DISTINCT fja.job_title) AS unique_job_titles,\n        AVG((fja.salary_min + fja.salary_max) / 2) AS avg_salary_midpoint,\n        COUNT(DISTINCT CASE WHEN fja.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN fja.job_id END) AS recent_jobs_30d\n    FROM federal_job_aggregations fja\n    WHERE fja.location_state IS NOT NULL\n    GROUP BY fja.location_state, fja.location_city\n),\nfederal_trend_analysis AS (\n    -- Sixth CTE: Analyze federal hiring trends\n    SELECT\n        fja.posting_month,\n        fja.posting_quarter,\n        COUNT(DISTINCT fja.job_id) AS monthly_job_postings,\n        COUNT(DISTINCT fja.agency_name) AS active_agencies,\n        COUNT(DISTINCT fja.job_title) AS unique_job_titles,\n        AVG((fja.salary_min + fja.salary_max) / 2) AS avg_salary_midpoint,\n        COUNT(DISTINCT fja.location_state) AS states_with_postings,\n        -- Compare to previous month\n        LAG(COUNT(DISTINCT fja.job_id), 1) OVER (ORDER BY fja.posting_month) AS prev_month_postings,\n        -- Compare to previous quarter\n        LAG(COUNT(DISTINCT fja.job_id), 3) OVER (ORDER BY fja.posting_month) AS prev_quarter_postings\n    FROM federal_job_aggregations fja\n    GROUP BY fja.posting_month, fja.posting_quarter\n)\nSELECT\n    ahp.agency_name,\n    ahp.total_job_postings,\n    ahp.recent_postings_30d,\n    ahp.recent_postings_90d,\n    ahp.unique_job_titles,\n    ahp.states_active,\n    ahp.cities_active,\n    ahp.pay_plans_used,\n    ahp.grade_levels_used,\n    ROUND(CAST(ahp.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n    -- Agency ranking\n    RANK() OVER (ORDER BY ahp.total_job_postings DESC) AS agency_rank_by_postings,\n    RANK() OVER (ORDER BY ahp.recent_postings_30d DESC) AS agency_rank_by_recent,\n    -- Pay plan distribution\n    (\n        SELECT JSON_OBJECT_AGG(\n            ppa.pay_plan,\n            JSON_BUILD_OBJECT(\n                'total_jobs', ppa.total_jobs,\n                'avg_salary', ROUND(CAST(ppa.avg_salary_midpoint AS NUMERIC), 0),\n                'median_salary', ROUND(CAST(ppa.median_salary AS NUMERIC), 0)\n            )\n        )\n        FROM pay_plan_analysis ppa\n        WHERE ppa.total_jobs > 0\n        LIMIT 5\n    ) AS top_pay_plans,\n    -- Grade level distribution\n    (\n        SELECT JSON_OBJECT_AGG(\n            gla.grade_level,\n            JSON_BUILD_OBJECT(\n                'total_jobs', gla.total_jobs,\n                'avg_salary', ROUND(CAST(gla.avg_salary_midpoint AS NUMERIC), 0),\n                'unique_titles', gla.unique_job_titles\n            )\n        )\n        FROM (\n            SELECT gla.grade_level, gla.total_jobs, gla.avg_salary_midpoint, gla.unique_job_titles\n            FROM grade_level_analysis gla\n            WHERE gla.total_jobs > 0\n            ORDER BY gla.total_jobs DESC\n            LIMIT 5\n        ) gla\n    ) AS top_grade_levels,\n    -- Geographic distribution\n    (\n        SELECT JSON_OBJECT_AGG(\n            CONCAT(gfd.location_city, ', ', gfd.location_state),\n            JSON_BUILD_OBJECT(\n                'total_jobs', gfd.total_federal_jobs,\n                'recent_jobs', gfd.recent_jobs_30d,\n                'agencies', gfd.agencies_present\n            )\n        )\n        FROM (\n            SELECT gfd.location_state, gfd.location_city, gfd.total_federal_jobs, gfd.recent_jobs_30d, gfd.agencies_present\n            FROM geographic_federal_distribution gfd\n            INNER JOIN federal_job_aggregations fja ON gfd.location_state = fja.location_state\n                AND gfd.location_city = fja.location_city\n            WHERE fja.agency_name = ahp.agency_name\n            ORDER BY gfd.total_federal_jobs DESC\n            LIMIT 5\n        ) gfd\n    ) AS top_locations\nFROM agency_hiring_patterns ahp\nWHERE ahp.total_job_postings >= 5\nORDER BY ahp.total_job_postings DESC, ahp.recent_postings_30d DESC\nLIMIT 100;",
      "line_number": 2267,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005835,
        "row_count": 0,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 11,
      "title": "Job Search Behavior Analysis with Search Pattern Recognition and Recommendation Optimization",
      "description": "Description: Comprehensive analysis of user job search behavior patterns, search query analysis, filter usage patterns, and recommendation engagement metrics. Uses pattern recognition and behavioral analytics. Use Case: Search Intelligence - User Behavior Analysis and Recommendation Optimization Business Value: Analyzes user search behavior patterns, identifies common search queries, filter usage patterns, and recommendation engagement metrics. Helps optimize search functionality and improve rec",
      "complexity": "Deep nested CTEs (6+ levels), pattern recognition, behavioral analytics, window functions with user partitions, time-series analysis",
      "expected_output": "Job search behavior analysis with search patterns, filter usage, recommendation engagement, and optimization insights.",
      "sql": "WITH user_search_activity AS (\n    -- First CTE: Aggregate user search activity\n    SELECT\n        ujsh.search_id,\n        ujsh.user_id,\n        ujsh.search_query,\n        ujsh.search_filters,\n        ujsh.location_filter,\n        ujsh.salary_filter_min,\n        ujsh.salary_filter_max,\n        ujsh.work_model_filter,\n        ujsh.job_type_filter,\n        ujsh.industry_filter,\n        ujsh.results_count,\n        ujsh.search_date,\n        DATE_TRUNC('day', ujsh.search_date) AS search_day,\n        DATE_TRUNC('week', ujsh.search_date) AS search_week,\n        DATE_TRUNC('month', ujsh.search_date) AS search_month,\n        EXTRACT(HOUR FROM ujsh.search_date) AS search_hour,\n        EXTRACT(DOW FROM ujsh.search_date) AS search_day_of_week\n    FROM user_job_search_history ujsh\n    WHERE ujsh.search_date >= CURRENT_TIMESTAMP - INTERVAL '90 days'\n),\nsearch_pattern_analysis AS (\n    -- Second CTE: Analyze search patterns\n    SELECT\n        usa.user_id,\n        COUNT(DISTINCT usa.search_id) AS total_searches,\n        COUNT(DISTINCT usa.search_day) AS active_search_days,\n        COUNT(DISTINCT usa.search_query) AS unique_queries,\n        AVG(usa.results_count) AS avg_results_per_search,\n        MIN(usa.search_date) AS first_search_date,\n        MAX(usa.search_date) AS last_search_date,\n        EXTRACT(EPOCH FROM (MAX(usa.search_date) - MIN(usa.search_date))) / 86400 AS search_span_days,\n        -- Most common search hour\n        MODE() WITHIN GROUP (ORDER BY usa.search_hour) AS most_common_search_hour,\n        -- Most common search day\n        MODE() WITHIN GROUP (ORDER BY usa.search_day_of_week) AS most_common_search_day,\n        -- Filter usage patterns\n        COUNT(DISTINCT CASE WHEN usa.location_filter IS NOT NULL THEN usa.search_id END) AS searches_with_location_filter,\n        COUNT(DISTINCT CASE WHEN usa.salary_filter_min IS NOT NULL OR usa.salary_filter_max IS NOT NULL THEN usa.search_id END) AS searches_with_salary_filter,\n        COUNT(DISTINCT CASE WHEN usa.work_model_filter IS NOT NULL THEN usa.search_id END) AS searches_with_work_model_filter,\n        COUNT(DISTINCT CASE WHEN usa.industry_filter IS NOT NULL THEN usa.search_id END) AS searches_with_industry_filter\n    FROM user_search_activity usa\n    GROUP BY usa.user_id\n),\nrecommendation_engagement AS (\n    -- Third CTE: Analyze recommendation engagement\n    SELECT\n        jr.user_id,\n        COUNT(DISTINCT jr.recommendation_id) AS total_recommendations_received,\n        COUNT(DISTINCT CASE WHEN jr.is_liked = TRUE THEN jr.recommendation_id END) AS liked_recommendations,\n        COUNT(DISTINCT CASE WHEN jr.is_applied = TRUE THEN jr.recommendation_id END) AS applied_recommendations,\n        COUNT(DISTINCT CASE WHEN jr.is_dismissed = TRUE THEN jr.recommendation_id END) AS dismissed_recommendations,\n        AVG(jr.match_score) AS avg_recommendation_match_score,\n        AVG(CASE WHEN jr.is_applied = TRUE THEN jr.match_score ELSE NULL END) AS avg_applied_match_score,\n        AVG(CASE WHEN jr.is_liked = TRUE THEN jr.match_score ELSE NULL END) AS avg_liked_match_score,\n        -- Engagement rate\n        CASE\n            WHEN COUNT(DISTINCT jr.recommendation_id) > 0 THEN\n                ROUND((COUNT(DISTINCT CASE WHEN jr.is_applied = TRUE THEN jr.recommendation_id END)::NUMERIC / COUNT(DISTINCT jr.recommendation_id)) * 100, 2)\n            ELSE 0\n        END AS recommendation_engagement_rate_pct\n    FROM job_recommendations jr\n    WHERE jr.recommendation_date >= CURRENT_TIMESTAMP - INTERVAL '90 days'\n    GROUP BY jr.user_id\n),\nsearch_recommendation_correlation AS (\n    -- Fourth CTE: Correlate search behavior with recommendation engagement\n    SELECT\n        spa.user_id,\n        spa.total_searches,\n        spa.active_search_days,\n        spa.unique_queries,\n        spa.avg_results_per_search,\n        spa.searches_with_location_filter,\n        spa.searches_with_salary_filter,\n        spa.searches_with_work_model_filter,\n        spa.searches_with_industry_filter,\n        -- Filter usage rates\n        ROUND((spa.searches_with_location_filter::NUMERIC / NULLIF(spa.total_searches, 0)) * 100, 2) AS location_filter_usage_pct,\n        ROUND((spa.searches_with_salary_filter::NUMERIC / NULLIF(spa.total_searches, 0)) * 100, 2) AS salary_filter_usage_pct,\n        ROUND((spa.searches_with_work_model_filter::NUMERIC / NULLIF(spa.total_searches, 0)) * 100, 2) AS work_model_filter_usage_pct,\n        ROUND((spa.searches_with_industry_filter::NUMERIC / NULLIF(spa.total_searches, 0)) * 100, 2) AS industry_filter_usage_pct,\n        -- Recommendation metrics\n        re.total_recommendations_received,\n        re.liked_recommendations,\n        re.applied_recommendations,\n        re.dismissed_recommendations,\n        re.avg_recommendation_match_score,\n        re.avg_applied_match_score,\n        re.recommendation_engagement_rate_pct,\n        -- Search frequency\n        CASE\n            WHEN spa.search_span_days > 0 THEN\n                ROUND(spa.total_searches::NUMERIC / spa.search_span_days, 2)\n            ELSE spa.total_searches\n        END AS searches_per_day\n    FROM search_pattern_analysis spa\n    LEFT JOIN recommendation_engagement re ON spa.user_id = re.user_id\n),\nuser_behavior_segmentation AS (\n    -- Fifth CTE: Segment users by behavior patterns\n    SELECT\n        src.user_id,\n        src.total_searches,\n        src.active_search_days,\n        src.unique_queries,\n        src.avg_results_per_search,\n        src.location_filter_usage_pct,\n        src.salary_filter_usage_pct,\n        src.work_model_filter_usage_pct,\n        src.industry_filter_usage_pct,\n        src.total_recommendations_received,\n        src.applied_recommendations,\n        src.recommendation_engagement_rate_pct,\n        src.searches_per_day,\n        -- User behavior segment\n        CASE\n            WHEN src.total_searches >= 50 AND src.applied_recommendations >= 10 THEN 'highly_active_applicant'\n            WHEN src.total_searches >= 30 AND src.applied_recommendations >= 5 THEN 'active_applicant'\n            WHEN src.total_searches >= 20 THEN 'active_searcher'\n            WHEN src.total_searches >= 10 THEN 'moderate_searcher'\n            ELSE 'casual_searcher'\n        END AS behavior_segment,\n        -- Search sophistication score\n        ROUND(\n            (\n                src.location_filter_usage_pct * 0.25 +\n                src.salary_filter_usage_pct * 0.25 +\n                src.work_model_filter_usage_pct * 0.25 +\n                src.industry_filter_usage_pct * 0.25\n            ),\n            2\n        ) AS search_sophistication_score\n    FROM search_recommendation_correlation src\n)\nSELECT\n    ubs.user_id,\n    up.full_name,\n    up.email,\n    ubs.total_searches,\n    ubs.active_search_days,\n    ubs.unique_queries,\n    ROUND(CAST(ubs.avg_results_per_search AS NUMERIC), 0) AS avg_results_per_search,\n    ubs.location_filter_usage_pct,\n    ubs.salary_filter_usage_pct,\n    ubs.work_model_filter_usage_pct,\n    ubs.industry_filter_usage_pct,\n    ubs.total_recommendations_received,\n    ubs.applied_recommendations,\n    ubs.recommendation_engagement_rate_pct,\n    ROUND(CAST(ubs.searches_per_day AS NUMERIC), 2) AS searches_per_day,\n    ubs.behavior_segment,\n    ubs.search_sophistication_score,\n    -- Comparison metrics\n    PERCENT_RANK() OVER (ORDER BY ubs.total_searches) AS median_total_searches,\n    PERCENT_RANK() OVER (ORDER BY ubs.total_searches) AS p75_total_searches,\n    -- Ranking\n    RANK() OVER (ORDER BY ubs.total_searches DESC) AS search_activity_rank,\n    RANK() OVER (ORDER BY ubs.recommendation_engagement_rate_pct DESC NULLS LAST) AS engagement_rank\nFROM user_behavior_segmentation ubs\nINNER JOIN user_profiles up ON ubs.user_id = up.user_id\nWHERE ubs.total_searches >= 5\nORDER BY ubs.total_searches DESC, ubs.recommendation_engagement_rate_pct DESC NULLS LAST\nLIMIT 100;",
      "line_number": 2464,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006712,
        "row_count": 0,
        "column_count": 21,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 12,
      "title": "Application Funnel Optimization with Conversion Rate Analysis and Drop-off Identification",
      "description": "Description: Comprehensive application funnel analysis tracking conversion rates at each stage, identifying drop-off points, analyzing time-to-conversion, and optimizing application flow. Uses funnel analysis patterns. Use Case: Funnel Optimization - Application Conversion Analysis and Drop-off Identification Business Value: Analyzes application funnel conversion rates, identifies drop-off points, calculates time-to-conversion metrics, and provides optimization recommendations. Helps improve app",
      "complexity": "Deep nested CTEs (7+ levels), funnel analysis, conversion rate calculations, drop-off identification, time-to-event analysis",
      "expected_output": "Application funnel analysis with conversion rates, drop-off points, time-to-conversion metrics, and optimization recommendations.",
      "sql": "WITH application_stages AS (\n    -- First CTE: Define application stages and transitions\n    SELECT\n        ja.application_id,\n        ja.user_id,\n        ja.job_id,\n        ja.application_status,\n        ja.submitted_at,\n        ja.status_updated_at,\n        jp.job_title,\n        jp.company_id,\n        c.industry,\n        jp.location_state,\n        -- Stage mapping\n        CASE\n            WHEN ja.application_status = 'draft' THEN 1\n            WHEN ja.application_status = 'submitted' THEN 2\n            WHEN ja.application_status = 'under_review' THEN 3\n            WHEN ja.application_status = 'interview' THEN 4\n            WHEN ja.application_status = 'offer' THEN 5\n            WHEN ja.application_status = 'rejected' THEN 0\n            WHEN ja.application_status = 'withdrawn' THEN 0\n            ELSE 0\n        END AS stage_number,\n        -- Time at each stage\n        EXTRACT(EPOCH FROM (ja.status_updated_at - ja.submitted_at)) / 86400 AS days_to_status_update\n    FROM job_applications ja\n    INNER JOIN job_postings jp ON ja.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE ja.submitted_at IS NOT NULL\n        AND ja.submitted_at >= CURRENT_TIMESTAMP - INTERVAL '1 year'\n),\nfunnel_stage_counts AS (\n    -- Second CTE: Count applications at each stage\n    SELECT\n        as_stages.stage_number,\n        CASE\n            WHEN as_stages.stage_number = 1 THEN 'draft'\n            WHEN as_stages.stage_number = 2 THEN 'submitted'\n            WHEN as_stages.stage_number = 3 THEN 'under_review'\n            WHEN as_stages.stage_number = 4 THEN 'interview'\n            WHEN as_stages.stage_number = 5 THEN 'offer'\n            ELSE 'other'\n        END AS stage_name,\n        COUNT(DISTINCT as_stages.application_id) AS applications_at_stage,\n        COUNT(DISTINCT as_stages.user_id) AS unique_users_at_stage,\n        AVG(as_stages.days_to_status_update) AS avg_days_at_stage,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY as_stages.days_to_status_update) AS median_days_at_stage,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY as_stages.days_to_status_update) AS p25_days_at_stage,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY as_stages.days_to_status_update) AS p75_days_at_stage\n    FROM application_stages as_stages\n    WHERE as_stages.stage_number > 0\n    GROUP BY as_stages.stage_number\n),\nfunnel_conversion_rates AS (\n    -- Third CTE: Calculate conversion rates between stages\n    SELECT\n        fsc.stage_number,\n        fsc.stage_name,\n        fsc.applications_at_stage,\n        fsc.unique_users_at_stage,\n        ROUND(CAST(fsc.avg_days_at_stage AS NUMERIC), 2) AS avg_days_at_stage,\n        ROUND(CAST(fsc.median_days_at_stage AS NUMERIC), 2) AS median_days_at_stage,\n        -- Previous stage count\n        LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number) AS prev_stage_applications,\n        -- Conversion rate from previous stage\n        CASE\n            WHEN LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number) > 0 THEN\n                ROUND((fsc.applications_at_stage::NUMERIC / LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number)) * 100, 2)\n            ELSE NULL\n        END AS conversion_rate_from_prev_pct,\n        -- Drop-off rate\n        CASE\n            WHEN LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number) > 0 THEN\n                ROUND(((LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number) - fsc.applications_at_stage)::NUMERIC / LAG(fsc.applications_at_stage, 1) OVER (ORDER BY fsc.stage_number)) * 100, 2)\n            ELSE NULL\n        END AS drop_off_rate_pct,\n        -- Overall conversion rate from start\n        CASE\n            WHEN FIRST_VALUE(fsc.applications_at_stage) OVER (ORDER BY fsc.stage_number ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) > 0 THEN\n                ROUND((fsc.applications_at_stage::NUMERIC / FIRST_VALUE(fsc.applications_at_stage) OVER (ORDER BY fsc.stage_number ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) * 100, 2)\n            ELSE NULL\n        END AS overall_conversion_rate_pct\n    FROM funnel_stage_counts fsc\n),\nstage_time_analysis AS (\n    -- Fourth CTE: Analyze time spent at each stage\n    SELECT\n        as_stages.stage_number,\n        CASE\n            WHEN as_stages.stage_number = 1 THEN 'draft'\n            WHEN as_stages.stage_number = 2 THEN 'submitted'\n            WHEN as_stages.stage_number = 3 THEN 'under_review'\n            WHEN as_stages.stage_number = 4 THEN 'interview'\n            WHEN as_stages.stage_number = 5 THEN 'offer'\n            ELSE 'other'\n        END AS stage_name,\n        COUNT(*) AS stage_transitions,\n        AVG(as_stages.days_to_status_update) AS avg_time_at_stage,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY as_stages.days_to_status_update) AS median_time_at_stage,\n        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY as_stages.days_to_status_update) AS p90_time_at_stage,\n        MIN(as_stages.days_to_status_update) AS min_time_at_stage,\n        MAX(as_stages.days_to_status_update) AS max_time_at_stage\n    FROM application_stages as_stages\n    GROUP BY as_stages.stage_number\n),\nfunnel_optimization_insights AS (\n    -- Fifth CTE: Identify optimization opportunities\n    SELECT\n        fcr.stage_number,\n        fcr.stage_name,\n        fcr.applications_at_stage,\n        fcr.prev_stage_applications,\n        fcr.conversion_rate_from_prev_pct,\n        fcr.drop_off_rate_pct,\n        fcr.overall_conversion_rate_pct,\n        sta.avg_time_at_stage,\n        sta.median_time_at_stage,\n        sta.p90_time_at_stage,\n        -- Optimization flags\n        CASE\n            WHEN fcr.drop_off_rate_pct > 50 THEN 'high_drop_off'\n            WHEN fcr.drop_off_rate_pct > 30 THEN 'moderate_drop_off'\n            ELSE 'low_drop_off'\n        END AS drop_off_severity,\n        CASE\n            WHEN sta.median_time_at_stage > 14 THEN 'slow_stage'\n            WHEN sta.median_time_at_stage > 7 THEN 'moderate_stage'\n            ELSE 'fast_stage'\n        END AS stage_speed_category,\n        -- Optimization priority score\n        ROUND(\n            CAST((\n                fcr.drop_off_rate_pct * 0.5 +\n                LEAST(sta.median_time_at_stage / 30.0, 1) * 50 * 0.3 +\n                (100 - fcr.overall_conversion_rate_pct) * 0.2\n            ) AS NUMERIC),\n            2\n        ) AS optimization_priority_score\n    FROM funnel_conversion_rates fcr\n    INNER JOIN stage_time_analysis sta ON fcr.stage_number = sta.stage_number\n)\nSELECT\n    foi.stage_number,\n    foi.stage_name,\n    foi.applications_at_stage,\n    foi.prev_stage_applications,\n    foi.conversion_rate_from_prev_pct,\n    foi.drop_off_rate_pct,\n    foi.overall_conversion_rate_pct,\n    ROUND(CAST(foi.avg_time_at_stage AS NUMERIC), 1) AS avg_time_at_stage_days,\n    ROUND(CAST(foi.median_time_at_stage AS NUMERIC), 1) AS median_time_at_stage_days,\n    ROUND(CAST(foi.p90_time_at_stage AS NUMERIC), 1) AS p90_time_at_stage_days,\n    foi.drop_off_severity,\n    foi.stage_speed_category,\n    foi.optimization_priority_score,\n    -- Recommendations\n    CASE\n        WHEN foi.drop_off_severity = 'high_drop_off' AND foi.stage_speed_category = 'slow_stage' THEN 'Critical: High drop-off and slow processing - investigate immediately'\n        WHEN foi.drop_off_severity = 'high_drop_off' THEN 'High Priority: Significant drop-off at this stage - review application requirements'\n        WHEN foi.stage_speed_category = 'slow_stage' THEN 'Medium Priority: Slow processing time - optimize review process'\n        ELSE 'Low Priority: Stage performing well'\n    END AS optimization_recommendation\nFROM funnel_optimization_insights foi\nORDER BY foi.stage_number;",
      "line_number": 2647,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005886,
        "row_count": 3,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 13,
      "title": "Skill Combination Analysis with Co-occurrence Patterns and Market Value Optimization",
      "description": "Description: Comprehensive analysis of skill combinations, identifying co-occurrence patterns, high-value skill pairs, and optimal skill portfolios. Uses graph-like analysis and combination scoring. Use Case: Skill Portfolio Optimization - Skill Combination Analysis and Market Value Maximization Business Value: Identifies valuable skill combinations, analyzes co-occurrence patterns in job postings, and recommends optimal skill portfolios. Helps users understand which skills to combine for maximu",
      "complexity":
    "Deep nested CTEs (7+ levels), skill combination analysis, co-occurrence calculations, graph-like patterns, combination scoring",
      "expected_output": "Skill combination analysis with co-occurrence patterns, high-value skill pairs, and optimal skill portfolio recommendations.",
      "sql": "WITH job_skill_combinations AS (\n    -- First CTE: Extract skill combinations from job postings\n    SELECT\n        jsr1.job_id,\n        jsr1.skill_id AS skill_id_1,\n        s1.skill_name AS skill_name_1,\n        s1.skill_category AS skill_category_1,\n        jsr1.requirement_type AS requirement_type_1,\n        jsr1.importance_score AS importance_score_1,\n        jsr2.skill_id AS skill_id_2,\n        s2.skill_name AS skill_name_2,\n        s2.skill_category AS skill_category_2,\n        jsr2.requirement_type AS requirement_type_2,\n        jsr2.importance_score AS importance_score_2,\n        c.industry,\n        jp.location_state,\n        jp.salary_min,\n        jp.salary_max,\n        (jp.salary_min + jp.salary_max) / 2 AS salary_midpoint\n    FROM job_skills_requirements jsr1\n    INNER JOIN job_skills_requirements jsr2 ON jsr1.job_id = jsr2.job_id\n        AND jsr1.skill_id < jsr2.skill_id  -- Avoid duplicates and self-pairs\n    INNER JOIN skills s1 ON jsr1.skill_id = s1.skill_id\n    INNER JOIN skills s2 ON jsr2.skill_id = s2.skill_id\n    INNER JOIN job_postings jp ON jsr1.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n),\nskill_pair_co_occurrence AS (\n    -- Second CTE: Calculate co-occurrence metrics for skill pairs\n    SELECT\n        jsc.skill_id_1,\n        jsc.skill_name_1,\n        jsc.skill_category_1,\n        jsc.skill_id_2,\n        jsc.skill_name_2,\n        jsc.skill_category_2,\n        COUNT(DISTINCT jsc.job_id) AS co_occurrence_count,\n        COUNT(DISTINCT jsc.industry) AS industries_appearing_together,\n        COUNT(DISTINCT jsc.location_state) AS states_appearing_together,\n        AVG(jsc.salary_midpoint) AS avg_salary_when_paired,\n        AVG(jsc.importance_score_1 + jsc.importance_score_2) AS avg_combined_importance,\n        COUNT(DISTINCT CASE WHEN jsc.requirement_type_1 = 'required' AND jsc.requirement_type_2 = 'required' THEN jsc.job_id END) AS both_required_count,\n        COUNT(DISTINCT CASE WHEN jsc.requirement_type_1 = 'required' OR jsc.requirement_type_2 = 'required' THEN jsc.job_id END) AS at_least_one_required_count\n    FROM job_skill_combinations jsc\n    GROUP BY jsc.skill_id_1, jsc.skill_name_1, jsc.skill_category_1, jsc.skill_id_2, jsc.skill_name_2, jsc.skill_category_2\n),\nindividual_skill_metrics AS (\n    -- Third CTE:
    Calculate individual skill metrics for comparison\n    SELECT\n        jsr.skill_id,\n        COUNT(DISTINCT jsr.job_id) AS total_job_demand,\n        AVG(jsr.importance_score) AS avg_importance_score,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary\n    FROM job_skills_requirements jsr\n    INNER JOIN job_postings jp ON jsr.job_id = jp.job_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jsr.skill_id\n),\nskill_pair_value_analysis AS (\n    -- Fourth CTE: Analyze value of skill combinations\n    SELECT\n        spco.skill_id_1,\n        spco.skill_name_1,\n        spco.skill_category_1,\n        spco.skill_id_2,\n        spco.skill_name_2,\n        spco.skill_category_2,\n        spco.co_occurrence_count,\n        spco.industries_appearing_together,\n        spco.states_appearing_together,\n        ROUND(CAST(spco.avg_salary_when_paired AS NUMERIC), 0) AS avg_salary_when_paired,\n        ROUND(CAST(spco.avg_combined_importance AS NUMERIC), 2) AS avg_combined_importance,\n        spco.both_required_count,\n        spco.at_least_one_required_count,\n        -- Individual skill metrics\n        ism1.total_job_demand AS skill_1_demand,\n        ism1.avg_importance_score AS skill_1_avg_importance,\n        ism1.avg_salary AS skill_1_avg_salary,\n        ism2.total_job_demand AS skill_2_demand,\n        ism2.avg_importance_score AS skill_2_avg_importance,\n        ism2.avg_salary AS skill_2_avg_salary,\n        -- Combination value metrics\n        CASE\n            WHEN ism1.total_job_demand > 0 AND ism2.total_job_demand > 0 THEN\n                ROUND((spco.co_occurrence_count::NUMERIC / NULLIF(LEAST(ism1.total_job_demand, ism2.total_job_demand), 0)) * 100, 2)\n            ELSE NULL\n        END AS co_occurrence_rate_pct,\n        -- Salary premium when paired\n        CASE\n            WHEN ism1.avg_salary > 0 AND ism2.avg_salary > 0 THEN\n                ROUND(((spco.avg_salary_when_paired - (ism1.avg_salary + ism2.avg_salary) / 2) / NULLIF((ism1.avg_salary + ism2.avg_salary) / 2, 0)) * 100, 2)\n            ELSE NULL\n        END AS salary_premium_pct\n    FROM skill_pair_co_occurrence spco\n    INNER JOIN individual_skill_metrics ism1 ON spco.skill_id_1 = ism1.skill_id\n    INNER JOIN individual_skill_metrics ism2 ON spco.skill_id_2 = ism2.skill_id\n    WHERE spco.co_occurrence_count >= 5  -- Minimum co-occurrence threshold\n),\nskill_pair_scoring AS (\n    -- Fifth CTE: Score skill pairs by value\n    SELECT\n        spva.skill_id_1,\n        spva.skill_name_1,\n        spva.skill_category_1,\n        spva.skill_id_2,\n        spva.skill_name_2,\n        spva.skill_category_2,\n        spva.co_occurrence_count,\n        spva.co_occurrence_rate_pct,\n        spva.avg_salary_when_paired,\n        spva.salary_premium_pct,\n        spva.avg_combined_importance,\n        spva.both_required_count,\n        spva.industries_appearing_together,\n        spva.states_appearing_together,\n        -- Combination value score\n        ROUND(\n            (\n                LEAST(spva.co_occurrence_count / 100.0, 1) * 30 +  -- Co-occurrence component\n                LEAST(spva.co_occurrence_rate_pct / 100.0, 1) * 25 +  -- Co-occurrence rate component\n                COALESCE(LEAST(spva.salary_premium_pct / 50.0, 1), 0) * 25 +  -- Salary premium component\n                LEAST(spva.avg_combined_importance / 20.0, 1) * 20  -- Importance component\n            ) * 100,\n            2\n        ) AS combination_value_score,\n        -- Combination category\n        CASE\n            WHEN spva.co_occurrence_count >= 50 AND spva.salary_premium_pct >= 10 THEN 'premium_combination'\n            WHEN spva.co_occurrence_count >= 30 AND spva.salary_premium_pct >= 5 THEN 'high_value_combination'\n            WHEN spva.co_occurrence_count >= 20 THEN 'valuable_combination'\n            WHEN spva.co_occurrence_count >= 10 THEN 'common_combination'\n            ELSE 'emerging_combination'\n        END AS combination_category\n    FROM skill_pair_value_analysis spva\n)\nSELECT\n    sps.skill_id_1,\n    sps.skill_name_1,\n    sps.skill_category_1,\n    sps.skill_id_2,\n    sps.skill_name_2,\n    sps.skill_category_2,\n    sps.co_occurrence_count,\n    sps.co_occurrence_rate_pct,\n    sps.avg_salary_when_paired,\n    sps.salary_premium_pct,\n    sps.avg_combined_importance,\n    sps.both_required_count,\n    sps.industries_appearing_together,\n    sps.states_appearing_together,\n    sps.combination_value_score,\n    sps.combination_category,\n    -- Rankings\n    RANK() OVER (ORDER BY sps.combination_value_score DESC) AS combination_value_rank,\n    RANK() OVER (ORDER BY sps.co_occurrence_count DESC) AS co_occurrence_rank,\n    RANK() OVER (ORDER BY sps.avg_salary_when_paired DESC) AS salary_rank\nFROM skill_pair_scoring sps\nWHERE sps.co_occurrence_count >= 10\nORDER BY sps.combination_value_score DESC, sps.co_occurrence_count DESC\nLIMIT 100;",
      "line_number": 2829,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006599,
        "row_count": 0,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 14,
      "title": "Time-to-Fill Analysis with Hiring Velocity Metrics and Market Efficiency Indicators",
      "description": "Description: Comprehensive analysis of time-to-fill metrics, hiring velocity, market efficiency indicators, and factors affecting time-to-hire. Uses time-to-event analysis and efficiency calculations. Use Case: Hiring Intelligence - Time-to-Fill Analysis and Market Efficiency Metrics Business Value: Analyzes time-to-fill metrics, identifies factors affecting hiring speed, calculates market efficiency indicators, and provides insights for optimizing hiring processes. Helps understand market dynam",
      "complexity":
    "Deep nested CTEs (6+ levels), time-to-event analysis, efficiency calculations, factor analysis, window functions",
      "expected_output": "Time-to-fill analysis with hiring velocity metrics, market efficiency indicators, and optimization insights.",
      "sql": "WITH job_posting_lifecycle AS (\n    -- First CTE: Track job posting lifecycle\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.company_id,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.work_model,\n        jp.job_type,\n        jp.posted_date,\n        jp.expiration_date,\n        jp.salary_min,\n        jp.salary_max,\n        (jp.salary_min + jp.salary_max) / 2 AS salary_midpoint,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications,\n        MIN(ja.submitted_at) AS first_application_date,\n        MAX(ja.submitted_at) AS last_application_date,\n        -- Time metrics\n        CASE\n            WHEN jp.expiration_date IS NOT NULL THEN\n                EXTRACT(EPOCH FROM (jp.expiration_date - jp.posted_date)) / 86400\n            ELSE\n                EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - jp.posted_date)) / 86400\n        END AS days_posted,\n        (\n            SELECT EXTRACT(EPOCH FROM (MIN(ja2.submitted_at) - jp.posted_date)) / 86400\n            FROM job_applications ja2\n            WHERE ja2.job_id = jp.job_id\n                AND ja2.submitted_at IS NOT NULL\n        ) AS days_to_first_application\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    LEFT JOIN job_applications ja ON jp.job_id = ja.job_id\n    WHERE jp.is_active = TRUE\n        AND jp.posted_date >= CURRENT_DATE - INTERVAL '1 year'\n    GROUP BY jp.job_id, jp.job_title, jp.company_id, c.industry, jp.location_state, jp.location_city, jp.work_model, jp.job_type, jp.posted_date, jp.expiration_date, jp.salary_min, jp.salary_max\n),\ntime_to_fill_metrics AS (\n    -- Second CTE: Calculate time-to-fill metrics\n    SELECT\n        jpl.job_id,\n        jpl.job_title,\n        jpl.company_id,\n        jpl.industry,\n        jpl.location_state,\n        jpl.location_city,\n        jpl.work_model,\n        jpl.job_type,\n        jpl.salary_midpoint,\n        jpl.total_applications,\n        jpl.successful_applications,\n        jpl.days_posted,\n        jpl.days_to_first_application,\n        -- Time to fill (assuming successful application indicates fill)\n        CASE\n            WHEN jpl.successful_applications > 0 AND jpl.first_application_date IS NOT NULL THEN\n                EXTRACT(EPOCH FROM (jpl.first_application_date - jpl.posted_date)) / 86400\n            ELSE NULL\n        END AS time_to_fill_days,\n        -- Application velocity (applications per day)\n        CASE\n            WHEN jpl.days_posted > 0 THEN\n                ROUND(CAST(jpl.total_applications::NUMERIC / jpl.days_posted AS NUMERIC), 2)\n            ELSE NULL\n        END AS application_velocity,\n        -- Success rate\n        CASE\n            WHEN jpl.total_applications > 0 THEN\n                ROUND((jpl.successful_applications::NUMERIC / jpl.total_applications) * 100, 2)\n            ELSE NULL\n        END AS success_rate_pct\n    FROM job_posting_lifecycle jpl\n),\nindustry_time_analysis AS (\n    -- Third CTE: Analyze time-to-fill by industry\n    SELECT\n        ttf.industry,\n        COUNT(DISTINCT ttf.job_id) AS total_jobs,\n        COUNT(DISTINCT CASE WHEN ttf.time_to_fill_days IS NOT NULL THEN ttf.job_id END) AS filled_jobs,\n        AVG(ttf.time_to_fill_days) AS avg_time_to_fill,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ttf.time_to_fill_days) AS median_time_to_fill,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY ttf.time_to_fill_days) AS p25_time_to_fill,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY ttf.time_to_fill_days) AS p75_time_to_fill,\n        AVG(ttf.application_velocity) AS avg_application_velocity,\n        AVG(ttf.success_rate_pct) AS avg_success_rate\n    FROM time_to_fill_metrics ttf\n    WHERE ttf.industry IS NOT NULL\n    GROUP BY ttf.industry\n),\nlocation_time_analysis AS (\n    -- Fourth CTE: Analyze time-to-fill by location\n    SELECT\n        ttf.location_state,\n        ttf.location_city,\n        COUNT(DISTINCT ttf.job_id) AS total_jobs,\n        COUNT(DISTINCT CASE WHEN ttf.time_to_fill_days IS NOT NULL THEN ttf.job_id END) AS filled_jobs,\n        AVG(ttf.time_to_fill_days) AS avg_time_to_fill,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY ttf.time_to_fill_days) AS median_time_to_fill,\n        AVG(ttf.application_velocity) AS avg_application_velocity,\n        AVG(ttf.success_rate_pct) AS avg_success_rate\n    FROM time_to_fill_metrics ttf\n    WHERE ttf.location_state IS NOT NULL\n    GROUP BY ttf.location_state, ttf.location_city\n),\nmarket_efficiency_indicators AS (\n    -- Fifth CTE: Calculate market efficiency indicators\n    SELECT\n        ita.industry,\n        ita.total_jobs,\n        ita.filled_jobs,\n        ROUND(CAST(ita.avg_time_to_fill AS NUMERIC), 1) AS avg_time_to_fill,\n        ROUND(CAST(ita.median_time_to_fill AS NUMERIC), 1) AS median_time_to_fill,\n        ROUND(CAST(ita.p25_time_to_fill AS NUMERIC), 1) AS p25_time_to_fill,\n        ROUND(CAST(ita.p75_time_to_fill AS NUMERIC), 1) AS p75_time_to_fill,\n        ROUND(CAST(ita.avg_application_velocity AS NUMERIC), 2) AS avg_application_velocity,\n        ROUND(CAST(ita.avg_success_rate AS NUMERIC), 2) AS avg_success_rate,\n        -- Fill rate\n        CASE\n            WHEN ita.total_jobs > 0 THEN\n                ROUND((ita.filled_jobs::NUMERIC / ita.total_jobs) * 100, 2)\n            ELSE NULL\n        END AS fill_rate_pct,\n        -- Market efficiency score (lower time to fill = higher efficiency)\n        CASE\n            WHEN ita.median_time_to_fill IS NOT NULL THEN\n                ROUND(\n                    CAST(\n                        CASE\n                            WHEN ita.median_time_to_fill <= 7 THEN 100\n                            WHEN ita.median_time_to_fill <= 14 THEN 90 - ((ita.median_time_to_fill - 7) * 5)\n                            WHEN ita.median_time_to_fill <= 30 THEN 75 - ((ita.median_time_to_fill - 14) * 2)\n                            WHEN ita.median_time_to_fill <= 60 THEN 50 - ((ita.median_time_to_fill - 30) * 1)\n                            ELSE GREATEST(0, 20 - ((ita.median_time_to_fill - 60) * 0.5))\n                        END AS NUMERIC\n                    ),\n                    2\n                )\n            ELSE NULL\n        END AS market_efficiency_score,\n        -- Efficiency category\n        CASE\n            WHEN ita.median_time_to_fill <= 7 THEN 'highly_efficient'\n            WHEN ita.median_time_to_fill <= 14 THEN 'efficient'\n            WHEN ita.median_time_to_fill <= 30 THEN 'moderate'\n            WHEN ita.median_time_to_fill <= 60 THEN 'slow'\n            ELSE 'very_slow'\n        END AS efficiency_category\n    FROM industry_time_analysis ita\n)\nSELECT\n    mei.industry,\n    mei.total_jobs,\n    mei.filled_jobs,\n    mei.fill_rate_pct,\n    mei.avg_time_to_fill,\n    mei.median_time_to_fill,\n    mei.p25_time_to_fill,\n    mei.p75_time_to_fill,\n    mei.avg_application_velocity,\n    mei.avg_success_rate,\n    mei.market_efficiency_score,\n    mei.efficiency_category,\n    -- Comparison metrics\n    AVG(mei.median_time_to_fill) OVER () AS overall_median_time_to_fill,\n    -- Ranking\n    RANK() OVER (ORDER BY mei.market_efficiency_score DESC NULLS LAST) AS efficiency_rank,\n    RANK() OVER (ORDER BY mei.median_time_to_fill ASC NULLS LAST) AS speed_rank\nFROM market_efficiency_indicators mei\nWHERE mei.total_jobs >= 10\nORDER BY mei.market_efficiency_score DESC NULLS LAST, mei.median_time_to_fill ASC NULLS LAST\nLIMIT 100;",
      "line_number": 3007,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006914,
        "row_count": 0,
        "column_count": 15,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 15,
      "title": "Remote Work Trends Analysis with Geographic Distribution and Work Model Evolution",
      "description": "Description: Comprehensive analysis of remote work trends, geographic distribution patterns, work model evolution over time, and remote work adoption rates. Uses time-series analysis and trend calculations. Use Case: Work Model Intelligence - Remote Work Trends and Geographic Distribution Analysis Business Value: Analyzes remote work trends, tracks work model evolution, identifies geographic distribution patterns, and provides insights into remote work adoption. Helps understand work model trend",
      "complexity": "Deep nested CTEs (6+ levels), time-series analysis, trend calculations, geographic aggregations, window functions",
      "expected_output": "Remote work trends analysis with adoption rates, geographic distribution, work model evolution, and trend projections.",
      "sql": "WITH work_model_distribution AS (\n    -- First CTE: Aggregate work model distribution over time\n    SELECT\n        jp.job_id,\n        jp.work_model,\n        jp.job_type,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.posted_date,\n        DATE_TRUNC('month', jp.posted_date) AS posting_month,\n        DATE_TRUNC('quarter', jp.posted_date) AS posting_quarter,\n        DATE_TRUNC('year', jp.posted_date) AS posting_year,\n        (jp.salary_min + jp.salary_max) / 2 AS salary_midpoint\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.work_model IS NOT NULL\n        AND jp.posted_date >= CURRENT_DATE - INTERVAL '2 years'\n),\nmonthly_work_model_trends AS (\n    -- Second CTE: Calculate monthly work model trends\n    SELECT\n        wmd.posting_month,\n        wmd.posting_quarter,\n        wmd.posting_year,\n        COUNT(DISTINCT wmd.job_id) AS total_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END) AS remote_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'hybrid' THEN wmd.job_id END) AS hybrid_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'onsite' THEN wmd.job_id END) AS onsite_jobs,\n        -- Work model percentages\n        ROUND(\n            (COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END)::NUMERIC / NULLIF(COUNT(DISTINCT wmd.job_id), 0)) * 100,\n            2\n        ) AS remote_pct,\n        ROUND(\n            (COUNT(DISTINCT CASE WHEN wmd.work_model = 'hybrid' THEN wmd.job_id END)::NUMERIC / NULLIF(COUNT(DISTINCT wmd.job_id), 0)) * 100,\n            2\n        ) AS hybrid_pct,\n        ROUND(\n            (COUNT(DISTINCT CASE WHEN wmd.work_model = 'onsite' THEN wmd.job_id END)::NUMERIC / NULLIF(COUNT(DISTINCT wmd.job_id), 0)) * 100,\n            2\n        ) AS onsite_pct,\n        -- Average salaries by work model\n        AVG(CASE WHEN wmd.work_model = 'remote' THEN wmd.salary_midpoint END) AS avg_remote_salary,\n        AVG(CASE WHEN wmd.work_model = 'hybrid' THEN wmd.salary_midpoint END) AS avg_hybrid_salary,\n        AVG(CASE WHEN wmd.work_model = 'onsite' THEN wmd.salary_midpoint END) AS avg_onsite_salary\n    FROM work_model_distribution wmd\n    GROUP BY wmd.posting_month, wmd.posting_quarter, wmd.posting_year\n),\nwork_model_trend_analysis AS (\n    -- Third CTE: Analyze trends with window functions\n    SELECT\n        mwmt.posting_month,\n        mwmt.posting_quarter,\n        mwmt.posting_year,\n        mwmt.total_jobs,\n        mwmt.remote_jobs,\n        mwmt.hybrid_jobs,\n        mwmt.onsite_jobs,\n        mwmt.remote_pct,\n        mwmt.hybrid_pct,\n        mwmt.onsite_pct,\n        ROUND(CAST(mwmt.avg_remote_salary AS NUMERIC), 0) AS avg_remote_salary,\n        ROUND(CAST(mwmt.avg_hybrid_salary AS NUMERIC), 0) AS avg_hybrid_salary,\n        ROUND(CAST(mwmt.avg_onsite_salary AS NUMERIC), 0) AS avg_onsite_salary,\n        -- Compare to previous month\n        LAG(mwmt.remote_pct, 1) OVER (ORDER BY mwmt.posting_month) AS prev_month_remote_pct,\n        LAG(mwmt.hybrid_pct, 1) OVER (ORDER BY mwmt.posting_month) AS prev_month_hybrid_pct,\n        LAG(mwmt.onsite_pct, 1) OVER (ORDER BY mwmt.posting_month) AS prev_month_onsite_pct,\n        -- Moving averages\n        AVG(mwmt.remote_pct) OVER (\n            ORDER BY mwmt.posting_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_remote_pct_3month,\n        AVG(mwmt.hybrid_pct) OVER (\n            ORDER BY mwmt.posting_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_hybrid_pct_3month,\n        AVG(mwmt.onsite_pct) OVER (\n            ORDER BY mwmt.posting_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_onsite_pct_3month,\n        -- Trend direction\n        CASE\n            WHEN mwmt.remote_pct > LAG(mwmt.remote_pct, 1) OVER (ORDER BY mwmt.posting_month) THEN 'increasing'\n            WHEN mwmt.remote_pct < LAG(mwmt.remote_pct, 1) OVER (ORDER BY mwmt.posting_month) THEN 'decreasing'\n            ELSE 'stable'\n        END AS remote_trend_direction\n    FROM monthly_work_model_trends mwmt\n),\ngeographic_remote_distribution AS (\n    -- Fourth CTE: Analyze geographic distribution of remote work\n    SELECT\n        wmd.location_state,\n        wmd.location_city,\n        COUNT(DISTINCT wmd.job_id) AS total_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END) AS remote_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'hybrid' THEN wmd.job_id END) AS hybrid_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'onsite' THEN wmd.job_id END) AS onsite_jobs,\n        ROUND(\n            (COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END)::NUMERIC / NULLIF(COUNT(DISTINCT wmd.job_id), 0)) * 100,\n            2\n        ) AS remote_pct,\n        AVG(CASE WHEN wmd.work_model = 'remote' THEN wmd.salary_midpoint END) AS avg_remote_salary\n    FROM work_model_distribution wmd\n    WHERE wmd.location_state IS NOT NULL\n    GROUP BY wmd.location_state, wmd.location_city\n),\nindustry_remote_adoption AS (\n    -- Fifth CTE: Analyze remote work adoption by industry\n    SELECT\n        wmd.industry,\n        COUNT(DISTINCT wmd.job_id) AS total_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END) AS remote_jobs,\n        COUNT(DISTINCT CASE WHEN wmd.work_model = 'hybrid' THEN wmd.job_id END) AS hybrid_jobs,\n        ROUND(\n            (COUNT(DISTINCT CASE WHEN wmd.work_model = 'remote' THEN wmd.job_id END)::NUMERIC / NULLIF(COUNT(DISTINCT wmd.job_id), 0)) * 100,\n            2\n        ) AS remote_adoption_pct,\n        AVG(CASE WHEN wmd.work_model = 'remote' THEN wmd.salary_midpoint END) AS avg_remote_salary\n    FROM work_model_distribution wmd\n    WHERE wmd.industry IS NOT NULL\n    GROUP BY wmd.industry\n)\nSELECT\n    wmta.posting_month,\n    wmta.posting_quarter,\n    wmta.posting_year,\n    wmta.total_jobs,\n    wmta.remote_jobs,\n    wmta.hybrid_jobs,\n    wmta.onsite_jobs,\n    wmta.remote_pct,\n    wmta.hybrid_pct,\n    wmta.onsite_pct,\n    wmta.avg_remote_salary,\n    wmta.avg_hybrid_salary,\n    wmta.avg_onsite_salary,\n    wmta.prev_month_remote_pct,\n    wmta.moving_avg_remote_pct_3month,\n    wmta.remote_trend_direction,\n    -- Change from previous month\n    CASE\n        WHEN wmta.prev_month_remote_pct IS NOT NULL THEN\n            ROUND(wmta.remote_pct - wmta.prev_month_remote_pct, 2)\n        ELSE NULL\n    END AS remote_pct_change,\n    -- Top remote-friendly locations\n    (\n        SELECT JSON_OBJECT_AGG(\n            CONCAT(grd.location_city, ', ', grd.location_state),\n            JSON_BUILD_OBJECT(\n                'remote_pct', grd.remote_pct,\n                'total_jobs', grd.total_jobs,\n                'avg_salary', ROUND(CAST(grd.avg_remote_salary AS NUMERIC), 0)\n            )\n        )\n        FROM (\n            SELECT grd.location_state, grd.location_city, grd.remote_pct, grd.total_jobs, grd.avg_remote_salary\n            FROM geographic_remote_distribution grd\n            WHERE grd.total_jobs >= 20\n            ORDER BY grd.remote_pct DESC\n            LIMIT 5\n        ) grd\n    ) AS top_remote_locations,\n    -- Top remote-friendly industries\n    (\n        SELECT JSON_OBJECT_AGG(\n            ira.industry,\n            JSON_BUILD_OBJECT(\n                'remote_adoption_pct', ira.remote_adoption_pct,\n                'total_jobs', ira.total_jobs,\n                'avg_salary', ROUND(CAST(ira.avg_remote_salary AS NUMERIC), 0)\n            )\n        )\n        FROM (\n            SELECT ira.industry, ira.remote_adoption_pct, ira.total_jobs, ira.avg_remote_salary\n            FROM industry_remote_adoption ira\n            WHERE ira.total_jobs >= 50\n            ORDER BY ira.remote_adoption_pct DESC\n            LIMIT 5\n        ) ira\n    ) AS top_remote_industries\nFROM work_model_trend_analysis wmta\nORDER BY wmta.posting_month DESC\nLIMIT 100;",
      "line_number": 3198,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006004,
        "row_count": 1,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 16,
      "title": "Data Source Quality Analysis with Extraction Metrics and Data Freshness Tracking",
      "description": "Description: Comprehensive analysis of data source quality, extraction success rates, data freshness metrics, and source reliability scoring. Uses quality metrics and reliability calculations. Use Case: Data Quality Intelligence - Source Quality Analysis and Extraction Monitoring Business Value: Analyzes data source quality, tracks extraction success rates, monitors data freshness, and provides reliability scores. Helps ensure data quality and identify source issues. Purpose: Delivers actionable",
      "complexity": "Deep nested CTEs (6+ levels), quality metrics, reliability scoring, time-series analysis, window functions",
      "expected_output": "Data source quality analysis with extraction metrics, freshness tracking, and reliability scores.",
      "sql": "WITH data_source_extraction_summary AS (\n    -- First CTE: Aggregate extraction metrics by source\n    SELECT\n        dsm.source_name,\n        dsm.source_type,\n        DATE_TRUNC('day', dsm.extraction_date) AS extraction_day,\n        DATE_TRUNC('week', dsm.extraction_date) AS extraction_week,\n        DATE_TRUNC('month', dsm.extraction_date) AS extraction_month,\n        COUNT(DISTINCT dsm.metadata_id) AS total_extractions,\n        SUM(dsm.records_extracted) AS total_records_extracted,\n        SUM(dsm.records_new) AS total_records_new,\n        SUM(dsm.records_updated) AS total_records_updated,\n        SUM(dsm.records_failed) AS total_records_failed,\n        COUNT(DISTINCT CASE WHEN dsm.extraction_status = 'success' THEN dsm.metadata_id END) AS successful_extractions,\n        COUNT(DISTINCT CASE WHEN dsm.extraction_status = 'partial' THEN dsm.metadata_id END) AS partial_extractions,\n        COUNT(DISTINCT CASE WHEN dsm.extraction_status = 'failed' THEN dsm.metadata_id END) AS failed_extractions,\n        AVG(dsm.extraction_duration_seconds) AS avg_extraction_duration,\n        AVG(dsm.api_response_code) AS avg_api_response_code\n    FROM data_source_metadata dsm\n    WHERE dsm.extraction_date >= CURRENT_TIMESTAMP - INTERVAL '90 days'\n    GROUP BY dsm.source_name, dsm.source_type, DATE_TRUNC('day', dsm.extraction_date), DATE_TRUNC('week', dsm.extraction_date), DATE_TRUNC('month', dsm.extraction_date)\n),\nsource_quality_metrics AS (\n    -- Second CTE: Calculate quality metrics\n    SELECT\n        dses.source_name,\n        dses.source_type,\n        dses.extraction_month,\n        dses.total_extractions,\n        dses.total_records_extracted,\n        dses.total_records_new,\n        dses.total_records_updated,\n        dses.total_records_failed,\n        dses.successful_extractions,\n        dses.partial_extractions,\n        dses.failed_extractions,\n        ROUND(CAST(dses.avg_extraction_duration AS NUMERIC), 2) AS avg_extraction_duration,\n        ROUND(CAST(dses.avg_api_response_code AS NUMERIC), 0) AS avg_api_response_code,\n        -- Success rate\n        CASE\n            WHEN dses.total_extractions > 0 THEN\n                ROUND((dses.successful_extractions::NUMERIC / dses.total_extractions) * 100, 2)\n            ELSE NULL\n        END AS success_rate_pct,\n        -- Failure rate\n        CASE\n            WHEN dses.total_extractions > 0 THEN\n                ROUND((dses.failed_extractions::NUMERIC / dses.total_extractions) * 100, 2)\n            ELSE NULL\n        END AS failure_rate_pct,\n        -- Record quality rate\n        CASE\n            WHEN dses.total_records_extracted > 0 THEN\n                ROUND(((dses.total_records_new + dses.total_records_updated)::NUMERIC / dses.total_records_extracted) * 100, 2)\n            ELSE NULL\n        END AS record_quality_rate_pct,\n        -- Average records per extraction\n        CASE\n            WHEN dses.total_extractions > 0 THEN\n                ROUND(dses.total_records_extracted::NUMERIC / dses.total_extractions, 0)\n            ELSE NULL\n        END AS avg_records_per_extraction\n    FROM data_source_extraction_summary dses\n),\nsource_freshness_analysis AS (\n    -- Third CTE: Analyze data freshness\n    SELECT\n        dsm.source_name,\n        MAX(dsm.extraction_date) AS last_extraction_date,\n        MIN(dsm.extraction_date) AS first_extraction_date,\n        COUNT(DISTINCT DATE_TRUNC('day', dsm.extraction_date)) AS extraction_days,\n        EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - MAX(dsm.extraction_date))) / 86400 AS days_since_last_extraction,\n        (\n            SELECT AVG(days_between)\n            FROM (\n                SELECT EXTRACT(EPOCH FROM (LEAD(dsm2.extraction_date, 1) OVER (PARTITION BY dsm2.source_name ORDER BY dsm2.extraction_date) - dsm2.extraction_date)) / 86400 AS days_between\n                FROM data_source_metadata dsm2\n                WHERE dsm2.source_name = dsm.source_name\n            ) intervals\n            WHERE days_between IS NOT NULL\n        ) AS avg_days_between_extractions\n    FROM data_source_metadata dsm\n    GROUP BY dsm.source_name\n),\nsource_reliability_scoring AS (\n    -- Fourth CTE: Calculate reliability scores\n    SELECT\n        sqm.source_name,\n        sqm.source_type,\n        sqm.extraction_month,\n        sqm.total_extractions,\n        sqm.total_records_extracted,\n        sqm.success_rate_pct,\n        sqm.failure_rate_pct,\n        sqm.record_quality_rate_pct,\n        sqm.avg_records_per_extraction,\n        sfa.days_since_last_extraction,\n        sfa.avg_days_between_extractions,\n        -- Reliability score components\n        CASE\n            WHEN sqm.success_rate_pct >= 95 THEN 30\n            WHEN sqm.success_rate_pct >= 90 THEN 25\n            WHEN sqm.success_rate_pct >= 80 THEN 20\n            WHEN sqm.success_rate_pct >= 70 THEN 15\n            ELSE 10\n        END AS success_rate_score,\n        CASE\n            WHEN sqm.record_quality_rate_pct >= 95 THEN 25\n            WHEN sqm.record_quality_rate_pct >= 90 THEN 20\n            WHEN sqm.record_quality_rate_pct >= 80 THEN 15\n            ELSE 10\n        END AS quality_score,\n        CASE\n            WHEN sfa.days_since_last_extraction <= 1 THEN 25\n            WHEN sfa.days_since_last_extraction <= 3 THEN 20\n            WHEN sfa.days_since_last_extraction <= 7 THEN 15\n            WHEN sfa.days_since_last_extraction <= 14 THEN 10\n            ELSE 5\n        END AS freshness_score,\n        CASE\n            WHEN sqm.avg_records_per_extraction >= 100 THEN 20\n            WHEN sqm.avg_records_per_extraction >= 50 THEN 15\n            WHEN sqm.avg_records_per_extraction >= 20 THEN 10\n            ELSE 5\n        END AS volume_score,\n        -- Overall reliability score\n        (\n            CASE\n                WHEN sqm.success_rate_pct >= 95 THEN 30\n                WHEN sqm.success_rate_pct >= 90 THEN 25\n                WHEN sqm.success_rate_pct >= 80 THEN 20\n                WHEN sqm.success_rate_pct >= 70 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN sqm.record_quality_rate_pct >= 95 THEN 25\n                WHEN sqm.record_quality_rate_pct >= 90 THEN 20\n                WHEN sqm.record_quality_rate_pct >= 80 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN sfa.days_since_last_extraction <= 1 THEN 25\n                WHEN sfa.days_since_last_extraction <= 3 THEN 20\n                WHEN sfa.days_since_last_extraction <= 7 THEN 15\n                WHEN sfa.days_since_last_extraction <= 14 THEN 10\n                ELSE 5\n            END +\n            CASE\n                WHEN sqm.avg_records_per_extraction >= 100 THEN 20\n                WHEN sqm.avg_records_per_extraction >= 50 THEN 15\n                WHEN sqm.avg_records_per_extraction >= 20 THEN 10\n                ELSE 5\n            END\n        ) AS reliability_score,\n        -- Reliability category\n        CASE\n            WHEN (\n                CASE\n                    WHEN sqm.success_rate_pct >= 95 THEN 30 ELSE 10\n                END +\n                CASE\n                    WHEN sqm.record_quality_rate_pct >= 95 THEN 25 ELSE 10\n                END +\n                CASE\n                    WHEN sfa.days_since_last_extraction <= 1 THEN 25 ELSE 5\n                END +\n                CASE\n                    WHEN sqm.avg_records_per_extraction >= 100 THEN 20 ELSE 5\n                END\n            ) >= 90 THEN 'highly_reliable'\n            WHEN (\n                CASE\n                    WHEN sqm.success_rate_pct >= 95 THEN 30 ELSE 10\n                END +\n                CASE\n                    WHEN sqm.record_quality_rate_pct >= 95 THEN 25 ELSE 10\n                END +\n                CASE\n                    WHEN sfa.days_since_last_extraction <= 1 THEN 25 ELSE 5\n                END +\n                CASE\n                    WHEN sqm.avg_records_per_extraction >= 100 THEN 20 ELSE 5\n                END\n            ) >= 75 THEN 'reliable'\n            WHEN (\n                CASE\n                    WHEN sqm.success_rate_pct >= 95 THEN 30 ELSE 10\n                END +\n                CASE\n                    WHEN sqm.record_quality_rate_pct >= 95 THEN 25 ELSE 10\n                END +\n                CASE\n                    WHEN sfa.days_since_last_extraction <= 1 THEN 25 ELSE 5\n                END +\n                CASE\n                    WHEN sqm.avg_records_per_extraction >= 100 THEN 20 ELSE 5\n                END\n            ) >= 60 THEN 'moderate'\n            ELSE 'unreliable'\n        END AS reliability_category\n    FROM source_quality_metrics sqm\n    INNER JOIN source_freshness_analysis sfa ON sqm.source_name = sfa.source_name\n)\nSELECT\n    srs.source_name,\n    srs.source_type,\n    srs.extraction_month,\n    srs.total_extractions,\n    srs.total_records_extracted,\n    srs.success_rate_pct,\n    srs.failure_rate_pct,\n    srs.record_quality_rate_pct,\n    srs.avg_records_per_extraction,\n    ROUND(CAST(srs.days_since_last_extraction AS NUMERIC), 1) AS days_since_last_extraction,\n    ROUND(CAST(srs.avg_days_between_extractions AS NUMERIC), 1) AS avg_days_between_extractions,\n    srs.reliability_score,\n    srs.reliability_category,\n    -- Trend analysis\n    LAG(srs.success_rate_pct, 1) OVER (PARTITION BY srs.source_name ORDER BY srs.extraction_month) AS prev_month_success_rate,\n    AVG(srs.success_rate_pct) OVER (\n        PARTITION BY srs.source_name\n        ORDER BY srs.extraction_month\n        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n    ) AS moving_avg_success_rate,\n    -- Ranking\n    RANK() OVER (ORDER BY srs.reliability_score DESC) AS reliability_rank\nFROM source_reliability_scoring srs\nORDER BY srs.extraction_month DESC, srs.reliability_score DESC\nLIMIT 100;",
      "line_number": 3402,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005348,
        "row_count": 5,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 17,
      "title": "Job Title Normalization and Clustering with Similarity Analysis",
      "description": "Description: Comprehensive job title normalization, clustering similar titles, and similarity analysis. Uses text similarity patterns and clustering algorithms. Use Case: Title Intelligence - Job Title Normalization and Clustering Analysis Business Value: Normalizes job titles, clusters similar titles, and identifies title variations. Helps standardize job titles and improve matching accuracy. Purpose: Delivers actionable insights for job title standardization and matching improvement. Complexit",
      "complexity":
    "Deep nested CTEs (6+ levels), text similarity analysis, clustering patterns, normalization algorithms",
      "expected_output": "Job title normalization analysis with clusters, similarity scores, and standardized titles.",
      "sql": "WITH job_title_variations AS (\n    -- First CTE: Extract and normalize job titles\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.job_title_normalized,\n        c.industry,\n        jp.company_id,\n        LOWER(TRIM(jp.job_title)) AS title_lower,\n        -- Extract key terms\n        ARRAY_TO_STRING(\n            ARRAY_REMOVE(\n                ARRAY[\n                    CASE WHEN LOWER(jp.job_title) LIKE '%senior%' THEN 'senior' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%junior%' THEN 'junior' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%lead%' THEN 'lead' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%principal%' THEN 'principal' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%engineer%' THEN 'engineer' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%developer%' THEN 'developer' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%manager%' THEN 'manager' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%analyst%' THEN 'analyst' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%specialist%' THEN 'specialist' END,\n                    CASE WHEN LOWER(jp.job_title) LIKE '%architect%' THEN 'architect' END\n                ],\n                NULL\n            ),\n            ' '\n        ) AS title_key_terms\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.job_title IS NOT NULL\n),\ntitle_frequency_analysis AS (\n    -- Second CTE: Analyze title frequency and variations\n    SELECT\n        jtv.title_lower,\n        jtv.title_key_terms,\n        COUNT(DISTINCT jtv.job_id) AS title_frequency,\n        COUNT(DISTINCT jtv.industry) AS industries_using_title,\n        COUNT(DISTINCT jtv.company_id) AS companies_using_title,\n        ARRAY_AGG(DISTINCT jtv.job_title ORDER BY jtv.job_title) FILTER (WHERE jtv.job_title IS NOT NULL) AS title_variations,\n        -- Most common industry\n        MODE() WITHIN GROUP (ORDER BY jtv.industry) AS most_common_industry\n    FROM job_title_variations jtv\n    GROUP BY jtv.title_lower, jtv.title_key_terms\n),\ntitle_similarity_clustering AS (\n    -- Third CTE: Cluster similar titles using key terms\n    SELECT\n        tfa.title_lower,\n        tfa.title_key_terms,\n        tfa.title_frequency,\n        tfa.industries_using_title,\n        tfa.companies_using_title,\n        tfa.title_variations,\n        tfa.most_common_industry,\n        -- Find similar titles based on key terms\n        (\n            SELECT COUNT(*)\n            FROM title_frequency_analysis tfa2\n            WHERE tfa2.title_key_terms = tfa.title_key_terms\n                AND tfa2.title_lower != tfa.title_lower\n        ) AS similar_titles_count,\n        -- Cluster identifier based on key terms\n        MD5(tfa.title_key_terms) AS title_cluster_id\n    FROM title_frequency_analysis tfa\n),\ntitle_normalization_mapping AS (\n    -- Fourth CTE: Create normalization mapping\n    SELECT\n        tsc.title_lower,\n        tsc.title_key_terms,\n        tsc.title_frequency,\n        tsc.title_variations,\n        tsc.title_cluster_id,\n        tsc.most_common_industry,\n        -- Standardized title (most frequent variation)\n        (\n            SELECT title_variation\n            FROM UNNEST(tsc.title_variations) AS title_variation\n            ORDER BY (\n                SELECT COUNT(*)\n                FROM job_title_variations jtv2\n                WHERE LOWER(TRIM(jtv2.job_title)) = title_variation\n            ) DESC\n            LIMIT 1\n        ) AS standardized_title,\n        -- Title category\n        CASE\n            WHEN tsc.title_key_terms LIKE '%engineer%' OR tsc.title_key_terms LIKE '%developer%' THEN 'engineering'\n            WHEN tsc.title_key_terms LIKE '%manager%' THEN 'management'\n            WHEN tsc.title_key_terms LIKE '%analyst%' THEN 'analysis'\n            WHEN tsc.title_key_terms LIKE '%specialist%' THEN 'specialist'\n            WHEN tsc.title_key_terms LIKE '%architect%' THEN 'architecture'\n            ELSE 'other'\n        END AS title_category,\n        -- Experience level\n        CASE\n            WHEN tsc.title_key_terms LIKE '%senior%' OR tsc.title_key_terms LIKE '%lead%' OR tsc.title_key_terms LIKE '%principal%' THEN 'senior'\n            WHEN tsc.title_key_terms LIKE '%junior%' OR tsc.title_key_terms LIKE '%entry%' THEN 'junior'\n            ELSE 'mid'\n        END AS experience_level\n    FROM title_similarity_clustering tsc\n)\nSELECT\n    tnm.title_lower,\n    tnm.title_key_terms,\n    tnm.title_frequency,\n    tnm.title_variations,\n    tnm.standardized_title,\n    tnm.title_category,\n    tnm.experience_level,\n    tnm.most_common_industry,\n    tnm.title_cluster_id,\n    -- Cluster size\n    COUNT(*) OVER (PARTITION BY tnm.title_cluster_id) AS cluster_size,\n    -- Ranking\n    RANK() OVER (ORDER BY tnm.title_frequency DESC) AS title_frequency_rank,\n    RANK() OVER (PARTITION BY tnm.title_category ORDER BY tnm.title_frequency DESC) AS category_rank\nFROM title_normalization_mapping tnm\nWHERE tnm.title_frequency >= 3\nORDER BY tnm.title_frequency DESC, tnm.title_category\nLIMIT 100;",
      "line_number": 3648,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.007709,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 18,
      "title": "Application Success Prediction with Machine Learning Features and Probability Scoring",
      "description": "Description: Comprehensive analysis for predicting application success using feature engineering, probability scoring, and success factor identification. Uses predictive analytics patterns. Use Case:
    Predictive Analytics - Application Success Prediction and Probability Scoring Business Value: Predicts application success probability, identifies success factors, and provides actionable insights for improving application outcomes. Helps users optimize applications and increase success rates. Purpo",
      "complexity": "Deep nested CTEs (7+ levels), feature engineering, probability calculations, predictive patterns, window functions",
      "expected_output": "Application success prediction with probability scores, success factors, and optimization recommendations.",
      "sql": "WITH application_features AS (\n    -- First CTE: Extract features for prediction\n    SELECT\n        ja.application_id,\n        ja.user_id,\n        ja.job_id,\n        ja.application_status,\n        ja.match_score,\n        ja.submitted_at,\n        up.years_experience,\n        up.education_level,\n        up.profile_completeness_score,\n        jp.job_title,\n        c.industry,\n        jp.location_state,\n        jp.work_model,\n        jp.salary_min,\n        jp.salary_max,\n        (jp.salary_min + jp.salary_max) / 2 AS job_salary_midpoint,\n        -- User skill features\n        (\n            SELECT COUNT(DISTINCT us.skill_id)\n            FROM user_skills us\n            WHERE us.user_id = ja.user_id\n        ) AS user_total_skills,\n        (\n            SELECT AVG(us.proficiency_score)\n            FROM user_skills us\n            WHERE us.user_id = ja.user_id\n        ) AS user_avg_proficiency,\n        -- Job requirement features\n        (\n            SELECT COUNT(DISTINCT jsr.skill_id)\n            FROM job_skills_requirements jsr\n            WHERE jsr.job_id = ja.job_id\n                AND jsr.requirement_type = 'required'\n        ) AS job_required_skills_count,\n        -- Skill match features\n        (\n            SELECT COUNT(*)\n            FROM job_skills_requirements jsr\n            INNER JOIN user_skills us ON jsr.skill_id = us.skill_id\n            WHERE jsr.job_id = ja.job_id\n                AND us.user_id = ja.user_id\n                AND jsr.requirement_type = 'required'\n        ) AS matching_required_skills_count,\n        -- User application history\n        (\n            SELECT COUNT(*)\n            FROM job_applications ja2\n            WHERE ja2.user_id = ja.user_id\n                AND ja2.submitted_at < ja.submitted_at\n        ) AS user_previous_applications,\n        (\n            SELECT COUNT(*)\n            FROM job_applications ja2\n            WHERE ja2.user_id = ja.user_id\n                AND ja2.submitted_at < ja.submitted_at\n                AND ja2.application_status IN ('interview', 'offer')\n        ) AS user_previous_successes,\n        -- Job competition features\n        (\n            SELECT COUNT(*)\n            FROM job_applications ja2\n            WHERE ja2.job_id = ja.job_id\n                AND ja2.submitted_at < ja.submitted_at\n        ) AS job_previous_applications\n    FROM job_applications ja\n    INNER JOIN user_profiles up ON ja.user_id = up.user_id\n    INNER JOIN job_postings jp ON ja.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE ja.submitted_at IS NOT NULL\n        AND ja.submitted_at >= CURRENT_TIMESTAMP - INTERVAL '1 year'\n),\nfeature_engineering AS (\n    -- Second CTE:
    Engineer predictive features\n    SELECT\n        af.application_id,\n        af.user_id,\n        af.job_id,\n        af.application_status,\n        af.match_score,\n        af.years_experience,\n        af.profile_completeness_score,\n        af.user_total_skills,\n        af.user_avg_proficiency,\n        af.job_required_skills_count,\n        af.matching_required_skills_count,\n        af.user_previous_applications,\n        af.user_previous_successes,\n        af.job_previous_applications,\n        -- Derived features\n        CASE\n            WHEN af.job_required_skills_count > 0 THEN\n                ROUND((af.matching_required_skills_count::NUMERIC / af.job_required_skills_count) * 100, 2)\n            ELSE NULL\n        END AS skill_match_rate_pct,\n        CASE\n            WHEN af.user_previous_applications > 0 THEN\n                ROUND((af.user_previous_successes::NUMERIC / af.user_previous_applications) * 100, 2)\n            ELSE NULL\n        END AS user_historical_success_rate_pct,\n        CASE\n            WHEN af.job_previous_applications > 0 THEN\n                ROUND(af.job_previous_applications::NUMERIC / NULLIF(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - jp.posted_date)) / 86400, 0), 2)\n            ELSE NULL\n        END AS job_application_velocity,\n        -- Target variable\n        CASE\n            WHEN af.application_status IN ('interview', 'offer') THEN 1\n            ELSE 0\n        END AS success_indicator\n    FROM application_features af\n    INNER JOIN job_postings jp ON af.job_id = jp.job_id\n),\nsuccess_probability_scoring AS (\n    -- Third CTE: Calculate success probability scores\n    SELECT\n        fe.application_id,\n        fe.user_id,\n        fe.job_id,\n        fe.application_status,\n        fe.match_score,\n        fe.skill_match_rate_pct,\n        fe.user_historical_success_rate_pct,\n        fe.job_application_velocity,\n        fe.success_indicator,\n        -- Probability score components\n        CASE\n            WHEN fe.match_score >= 80 THEN 30\n            WHEN fe.match_score >= 70 THEN 25\n            WHEN fe.match_score >= 60 THEN 20\n            ELSE 15\n        END AS match_score_component,\n        CASE\n            WHEN fe.skill_match_rate_pct >= 80 THEN 25\n            WHEN fe.skill_match_rate_pct >= 60 THEN 20\n            WHEN fe.skill_match_rate_pct >= 40 THEN 15\n            ELSE 10\n        END AS skill_match_component,\n        CASE\n            WHEN fe.user_historical_success_rate_pct >= 30 THEN 25\n            WHEN fe.user_historical_success_rate_pct >= 20 THEN 20\n            WHEN fe.user_historical_success_rate_pct >= 10 THEN 15\n            ELSE 10\n        END AS historical_success_component,\n        CASE\n            WHEN fe.profile_completeness_score >= 90 THEN 20\n            WHEN fe.profile_completeness_score >= 75 THEN 15\n            WHEN fe.profile_completeness_score >= 60 THEN 10\n            ELSE 5\n        END AS profile_completeness_component,\n        -- Overall success probability score\n        (\n            CASE\n                WHEN fe.match_score >= 80 THEN 30\n                WHEN fe.match_score >= 70 THEN 25\n                WHEN fe.match_score >= 60 THEN 20\n                ELSE 15\n            END +\n            CASE\n                WHEN fe.skill_match_rate_pct >= 80 THEN 25\n                WHEN fe.skill_match_rate_pct >= 60 THEN 20\n                WHEN fe.skill_match_rate_pct >= 40 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN fe.user_historical_success_rate_pct >= 30 THEN 25\n                WHEN fe.user_historical_success_rate_pct >= 20 THEN 20\n                WHEN fe.user_historical_success_rate_pct >= 10 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN fe.profile_completeness_score >= 90 THEN 20\n                WHEN fe.profile_completeness_score >= 75 THEN 15\n                WHEN fe.profile_completeness_score >= 60 THEN 10\n                ELSE 5\n            END\n        ) AS success_probability_score,\n        -- Success probability category\n        CASE\n            WHEN (\n                CASE WHEN fe.match_score >= 80 THEN 30 ELSE 15 END +\n                CASE WHEN fe.skill_match_rate_pct >= 80 THEN 25 ELSE 10 END +\n                CASE WHEN fe.user_historical_success_rate_pct >= 30 THEN 25 ELSE 10 END +\n                CASE WHEN fe.profile_completeness_score >= 90 THEN 20 ELSE 5 END\n            ) >= 85 THEN 'high_probability'\n            WHEN (\n                CASE WHEN fe.match_score >= 80 THEN 30 ELSE 15 END +\n                CASE WHEN fe.skill_match_rate_pct >= 80 THEN 25 ELSE 10 END +\n                CASE WHEN fe.user_historical_success_rate_pct >= 30 THEN 25 ELSE 10 END +\n                CASE WHEN fe.profile_completeness_score >= 90 THEN 20 ELSE 5 END\n            ) >= 70 THEN 'moderate_probability'\n            WHEN (\n                CASE WHEN fe.match_score >= 80 THEN 30 ELSE 15 END +\n                CASE WHEN fe.skill_match_rate_pct >= 80 THEN 25 ELSE 10 END +\n                CASE WHEN fe.user_historical_success_rate_pct >= 30 THEN 25 ELSE 10 END +\n                CASE WHEN fe.profile_completeness_score >= 90 THEN 20 ELSE 5 END\n            ) >= 55 THEN 'low_probability'\n            ELSE 'very_low_probability'\n        END AS success_probability_category\n    FROM feature_engineering fe\n)\nSELECT\n    sps.application_id,\n    sps.user_id,\n    sps.job_id,\n    sps.application_status,\n    sps.match_score,\n    sps.skill_match_rate_pct,\n    sps.user_historical_success_rate_pct,\n    sps.success_probability_score,\n    sps.success_probability_category,\n    sps.success_indicator,\n    -- Actual vs predicted\n    CASE\n        WHEN sps.success_indicator = 1 AND sps.success_probability_score >= 70 THEN 'true_positive'\n        WHEN sps.success_indicator = 0 AND sps.success_probability_score < 70 THEN 'true_negative'\n        WHEN sps.success_indicator = 1 AND sps.success_probability_score < 70 THEN 'false_negative'\n        ELSE 'false_positive'\n    END AS prediction_accuracy,\n    -- Ranking\n    RANK() OVER (ORDER BY sps.success_probability_score DESC) AS probability_rank\nFROM success_probability_scoring sps\nORDER BY sps.success_probability_score DESC, sps.match_score DESC\nLIMIT 100;",
      "line_number": 3789,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.009992,
        "row_count": 5,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 19,
      "title": "Company Growth Analysis with Hiring Trends and Expansion Patterns",
      "description": "Description: Comprehensive analysis of company growth patterns, hiring trends, geographic expansion, and growth indicators. Uses trend analysis and growth calculations. Use Case: Company Intelligence - Growth Analysis and Expansion Pattern Tracking Business Value: Analyzes company growth patterns, tracks hiring trends, identifies expansion patterns, and provides growth indicators. Helps understand company dynamics and opportunities. Purpose: Delivers actionable insights into company growth and e",
      "complexity": "Deep nested CTEs (6+ levels), growth trend analysis, expansion pattern detection, window functions, time-series analysis",
      "expected_output": "Company growth analysis with hiring trends, expansion patterns, and growth indicators.",
      "sql": "WITH company_hiring_timeline AS (\n    -- First CTE: Track company hiring over time\n    SELECT\n        jp.company_id,\n        c.company_name,\n        c.industry,\n        c.company_size,\n        DATE_TRUNC('month', jp.posted_date) AS hiring_month,\n        DATE_TRUNC('quarter', jp.posted_date) AS hiring_quarter,\n        DATE_TRUNC('year', jp.posted_date) AS hiring_year,\n        COUNT(DISTINCT jp.job_id) AS jobs_posted,\n        COUNT(DISTINCT jp.job_title) AS unique_job_titles,\n        COUNT(DISTINCT jp.location_state) AS states_hiring,\n        COUNT(DISTINCT jp.location_city) AS cities_hiring,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS remote_jobs,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary_midpoint\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.posted_date >= CURRENT_DATE - INTERVAL '2 years'\n    GROUP BY jp.company_id, c.company_name, c.industry, c.company_size, DATE_TRUNC('month', jp.posted_date), DATE_TRUNC('quarter', jp.posted_date), DATE_TRUNC('year', jp.posted_date)\n),\ncompany_growth_metrics AS (\n    -- Second CTE: Calculate growth metrics\n    SELECT\n        cht.company_id,\n        cht.company_name,\n        cht.industry,\n        cht.company_size,\n        cht.hiring_month,\n        cht.hiring_quarter,\n        cht.hiring_year,\n        cht.jobs_posted,\n        cht.unique_job_titles,\n        cht.states_hiring,\n        cht.cities_hiring,\n        cht.remote_jobs,\n        ROUND(CAST(cht.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n        -- Compare to previous month\n        LAG(cht.jobs_posted, 1) OVER (PARTITION BY cht.company_id ORDER BY cht.hiring_month) AS prev_month_jobs,\n        LAG(cht.states_hiring, 1) OVER (PARTITION BY cht.company_id ORDER BY cht.hiring_month) AS prev_month_states,\n        LAG(cht.cities_hiring, 1) OVER (PARTITION BY cht.company_id ORDER BY cht.hiring_month) AS prev_month_cities,\n        -- Moving averages\n        AVG(cht.jobs_posted) OVER (\n            PARTITION BY cht.company_id\n            ORDER BY cht.hiring_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_jobs_3month,\n        AVG(cht.jobs_posted) OVER (\n            PARTITION BY cht.company_id\n            ORDER BY cht.hiring_month\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_jobs_6month\n    FROM company_hiring_timeline cht\n),\ncompany_growth_analysis AS (\n    -- Third CTE: Analyze growth patterns\n    SELECT\n        cgm.company_id,\n        cgm.company_name,\n        cgm.industry,\n        cgm.company_size,\n        cgm.hiring_month,\n        cgm.jobs_posted,\n        cgm.states_hiring,\n        cgm.cities_hiring,\n        cgm.remote_jobs,\n        cgm.avg_salary_midpoint,\n        -- Growth rates\n        CASE\n            WHEN cgm.prev_month_jobs > 0 THEN\n                ROUND(((cgm.jobs_posted - cgm.prev_month_jobs)::NUMERIC / cgm.prev_month_jobs) * 100, 2)\n            ELSE NULL\n        END AS jobs_growth_rate_pct,\n        -- Geographic expansion\n        CASE\n            WHEN cgm.states_hiring > cgm.prev_month_states THEN 'expanding'\n            WHEN cgm.states_hiring < cgm.prev_month_states THEN 'contracting'\n            ELSE 'stable'\n        END AS geographic_expansion_status,\n        -- Growth trend\n        CASE\n            WHEN cgm.jobs_posted > cgm.moving_avg_jobs_6month * 1.2 THEN 'accelerating'\n            WHEN cgm.jobs_posted > cgm.moving_avg_jobs_6month THEN 'growing'\n            WHEN cgm.jobs_posted < cgm.moving_avg_jobs_6month * 0.8 THEN 'declining'\n            ELSE 'stable'\n        END AS growth_trend\n    FROM company_growth_metrics cgm\n),\ncompany_summary_metrics AS (\n    -- Fourth CTE: Calculate summary metrics\n    SELECT\n        cga.company_id,\n        cga.company_name,\n        cga.industry,\n        cga.company_size,\n        SUM(cga.jobs_posted) AS total_jobs_last_year,\n        AVG(cga.jobs_posted) AS avg_monthly_jobs,\n        MAX(cga.states_hiring) AS max_states_hiring,\n        MAX(cga.cities_hiring) AS max_cities_hiring,\n        AVG(cga.jobs_growth_rate_pct) AS avg_growth_rate,\n        COUNT(DISTINCT CASE WHEN cga.growth_trend = 'accelerating' THEN cga.hiring_month END) AS accelerating_months,\n        COUNT(DISTINCT CASE WHEN cga.geographic_expansion_status = 'expanding' THEN cga.hiring_month END) AS expanding_months,\n        AVG(cga.avg_salary_midpoint) AS avg_salary_midpoint\n    FROM company_growth_analysis cga\n    GROUP BY cga.company_id, cga.company_name, cga.industry, cga.company_size\n),\ncompany_growth_scoring AS (\n    -- Fifth CTE: Score company growth\n    SELECT\n        csm.company_id,\n        csm.company_name,\n        csm.industry,\n        csm.company_size,\n        csm.total_jobs_last_year,\n        csm.avg_monthly_jobs,\n        csm.max_states_hiring,\n        csm.max_cities_hiring,\n        ROUND(CAST(csm.avg_growth_rate AS NUMERIC), 2) AS avg_growth_rate,\n        csm.accelerating_months,\n        csm.expanding_months,\n        ROUND(CAST(csm.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n        -- Growth score components\n        CASE\n            WHEN csm.total_jobs_last_year >= 100 THEN 30\n            WHEN csm.total_jobs_last_year >= 50 THEN 25\n            WHEN csm.total_jobs_last_year >= 20 THEN 20\n            ELSE 15\n        END AS hiring_volume_score,\n        CASE\n            WHEN csm.avg_growth_rate >= 20 THEN 25\n            WHEN csm.avg_growth_rate >= 10 THEN 20\n            WHEN csm.avg_growth_rate >= 5 THEN 15\n            ELSE 10\n        END AS growth_rate_score,\n        CASE\n            WHEN csm.max_states_hiring >= 10 THEN 25\n            WHEN csm.max_states_hiring >= 5 THEN 20\n            WHEN csm.max_states_hiring >= 2 THEN 15\n            ELSE 10\n        END AS geographic_expansion_score,\n        CASE\n            WHEN csm.accelerating_months >= 6 THEN 20\n            WHEN csm.accelerating_months >= 3 THEN 15\n            WHEN csm.accelerating_months >= 1 THEN 10\n            ELSE 5\n        END AS acceleration_score,\n        -- Overall growth score\n        (\n            CASE\n                WHEN csm.total_jobs_last_year >= 100 THEN 30\n                WHEN csm.total_jobs_last_year >= 50 THEN 25\n                WHEN csm.total_jobs_last_year >= 20 THEN 20\n                ELSE 15\n            END +\n            CASE\n                WHEN csm.avg_growth_rate >= 20 THEN 25\n                WHEN csm.avg_growth_rate >= 10 THEN 20\n                WHEN csm.avg_growth_rate >= 5 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN csm.max_states_hiring >= 10 THEN 25\n                WHEN csm.max_states_hiring >= 5 THEN 20\n                WHEN csm.max_states_hiring >= 2 THEN 15\n                ELSE 10\n            END +\n            CASE\n                WHEN csm.accelerating_months >= 6 THEN 20\n                WHEN csm.accelerating_months >= 3 THEN 15\n                WHEN csm.accelerating_months >= 1 THEN 10\n                ELSE 5\n            END\n        ) AS growth_score,\n        -- Growth category\n        CASE\n            WHEN (\n                CASE WHEN csm.total_jobs_last_year >= 100 THEN 30 ELSE 15 END +\n                CASE WHEN csm.avg_growth_rate >= 20 THEN 25 ELSE 10 END +\n                CASE WHEN csm.max_states_hiring >= 10 THEN 25 ELSE 10 END +\n                CASE WHEN csm.accelerating_months >= 6 THEN 20 ELSE 5 END\n            ) >= 85 THEN 'high_growth'\n            WHEN (\n                CASE WHEN csm.total_jobs_last_year >= 100 THEN 30 ELSE 15 END +\n                CASE WHEN csm.avg_growth_rate >= 20 THEN 25 ELSE 10 END +\n                CASE WHEN csm.max_states_hiring >= 10 THEN 25 ELSE 10 END +\n                CASE WHEN csm.accelerating_months >= 6 THEN 20 ELSE 5 END\n            ) >= 70 THEN 'moderate_growth'\n            WHEN (\n                CASE WHEN csm.total_jobs_last_year >= 100 THEN 30 ELSE 15 END +\n                CASE WHEN csm.avg_growth_rate >= 20 THEN 25 ELSE 10 END +\n                CASE WHEN csm.max_states_hiring >= 10 THEN 25 ELSE 10 END +\n                CASE WHEN csm.accelerating_months >= 6 THEN 20 ELSE 5 END\n            ) >= 55 THEN 'stable'\n            ELSE 'declining'\n        END AS growth_category\n    FROM company_summary_metrics csm\n)\nSELECT\n    cgs.company_id,\n    cgs.company_name,\n    cgs.industry,\n    cgs.company_size,\n    cgs.total_jobs_last_year,\n    ROUND(CAST(cgs.avg_monthly_jobs AS NUMERIC), 1) AS avg_monthly_jobs,\n    cgs.max_states_hiring,\n    cgs.max_cities_hiring,\n    cgs.avg_growth_rate,\n    cgs.accelerating_months,\n    cgs.expanding_months,\n    cgs.avg_salary_midpoint,\n    cgs.growth_score,\n    cgs.growth_category,\n    -- Ranking\n    RANK() OVER (ORDER BY cgs.growth_score DESC) AS growth_rank,\n    RANK() OVER (PARTITION BY cgs.industry ORDER BY cgs.growth_score DESC) AS industry_growth_rank\nFROM company_growth_scoring cgs\nWHERE cgs.total_jobs_last_year >= 10\nORDER BY cgs.growth_score DESC, cgs.total_jobs_last_year DESC\nLIMIT 100;",
      "line_number": 4032,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.007567,
        "row_count": 0,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 20,
      "title": "Skill Evolution Tracking with Emerging Skills Detection and Trend Analysis",
      "description": "Description: Comprehensive tracking of skill evolution, detecting emerging skills, analyzing skill trends, and identifying skill lifecycle stages. Uses time-series analysis and trend detection. Use Case: Skill Intelligence - Evolution Tracking and Emerging Skills Detection Business Value: Tracks skill evolution, detects emerging skills, analyzes trends, and identifies skill lifecycle stages. Helps users stay ahead of skill trends and identify valuable skills early. Purpose: Delivers actionable i",
      "complexity": "Deep nested CTEs (6+ levels), time-series analysis, trend detection, lifecycle analysis, window functions",
      "expected_output": "Skill evolution analysis with emerging skills, trend indicators, and lifecycle stages.",
      "sql": "WITH skill_demand_timeline AS (\n    -- First CTE: Track skill demand over time\n    SELECT\n        jsr.skill_id,\n        s.skill_name,\n        s.skill_category,\n        s.skill_type,\n        DATE_TRUNC('month', jp.posted_date) AS demand_month,\n        DATE_TRUNC('quarter', jp.posted_date) AS demand_quarter,\n        COUNT(DISTINCT jsr.job_id) AS job_demand_count,\n        COUNT(DISTINCT c.industry) AS industries_demanding,\n        COUNT(DISTINCT jp.company_id) AS companies_demanding,\n        AVG(jsr.importance_score) AS avg_importance_score,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'required' THEN jsr.job_id END) AS required_demand_count\n    FROM job_skills_requirements jsr\n    INNER JOIN skills s ON jsr.skill_id = s.skill_id\n    INNER JOIN job_postings jp ON jsr.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.posted_date >= CURRENT_DATE - INTERVAL '2 years'\n    GROUP BY jsr.skill_id, s.skill_name, s.skill_category, s.skill_type, DATE_TRUNC('month', jp.posted_date), DATE_TRUNC('quarter', jp.posted_date)\n),\nskill_trend_analysis AS (\n    -- Second CTE: Analyze skill trends\n    SELECT\n        sdt.skill_id,\n        sdt.skill_name,\n        sdt.skill_category,\n        sdt.skill_type,\n        sdt.demand_month,\n        sdt.demand_quarter,\n        sdt.job_demand_count,\n        sdt.industries_demanding,\n        sdt.companies_demanding,\n        ROUND(CAST(sdt.avg_importance_score AS NUMERIC), 2) AS avg_importance_score,\n        sdt.required_demand_count,\n        -- Compare to previous month\n        LAG(sdt.job_demand_count, 1) OVER (PARTITION BY sdt.skill_id ORDER BY sdt.demand_month) AS prev_month_demand,\n        LAG(sdt.job_demand_count, 3) OVER (PARTITION BY sdt.skill_id ORDER BY sdt.demand_month) AS prev_quarter_demand,\n        LAG(sdt.job_demand_count, 12) OVER (PARTITION BY sdt.skill_id ORDER BY sdt.demand_month) AS prev_year_demand,\n        -- Moving averages\n        AVG(sdt.job_demand_count) OVER (\n            PARTITION BY sdt.skill_id\n            ORDER BY sdt.demand_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_3month,\n        AVG(sdt.job_demand_count) OVER (\n            PARTITION BY sdt.skill_id\n            ORDER BY sdt.demand_month\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_6month\n    FROM skill_demand_timeline sdt\n),\nskill_growth_metrics AS (\n    -- Third CTE: Calculate growth metrics\n    SELECT\n        sta.skill_id,\n        sta.skill_name,\n        sta.skill_category,\n        sta.skill_type,\n        sta.demand_month,\n        sta.job_demand_count,\n        sta.industries_demanding,\n        sta.companies_demanding,\n        sta.avg_importance_score,\n        sta.required_demand_count,\n        -- Growth rates\n        CASE\n            WHEN sta.prev_month_demand > 0 THEN\n                ROUND(((sta.job_demand_count - sta.prev_month_demand)::NUMERIC / sta.prev_month_demand) * 100, 2)\n            ELSE NULL\n        END AS mom_growth_rate_pct,\n        CASE\n            WHEN sta.prev_quarter_demand > 0 THEN\n                ROUND(((sta.job_demand_count - sta.prev_quarter_demand)::NUMERIC / sta.prev_quarter_demand) * 100, 2)\n            ELSE NULL\n        END AS qoq_growth_rate_pct,\n        CASE\n            WHEN sta.prev_year_demand > 0 THEN\n                ROUND(((sta.job_demand_count - sta.prev_year_demand)::NUMERIC / sta.prev_year_demand) * 100, 2)\n            ELSE NULL\n        END AS yoy_growth_rate_pct,\n        -- Trend direction\n        CASE\n            WHEN sta.job_demand_count > sta.moving_avg_6month * 1.3 THEN 'accelerating'\n            WHEN sta.job_demand_count > sta.moving_avg_6month * 1.1 THEN 'growing'\n            WHEN sta.job_demand_count < sta.moving_avg_6month * 0.9 THEN 'declining'\n            ELSE 'stable'\n        END AS trend_direction\n    FROM skill_trend_analysis sta\n),\nskill_lifecycle_detection AS (\n    -- Fourth CTE: Detect skill lifecycle stages\n    SELECT\n        sgm.skill_id,\n        sgm.skill_name,\n        sgm.skill_category,\n        sgm.skill_type,\n        SUM(sgm.job_demand_count) AS total_demand_last_year,\n        AVG(sgm.job_demand_count) AS avg_monthly_demand,\n        MAX(sgm.job_demand_count) AS peak_demand,\n        AVG(sgm.mom_growth_rate_pct) AS avg_growth_rate,\n        AVG(sgm.yoy_growth_rate_pct) AS avg_yoy_growth_rate,\n        COUNT(DISTINCT CASE WHEN sgm.trend_direction = 'accelerating' THEN sgm.demand_month END) AS accelerating_months,\n        COUNT(DISTINCT CASE WHEN sgm.trend_direction = 'declining' THEN sgm.demand_month END) AS declining_months,\n        -- Lifecycle stage detection\n        CASE\n            WHEN AVG(sgm.job_demand_count) < 5 AND AVG(sgm.mom_growth_rate_pct) > 50 THEN 'emerging'\n            WHEN AVG(sgm.job_demand_count) >= 5 AND AVG(sgm.job_demand_count) < 20 AND AVG(sgm.mom_growth_rate_pct) > 20 THEN 'growing'\n            WHEN AVG(sgm.job_demand_count) >= 20 AND AVG(sgm.mom_growth_rate_pct) BETWEEN -10 AND 10 THEN 'mature'\n            WHEN AVG(sgm.job_demand_count) >= 20 AND AVG(sgm.mom_growth_rate_pct) < -10 THEN 'declining'\n            ELSE 'stable'\n        END AS lifecycle_stage,\n        MAX(sgm.industries_demanding) AS max_industries,\n        MAX(sgm.companies_demanding) AS max_companies,\n        AVG(sgm.avg_importance_score) AS avg_importance_score\n    FROM skill_growth_metrics sgm\n    GROUP BY sgm.skill_id, sgm.skill_name, sgm.skill_category, sgm.skill_type\n),\nemerging_skills_identification AS (\n    -- Fifth CTE: Identify emerging skills\n    SELECT\n        sld.skill_id,\n        sld.skill_name,\n        sld.skill_category,\n        sld.skill_type,\n        sld.total_demand_last_year,\n        ROUND(CAST(sld.avg_monthly_demand AS NUMERIC), 1) AS avg_monthly_demand,\n        sld.peak_demand,\n        ROUND(CAST(sld.avg_growth_rate AS NUMERIC), 2) AS avg_growth_rate,\n        ROUND(CAST(sld.avg_yoy_growth_rate AS NUMERIC), 2) AS avg_yoy_growth_rate,\n        sld.accelerating_months,\n        sld.declining_months,\n        sld.lifecycle_stage,\n        sld.max_industries,\n        sld.max_companies,\n        ROUND(CAST(sld.avg_importance_score AS NUMERIC), 2) AS avg_importance_score,\n        -- Emerging score\n        CASE\n            WHEN sld.lifecycle_stage = 'emerging' THEN\n                ROUND(\n                    (\n                        LEAST(sld.avg_growth_rate / 100.0, 1) * 40 +\n                        LEAST(sld.avg_monthly_demand / 20.0, 1) * 30 +\n                        LEAST(sld.max_industries / 10.0, 1) * 20 +\n                        LEAST(sld.avg_importance_score / 10.0, 1) * 10\n                    ) * 100,\n                    2\n                )\n            ELSE 0\n        END AS emerging_score\n    FROM skill_lifecycle_detection sld\n)\nSELECT\n    esi.skill_id,\n    esi.skill_name,\n    esi.skill_category,\n    esi.skill_type,\n    esi.total_demand_last_year,\n    esi.avg_monthly_demand,\n    esi.peak_demand,\n    esi.avg_growth_rate,\n    esi.avg_yoy_growth_rate,\n    esi.accelerating_months,\n    esi.declining_months,\n    esi.lifecycle_stage,\n    esi.max_industries,\n    esi.max_companies,\n    esi.avg_importance_score,\n    esi.emerging_score,\n    -- Ranking\n    RANK() OVER (ORDER BY esi.emerging_score DESC) AS emerging_rank,\n    RANK() OVER (PARTITION BY esi.lifecycle_stage ORDER BY esi.avg_growth_rate DESC) AS lifecycle_rank,\n    RANK() OVER (ORDER BY esi.avg_yoy_growth_rate DESC NULLS LAST) AS growth_rank\nFROM emerging_skills_identification esi\nWHERE esi.total_demand_last_year >= 3\nORDER BY esi.emerging_score DESC, esi.avg_growth_rate DESC\nLIMIT 100;",
      "line_number": 4269,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005906,
        "row_count": 2,
        "column_count": 19,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 21,
      "title": "Application Timing Optimization with Best Time-to-Apply Analysis",
      "description": "Description: Comprehensive analysis of optimal application timing, analyzing when to apply for best success rates, time-to-response patterns, and application timing strategies. Uses temporal analysis and optimization patterns. Use Case:
    Timing Intelligence - Optimal Application Timing and Success Rate Optimization Business Value: Identifies optimal application timing, analyzes time-to-response patterns, and provides timing strategies for maximizing success rates. Helps users apply at the right t",
      "complexity": "Deep nested CTEs (6+ levels), temporal analysis, optimization patterns, window functions, time-series analysis",
      "expected_output": "Application timing analysis with optimal timing recommendations, time-to-response patterns, and success rate optimizations.",
      "sql": "WITH application_timing_features AS (\n    -- First CTE: Extract timing features\n    SELECT\n        ja.application_id,\n        ja.user_id,\n        ja.job_id,\n        ja.application_status,\n        ja.submitted_at,\n        jp.posted_date,\n        jp.expiration_date,\n        EXTRACT(DOW FROM ja.submitted_at) AS day_of_week,\n        EXTRACT(HOUR FROM ja.submitted_at) AS hour_of_day,\n        EXTRACT(EPOCH FROM (ja.submitted_at - jp.posted_date)) / 86400 AS days_after_posting,\n        EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - jp.posted_date)) / 86400 AS days_since_posting,\n        CASE\n            WHEN EXTRACT(EPOCH FROM (ja.submitted_at - jp.posted_date)) / 86400 <= 1 THEN 'same_day'\n            WHEN EXTRACT(EPOCH FROM (ja.submitted_at - jp.posted_date)) / 86400 <= 3 THEN 'early'\n            WHEN EXTRACT(EPOCH FROM (ja.submitted_at - jp.posted_date)) / 86400 <= 7 THEN 'week_1'\n            WHEN EXTRACT(EPOCH FROM (ja.submitted_at - jp.posted_date)) / 86400 <= 14 THEN 'week_2'\n            ELSE 'late'\n        END AS application_timing_category,\n        CASE\n            WHEN ja.application_status IN ('interview', 'offer') THEN 1\n            ELSE 0\n        END AS success_indicator\n    FROM job_applications ja\n    INNER JOIN job_postings jp ON ja.job_id = jp.job_id\n    WHERE ja.submitted_at IS NOT NULL\n        AND jp.posted_date IS NOT NULL\n        AND ja.submitted_at >= CURRENT_TIMESTAMP - INTERVAL '1 year'\n),\ntiming_success_analysis AS (\n    -- Second CTE: Analyze success rates by timing\n    SELECT\n        atf.application_timing_category,\n        atf.day_of_week,\n        atf.hour_of_day,\n        COUNT(*) AS total_applications,\n        SUM(atf.success_indicator) AS successful_applications,\n        ROUND((SUM(atf.success_indicator)::NUMERIC / COUNT(*)) * 100, 2) AS success_rate_pct,\n        AVG(atf.days_after_posting) AS avg_days_after_posting,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY atf.days_after_posting) AS median_days_after_posting\n    FROM application_timing_features atf\n    GROUP BY atf.application_timing_category, atf.day_of_week, atf.hour_of_day\n),\noptimal_timing_recommendations AS (\n    -- Third CTE: Generate timing recommendations\n    SELECT\n        tsa.application_timing_category,\n        tsa.day_of_week,\n        tsa.hour_of_day,\n        tsa.total_applications,\n        tsa.successful_applications,\n        tsa.success_rate_pct,\n        ROUND(CAST(tsa.avg_days_after_posting AS NUMERIC), 1) AS avg_days_after_posting,\n        ROUND(CAST(tsa.median_days_after_posting AS NUMERIC), 1) AS median_days_after_posting,\n        -- Timing score\n        ROUND(\n            (\n                tsa.success_rate_pct * 0.6 +\n                LEAST((100 - tsa.avg_days_after_posting) / 30.0, 1) * 40\n            ),\n            2\n        ) AS timing_score,\n        -- Recommendation\n        CASE\n            WHEN tsa.success_rate_pct >= 30 AND tsa.avg_days_after_posting <= 3 THEN 'highly_recommended'\n            WHEN tsa.success_rate_pct >= 25 AND tsa.avg_days_after_posting <= 7 THEN 'recommended'\n            WHEN tsa.success_rate_pct >= 20 THEN 'moderate'\n            ELSE 'not_recommended'\n        END AS timing_recommendation\n    FROM timing_success_analysis tsa\n    WHERE tsa.total_applications >= 10\n)\nSELECT\n    otr.application_timing_category,\n    otr.day_of_week,\n    otr.hour_of_day,\n    otr.total_applications,\n    otr.successful_applications,\n    otr.success_rate_pct,\n    otr.avg_days_after_posting,\n    otr.median_days_after_posting,\n    otr.timing_score,\n    otr.timing_recommendation,\n    RANK() OVER (ORDER BY otr.timing_score DESC) AS timing_rank\nFROM optimal_timing_recommendations otr\nORDER BY otr.timing_score DESC\nLIMIT 100;",
      "line_number": 4464,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005613,
        "row_count": 0,
        "column_count": 11,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 22,
      "title": "Industry Transition Analysis with Career Path Mapping and Transition Success Rates",
      "description": "Description: Comprehensive analysis of industry transitions, mapping career paths, analyzing transition success rates, and identifying optimal transition strategies. Uses path analysis and transition patterns. Use Case: Career Intelligence - Industry Transition Analysis and Career Path Mapping Business Value: Maps career paths across industries, analyzes transition success rates, and identifies optimal transition strategies. Helps users plan industry transitions and career changes. Purpose: Deli",
      "complexity": "Deep nested CTEs (7+ levels), path analysis, transition patterns, success rate calculations, graph-like analysis",
      "expected_output": "Industry transition analysis with career paths, transition success rates, and optimal transition strategies.",
      "sql": "WITH user_industry_history AS (\n    -- First CTE: Track user industry history\n    SELECT\n        up.user_id,\n        up.current_job_title,\n        c.industry AS current_industry,\n        ja.application_id,\n        ja.job_id,\n        ja.application_status,\n        c2.industry AS target_industry,\n        ja.submitted_at,\n        CASE\n            WHEN ja.application_status IN ('interview', 'offer') THEN 1\n            ELSE 0\n        END AS transition_success\n    FROM user_profiles up\n    INNER JOIN job_applications ja ON up.user_id = ja.user_id\n    INNER JOIN job_postings jp ON ja.job_id = jp.job_id\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    LEFT JOIN job_postings jp2 ON ja.job_id = jp2.job_id\n    LEFT JOIN companies c2 ON jp2.company_id = c2.company_id\n    WHERE up.current_job_title IS NOT NULL\n        AND c.industry IS NOT NULL\n        AND c2.industry IS NOT NULL\n        AND c.industry != c2.industry\n),\nindustry_transition_matrix AS (\n    -- Second CTE: Build industry transition matrix\n    SELECT\n        uih.current_industry,\n        uih.target_industry,\n        COUNT(DISTINCT uih.user_id) AS users_attempting_transition,\n        COUNT(DISTINCT uih.application_id) AS total_transition_attempts,\n        SUM(uih.transition_success) AS successful_transitions,\n        ROUND((SUM(uih.transition_success)::NUMERIC / COUNT(DISTINCT uih.application_id)) * 100, 2) AS transition_success_rate_pct,\n        AVG(uih.transition_success) AS avg_success_rate\n    FROM user_industry_history uih\n    GROUP BY uih.current_industry, uih.target_industry\n),\ntransition_path_analysis AS (\n    -- Third CTE: Analyze transition paths\n    SELECT\n        itm.current_industry,\n        itm.target_industry,\n        itm.users_attempting_transition,\n        itm.total_transition_attempts,\n        itm.successful_transitions,\n        itm.transition_success_rate_pct,\n        -- Transition difficulty\n        CASE\n            WHEN itm.transition_success_rate_pct >= 30 THEN 'easy'\n            WHEN itm.transition_success_rate_pct >= 20 THEN 'moderate'\n            WHEN itm.transition_success_rate_pct >= 10 THEN 'difficult'\n            ELSE 'very_difficult'\n        END AS transition_difficulty,\n        -- Transition popularity\n        CASE\n            WHEN itm.total_transition_attempts >= 100 THEN 'very_popular'\n            WHEN itm.total_transition_attempts >= 50 THEN 'popular'\n            WHEN itm.total_transition_attempts >= 20 THEN 'moderate'\n            ELSE 'rare'\n        END AS transition_popularity\n    FROM industry_transition_matrix itm\n    WHERE itm.total_transition_attempts >= 5\n)\nSELECT\n    tpa.current_industry,\n    tpa.target_industry,\n    tpa.users_attempting_transition,\n    tpa.total_transition_attempts,\n    tpa.successful_transitions,\n    tpa.transition_success_rate_pct,\n    tpa.transition_difficulty,\n    tpa.transition_popularity,\n    RANK() OVER (PARTITION BY tpa.current_industry ORDER BY tpa.transition_success_rate_pct DESC) AS best_transition_rank,\n    RANK() OVER (ORDER BY tpa.total_transition_attempts DESC) AS popularity_rank\nFROM transition_path_analysis tpa\nORDER BY tpa.current_industry, tpa.transition_success_rate_pct DESC\nLIMIT 100;",
      "line_number": 4570,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006106,
        "row_count": 0,
        "column_count": 10,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 23,
      "title": "Salary Negotiation Intelligence with Market Positioning and Negotiation Leverage Analysis",
      "description": "Description: Comprehensive analysis for salary negotiation, providing market positioning data, negotiation leverage factors, and salary range recommendations. Uses market analysis and leverage calculations. Use Case:
    Negotiation Intelligence - Salary Negotiation Support and Market Positioning Business Value: Provides market positioning data, identifies negotiation leverage factors, and recommends salary ranges for negotiation. Helps users negotiate competitive salaries effectively. Purpose: Deli",
      "complexity": "Deep nested CTEs (6+ levels), market positioning, leverage analysis, percentile calculations, comparative analytics",
      "expected_output": "Salary negotiation intelligence with market positioning, leverage factors, and negotiation recommendations.",
      "sql": "WITH market_salary_benchmarks AS (\n    -- First CTE: Calculate market salary benchmarks\n    SELECT\n        jp.job_title,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.work_model,\n        COUNT(*) AS job_count,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary_midpoint,\n        PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p10_salary,\n        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p25_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p50_salary,\n        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p75_salary,\n        PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY (jp.salary_min + jp.salary_max) / 2) AS p90_salary,\n        MIN(jp.salary_min) AS market_min,\n        MAX(jp.salary_max) AS market_max\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.salary_min IS NOT NULL\n        AND jp.salary_max IS NOT NULL\n    GROUP BY jp.job_title, c.industry, jp.location_state, jp.location_city, jp.work_model\n),\nuser_market_positioning AS (\n    -- Second CTE: Position user relative to market\n    SELECT\n        up.user_id,\n        up.current_job_title,\n        up.years_experience,\n        msb.job_title,\n        msb.industry,\n        msb.location_state,\n        msb.work_model,\n        msb.p50_salary AS market_median,\n        msb.p75_salary AS market_p75,\n        msb.p90_salary AS market_p90,\n        msb.market_min,\n        msb.market_max,\n        -- User's market position\n        CASE\n            WHEN up.years_experience >= 10 THEN msb.p75_salary\n            WHEN up.years_experience >= 5 THEN msb.p50_salary\n            ELSE msb.p25_salary\n        END AS recommended_salary_target\n    FROM user_profiles up\n    INNER JOIN market_salary_benchmarks msb ON up.current_job_title = msb.job_title\n        AND up.location_state = msb.location_state\n),\nnegotiation_leverage_factors AS (\n    -- Third CTE: Identify negotiation leverage factors\n    SELECT\n        ump.user_id,\n        ump.job_title,\n        ump.industry,\n        ump.market_median,\n        ump.market_p75,\n        ump.market_p90,\n        ump.recommended_salary_target,\n        -- Leverage factors\n        (\n            SELECT COUNT(DISTINCT ja2.job_id)\n            FROM job_applications ja2\n            INNER JOIN job_postings jp2 ON ja2.job_id = jp2.job_id\n            WHERE ja2.user_id = ump.user_id\n                AND ja2.application_status IN ('interview', 'offer')\n                AND jp2.job_title = ump.job_title\n        ) AS competing_offers_count,\n        (\n            SELECT AVG(us.proficiency_score)\n            FROM user_skills us\n            WHERE us.user_id = ump.user_id\n        ) AS skill_proficiency_avg,\n        (\n            SELECT COUNT(DISTINCT jsr.skill_id)\n            FROM job_postings jp3\n            INNER JOIN job_skills_requirements jsr ON jp3.job_id = jsr.job_id\n            INNER JOIN user_skills us2 ON jsr.skill_id = us2.skill_id\n            WHERE jp3.job_title = ump.job_title\n                AND us2.user_id = ump.user_id\n                AND jsr.requirement_type = 'required'\n        ) AS matching_skills_count,\n        -- Leverage score\n        CASE\n            WHEN (\n                SELECT COUNT(DISTINCT ja2.job_id)\n                FROM job_applications ja2\n                WHERE ja2.user_id = ump.user_id\n                    AND ja2.application_status IN ('interview', 'offer')\n            ) >= 3 THEN 'high_leverage'\n            WHEN (\n                SELECT COUNT(DISTINCT ja2.job_id)\n                FROM job_applications ja2\n                WHERE ja2.user_id = ump.user_id\n                    AND ja2.application_status IN ('interview', 'offer')\n            ) >= 1 THEN 'moderate_leverage'\n            ELSE 'low_leverage'\n        END AS leverage_level\n    FROM user_market_positioning ump\n),\nnegotiation_recommendations AS (\n    -- Fourth CTE: Generate negotiation recommendations\n    SELECT\n        nlf.user_id,\n        nlf.job_title,\n        nlf.industry,\n        ROUND(CAST(nlf.market_median AS NUMERIC), 0) AS market_median,\n        ROUND(CAST(nlf.market_p75 AS NUMERIC), 0) AS market_p75,\n        ROUND(CAST(nlf.market_p90 AS NUMERIC), 0) AS market_p90,\n        ROUND(CAST(nlf.recommended_salary_target AS NUMERIC), 0) AS recommended_salary_target,\n        nlf.competing_offers_count,\n        nlf.skill_proficiency_avg,\n        nlf.matching_skills_count,\n        nlf.leverage_level,\n        -- Negotiation range\n        ROUND(CAST(nlf.recommended_salary_target * 0.95 AS NUMERIC), 0) AS negotiation_min,\n        ROUND(CAST(nlf.recommended_salary_target * 1.15 AS NUMERIC), 0) AS negotiation_max,\n        -- Negotiation strategy\n        CASE\n            WHEN nlf.leverage_level = 'high_leverage' AND nlf.skill_proficiency_avg >= 8 THEN 'aggressive'\n            WHEN nlf.leverage_level = 'moderate_leverage' OR nlf.skill_proficiency_avg >= 7 THEN 'moderate'\n            ELSE 'conservative'\n        END AS negotiation_strategy\n    FROM negotiation_leverage_factors nlf\n)\nSELECT\n    nr.user_id,\n    nr.job_title,\n    nr.industry,\n    nr.market_median,\n    nr.market_p75,\n    nr.market_p90,\n    nr.recommended_salary_target,\n    nr.negotiation_min,\n    nr.negotiation_max,\n    nr.competing_offers_count,\n    ROUND(CAST(nr.skill_proficiency_avg AS NUMERIC), 2) AS skill_proficiency_avg,\n    nr.matching_skills_count,\n    nr.leverage_level,\n    nr.negotiation_strategy\nFROM negotiation_recommendations nr\nORDER BY nr.recommended_salary_target DESC\nLIMIT 100;",
      "line_number": 4666,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00621,
        "row_count": 2,
        "column_count": 14,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 24,
      "title": "Job Market Saturation Analysis with Competition Intensity and Opportunity Density Metrics",
      "description": "Description: Comprehensive analysis of job market saturation, calculating competition intensity, opportunity density, and market saturation indicators. Uses saturation metrics and density calculations. Use Case: Market Saturation Intelligence - Competition Analysis and Opportunity Density Metrics Business Value: Analyzes market saturation, calculates competition intensity, measures opportunity density, and identifies saturated vs. unsaturated markets. Helps users identify less competitive opport",
      "complexity": "Deep nested CTEs (6+ levels), saturation calculations, density metrics, competition analysis, percentile rankings",
      "expected_output": "Market saturation analysis with competition intensity, opportunity density, and saturation indicators.",
      "sql": "WITH market_opportunity_density AS (\n    -- First CTE: Calculate opportunity density\n    SELECT\n        jp.job_title,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        COUNT(DISTINCT jp.job_id) AS total_job_postings,\n        COUNT(DISTINCT jp.company_id) AS unique_companies,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT ja.user_id) AS unique_applicants,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary_midpoint,\n        -- Opportunity density (jobs per company)\n        CASE\n            WHEN COUNT(DISTINCT jp.company_id) > 0 THEN\n                ROUND(COUNT(DISTINCT jp.job_id)::NUMERIC / COUNT(DISTINCT jp.company_id), 2)\n            ELSE NULL\n        END AS opportunity_density\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    LEFT JOIN job_applications ja ON jp.job_id = ja.job_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jp.job_title, c.industry, jp.location_state, jp.location_city\n),\ncompetition_intensity_analysis AS (\n    -- Second CTE: Analyze competition intensity\n    SELECT\n        mod.job_title,\n        mod.industry,\n        mod.location_state,\n        mod.location_city,\n        mod.total_job_postings,\n        mod.unique_companies,\n        mod.total_applications,\n        mod.unique_applicants,\n        mod.avg_salary_midpoint,\n        mod.opportunity_density,\n        -- Competition ratio (applications per job)\n        CASE\n            WHEN mod.total_job_postings > 0 THEN\n                ROUND(mod.total_applications::NUMERIC / mod.total_job_postings, 2)\n            ELSE NULL\n        END AS competition_ratio,\n        -- Applicant-to-job ratio\n        CASE\n            WHEN mod.total_job_postings > 0 THEN\n                ROUND(mod.unique_applicants::NUMERIC / mod.total_job_postings, 2)\n            ELSE NULL\n        END AS applicant_job_ratio\n    FROM market_opportunity_density mod\n),\nmarket_saturation_scoring AS (\n    -- Third CTE: Calculate saturation scores\n    SELECT\n        cia.job_title,\n        cia.industry,\n        cia.location_state,\n        cia.location_city,\n        cia.total_job_postings,\n        cia.unique_companies,\n        cia.total_applications,\n        cia.unique_applicants,\n        ROUND(CAST(cia.avg_salary_midpoint AS NUMERIC), 0) AS avg_salary_midpoint,\n        cia.opportunity_density,\n        cia.competition_ratio,\n        cia.applicant_job_ratio,\n        -- Saturation score (higher = more saturated)\n        CASE\n            WHEN cia.competition_ratio >= 50 THEN 100\n            WHEN cia.competition_ratio >= 30 THEN 80\n            WHEN cia.competition_ratio >= 20 THEN 60\n            WHEN cia.competition_ratio >= 10 THEN 40\n            WHEN cia.competition_ratio >= 5 THEN 20\n            ELSE 10\n        END AS saturation_score,\n        -- Saturation category\n        CASE\n            WHEN cia.competition_ratio >= 50 THEN 'highly_saturated'\n            WHEN cia.competition_ratio >= 30 THEN 'saturated'\n            WHEN cia.competition_ratio >= 20 THEN 'moderate'\n            WHEN cia.competition_ratio >= 10 THEN 'low'\n            ELSE 'unsaturated'\n        END AS saturation_category\n    FROM competition_intensity_analysis cia\n    WHERE cia.total_job_postings >= 5\n)\nSELECT\n    mss.job_title,\n    mss.industry,\n    mss.location_state,\n    mss.location_city,\n    mss.total_job_postings,\n    mss.unique_companies,\n    mss.total_applications,\n    mss.unique_applicants,\n    mss.avg_salary_midpoint,\n    mss.opportunity_density,\n    mss.competition_ratio,\n    mss.applicant_job_ratio,\n    mss.saturation_score,\n    mss.saturation_category,\n    RANK() OVER (ORDER BY mss.saturation_score ASC) AS opportunity_rank,\n    RANK() OVER (PARTITION BY mss.industry ORDER BY mss.saturation_score ASC) AS industry_opportunity_rank\nFROM market_saturation_scoring mss\nORDER BY mss.saturation_score ASC, mss.total_job_postings DESC\nLIMIT 100;",
      "line_number": 4826,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005905,
        "row_count": 0,
        "column_count": 16,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 25,
      "title": "User Journey Analysis with Engagement Funnels and Conversion Optimization",
      "description": "Description: Comprehensive analysis of user journeys, tracking engagement funnels, analyzing conversion paths, and identifying optimization opportunities. Uses funnel analysis and journey mapping. Use Case: User Experience Intelligence - Journey Analysis and Conversion Optimization Business Value: Maps user journeys, analyzes engagement funnels, tracks conversion paths, and identifies optimization opportunities. Helps improve user experience and conversion rates. Purpose: Delivers actionable ins",
      "complexity": "Deep nested CTEs (7+ levels), funnel analysis, journey mapping, conversion tracking, path analysis",
      "expected_output": "User journey analysis with engagement funnels, conversion paths, and optimization recommendations.",
      "sql": "WITH user_journey_events AS (\n    -- First CTE: Track user journey events\n    SELECT\n        up.user_id,\n        up.created_at AS registration_date,\n        MIN(ujsh.search_date) AS first_search_date,\n        MIN(ja.submitted_at) AS first_application_date,\n        MIN(jr.recommendation_date) AS first_recommendation_date,\n        COUNT(DISTINCT ujsh.search_id) AS total_searches,\n        COUNT(DISTINCT jr.recommendation_id) AS total_recommendations_viewed,\n        COUNT(DISTINCT CASE WHEN jr.is_liked = TRUE THEN jr.recommendation_id END) AS recommendations_liked,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications,\n        EXTRACT(EPOCH FROM (MIN(ujsh.search_date) - up.created_at)) / 86400 AS days_to_first_search,\n        EXTRACT(EPOCH FROM (MIN(ja.submitted_at) - up.created_at)) / 86400 AS days_to_first_application\n    FROM user_profiles up\n    LEFT JOIN user_job_search_history ujsh ON up.user_id = ujsh.user_id\n    LEFT JOIN job_recommendations jr ON up.user_id = jr.user_id\n    LEFT JOIN job_applications ja ON up.user_id = ja.user_id\n    WHERE up.is_active = TRUE\n    GROUP BY up.user_id, up.created_at\n),\njourney_stage_analysis AS (\n    -- Second CTE: Analyze journey stages\n    SELECT\n        uje.user_id,\n        uje.registration_date,\n        uje.total_searches,\n        uje.total_recommendations_viewed,\n        uje.recommendations_liked,\n        uje.total_applications,\n        uje.successful_applications,\n        uje.days_to_first_search,\n        uje.days_to_first_application,\n        -- Journey stage\n        CASE\n            WHEN uje.total_applications >= 5 AND uje.successful_applications >= 1 THEN 'active_applicant'\n            WHEN uje.total_applications >= 1 THEN 'applicant'\n            WHEN uje.total_recommendations_viewed >= 10 THEN 'engaged_searcher'\n            WHEN uje.total_searches >= 5 THEN 'active_searcher'\n            WHEN uje.total_searches >= 1 THEN 'searcher'\n            ELSE 'registered'\n        END AS journey_stage,\n        -- Engagement score\n        (\n            LEAST(uje.total_searches / 20.0, 1) * 25 +\n            LEAST(uje.total_recommendations_viewed / 50.0, 1) * 25 +\n            LEAST(uje.total_applications / 10.0, 1) * 25 +\n            LEAST(uje.successful_applications / 3.0, 1) * 25\n        ) AS engagement_score\n    FROM user_journey_events uje\n),\njourney_funnel_metrics AS (\n    -- Third CTE: Calculate funnel metrics\n    SELECT\n        jsa.journey_stage,\n        COUNT(DISTINCT jsa.user_id) AS users_at_stage,\n        AVG(jsa.total_searches) AS avg_searches,\n        AVG(jsa.total_recommendations_viewed) AS avg_recommendations,\n        AVG(jsa.total_applications) AS avg_applications,\n        AVG(jsa.successful_applications) AS avg_successes,\n        AVG(jsa.days_to_first_search) AS avg_days_to_search,\n        AVG(jsa.days_to_first_application) AS avg_days_to_application,\n        AVG(jsa.engagement_score) AS avg_engagement_score,\n        -- Conversion rates\n        LAG(COUNT(DISTINCT jsa.user_id), 1) OVER (ORDER BY\n            CASE jsa.journey_stage\n                WHEN 'registered' THEN 1\n                WHEN 'searcher' THEN 2\n                WHEN 'active_searcher' THEN 3\n                WHEN 'engaged_searcher' THEN 4\n                WHEN 'applicant' THEN 5\n                WHEN 'active_applicant' THEN 6\n            END\n        ) AS prev_stage_users,\n        -- Stage conversion rate\n        CASE\n            WHEN LAG(COUNT(DISTINCT jsa.user_id), 1) OVER (ORDER BY\n                CASE jsa.journey_stage\n                    WHEN 'registered' THEN 1\n                    WHEN 'searcher' THEN 2\n                    WHEN 'active_searcher' THEN 3\n                    WHEN 'engaged_searcher' THEN 4\n                    WHEN 'applicant' THEN 5\n                    WHEN 'active_applicant' THEN 6\n                END\n            ) > 0 THEN\n                ROUND((COUNT(DISTINCT jsa.user_id)::NUMERIC / LAG(COUNT(DISTINCT jsa.user_id), 1) OVER (ORDER BY\n                    CASE jsa.journey_stage\n                        WHEN 'registered' THEN 1\n                        WHEN 'searcher' THEN 2\n                        WHEN 'active_searcher' THEN 3\n                        WHEN 'engaged_searcher' THEN 4\n                        WHEN 'applicant' THEN 5\n                        WHEN 'active_applicant' THEN 6\n                    END\n                )) * 100, 2)\n            ELSE NULL\n        END AS stage_conversion_rate_pct\n    FROM journey_stage_analysis jsa\n    GROUP BY jsa.journey_stage\n)\nSELECT\n    jfm.journey_stage,\n    jfm.users_at_stage,\n    jfm.prev_stage_users,\n    ROUND(CAST(jfm.avg_searches AS NUMERIC), 1) AS avg_searches,\n    ROUND(CAST(jfm.avg_recommendations AS NUMERIC), 1) AS avg_recommendations,\n    ROUND(CAST(jfm.avg_applications AS NUMERIC), 1) AS avg_applications,\n    ROUND(CAST(jfm.avg_successes AS NUMERIC), 1) AS avg_successes,\n    ROUND(CAST(jfm.avg_days_to_search AS NUMERIC), 1) AS avg_days_to_search,\n    ROUND(CAST(jfm.avg_days_to_application AS NUMERIC), 1) AS avg_days_to_application,\n    ROUND(CAST(jfm.avg_engagement_score AS NUMERIC), 2) AS avg_engagement_score,\n    jfm.stage_conversion_rate_pct,\n    -- Drop-off rate\n    CASE\n        WHEN jfm.prev_stage_users > 0 THEN\n            ROUND(((jfm.prev_stage_users - jfm.users_at_stage)::NUMERIC / jfm.prev_stage_users) * 100, 2)\n        ELSE NULL\n    END AS drop_off_rate_pct\nFROM journey_funnel_metrics jfm\nORDER BY\n    CASE jfm.journey_stage\n        WHEN 'registered' THEN 1\n        WHEN 'searcher' THEN 2\n        WHEN 'active_searcher' THEN 3\n        WHEN 'engaged_searcher' THEN 4\n        WHEN 'applicant' THEN 5\n        WHEN 'active_applicant' THEN 6\n    END;",
      "line_number": 4949,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005289,
        "row_count": 3,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 26,
      "title": "Recursive Career Path Analysis with Hierarchical Job Progression and Path Optimization",
      "description": "Description: Recursive CTE-based career path analysis that identifies optimal career progression paths, analyzes job hierarchies, and determines advancement sequences. Uses recursive CTEs to traverse career progression trees and identify optimal paths. Use Case: Career Path Intelligence - Hierarchical Progression Analysis and Path Optimization Business Value: Identifies optimal career progression paths, analyzes job hierarchies, and determines advancement sequences. Helps users plan career advan",
      "complexity": "Recursive CTEs (WITH RECURSIVE), hierarchical path traversal, progression analysis, path optimization algorithms, complex aggregations",
      "expected_output": "Career path analysis with progression sequences, advancement paths, and optimization recommendations.",
      "sql": "WITH RECURSIVE job_hierarchy_base AS (\n    -- Anchor: Entry-level positions\n    SELECT\n        jp.job_title,\n        c.industry,\n        jp.location_state,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS avg_salary,\n        COUNT(*) AS job_count,\n        0 AS hierarchy_level,\n        ARRAY[jp.job_title::VARCHAR(255)] AS career_path,\n        jp.job_title::VARCHAR AS path_description\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND (\n            LOWER(jp.job_title) LIKE '%junior%'\n            OR LOWER(jp.job_title) LIKE '%entry%'\n            OR LOWER(jp.job_title) LIKE '%associate%'\n            OR LOWER(jp.job_title) LIKE '%intern%'\n        )\n    GROUP BY jp.job_title, c.industry, jp.location_state\n\n    UNION ALL\n\n    -- Recursive: Build career progression paths\n    SELECT\n        jp2.job_title,\n        c2.industry,\n        jp2.location_state,\n        (jp2.salary_min + jp2.salary_max) / 2 AS avg_salary,\n        1 AS job_count,\n        jhb.hierarchy_level + 1,\n        (jhb.career_path || ARRAY[jp2.job_title::VARCHAR(255)])::VARCHAR(255)[],\n        (jhb.path_description || ' -> ' || jp2.job_title)::VARCHAR AS path_description\n    FROM job_postings jp2\n    INNER JOIN companies c2 ON jp2.company_id = c2.company_id\n    INNER JOIN job_hierarchy_base jhb ON c2.industry = jhb.industry\n        AND jp2.location_state = jhb.location_state\n        AND (\n            -- Progression logic: senior roles follow junior roles\n            (jhb.job_title LIKE '%junior%' AND jp2.job_title LIKE '%senior%')\n            OR (jhb.job_title LIKE '%associate%' AND jp2.job_title LIKE '%engineer%')\n            OR (jhb.job_title LIKE '%engineer%' AND jp2.job_title LIKE '%senior%engineer%')\n            OR (jhb.job_title LIKE '%developer%' AND jp2.job_title LIKE '%senior%developer%')\n            OR (jhb.job_title LIKE '%analyst%' AND jp2.job_title LIKE '%senior%analyst%')\n            OR (jhb.job_title LIKE '%manager%' AND jp2.job_title LIKE '%senior%manager%')\n            OR (jhb.job_title LIKE '%senior%' AND jp2.job_title LIKE '%lead%')\n            OR (jhb.job_title LIKE '%lead%' AND jp2.job_title LIKE '%principal%')\n            OR (jhb.job_title LIKE '%principal%' AND jp2.job_title LIKE '%director%')\n        )\n        AND (jp2.salary_min + jp2.salary_max) / 2 > jhb.avg_salary * 1.1  -- Salary progression\n    WHERE jhb.hierarchy_level < 5  -- Prevent infinite recursion\n        AND NOT (jp2.job_title = ANY(jhb.career_path))  -- Prevent cycles\n),\ncareer_path_analysis AS (\n    -- CTE: Analyze career paths\n    SELECT\n        jhb.job_title,\n        jhb.industry,\n        jhb.location_state,\n        jhb.hierarchy_level,\n        jhb.career_path,\n        jhb.path_description,\n        ROUND(CAST(jhb.avg_salary AS NUMERIC), 0) AS avg_salary,\n        SUM(jhb.job_count) AS job_count,\n        -- Path metrics\n        ARRAY_LENGTH(jhb.career_path, 1) AS path_length,\n        -- Salary progression\n        (\n            SELECT AVG((jp3.salary_min + jp3.salary_max) / 2)\n            FROM job_postings jp3\n            INNER JOIN companies c3 ON jp3.company_id = c3.company_id\n            WHERE jp3.job_title = jhb.career_path[ARRAY_LENGTH(jhb.career_path, 1)]\n                AND c3.industry = jhb.industry\n                AND jp3.location_state = jhb.location_state\n        ) - (\n            SELECT AVG((jp4.salary_min + jp4.salary_max) / 2)\n            FROM job_postings jp4\n            INNER JOIN companies c4 ON jp4.company_id = c4.company_id\n            WHERE jp4.job_title = jhb.career_path[1]\n                AND c4.industry = jhb.industry\n                AND jp4.location_state = jhb.location_state\n        ) AS total_salary_increase,\n        -- Path popularity\n        (\n            SELECT COUNT(*)\n            FROM job_hierarchy_base jhb2\n            WHERE jhb2.career_path = jhb.career_path\n        ) AS path_frequency\n    FROM job_hierarchy_base jhb\n    WHERE jhb.hierarchy_level > 0\n    GROUP BY jhb.job_title, jhb.industry, jhb.location_state, jhb.hierarchy_level, jhb.career_path, jhb.path_description, jhb.avg_salary\n)\nSELECT\n    cpa.job_title,\n    cpa.industry,\n    cpa.location_state,\n    cpa.hierarchy_level,\n    cpa.path_description,\n    cpa.avg_salary,\n    cpa.job_count,\n    cpa.path_length,\n    ROUND(CAST(cpa.total_salary_increase AS NUMERIC), 0) AS total_salary_increase,\n    cpa.path_frequency,\n    -- Path score\n    ROUND(\n        (\n            LEAST(cpa.path_length / 5.0, 1) * 30 +\n            LEAST(cpa.total_salary_increase / 100000.0, 1) * 40 +\n            LEAST(cpa.path_frequency / 10.0, 1) * 30\n        ) * 100,\n        2\n    ) AS path_score,\n    RANK() OVER (PARTITION BY cpa.industry ORDER BY (\n        (\n            LEAST(cpa.path_length / 5.0, 1) * 30 +\n            LEAST(cpa.total_salary_increase / 100000.0, 1) * 40 +\n            LEAST(cpa.path_frequency / 10.0, 1) * 30\n        ) * 100\n    ) DESC) AS industry_path_rank\nFROM career_path_analysis cpa\nWHERE cpa.path_length >= 2\nORDER BY (\n    (\n        LEAST(cpa.path_length / 5.0, 1) * 30 +\n        LEAST(cpa.total_salary_increase / 100000.0, 1) * 40 +\n        LEAST(cpa.path_frequency / 10.0, 1) * 30\n    ) * 100\n) DESC, cpa.total_salary_increase DESC\nLIMIT 100;",
      "line_number": 5096,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005167,
        "row_count": 0,
        "column_count": 12,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 27,
      "title": "Multi-Dimensional Market Segmentation with Cluster Analysis and Segment Profiling",
      "description": "Description: Comprehensive multi-dimensional market segmentation analyzing jobs across multiple dimensions (industry, location, skills, salary), identifying market segments, and profiling segment characteristics. Uses clustering patterns and multi-dimensional analysis. Use Case: Market Segmentation Intelligence - Multi-Dimensional Analysis and Segment Profiling Business Value: Identifies market segments across multiple dimensions, profiles segment characteristics, and provides segment-specific i",
      "complexity": "Deep nested CTEs (7+ levels), multi-dimensional analysis, clustering patterns, segment profiling, percentile calculations",
      "expected_output": "Market segmentation analysis with segment profiles, characteristics, and segment-specific insights.",
      "sql": "WITH multi_dimensional_job_features AS (\n    -- First CTE: Extract multi-dimensional features\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.work_model,\n        jp.job_type,\n        (jp.salary_min + jp.salary_max) / 2 AS salary_midpoint,\n        COUNT(DISTINCT jsr.skill_id) AS required_skills_count,\n        ARRAY_AGG(DISTINCT s.skill_category) AS skill_categories,\n        COUNT(DISTINCT jsr.skill_id) FILTER (WHERE jsr.requirement_type = 'required') AS required_skills,\n        COUNT(DISTINCT jsr.skill_id) FILTER (WHERE jsr.requirement_type = 'preferred') AS preferred_skills\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    LEFT JOIN job_skills_requirements jsr ON jp.job_id = jsr.job_id\n    LEFT JOIN skills s ON jsr.skill_id = s.skill_id\n    WHERE jp.is_active = TRUE\n    GROUP BY jp.job_id, jp.job_title, c.industry, jp.location_state, jp.location_city, jp.work_model, jp.job_type, jp.salary_min, jp.salary_max\n),\ndimension_aggregations AS (\n    -- Second CTE: Aggregate by dimensions\n    SELECT\n        mdjf.industry,\n        mdjf.location_state,\n        mdjf.work_model,\n        mdjf.job_type,\n        COUNT(DISTINCT mdjf.job_id) AS segment_job_count,\n        AVG(mdjf.salary_midpoint) AS segment_avg_salary,\n        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY mdjf.salary_midpoint) AS segment_median_salary,\n        AVG(mdjf.required_skills_count) AS segment_avg_skills,\n        COUNT(DISTINCT mdjf.job_title) AS segment_unique_titles,\n        COUNT(DISTINCT mdjf.location_city) AS segment_cities,\n        -- Skill category distribution\n        ARRAY_AGG(DISTINCT skill_cat) AS segment_skill_categories\n    FROM multi_dimensional_job_features mdjf\n    CROSS JOIN UNNEST(mdjf.skill_categories) AS skill_cat\n    GROUP BY mdjf.industry, mdjf.location_state, mdjf.work_model, mdjf.job_type\n),\nsegment_profiling AS (\n    -- Third CTE: Profile market segments\n    SELECT\n        da.industry,\n        da.location_state,\n        da.work_model,\n        da.job_type,\n        da.segment_job_count,\n        ROUND(CAST(da.segment_avg_salary AS NUMERIC), 0) AS segment_avg_salary,\n        ROUND(CAST(da.segment_median_salary AS NUMERIC), 0) AS segment_median_salary,\n        ROUND(CAST(da.segment_avg_skills AS NUMERIC), 1) AS segment_avg_skills,\n        da.segment_unique_titles,\n        da.segment_cities,\n        da.segment_skill_categories,\n        -- Segment size category\n        CASE\n            WHEN da.segment_job_count >= 100 THEN 'large'\n            WHEN da.segment_job_count >= 50 THEN 'medium'\n            WHEN da.segment_job_count >= 20 THEN 'small'\n            ELSE 'niche'\n        END AS segment_size_category,\n        -- Segment salary category\n        CASE\n            WHEN da.segment_avg_salary >= 150000 THEN 'high_salary'\n            WHEN da.segment_avg_salary >= 100000 THEN 'mid_high_salary'\n            WHEN da.segment_avg_salary >= 75000 THEN 'mid_salary'\n            ELSE 'low_salary'\n        END AS segment_salary_category\n    FROM dimension_aggregations da\n    WHERE da.segment_job_count >= 5\n),\nsegment_comparison AS (\n    -- Fourth CTE: Compare segments\n    SELECT\n        sp.industry,\n        sp.location_state,\n        sp.work_model,\n        sp.job_type,\n        sp.segment_job_count,\n        sp.segment_avg_salary,\n        sp.segment_median_salary,\n        sp.segment_avg_skills,\n        sp.segment_unique_titles,\n        sp.segment_cities,\n        sp.segment_skill_categories,\n        sp.segment_size_category,\n        sp.segment_salary_category,\n        -- Compare to industry average\n        (\n            SELECT AVG(sp2.segment_avg_salary)\n            FROM segment_profiling sp2\n            WHERE sp2.industry = sp.industry\n        ) AS industry_avg_salary,\n        -- Segment attractiveness score\n        ROUND(\n            (\n                LEAST(sp.segment_job_count / 100.0, 1) * 30 +\n                LEAST(sp.segment_avg_salary / 200000.0, 1) * 40 +\n                LEAST(sp.segment_unique_titles / 20.0, 1) * 30\n            ) * 100,\n            2\n        ) AS segment_attractiveness_score\n    FROM segment_profiling sp\n)\nSELECT\n    sc.industry,\n    sc.location_state,\n    sc.work_model,\n    sc.job_type,\n    sc.segment_job_count,\n    sc.segment_avg_salary,\n    sc.segment_median_salary,\n    sc.segment_avg_skills,\n    sc.segment_unique_titles,\n    sc.segment_cities,\n    sc.segment_skill_categories,\n    sc.segment_size_category,\n    sc.segment_salary_category,\n    ROUND(CAST(sc.industry_avg_salary AS NUMERIC), 0) AS industry_avg_salary,\n    sc.segment_attractiveness_score,\n    RANK() OVER (ORDER BY sc.segment_attractiveness_score DESC) AS segment_rank,\n    RANK() OVER (PARTITION BY sc.industry ORDER BY sc.segment_attractiveness_score DESC) AS industry_segment_rank\nFROM segment_comparison sc\nORDER BY sc.segment_attractiveness_score DESC, sc.segment_job_count DESC\nLIMIT 100;",
      "line_number": 5243,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.006369,
        "row_count": 0,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 28,
      "title": "Predictive Market Forecasting with Time-Series Projections and Trend Extrapolation",
      "description": "Description: Comprehensive predictive market forecasting using time-series analysis, trend extrapolation, and projection models. Uses forecasting patterns and predictive analytics. Use Case: Predictive Intelligence - Market Forecasting and Trend Projections Business Value: Provides market forecasts, projects future trends, and extrapolates market patterns. Helps users anticipate market changes and plan strategically. Purpose: Delivers predictive insights for strategic planning and market anticip",
      "complexity":
    "Deep nested CTEs (7+ levels), time-series forecasting, trend extrapolation, projection models, window functions",
      "expected_output": "Market forecasts with trend projections, future predictions, and forecast confidence intervals.",
      "sql": "WITH historical_market_trends AS (\n    -- First CTE: Build historical trend data\n    SELECT\n        DATE_TRUNC('month', jp.posted_date) AS trend_month,\n        c.industry,\n        jp.location_state,\n        COUNT(DISTINCT jp.job_id) AS monthly_job_postings,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS monthly_avg_salary,\n        COUNT(DISTINCT jp.company_id) AS monthly_companies,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS monthly_remote_jobs\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND jp.posted_date >= CURRENT_DATE - INTERVAL '2 years'\n    GROUP BY DATE_TRUNC('month', jp.posted_date), c.industry, jp.location_state\n),\ntrend_calculations AS (\n    -- Second CTE: Calculate trends\n    SELECT\n        hmt.trend_month,\n        hmt.industry,\n        hmt.location_state,\n        hmt.monthly_job_postings,\n        hmt.monthly_avg_salary,\n        hmt.monthly_companies,\n        hmt.monthly_remote_jobs,\n        -- Moving averages\n        AVG(hmt.monthly_job_postings) OVER (\n            PARTITION BY hmt.industry, hmt.location_state\n            ORDER BY hmt.trend_month\n            ROWS BETWEEN 2 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_3month,\n        AVG(hmt.monthly_job_postings) OVER (\n            PARTITION BY hmt.industry, hmt.location_state\n            ORDER BY hmt.trend_month\n            ROWS BETWEEN 5 PRECEDING AND CURRENT ROW\n        ) AS moving_avg_6month,\n        -- Growth rates\n        LAG(hmt.monthly_job_postings, 1) OVER (\n            PARTITION BY hmt.industry, hmt.location_state\n            ORDER BY hmt.trend_month\n        ) AS prev_month_postings,\n        LAG(hmt.monthly_job_postings, 12) OVER (\n            PARTITION BY hmt.industry, hmt.location_state\n            ORDER BY hmt.trend_month\n        ) AS prev_year_postings\n    FROM historical_market_trends hmt\n),\nforecast_projections AS (\n    -- Third CTE: Generate forecasts\n    SELECT\n        tc.trend_month,\n        tc.industry,\n        tc.location_state,\n        tc.monthly_job_postings,\n        tc.monthly_avg_salary,\n        tc.moving_avg_3month,\n        tc.moving_avg_6month,\n        -- Calculate growth rate\n        CASE\n            WHEN tc.prev_month_postings > 0 THEN\n                ((tc.monthly_job_postings - tc.prev_month_postings)::NUMERIC / tc.prev_month_postings) * 100\n            ELSE NULL\n        END AS monthly_growth_rate,\n        -- Project next month (simple linear projection)\n        CASE\n            WHEN tc.prev_month_postings > 0 THEN\n                ROUND(tc.monthly_job_postings * (1 + ((tc.monthly_job_postings - tc.prev_month_postings)::NUMERIC / tc.prev_month_postings)), 0)\n            ELSE tc.monthly_job_postings\n        END AS projected_next_month,\n        -- Project 3 months ahead\n        CASE\n            WHEN tc.prev_month_postings > 0 THEN\n                ROUND(tc.monthly_job_postings * POWER(1 + ((tc.monthly_job_postings - tc.prev_month_postings)::NUMERIC / tc.prev_month_postings), 3), 0)\n            ELSE tc.monthly_job_postings\n        END AS projected_3months,\n        -- Project 6 months ahead\n        CASE\n            WHEN tc.prev_month_postings > 0 THEN\n                ROUND(tc.monthly_job_postings * POWER(1 + ((tc.monthly_job_postings - tc.prev_month_postings)::NUMERIC / tc.prev_month_postings), 6), 0)\n            ELSE tc.monthly_job_postings\n        END AS projected_6months\n    FROM trend_calculations tc\n)\nSELECT\n    fp.trend_month,\n    fp.industry,\n    fp.location_state,\n    fp.monthly_job_postings,\n    ROUND(CAST(fp.monthly_avg_salary AS NUMERIC), 0) AS monthly_avg_salary,\n    ROUND(CAST(fp.moving_avg_6month AS NUMERIC), 0) AS moving_avg_6month,\n    ROUND(CAST(fp.monthly_growth_rate AS NUMERIC), 2) AS monthly_growth_rate_pct,\n    fp.projected_next_month,\n    fp.projected_3months,\n    fp.projected_6months,\n    -- Forecast confidence\n    CASE\n        WHEN ABS(fp.monthly_growth_rate) < 5 THEN 'high_confidence'\n        WHEN ABS(fp.monthly_growth_rate) < 15 THEN 'moderate_confidence'\n        ELSE 'low_confidence'\n    END AS forecast_confidence\nFROM forecast_projections fp\nWHERE fp.trend_month >= CURRENT_DATE - INTERVAL '6 months'\nORDER BY fp.trend_month DESC, fp.industry, fp.location_state\nLIMIT 100;",
      "line_number": 5386,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005394,
        "row_count": 8,
        "column_count": 11,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 29,
      "title": "Cross-Database Job Matching with Redundancy Architecture and Multi-Source Integration",
      "description": "Description: Comprehensive cross-database job matching that integrates data from multiple sources (USAJobs.gov, BLS, aggregated sources), handles redundancy architecture, and provides unified matching results. Uses cross-source integration patterns. Use Case: Integration Intelligence - Cross-Database Matching and Multi-Source Integration Business Value: Integrates job data from multiple sources, handles redundancy architecture, and provides unified matching results. Ensures comprehensive job cov",
      "complexity": "Deep nested CTEs (8+ levels), cross-source integration, redundancy handling, unified matching, data source reconciliation",
      "expected_output": "Cross-database job matching with unified results, source attribution, and redundancy handling.",
      "sql": "WITH multi_source_job_aggregation AS (\n    -- First CTE: Aggregate jobs from all sources\n    SELECT\n        jp.job_id,\n        jp.job_title,\n        jp.company_id,\n        c.industry,\n        jp.location_state,\n        jp.location_city,\n        jp.work_model,\n        jp.salary_min,\n        jp.salary_max,\n        jp.posted_date,\n        jp.data_source,\n        jp.usajobs_id,\n        jp.is_federal_job,\n        c.company_name,\n        -- Normalize job title for matching\n        LOWER(TRIM(jp.job_title)) AS normalized_title,\n        -- Create composite key for deduplication\n        MD5(CONCAT(jp.job_title, jp.company_id, jp.location_state, jp.location_city)) AS job_fingerprint\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n),\nsource_deduplication AS (\n    -- Second CTE:
    Deduplicate across sources\n    SELECT\n        msja.job_id,\n        msja.job_title,\n        msja.company_id,\n        msja.industry,\n        msja.location_state,\n        msja.location_city,\n        msja.work_model,\n        msja.salary_min,\n        msja.salary_max,\n        msja.posted_date,\n        msja.data_source,\n        msja.usajobs_id,\n        msja.is_federal_job,\n        msja.company_name,\n        msja.normalized_title,\n        msja.job_fingerprint,\n        -- Source priority (federal jobs from USAJobs have priority)\n        CASE\n            WHEN msja.is_federal_job = TRUE AND msja.data_source = 'usajobs' THEN 1\n            WHEN msja.data_source = 'usajobs' THEN 2\n            WHEN msja.data_source = 'bls' THEN 3\n            ELSE 4\n        END AS source_priority,\n        -- Count sources for same job\n        COUNT(*) OVER (PARTITION BY msja.job_fingerprint) AS source_count,\n        -- Primary source\n        FIRST_VALUE(msja.data_source) OVER (\n            PARTITION BY msja.job_fingerprint\n            ORDER BY\n                CASE\n                    WHEN msja.is_federal_job = TRUE AND msja.data_source = 'usajobs' THEN 1\n                    WHEN msja.data_source = 'usajobs' THEN 2\n                    WHEN msja.data_source = 'bls' THEN 3\n                    ELSE 4\n                END\n        ) AS primary_source\n    FROM multi_source_job_aggregation msja\n),\nunified_job_matching AS (\n    -- Third CTE: Unified matching across sources\n    SELECT\n        sd.job_id,\n        sd.job_title,\n        sd.company_id,\n        sd.company_name,\n        sd.industry,\n        sd.location_state,\n        sd.location_city,\n        sd.work_model,\n        sd.salary_min,\n        sd.salary_max,\n        sd.posted_date,\n        sd.data_source,\n        sd.primary_source,\n        sd.source_count,\n        sd.is_federal_job,\n        sd.usajobs_id,\n        -- User matching (if user_id provided)\n        (\n            SELECT COUNT(*)\n            FROM user_skills us\n            INNER JOIN job_skills_requirements jsr ON us.skill_id = jsr.skill_id\n            WHERE jsr.job_id = sd.job_id\n                AND us.user_id = 'USER_ID_PLACEHOLDER'  -- Replace with actual user_id\n        ) AS user_skill_matches,\n        -- Match score components\n        CASE\n            WHEN sd.source_count > 1 THEN 10  -- Bonus for multi-source confirmation\n            ELSE 0\n        END AS redundancy_bonus\n    FROM source_deduplication sd\n    WHERE sd.source_priority = (\n        SELECT MIN(sd2.source_priority)\n        FROM source_deduplication sd2\n        WHERE sd2.job_fingerprint = sd.job_fingerprint\n    )\n),\ncross_source_analytics AS (\n    -- Fourth CTE:
    Analyze cross-source patterns\n    SELECT\n        ujm.job_id,\n        ujm.job_title,\n        ujm.company_name,\n        ujm.industry,\n        ujm.location_state,\n        ujm.work_model,\n        ujm.salary_min,\n        ujm.salary_max,\n        ujm.posted_date,\n        ujm.data_source,\n        ujm.primary_source,\n        ujm.source_count,\n        ujm.is_federal_job,\n        ujm.user_skill_matches,\n        ujm.redundancy_bonus,\n        -- Source coverage\n        ujm.source_count AS source_coverage_count,\n        -- Unified match score\n        (\n            ujm.user_skill_matches * 5 +\n            ujm.redundancy_bonus +\n            CASE WHEN ujm.is_federal_job = TRUE THEN 5 ELSE 0 END\n        ) AS unified_match_score\n    FROM unified_job_matching ujm\n)\nSELECT\n    csa.job_id,\n    csa.job_title,\n    csa.company_name,\n    csa.industry,\n    csa.location_state,\n    csa.work_model,\n    csa.salary_min,\n    csa.salary_max,\n    csa.posted_date,\n    csa.data_source,\n    csa.primary_source,\n    csa.source_count,\n    csa.source_coverage_count,\n    csa.is_federal_job,\n    csa.user_skill_matches,\n    csa.unified_match_score,\n    RANK() OVER (ORDER BY csa.unified_match_score DESC) AS match_rank\nFROM cross_source_analytics csa\nORDER BY csa.unified_match_score DESC, csa.posted_date DESC\nLIMIT 100;",
      "line_number": 5508,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.005773,
        "row_count": 10,
        "column_count": 17,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    },
    {
      "number": 30,
      "title": "Comprehensive Market Intelligence Dashboard with Multi-Dimensional Analytics and Executive Summary",
      "description": "Description: Comprehensive market intelligence dashboard aggregating all key metrics, providing executive summary, multi-dimensional analytics, and holistic market view. Uses dashboard aggregation patterns and executive reporting. Use Case: Executive Intelligence - Comprehensive Market Dashboard and Executive Summary Business Value: Provides comprehensive market intelligence dashboard with all key metrics, executive summary, and multi-dimensional analytics. Delivers holistic market view for stra",
      "complexity":
    "Deep nested CTEs (10+ levels), dashboard aggregation, executive summary, multi-dimensional analytics, comprehensive reporting",
      "expected_output": "Comprehensive market intelligence dashboard with executive summary, key metrics, and multi-dimensional analytics.",
      "sql": "WITH market_overview_metrics AS (\n    -- First CTE: Overall market metrics\n    SELECT\n        COUNT(DISTINCT jp.job_id) AS total_active_jobs,\n        COUNT(DISTINCT jp.company_id) AS total_companies,\n        COUNT(DISTINCT c.industry) AS total_industries,\n        COUNT(DISTINCT jp.location_state) AS total_states,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS market_avg_salary,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS remote_jobs_count,\n        COUNT(DISTINCT CASE WHEN jp.posted_date >= CURRENT_DATE - INTERVAL '30 days' THEN jp.job_id END) AS recent_jobs_30d\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n),\nuser_engagement_metrics AS (\n    -- Second CTE: User engagement metrics\n    SELECT\n        COUNT(DISTINCT up.user_id) AS total_active_users,\n        COUNT(DISTINCT ja.application_id) AS total_applications,\n        COUNT(DISTINCT CASE WHEN ja.application_status IN ('interview', 'offer') THEN ja.application_id END) AS successful_applications,\n        COUNT(DISTINCT jr.recommendation_id) AS total_recommendations,\n        COUNT(DISTINCT ujsh.search_id) AS total_searches,\n        AVG(up.profile_completeness_score) AS avg_profile_completeness\n    FROM user_profiles up\n    LEFT JOIN job_applications ja ON up.user_id = ja.user_id\n    LEFT JOIN job_recommendations jr ON up.user_id = jr.user_id\n    LEFT JOIN user_job_search_history ujsh ON up.user_id = ujsh.user_id\n    WHERE up.is_active = TRUE\n),\nskill_market_metrics AS (\n    -- Third CTE: Skill market metrics\n    SELECT\n        COUNT(DISTINCT s.skill_id) AS total_skills,\n        COUNT(DISTINCT jsr.job_id) AS jobs_with_skills,\n        AVG(jsr.importance_score) AS avg_skill_importance,\n        COUNT(DISTINCT CASE WHEN jsr.requirement_type = 'required' THEN jsr.skill_id END) AS required_skills_count\n    FROM skills s\n    LEFT JOIN job_skills_requirements jsr ON s.skill_id = jsr.skill_id\n    LEFT JOIN job_postings jp ON jsr.job_id = jp.job_id\n    WHERE jp.is_active = TRUE OR jp.is_active IS NULL\n),\nindustry_breakdown AS (\n    -- Fourth CTE: Industry breakdown\n    SELECT\n        c.industry,\n        COUNT(DISTINCT jp.job_id) AS industry_jobs,\n        COUNT(DISTINCT jp.company_id) AS industry_companies,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS industry_avg_salary,\n        COUNT(DISTINCT CASE WHEN jp.work_model = 'remote' THEN jp.job_id END) AS industry_remote_jobs\n    FROM job_postings jp\n    INNER JOIN companies c ON jp.company_id = c.company_id\n    WHERE jp.is_active = TRUE\n        AND c.industry IS NOT NULL\n    GROUP BY c.industry\n),\ngeographic_breakdown AS (\n    -- Fifth CTE: Geographic breakdown\n    SELECT\n        jp.location_state,\n        COUNT(DISTINCT jp.job_id) AS state_jobs,\n        COUNT(DISTINCT jp.company_id) AS state_companies,\n        AVG((jp.salary_min + jp.salary_max) / 2) AS state_avg_salary,\n        COUNT(DISTINCT jp.location_city) AS state_cities\n    FROM job_postings jp\n    WHERE jp.is_active = TRUE\n        AND jp.location_state IS NOT NULL\n    GROUP BY jp.location_state\n),\ndata_source_health AS (\n    -- Sixth CTE: Data source health\n    SELECT\n        dsm.source_name,\n        COUNT(DISTINCT dsm.metadata_id) AS total_extractions,\n        COUNT(DISTINCT CASE WHEN dsm.extraction_status = 'success' THEN dsm.metadata_id END) AS successful_extractions,\n        SUM(dsm.records_extracted) AS total_records,\n        MAX(dsm.extraction_date) AS last_extraction_date\n    FROM data_source_metadata dsm\n    GROUP BY dsm.source_name\n),\nexecutive_summary AS (\n    -- Seventh CTE: Executive summary\n    SELECT\n        mom.total_active_jobs,\n        mom.total_companies,\n        mom.total_industries,\n        mom.total_states,\n        ROUND(CAST(mom.market_avg_salary AS NUMERIC), 0) AS market_avg_salary,\n        mom.remote_jobs_count,\n        ROUND((mom.remote_jobs_count::NUMERIC / NULLIF(mom.total_active_jobs, 0)) * 100, 2) AS remote_jobs_pct,\n        mom.recent_jobs_30d,\n        uem.total_active_users,\n        uem.total_applications,\n        uem.successful_applications,\n        CASE\n            WHEN uem.total_applications > 0 THEN\n                ROUND((uem.successful_applications::NUMERIC / uem.total_applications) * 100, 2)\n            ELSE NULL\n        END AS application_success_rate_pct,\n        uem.total_recommendations,\n        uem.total_searches,\n        ROUND(CAST(uem.avg_profile_completeness AS NUMERIC), 2) AS avg_profile_completeness,\n        smm.total_skills,\n        smm.jobs_with_skills,\n        ROUND(CAST(smm.avg_skill_importance AS NUMERIC), 2) AS avg_skill_importance,\n        smm.required_skills_count,\n        -- Top industries\n        (\n            SELECT JSON_OBJECT_AGG(ib.industry, ib.industry_jobs)\n            FROM (\n                SELECT industry, industry_jobs\n                FROM industry_breakdown\n                ORDER BY industry_jobs DESC\n                LIMIT 5\n            ) ib\n        ) AS top_industries,\n        -- Top states\n        (\n            SELECT JSON_OBJECT_AGG(gb.location_state, gb.state_jobs)\n            FROM (\n                SELECT location_state, state_jobs\n                FROM geographic_breakdown\n                ORDER BY state_jobs DESC\n                LIMIT 5\n            ) gb\n        ) AS top_states,\n        -- Data source status\n        (\n            SELECT JSON_OBJECT_AGG(dsh.source_name, JSON_BUILD_OBJECT(\n                'success_rate', ROUND((dsh.successful_extractions::NUMERIC / NULLIF(dsh.total_extractions, 0)) * 100, 2),\n                'total_records', dsh.total_records,\n                'last_extraction', dsh.last_extraction_date\n            ))\n            FROM data_source_health dsh\n        ) AS data_source_status\n    FROM market_overview_metrics mom\n    CROSS JOIN user_engagement_metrics uem\n    CROSS JOIN skill_market_metrics smm\n)\nSELECT\n    es.total_active_jobs,\n    es.total_companies,\n    es.total_industries,\n    es.total_states,\n    es.market_avg_salary,\n    es.remote_jobs_count,\n    es.remote_jobs_pct,\n    es.recent_jobs_30d,\n    es.total_active_users,\n    es.total_applications,\n    es.successful_applications,\n    es.application_success_rate_pct,\n    es.total_recommendations,\n    es.total_searches,\n    es.avg_profile_completeness,\n    es.total_skills,\n    es.jobs_with_skills,\n    es.avg_skill_importance,\n    es.required_skills_count,\n    es.top_industries,\n    es.top_states,\n    es.data_source_status,\n    -- Market health score\n    ROUND(\n        (\n            LEAST(es.total_active_jobs / 10000.0, 1) * 25 +\n            LEAST(es.recent_jobs_30d / 1000.0, 1) * 25 +\n            LEAST(es.application_success_rate_pct / 30.0, 1) * 25 +\n            LEAST(es.avg_profile_completeness / 100.0, 1) * 25\n        ) * 100,\n        2\n    ) AS market_health_score\nFROM executive_summary es;",
      "line_number": 5679,
      "execution": {
        "success": true,
        "execution_time_seconds": 0.00754,
        "row_count": 1,
        "column_count": 23,
        "tested_at": "2026-02-08T21:06:12.489823"
      }
    }
  ],
  "execution_test_results": {
    "test_timestamp": "2026-02-08T21:06:12.489823",
    "total_queries": 30,
    "passed": 30,
    "failed": 0,
    "success_rate": 100.0,
    "average_execution_time": 0.006608266666666667,
    "total_execution_time": 0.198248
  }
}
# Extract queries list
queries = QUERIES_DATA.get('queries', [])
total_queries = len(queries)
print("="*80)
print("EMBEDDED QUERIES LOADED")
print("="*80)
print(f"Total Queries: {total_queries}")
print(f"Source: Embedded in notebook (no file dependency)")
if queries:
    print(f"\nQuery Overview:")
    for q in queries[:5]:
        title = q.get('title', 'N/A')[:60]
        print(f"  Query {q.get('number')}: {title}...")
    if total_queries > 5:
    print(f"  ... and {total_queries - 5} more queries")
print("="*80)
print("‚úÖ Queries ready to execute!")
print("="*80)


In [None]:
# ============================================================================
# LOAD QUERIES (FROM EMBEDDED DATA)
# ============================================================================
# Queries are already loaded from embedded QUERIES_DATA cell above
# If not loaded, use the embedded queries cell
if 'queries' not in globals():
    print("‚ö†Ô∏è  Queries not found. Run the 'Embedded Queries' cell first.")
    print("   Looking for embedded queries...")
    # Try to find embedded queries
    for cell_num in range(len(notebook['cells'])):
    cell_text = ''.join(notebook['cells'][cell_num].get('source', []))
        if 'EMBEDDED QUERIES.JSON' in cell_text or 'QUERIES_DATA' in cell_text:
    print(f"   ‚úÖ Found embedded queries in cell")
            break
else:
    print("="*80)
    print("QUERIES LOADED")
    print("="*80)
    print(f"Total Queries: {len(queries)}")
    if queries:
    print(f"\nQuery Overview:")
        for q in queries[:5]:
            title = q.get('title', 'N/A')[:60]
            print(f"  Query {q.get('number')}: {title}...")
        if len(queries) > 5:
    print(f"  ... and {len(queries) - 5} more queries")
    print("="*80)


## Step 5: Query Execution Function

In [None]:
# ============================================================================# POSTGRESQL DATABASE CONNECTION (Colab Only)# ============================================================================import psycopg2from pathlib import Path# Database nameDB_NAME = "db-8"def create_postgresql_connection():        """Create PostgreSQL connection for Colab."""    if not IS_COLAB:
    raise RuntimeError("This notebook requires Google Colab")        # Colab PostgreSQL defaults    try:
    conn = psycopg2.connect(            host='localhost',            port=5432,            user='postgres',            password='postgres',  # Default Colab PostgreSQL password            database='postgres'  # Connect to default database first        )        print("‚úÖ Connected to PostgreSQL")        return conn    except Exception as e:
    print(f"‚ùå PostgreSQL connection failed: {e}")        print("\nTroubleshooting:")        print("1. Make sure PostgreSQL is installed (run the installation cell above)")        print("2. Check if PostgreSQL service is running:     !service postgresql status")        print("3. Try restarting PostgreSQL: !service postgresql restart")        raise# Create connectionconn = create_postgresql_connection()print(f"\nDatabase connection: PostgreSQL (Colab)")print(f"Host: localhost")
print(f"Port: 5432")print(f"User: postgres")

## Step 6: Execute All Queries

## Step 5: Query Execution Function

In [None]:
# ============================================================================
# QUERY EXECUTION FUNCTION WITH METRICS
# ============================================================================

import time
import pandas as pd

def execute_query_with_metrics(db_name: str, query_sql: str, query_num: int, db_config: dict = None):
    """
    Execute SQL query with metrics collection.
    
    Args:
        db_name: Database name
        query_sql: SQL query string
        query_num: Query number
        db_config: Database configuration (optional, uses global conn if None)
    
    Returns:
    dict: Query execution results with metrics
    """
    result = {
        'query_number': query_num,
        'success': False,
        'execution_time': 0.0,
        'row_count': 0,
        'column_count': 0,
        'dataframe': None,
        'error': None
    }
    
    try:
    # Use global connection if db_config not provided
        if db_config is None:
    # Use the global conn variable
            if 'conn' not in globals():
    raise RuntimeError("Database connection not available. Run connection cell first.")
            exec_conn = globals()['conn']
        else:
            # Create new connection from config
            exec_conn = psycopg2.connect(**db_config)
        
        # Start timing
        start_time = time.time()
        
        # Execute query
        cursor = exec_conn.cursor()
        cursor.execute(query_sql)
        
        # Fetch results
        columns = [desc[0] for desc in cursor.description] if cursor.description else []
        rows = cursor.fetchall()
        
        # Calculate execution time
        execution_time = time.time() - start_time
        
        # Create DataFrame
        if rows and columns:
    df = pd.DataFrame(rows, columns=columns)
        else:
            df = pd.DataFrame()
        
        # Update result
        result['success'] = True
        result['execution_time'] = execution_time
        result['row_count'] = len(df)
        result['column_count'] = len(columns)
        result['dataframe'] = df
        
        # Close cursor
        cursor.close()
        
        # Close connection if we created it
        if db_config is not None:
    exec_conn.close()
        
    except Exception as e:
    result['success'] = False
        result['error'] = str(e)
        result['execution_time'] = time.time() - start_time if 'start_time' in locals() else 0.0
    
    return result

# Database configuration (for reference, uses global conn by default)
DB_CONFIG = {
    'host':
    'localhost',
    'port': 5432,
    'user': 'postgres',
    'password': 'postgres',
    'database': 'postgres'
}

print("‚úÖ Query execution function loaded")
print("   Function: execute_query_with_metrics(db_name, query_sql, query_num, db_config=None)")


In [None]:
# ============================================================================
# EXECUTE ALL QUERIES - END-TO-END TESTING
# ============================================================================

all_results = []

print("="*80)
print("EXECUTING ALL QUERIES")
print("="*80)

for query_info in queries:
    query_num = query_info.get('number')
    query_sql = query_info.get('sql', '')
    query_title = query_info.get('title', f'Query {query_num}')
    
    result = execute_query_with_metrics(DB_NAME, query_sql, query_num, DB_CONFIG)
    result['query_number'] = query_num
    result['query_title'] = query_title
    result['query_info'] = query_info
    
    all_results.append(result)
    
    status = "‚úÖ" if result['success'] else "‚ùå"
    print(f"{status} Query {query_num:2d}: {query_title[:50]:<50} ({result['execution_time']:.3f}s, {result['row_count']:4d} rows)")

# Summary
passed = sum(1 for r in all_results if r['success'])
failed = sum(1 for r in all_results if not r['success'])

print(f"\n{'='*80}")
print(f"EXECUTION SUMMARY")
print(f"{'='*80}")
print(f"Total Queries:
    {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"{'='*80}")

## Step 7: Performance Visualization

In [None]:
import pandas as pdimport matplotlib.pyplot as plt# ============================================================================
# PERFORMANCE VISUALIZATION
# ============================================================================

# Create performance metrics DataFrame
perf_data = []
for r in all_results:
    perf_data.append({
        'Query': r['query_number'],
        'Title': r['query_title'][:40] + '...' if len(r['query_title']) > 40 else r['query_title'],
        'Execution Time (s)':
    r['execution_time'],
        'Row Count': r['row_count'],
        'Column Count': r['column_count'],
        'Status': 'Passed' if r['success'] else 'Failed'
    })

perf_df = pd.DataFrame(perf_data)

# Visualization:
    Execution Time Distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Execution time bar chart
axes[0, 0].bar(perf_df['Query'], perf_df['Execution Time (s)'], color='steelblue', alpha=0.7)
axes[0, 0].set_xlabel('Query Number')
axes[0, 0].set_ylabel('Execution Time (seconds)')
axes[0, 0].set_title('Query Execution Time by Query Number')
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].grid(True, alpha=0.3)

# Execution time histogram
axes[0, 1].hist(perf_df['Execution Time (s)'], bins=20, color='coral', alpha=0.7, edgecolor='black')
axes[0, 1].set_xlabel('Execution Time (seconds)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Distribution of Execution Times')
axes[0, 1].grid(True, alpha=0.3)

# Row count bar chart
axes[1, 0].bar(perf_df['Query'], perf_df['Row Count'], color='green', alpha=0.7)
axes[1, 0].set_xlabel('Query Number')
axes[1, 0].set_ylabel('Row Count')
axes[1, 0].set_title('Rows Returned by Query')
axes[1, 0].tick_params(axis='x', rotation=45)
axes[1, 0].grid(True, alpha=0.3)

# Status pie chart
status_counts = perf_df['Status'].value_counts()
axes[1, 1].pie(status_counts.values, labels=status_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Query Execution Status')

plt.tight_layout()
plt.show()

# Display performance summary
print("\n" + "="*80)
print("PERFORMANCE SUMMARY")
print("="*80)
print(f"Average execution time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Median execution time: {perf_df['Execution Time (s)'].median():.3f}s")
print(f"Max execution time: {perf_df['Execution Time (s)'].max():.3f}s")
print(f"Min execution time: {perf_df['Execution Time (s)'].min():.3f}s")
print(f"Total rows returned: {perf_df['Row Count'].sum():,}")
print(f"Average rows per query: {perf_df['Row Count'].mean():.1f}")
print("="*80)

## Step 8: Individual Query Documentation and Visualization

In [None]:
import numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom IPython.display import display, HTML, Markdown# ============================================================================
# INDIVIDUAL QUERY DOCUMENTATION AND VISUALIZATION
# ============================================================================

def document_and_visualize_query(query_result: dict, query_num: int):
    """Create comprehensive documentation and visualization for a single query."""
    query_info = query_result['query_info']
    
    # Create markdown documentation
    doc = f"""
## Query {query_num}:
    {query_info.get('title', 'N/A')}

### Execution Status
- **Status:** {'‚úÖ PASSED' if query_result['success'] else '‚ùå FAILED'}
- **Execution Time:** {query_result['execution_time']:.3f} seconds
- **Rows Returned:** {query_result['row_count']:,}
- **Columns Returned:** {query_result['column_count']}

### Query Information
- **Description:** {query_info.get('description', 'N/A')[:300]}...
- **Use Case:** {query_info.get('use_case', 'N/A')}
- **Business Value:** {query_info.get('business_value', 'N/A')}
- **Complexity:** {query_info.get('complexity', 'N/A')}
- **Expected Output:** {query_info.get('expected_output', 'N/A')}

### SQL Query
```sql
{query_info.get('sql', '')[:1000]}...
```

### Results Preview
"""
    
    try:
    display(Markdown(doc))
    except:
        print(doc)
    
    if query_result['success'] and query_result['dataframe'] is not None:
    df = query_result['dataframe']
        
        if len(df) > 0:
    print(f"\nFirst 10 rows of Query {query_num}:")
            try:
    display(df.head(10))
            except:
                print(df.head(10).to_string())
            
            # Create visualizations if numeric data exists
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            if len(numeric_cols) > 0:
    num_plots = min(3, len(numeric_cols))
                fig, axes = plt.subplots(1, num_plots, figsize=(15, 4))
                if num_plots == 1:
    axes = [axes]
                
                for idx, col in enumerate(numeric_cols[:num_plots]):
                    if df[col].notna().sum() > 0:
    axes[idx].hist(df[col].dropna(), bins=min(20, len(df)), alpha=0.7, edgecolor='black')
                        axes[idx].set_title(f'Distribution of {col[:30]}')
                        axes[idx].set_xlabel(col[:30])
                        axes[idx].set_ylabel('Frequency')
                        axes[idx].grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
                
                # Create correlation heatmap if multiple numeric columns
                if len(numeric_cols) > 1:
    fig, ax = plt.subplots(figsize=(10, 8))
                    corr_matrix = df[numeric_cols].corr()
                    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax)
                    ax.set_title('Correlation Matrix of Numeric Columns')
                    plt.tight_layout()
                    plt.show()
        else:
            print(f"\nQuery {query_num} returned 0 rows.")
    else:
        if query_result.get('error'):
    print(f"\n‚ùå Error: {query_result['error'][:500]}")

# Document and visualize each query
print("="*80)
print("INDIVIDUAL QUERY DOCUMENTATION")
print("="*80)

for query_result in all_results:
    query_num = query_result['query_number']
    document_and_visualize_query(query_result, query_num)
    print("\n" + "="*80 + "\n")

## Step 9: Generate Comprehensive Report

In [None]:
# ============================================================================
# GENERATE COMPREHENSIVE REPORT
# ============================================================================

# Create comprehensive report
report_data = {
    'database': DB_NAME,
    'test_timestamp': datetime.now().isoformat(),
    'total_queries': total_queries,
    'passed': passed,
    'failed': failed,
    'success_rate': passed / total_queries * 100 if total_queries > 0 else 0,
    'average_execution_time':
    perf_df['Execution Time (s)'].mean(),
    'total_execution_time': perf_df['Execution Time (s)'].sum(),
    'queries': []
}

for r in all_results:
    query_report = {
        'number': r['query_number'],
        'title': r['query_title'],
        'success': r['success'],
        'execution_time': r['execution_time'],
        'row_count': r['row_count'],
        'column_count': r['column_count'],
        'columns': r['columns']
    }
    if not r['success']:
    query_report['error'] = r['error']
    
    report_data['queries'].append(query_report)

# Save report
report_file = DB_DIR / 'results' / f'{DB_NAME}_comprehensive_report.json'
report_file.parent.mkdir(exist_ok=True)

with open(report_file, 'w') as f:
    json.dump(report_data, f, indent=2, default=str)

print("="*80)
print("COMPREHENSIVE TEST REPORT")
print("="*80)
print(f"Database: {DB_NAME}")
print(f"Total Queries: {total_queries}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
print(f"Success Rate: {passed/total_queries*100:.1f}%")
print(f"Average Execution Time: {perf_df['Execution Time (s)'].mean():.3f}s")
print(f"Total Execution Time: {perf_df['Execution Time (s)'].sum():.3f}s")
print(f"\n‚úÖ Report saved to: {report_file}")
print("="*80)

print("\n" + "="*80)
print("END-TO-END TESTING COMPLETE")
print("="*80)
print(f"‚úÖ Database '{DB_NAME}' initialized and tested")
print(f"‚úÖ All {total_queries} queries executed")
print(f"‚úÖ Performance metrics collected")
print(f"‚úÖ Comprehensive report generated")
print("="*80)