In [1]:
import os
import sys

# Critical environment setup for 1,773 algorithms
os.environ['LD_LIBRARY_PATH'] = "/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib"
os.environ['SAGA_CMD'] = '/opt/saga/bin/saga_cmd'
os.environ['SAGA_MLB'] = '/opt/saga/lib/saga'
os.environ['GISBASE'] = '/opt/grass'
os.environ['OTB_APPLICATION_PATH'] = '/opt/otb/lib/otb/applications'

# Add GRASS to Python path
sys.path.insert(0, '/opt/grass/etc/python')

# Initialize QGIS
from qgis.core import QgsApplication
QgsApplication.setPrefixPath('/opt/conda/envs/pygile', True)
qgs = QgsApplication([], False)
qgs.initQgis()

import processing
from processing.core.Processing import Processing
Processing.initialize()

print(" PyGILE-Plus initialized with 1,773+ algorithms!")

 PyGILE-Plus initialized with 1,773+ algorithms!


In [2]:
# SAGA GIS Integration for PyGILE-Plus Environment
import subprocess
import os
from pathlib import Path

class SAGAInterface:
    def __init__(self, saga_cmd_path="/opt/saga/bin/saga_cmd"):
        self.saga_cmd = saga_cmd_path
        self.env = os.environ.copy()
        self.env['SAGA_MLB'] = "/opt/saga/lib/saga"
        self.env['LD_LIBRARY_PATH'] = "/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib"
        
    def run_tool(self, library, tool, parameters):
        """Execute SAGA tool with parameters"""
        cmd = [self.saga_cmd, library, str(tool)] + parameters
        result = subprocess.run(cmd, capture_output=True, text=True, env=self.env)
        if result.returncode != 0:
            raise RuntimeError(f"SAGA error: {result.stderr}")
        return result
    
    def get_tools(self, library):
        """List available tools in library"""
        result = subprocess.run([self.saga_cmd, library], capture_output=True, text=True, env=self.env)
        print(f"Available tools in {library}:")
        print(result.stdout)
        
    def morphometry_slope(self, input_dem, output_slope):
        """Calculate slope from DEM using morphometry library"""
        params = [f"-ELEVATION={input_dem}", f"-SLOPE={output_slope}", "-METHOD=4"]
        return self.run_tool("ta_morphometry", 0, params)

# Initialize SAGA interface
saga = SAGAInterface()
print("SAGA interface initialized")

# Test SAGA functionality - Fixed library path issue
try:
    result = subprocess.run(["/opt/saga/bin/saga_cmd", "--help"], 
                          capture_output=True, text=True, 
                          env={'LD_LIBRARY_PATH': '/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib',
                               'SAGA_MLB': '/opt/saga/lib/saga'})
    if result.returncode == 0:
        print("SAGA command line working")
    else:
        print(f"SAGA error: {result.stderr}")
except Exception as e:
    print(f"SAGA test failed: {e}")

# Method 2: Direct subprocess approach (most reliable)
def saga_direct(library, tool, **kwargs):
    """Direct SAGA execution via subprocess"""
    cmd = ["/opt/saga/bin/saga_cmd", library, str(tool)]
    for key, value in kwargs.items():
        cmd.append(f"-{key.upper()}={value}")
    
    env = os.environ.copy()
    env['SAGA_MLB'] = "/opt/saga/lib/saga"
    
    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
    if result.returncode == 0:
        print("SAGA execution successful")
    return result

# Method 3: PySAGA-cmd wrapper (if installed) - Fixed version
try:
    from PySAGA_cmd import SAGA
    saga_py = SAGA('/opt/saga/bin/saga_cmd', version='9.3.2')
    morphometry = saga_py / 'ta_morphometry'
    slope_tool = morphometry / 0
    print("PySAGA-cmd interface ready with version 9.3.2")
except ImportError:
    print("PySAGA-cmd not available, using direct methods")

SAGA interface initialized
SAGA command line working
PySAGA-cmd interface ready with version 9.3.2


In [3]:
# SAGA GIS Integration for PyGILE-Plus Environment
import subprocess
import os
from pathlib import Path

class SAGAInterface:
    def __init__(self, saga_cmd_path="/opt/saga/bin/saga_cmd"):
        self.saga_cmd = saga_cmd_path
        self.env = os.environ.copy()
        self.env['SAGA_MLB'] = "/opt/saga/lib/saga"
        self.env['LD_LIBRARY_PATH'] = "/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib"
        
    def run_tool(self, library, tool, parameters):
        """Execute SAGA tool with parameters"""
        cmd = [self.saga_cmd, library, str(tool)] + parameters
        result = subprocess.run(cmd, capture_output=True, text=True, env=self.env)
        if result.returncode != 0:
            raise RuntimeError(f"SAGA error: {result.stderr}")
        return result
    
    def get_tools(self, library):
        """List available tools in library"""
        result = subprocess.run([self.saga_cmd, library], capture_output=True, text=True, env=self.env)
        print(f"Available tools in {library}:")
        print(result.stdout)
        
    def morphometry_slope(self, input_dem, output_slope):
        """Calculate slope from DEM using morphometry library"""
        params = [f"-ELEVATION={input_dem}", f"-SLOPE={output_slope}", "-METHOD=4"]
        return self.run_tool("ta_morphometry", 0, params)

# Initialize SAGA interface
saga = SAGAInterface()
print("SAGA interface initialized")

# Test SAGA functionality - Fixed library path issue
try:
    result = subprocess.run(["/opt/saga/bin/saga_cmd", "--help"], 
                          capture_output=True, text=True, 
                          env={'LD_LIBRARY_PATH': '/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib',
                               'SAGA_MLB': '/opt/saga/lib/saga'})
    if result.returncode == 0:
        print("SAGA command line working")
    else:
        print(f"SAGA error: {result.stderr}")
except Exception as e:
    print(f"SAGA test failed: {e}")

# Method 2: Direct subprocess approach (most reliable)
def saga_direct(library, tool, **kwargs):
    """Direct SAGA execution via subprocess"""
    cmd = ["/opt/saga/bin/saga_cmd", library, str(tool)]
    for key, value in kwargs.items():
        cmd.append(f"-{key.upper()}={value}")
    
    env = os.environ.copy()
    env['SAGA_MLB'] = "/opt/saga/lib/saga"
    
    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
    if result.returncode == 0:
        print("SAGA execution successful")
    return result

# Quick SAGA demonstration - Terrain Analysis
def demonstrate_saga():
    """Demonstrate SAGA terrain analysis capabilities"""
    
    # List available morphometry tools
    result = subprocess.run(["/opt/saga/bin/saga_cmd", "ta_morphometry"], 
                          capture_output=True, text=True, 
                          env={'LD_LIBRARY_PATH': '/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib',
                               'SAGA_MLB': '/opt/saga/lib/saga'})
    
    print("Available SAGA Morphometry Tools:")
    print("="*40)
    lines = result.stdout.split('\n')[:15]  # Show first 15 tools
    for line in lines:
        if line.strip() and not line.startswith('_'):
            print(line.strip())
    
    print("\nSAGA is ready for geospatial analysis!")
    return True

# Run demonstration
demonstrate_saga()

SAGA interface initialized
SAGA command line working
Available SAGA Morphometry Tools:
#####   ##   #####    ##
###     ###  ##       ###
###   # ## ##  #### # ##
### ##### ##    # #####
##### #   ##  ##### #   ##
SAGA Version: 9.3.2
Library    : Morphometry
Category   : Terrain Analysis
File       : /opt/saga/lib/saga/libta_morphometry.so

SAGA is ready for geospatial analysis!


True

In [7]:
import re
import subprocess
import csv

def export_all_saga_algorithms_fixed():
    """Use the working method but process ALL libraries"""
    
    env = {
        'LD_LIBRARY_PATH': '/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib',
        'SAGA_MLB': '/opt/saga/lib/saga'
    }
    
    # Get libraries (your working code)
    result = subprocess.run(["/opt/saga/bin/saga_cmd"], 
                          capture_output=True, text=True, env=env)
    
    libraries = []
    lines = result.stdout.split('\n')
    
    for line in lines:
        if line.startswith(' - '):
            lib_name = line[3:].strip()
            if lib_name.endswith(' *'):
                lib_name = lib_name[:-2]
            if lib_name and not lib_name.startswith('_'):
                libraries.append(lib_name)
    
    unique_libraries = list(dict.fromkeys(libraries))  # Remove duplicates
    
    print(f"Processing ALL {len(unique_libraries)} libraries...")
    
    algorithms_data = []
    
    # Process ALL libraries (not just first 15)
    for i, lib in enumerate(unique_libraries):
        print(f"Processing {i+1}/{len(unique_libraries)}: {lib}")
        
        # Your working parsing code
        result = subprocess.run(["/opt/saga/bin/saga_cmd", lib], 
                              capture_output=True, text=True, env=env)
        
        lines = result.stdout.split('\n')
        for line in lines:
            match = re.match(r'^\s*\[(\d+)\]\s+(.+)', line)
            if match:
                tool_id = match.group(1)
                tool_name = match.group(2).strip()
                algorithms_data.append([lib, tool_id, tool_name])
    
    # Save CSV
    with open('/workspace/saga_all_algorithms.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Library', 'Tool_ID', 'Algorithm_Name'])
        writer.writerows(algorithms_data)
    
    print(f"Total: {len(algorithms_data)} algorithms")
    return len(algorithms_data)

# Run this
total = export_all_saga_algorithms_fixed()

Processing ALL 73 libraries...
Processing 1/73: climate_tools
Processing 2/73: contrib_perego
Processing 3/73: db_odbc
Processing 4/73: db_pgsql
Processing 5/73: docs_html
Processing 6/73: docs_pdf
Processing 7/73: garden_fractals
Processing 8/73: garden_webservices
Processing 9/73: grid_analysis
Processing 10/73: grid_calculus
Processing 11/73: grid_calculus_bsl
Processing 12/73: grid_filter
Processing 13/73: grid_gridding
Processing 14/73: grid_spline
Processing 15/73: grid_tools
Processing 16/73: grid_visualisation
Processing 17/73: grids_tools
Processing 18/73: imagery_classification
Processing 19/73: imagery_isocluster
Processing 20/73: imagery_maxent
Processing 21/73: imagery_opencv
Processing 22/73: imagery_photogrammetry
Processing 23/73: imagery_segmentation
Processing 24/73: imagery_svm
Processing 25/73: imagery_tools
Processing 26/73: io_esri_e00
Processing 27/73: io_gdal
Processing 28/73: io_gps
Processing 29/73: io_grid
Processing 30/73: io_grid_image
Processing 31/73: io_

In [8]:
#!/usr/bin/env python3
"""
Complete GRASS GIS algorithm extraction from all directories
Reference: Neteler & Mitasova (2008) - Open Source GIS: A GRASS GIS Approach
"""
import subprocess
import csv
import os

def get_all_grass_modules():
    # Complete GRASS module discovery from all directories
    algorithms = []
    
    # Module prefixes for categorization
    module_categories = {
        'r.': 'Raster',
        'v.': 'Vector', 
        'g.': 'General',
        'i.': 'Imagery',
        't.': 'Temporal',
        'd.': 'Display',
        'db.': 'Database',
        'ps.': 'PostScript'
    }
    
    # All possible GRASS directories
    grass_directories = [
        '/opt/grass/grass84/bin',      # Core compiled modules
        '/opt/grass/grass84/scripts',  # Python scripts
        '/opt/grass/bin',              # Main binaries
        '/opt/grass/grass84/etc/python/grass/script',  # Python modules
    ]
    
    print("Scanning all GRASS directories...")
    
    for grass_dir in grass_directories:
        if os.path.exists(grass_dir):
            print(f"Checking: {grass_dir}")
            try:
                files = os.listdir(grass_dir)
                found_modules = 0
                
                for filename in sorted(files):
                    # Check if it's a GRASS module by prefix
                    for prefix, category in module_categories.items():
                        if filename.startswith(prefix):
                            # Avoid duplicates
                            if not any(alg['algorithm_id'] == filename for alg in algorithms):
                                # Get basic description
                                desc = get_module_description_direct(filename, grass_dir)
                                algorithms.append({
                                    'tool': 'GRASS',
                                    'provider': 'GRASS',
                                    'algorithm_id': filename,
                                    'display_name': desc,
                                    'group': category,
                                    'location': grass_dir
                                })
                                found_modules += 1
                            break
                
                print(f"  Found {found_modules} new modules")
                
            except Exception as e:
                print(f"  Error accessing {grass_dir}: {e}")
        else:
            print(f"  Directory not found: {grass_dir}")
    
    print(f"Total GRASS algorithms extracted: {len(algorithms)}")
    return algorithms

def get_module_description_direct(module_name, module_dir):
    # Get module description from multiple possible locations
    
    # Try different execution paths
    possible_paths = [
        f'{module_dir}/{module_name}',
        f'/opt/grass/bin/{module_name}',
        f'/opt/grass/grass84/bin/{module_name}',
        f'/opt/grass/grass84/scripts/{module_name}'
    ]
    
    for exec_path in possible_paths:
        if os.path.exists(exec_path):
            try:
                env = os.environ.copy()
                env['GISBASE'] = '/opt/grass'
                env['PATH'] = '/opt/grass/bin:/opt/grass/grass84/bin:' + env.get('PATH', '')
                
                # Try to get help from the module
                result = subprocess.run([exec_path, '--help'], 
                                      capture_output=True, text=True, env=env, timeout=5)
                
                if result.returncode == 0 or result.stderr:
                    # Parse help output for description
                    output = result.stdout + result.stderr
                    lines = output.split('\n')
                    
                    for line in lines:
                        line = line.strip()
                        if 'Description:' in line:
                            return line.split('Description:', 1)[1].strip()
                        elif line and not line.startswith(('Usage:', 'Flags:', 'Parameters:', 'ERROR:', 'WARNING:')):
                            if len(line) > 10 and not line.startswith(('-', 'GRASS', 'grass')):
                                return line
                
            except Exception:
                continue
    
    # Fallback: return module name
    return module_name

def export_all_grass_algorithms():
    # Generate comprehensive GRASS algorithm inventory
    print("=== Complete GRASS Algorithm Extraction ===")
    
    algorithms = get_all_grass_modules()
    
    if algorithms:
        # Write to CSV
        csv_path = '/workspace/grass_all_algorithms.csv'
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['tool', 'provider', 'algorithm_id', 'display_name', 'group', 'location']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(algorithms)
        
        print(f"GRASS algorithms exported: {len(algorithms)} modules")
        print(f"CSV saved to: {csv_path}")
        
        # Category summary
        categories = {}
        for alg in algorithms:
            cat = alg['group']
            categories[cat] = categories.get(cat, 0) + 1
        
        print("\nCategories:")
        for cat, count in sorted(categories.items()):
            print(f"  {cat}: {count}")
        
        # Location summary
        locations = {}
        for alg in algorithms:
            loc = alg['location']
            locations[loc] = locations.get(loc, 0) + 1
        
        print("\nLocations:")
        for loc, count in sorted(locations.items()):
            print(f"  {loc}: {count}")
    
    return len(algorithms)

if __name__ == "__main__":
    total = export_all_grass_algorithms()
    print(f"\nTotal GRASS modules catalogued: {total}")
    print("Complete CSV saved as: grass_all_algorithms.csv")

=== Complete GRASS Algorithm Extraction ===
Scanning all GRASS directories...
Checking: /opt/grass/grass84/bin
  Found 357 new modules
Checking: /opt/grass/grass84/scripts
  Found 143 new modules
Checking: /opt/grass/bin
  Found 0 new modules
Checking: /opt/grass/grass84/etc/python/grass/script
  Found 1 new modules
Total GRASS algorithms extracted: 501
GRASS algorithms exported: 501 modules
CSV saved to: /workspace/grass_all_algorithms.csv

Categories:
  Database: 19
  Display: 43
  General: 43
  Imagery: 51
  PostScript: 1
  Raster: 167
  Temporal: 51
  Vector: 126

Locations:
  /opt/grass/grass84/bin: 357
  /opt/grass/grass84/etc/python/grass/script: 1
  /opt/grass/grass84/scripts: 143

Total GRASS modules catalogued: 501
Complete CSV saved as: grass_all_algorithms.csv


In [9]:
#!/usr/bin/env python3
"""
WhiteboxTools algorithm extraction using multiple detection methods
Reference: WhiteboxTools documentation and CLI interface
"""

import csv
import os
import sys
import subprocess
import json
import glob
import re
import warnings
from pathlib import Path

# Suppress all warnings
warnings.filterwarnings("ignore")
os.environ['PYTHONWARNINGS'] = 'ignore'

def find_whitebox_executable():
    """Find WhiteboxTools executable in common locations"""
    
    # Common installation paths
    possible_paths = [
        '/usr/local/bin/whitebox_tools',
        '/usr/bin/whitebox_tools',
        '/opt/WhiteboxTools/whitebox_tools',
        '/opt/whitebox/whitebox_tools',
        './whitebox_tools',
        'whitebox_tools',
        '/usr/local/bin/whiteboxtools',
        '/usr/bin/whiteboxtools',
        'whiteboxtools'
    ]
    
    # Check PATH first
    try:
        result = subprocess.run(['which', 'whitebox_tools'], capture_output=True, text=True)
        if result.returncode == 0 and result.stdout.strip():
            path = result.stdout.strip()
            print(f" Found WhiteboxTools via PATH: {path}")
            return path
    except:
        pass
    
    # Check alternative name
    try:
        result = subprocess.run(['which', 'whiteboxtools'], capture_output=True, text=True)
        if result.returncode == 0 and result.stdout.strip():
            path = result.stdout.strip()
            print(f" Found WhiteboxTools via PATH: {path}")
            return path
    except:
        pass
    
    # Check specific paths
    for path in possible_paths:
        if os.path.isfile(path) and os.access(path, os.X_OK):
            print(f" Found WhiteboxTools at: {path}")
            return path
    
    print(" WhiteboxTools executable not found")
    return None

def get_whitebox_tools_list():
    """Get all WhiteboxTools using the --listtools flag"""
    algorithms = []
    
    wb_exec = find_whitebox_executable()
    if not wb_exec:
        return []
    
    try:
        # Run whitebox_tools --listtools
        result = subprocess.run([wb_exec, '--listtools'], 
                              capture_output=True, 
                              text=True, 
                              timeout=30)
        
        if result.returncode == 0 and result.stdout:
            output = result.stdout
            
            # Parse the tool list output
            lines = output.split('\n')
            
            for line in lines:
                line = line.strip()
                
                # Skip empty lines and headers
                if not line or line.startswith('Available') or line.startswith('='):
                    continue
                
                # Parse tool entries (format varies)
                # Common formats:
                # "ToolName: Description"
                # "ToolName - Description"
                # "ToolName Description"
                
                tool_name = ""
                description = ""
                
                if ':' in line:
                    parts = line.split(':', 1)
                    tool_name = parts[0].strip()
                    description = parts[1].strip() if len(parts) > 1 else tool_name
                elif ' - ' in line:
                    parts = line.split(' - ', 1)
                    tool_name = parts[0].strip()
                    description = parts[1].strip() if len(parts) > 1 else tool_name
                elif line and not line.startswith(' '):
                    # Single word tool name
                    tool_name = line.split()[0]
                    description = line
                
                if tool_name and len(tool_name) > 1:
                    algorithms.append({
                        'tool': 'WhiteboxTools',
                        'provider': 'WhiteboxTools',
                        'algorithm_id': tool_name,
                        'display_name': description,
                        'group': categorize_whitebox_tool(tool_name),
                        'detection_method': 'listtools_command'
                    })
            
            print(f" List tools command: Found {len(algorithms)} algorithms")
        else:
            print(f" List tools command failed: {result.stderr[:100]}...")
            
    except Exception as e:
        print(f" List tools command failed: {str(e)[:50]}...")
    
    return algorithms

def get_whitebox_help_output():
    """Get WhiteboxTools using the help output"""
    algorithms = []
    
    wb_exec = find_whitebox_executable()
    if not wb_exec:
        return []
    
    try:
        # Run whitebox_tools --help or just whitebox_tools
        for cmd_args in [['--help'], ['-h'], []]:
            try:
                result = subprocess.run([wb_exec] + cmd_args, 
                                      capture_output=True, 
                                      text=True, 
                                      timeout=10)
                
                output = result.stdout + result.stderr
                
                if output and ('tools' in output.lower() or 'available' in output.lower()):
                    # Parse help output for tool names
                    lines = output.split('\n')
                    
                    in_tools_section = False
                    
                    for line in lines:
                        line = line.strip()
                        
                        # Detect tools section
                        if any(keyword in line.lower() for keyword in ['available tools', 'tool list', 'supported tools']):
                            in_tools_section = True
                            continue
                        
                        if in_tools_section and line:
                            # Extract tool names from help output
                            if line.startswith('-') or line.startswith('*'):
                                continue
                            
                            # Look for tool name patterns
                            tool_match = re.match(r'^([a-zA-Z][a-zA-Z0-9_]*)', line)
                            if tool_match:
                                tool_name = tool_match.group(1)
                                if len(tool_name) > 2:
                                    algorithms.append({
                                        'tool': 'WhiteboxTools',
                                        'provider': 'WhiteboxTools',
                                        'algorithm_id': tool_name,
                                        'display_name': line,
                                        'group': categorize_whitebox_tool(tool_name),
                                        'detection_method': 'help_output'
                                    })
                    
                    if algorithms:
                        print(f" Help output: Found {len(algorithms)} algorithms")
                        break
                        
            except:
                continue
                
    except Exception as e:
        print(f" Help output method failed: {str(e)[:50]}...")
    
    if not algorithms:
        print(" Help output: No algorithms found")
    
    return algorithms

def get_whitebox_python_bindings():
    """Try to get WhiteboxTools via Python whitebox package"""
    algorithms = []
    
    try:
        # Try to import whitebox package
        import whitebox
        
        # Check if whitebox has tool listing capabilities
        wbt = whitebox.WhiteboxTools()
        
        # Try different methods to get tool list
        methods_to_try = [
            ('list_tools', lambda: wbt.list_tools()),
            ('tool_help', lambda: wbt.tool_help()),
            ('help', lambda: wbt.help())
        ]
        
        for method_name, method_func in methods_to_try:
            try:
                result = method_func()
                
                if result and isinstance(result, str):
                    # Parse tool names from the result
                    lines = result.split('\n')
                    
                    for line in lines:
                        line = line.strip()
                        
                        # Look for tool patterns in Python output
                        if ':' in line:
                            tool_name = line.split(':')[0].strip()
                            description = line.split(':', 1)[1].strip()
                            
                            if len(tool_name) > 2 and tool_name.replace('_', '').isalnum():
                                algorithms.append({
                                    'tool': 'WhiteboxTools',
                                    'provider': 'WhiteboxTools',
                                    'algorithm_id': tool_name,
                                    'display_name': description,
                                    'group': categorize_whitebox_tool(tool_name),
                                    'detection_method': 'python_bindings'
                                })
                
                if algorithms:
                    print(f" Python bindings ({method_name}): Found {len(algorithms)} algorithms")
                    break
                    
            except:
                continue
        
        if not algorithms:
            print(" Python bindings: No algorithms found")
            
    except ImportError:
        print(" Python bindings: whitebox package not available")
    except Exception as e:
        print(f" Python bindings failed: {str(e)[:50]}...")
    
    return algorithms

def get_whitebox_individual_tools():
    """Get individual tool help to extract more details"""
    algorithms = []
    
    wb_exec = find_whitebox_executable()
    if not wb_exec:
        return []
    
    # First get a basic list of tools
    base_tools = get_whitebox_tools_list()
    
    if not base_tools:
        # Try to discover some common tools
        common_tools = [
            'slope', 'aspect', 'hillshade', 'contours', 'watershed',
            'flow_direction', 'flow_accumulation', 'stream_network',
            'fill_depressions', 'breach_depressions', 'gaussian_filter',
            'median_filter', 'buffer', 'clip', 'reclass', 'mosaic'
        ]
        
        base_tools = [{'algorithm_id': tool} for tool in common_tools]
    
    print(f"Getting detailed help for {len(base_tools)} tools...")
    
    for i, tool_info in enumerate(base_tools):
        tool_name = tool_info['algorithm_id']
        
        if i % 20 == 0:  # Progress indicator
            print(f"  Progress: {i+1}/{len(base_tools)}")
        
        try:
            # Get help for individual tool
            result = subprocess.run([wb_exec, f'--run={tool_name}', '--help'], 
                                  capture_output=True, 
                                  text=True, 
                                  timeout=5)
            
            if result.returncode != 0:
                # Try alternative help format
                result = subprocess.run([wb_exec, '--tool=' + tool_name], 
                                      capture_output=True, 
                                      text=True, 
                                      timeout=5)
            
            output = result.stdout + result.stderr
            description = tool_name
            
            if output:
                # Extract description from tool help
                desc_patterns = [
                    r'Description:\s*(.+?)(?:\n\n|\nUsage:|\nParameters:)',
                    r'DESCRIPTION:\s*(.+?)(?:\n\n|\nUSAGE:|\nPARAMETERS:)',
                    r'Purpose:\s*(.+?)(?:\n)',
                    r'Brief:\s*(.+?)(?:\n)',
                    r'Summary:\s*(.+?)(?:\n)'
                ]
                
                for pattern in desc_patterns:
                    match = re.search(pattern, output, re.DOTALL | re.IGNORECASE)
                    if match:
                        desc = match.group(1).strip()
                        if len(desc) > len(description) and len(desc) < 200:
                            description = desc
                            break
                
                # Clean up description
                description = ' '.join(description.split())
            
            algorithms.append({
                'tool': 'WhiteboxTools',
                'provider': 'WhiteboxTools',
                'algorithm_id': tool_name,
                'display_name': description,
                'group': categorize_whitebox_tool(tool_name),
                'detection_method': 'individual_help'
            })
            
        except:
            # Add basic entry if help fails
            algorithms.append({
                'tool': 'WhiteboxTools',
                'provider': 'WhiteboxTools',
                'algorithm_id': tool_name,
                'display_name': tool_name,
                'group': categorize_whitebox_tool(tool_name),
                'detection_method': 'individual_help'
            })
    
    print(f" Individual help: Found {len(algorithms)} algorithms")
    return algorithms

def get_whitebox_json_output():
    """Try to get tool list in JSON format if supported"""
    algorithms = []
    
    wb_exec = find_whitebox_executable()
    if not wb_exec:
        return []
    
    try:
        # Try JSON output formats
        json_commands = [
            ['--listtools', '--json'],
            ['--list', '--json'],
            ['--tools', '--json']
        ]
        
        for cmd_args in json_commands:
            try:
                result = subprocess.run([wb_exec] + cmd_args, 
                                      capture_output=True, 
                                      text=True, 
                                      timeout=10)
                
                if result.returncode == 0 and result.stdout:
                    try:
                        data = json.loads(result.stdout)
                        
                        # Parse JSON structure (format may vary)
                        if isinstance(data, dict):
                            if 'tools' in data:
                                tools = data['tools']
                            elif 'available_tools' in data:
                                tools = data['available_tools']
                            else:
                                tools = data
                        elif isinstance(data, list):
                            tools = data
                        else:
                            continue
                        
                        for tool_info in tools:
                            if isinstance(tool_info, dict):
                                tool_name = tool_info.get('name', tool_info.get('tool', ''))
                                description = tool_info.get('description', tool_info.get('desc', tool_name))
                            elif isinstance(tool_info, str):
                                tool_name = tool_info
                                description = tool_info
                            else:
                                continue
                            
                            if tool_name and len(tool_name) > 1:
                                algorithms.append({
                                    'tool': 'WhiteboxTools',
                                    'provider': 'WhiteboxTools',
                                    'algorithm_id': tool_name,
                                    'display_name': description,
                                    'group': categorize_whitebox_tool(tool_name),
                                    'detection_method': 'json_output'
                                })
                        
                        if algorithms:
                            print(f" JSON output: Found {len(algorithms)} algorithms")
                            break
                            
                    except json.JSONDecodeError:
                        continue
                        
            except:
                continue
                
    except Exception as e:
        print(f" JSON output method failed: {str(e)[:50]}...")
    
    if not algorithms:
        print(" JSON output: No algorithms found")
    
    return algorithms

def categorize_whitebox_tool(tool_name):
    """Categorize WhiteboxTools based on tool name"""
    
    tool_name_lower = tool_name.lower()
    
    # Terrain analysis
    if any(keyword in tool_name_lower for keyword in ['slope', 'aspect', 'hillshade', 'curvature', 'elevation', 'terrain', 'topographic', 'ruggedness']):
        return 'Terrain Analysis'
    
    # Hydrology
    elif any(keyword in tool_name_lower for keyword in ['flow', 'watershed', 'stream', 'drainage', 'accumulation', 'direction', 'basin', 'pour', 'outlet', 'hydro']):
        return 'Hydrological Analysis'
    
    # Image processing and filtering
    elif any(keyword in tool_name_lower for keyword in ['filter', 'smooth', 'gaussian', 'median', 'bilateral', 'edge', 'enhance', 'sharpen', 'blur']):
        return 'Image Processing'
    
    # Geomorphometry
    elif any(keyword in tool_name_lower for keyword in ['geomorphon', 'landform', 'relative', 'position', 'morphometry', 'relief']):
        return 'Geomorphometry'
    
    # Vector tools
    elif any(keyword in tool_name_lower for keyword in ['vector', 'polygon', 'line', 'point', 'buffer', 'clip', 'overlay', 'intersection', 'union']):
        return 'Vector Analysis'
    
    # Raster operations
    elif any(keyword in tool_name_lower for keyword in ['raster', 'grid', 'resample', 'reproject', 'mosaic', 'merge', 'clip', 'extract']):
        return 'Raster Processing'
    
    # Classification and clustering
    elif any(keyword in tool_name_lower for keyword in ['classify', 'cluster', 'kmeans', 'isodata', 'segment', 'region']):
        return 'Classification'
    
    # Mathematical operations
    elif any(keyword in tool_name_lower for keyword in ['math', 'calculator', 'statistics', 'zonal', 'histogram', 'sum', 'mean', 'max', 'min']):
        return 'Mathematical Analysis'
    
    # LiDAR processing
    elif any(keyword in tool_name_lower for keyword in ['lidar', 'las', 'point_cloud', 'dtm', 'dsm', 'canopy', 'height']):
        return 'LiDAR Processing'
    
    # Input/Output
    elif any(keyword in tool_name_lower for keyword in ['import', 'export', 'convert', 'read', 'write', 'format', 'ascii']):
        return 'Input/Output'
    
    # Preprocessing
    elif any(keyword in tool_name_lower for keyword in ['fill', 'breach', 'depression', 'sink', 'correct', 'preprocess']):
        return 'Preprocessing'
    
    # Change detection
    elif any(keyword in tool_name_lower for keyword in ['change', 'difference', 'delta', 'temporal', 'time']):
        return 'Change Detection'
    
    else:
        return 'General'

def merge_algorithms(algorithm_lists):
    """Merge algorithm lists and remove duplicates"""
    seen = {}
    merged = []
    
    for alg_list in algorithm_lists:
        for alg in alg_list:
            alg_id = alg['algorithm_id']
            
            if alg_id not in seen:
                seen[alg_id] = alg
                merged.append(alg)
            else:
                # Update existing entry with better description and additional detection method
                existing = seen[alg_id]
                
                # Use longer, more descriptive display name
                if len(alg['display_name']) > len(existing['display_name']):
                    existing['display_name'] = alg['display_name']
                
                # Combine detection methods
                if alg['detection_method'] not in existing['detection_method']:
                    existing['detection_method'] += f", {alg['detection_method']}"
    
    return merged

def export_whitebox_algorithms():
    """Export all WhiteboxTools algorithms to CSV"""
    
    print("=== WhiteboxTools Algorithm Extraction ===\n")
    
    # Detection methods
    detection_methods = [
        ("List Tools Command", get_whitebox_tools_list),
        ("Help Output", get_whitebox_help_output),
        ("Python Bindings", get_whitebox_python_bindings),
        ("JSON Output", get_whitebox_json_output),
        ("Individual Help", get_whitebox_individual_tools)
    ]
    
    all_algorithms = []
    
    for method_name, method_func in detection_methods:
        try:
            algorithms = method_func()
            if algorithms:
                all_algorithms.append(algorithms)
        except Exception as e:
            print(f" {method_name}: Failed ({str(e)[:50]}...)")
    
    # Merge all results
    if all_algorithms:
        merged_algorithms = merge_algorithms(all_algorithms)
        
        # Write to CSV
        csv_path = '/workspace/whitebox_algorithms.csv'
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['tool', 'provider', 'algorithm_id', 'display_name', 'group', 'detection_method']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(merged_algorithms)
        
        print(f"\n=== Results ===")
        print(f" Total algorithms found: {len(merged_algorithms)}")
        print(f" CSV saved to: {csv_path}")
        
        # Category summary
        categories = {}
        for alg in merged_algorithms:
            cat = alg['group']
            categories[cat] = categories.get(cat, 0) + 1
        
        print(f"\nCategories:")
        sorted_cats = sorted(categories.items(), key=lambda x: x[1], reverse=True)
        for cat, count in sorted_cats:
            print(f"  {cat}: {count}")
        
        # Detection method summary
        detection_stats = {}
        for alg in merged_algorithms:
            methods = [m.strip() for m in alg['detection_method'].split(',')]
            for method in methods:
                detection_stats[method] = detection_stats.get(method, 0) + 1
        
        print(f"\nDetection methods:")
        for method, count in sorted(detection_stats.items()):
            print(f"  {method}: {count}")
        
        return len(merged_algorithms)
    else:
        print(" No WhiteboxTools algorithms found")
        return 0

if __name__ == "__main__":
    total = export_whitebox_algorithms()
    print(f"\nFinal count: {total} WhiteboxTools algorithms catalogued")

=== WhiteboxTools Algorithm Extraction ===

 Found WhiteboxTools via PATH: /opt/conda/envs/pygile/bin/whitebox_tools
 List tools command: Found 460 algorithms
 Found WhiteboxTools via PATH: /opt/conda/envs/pygile/bin/whitebox_tools
 Help output: Found 1 algorithms
 Python bindings (tool_help): Found 1377 algorithms
 Found WhiteboxTools via PATH: /opt/conda/envs/pygile/bin/whitebox_tools
 JSON output: No algorithms found
 Found WhiteboxTools via PATH: /opt/conda/envs/pygile/bin/whitebox_tools
 Found WhiteboxTools via PATH: /opt/conda/envs/pygile/bin/whitebox_tools
 List tools command: Found 460 algorithms
Getting detailed help for 460 tools...
  Progress: 1/460
  Progress: 21/460
  Progress: 41/460
  Progress: 61/460
  Progress: 81/460
  Progress: 101/460
  Progress: 121/460
  Progress: 141/460
  Progress: 161/460
  Progress: 181/460
  Progress: 201/460
  Progress: 221/460
  Progress: 241/460
  Progress: 261/460
  Progress: 281/460
  Progress: 301/460
  Progress: 321/460
  Progress: 341

In [10]:
#!/usr/bin/env python3
"""
Python geospatial libraries algorithm extraction
Extracts functions/algorithms from major Python GIS and remote sensing libraries
"""

import csv
import os
import sys
import inspect
import importlib
import warnings
import pkgutil
from pathlib import Path

# Nuclear option - suppress everything
warnings.filterwarnings("ignore")
os.environ['PYTHONWARNINGS'] = 'ignore'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Try to suppress IPython/Jupyter warnings
try:
    from IPython.core.interactiveshell import InteractiveShell
    InteractiveShell.showtraceback = lambda self, exc_tuple=None, filename=None, tb_offset=None, exception_only=False, running_compiled_code=False: None
except:
    pass

# Redirect file descriptors at OS level
import contextlib
@contextlib.contextmanager
def suppress_fd_output():
    """Suppress output by redirecting file descriptors"""
    with open(os.devnull, 'w') as devnull:
        old_stdout = os.dup(1)
        old_stderr = os.dup(2)
        try:
            os.dup2(devnull.fileno(), 1)
            os.dup2(devnull.fileno(), 2)
            yield
        finally:
            os.dup2(old_stdout, 1)
            os.dup2(old_stderr, 2)
            os.close(old_stdout)
            os.close(old_stderr)

def safe_import(module_name):
    """Safely import a module and return it, or None if import fails"""
    try:
        with suppress_fd_output():
            return importlib.import_module(module_name)
    except:
        return None

def get_module_functions(module, module_name, exclude_private=True, exclude_classes=False):
    """Extract functions from a module"""
    functions = []
    
    if not module:
        return functions
    
    try:
        with suppress_fd_output():
            for name in dir(module):
                if exclude_private and name.startswith('_'):
                    continue
                
                try:
                    obj = getattr(module, name)
                    
                    # Check if it's a function or method
                    if inspect.isfunction(obj) or inspect.ismethod(obj):
                        # Get function signature and docstring
                        try:
                            sig = str(inspect.signature(obj))
                        except:
                            sig = ""
                        
                        try:
                            doc = inspect.getdoc(obj) or ""
                            # Take first line of docstring as description
                            description = doc.split('\n')[0] if doc else name
                            if len(description) > 200:
                                description = description[:200] + "..."
                        except:
                            description = name
                        
                        functions.append({
                            'tool': 'Python',
                            'provider': module_name,
                            'algorithm_id': f"{module_name}.{name}",
                            'display_name': description,
                            'group': categorize_python_function(module_name, name),
                            'detection_method': 'introspection'
                        })
                    
                    # Optionally include classes with callable methods
                    elif not exclude_classes and inspect.isclass(obj):
                        try:
                            # Get class methods that are likely algorithms
                            for method_name in dir(obj):
                                if not method_name.startswith('_'):
                                    method_obj = getattr(obj, method_name)
                                    if callable(method_obj):
                                        try:
                                            doc = inspect.getdoc(method_obj) or ""
                                            description = doc.split('\n')[0] if doc else f"{name}.{method_name}"
                                            if len(description) > 200:
                                                description = description[:200] + "..."
                                        except:
                                            description = f"{name}.{method_name}"
                                        
                                        functions.append({
                                            'tool': 'Python',
                                            'provider': module_name,
                                            'algorithm_id': f"{module_name}.{name}.{method_name}",
                                            'display_name': description,
                                            'group': categorize_python_function(module_name, method_name),
                                            'detection_method': 'class_methods'
                                        })
                        except:
                            continue
                            
                except:
                    continue
                    
    except Exception as e:
        pass  # Silently skip problematic modules
    
    return functions

def get_submodule_functions(module, module_name, max_depth=2, current_depth=0):
    """Recursively get functions from submodules"""
    functions = []
    
    if not module or current_depth >= max_depth:
        return functions
    
    # Get functions from current module
    functions.extend(get_module_functions(module, module_name))
    
    # Get submodules
    try:
        if hasattr(module, '__path__'):
            for importer, modname, ispkg in pkgutil.iter_modules(module.__path__):
                try:
                    submodule_name = f"{module_name}.{modname}"
                    submodule = safe_import(submodule_name)
                    if submodule:
                        functions.extend(get_submodule_functions(
                            submodule, submodule_name, max_depth, current_depth + 1
                        ))
                except:
                    continue
    except:
        pass
    
    return functions

def get_gdal_algorithms():
    """Extract GDAL/OGR algorithms"""
    algorithms = []
    
    # GDAL modules to check
    gdal_modules = [
        'osgeo.gdal',
        'osgeo.ogr', 
        'osgeo.osr',
        'osgeo.gdal_array',
        'osgeo.gdalconst'
    ]
    
    for module_name in gdal_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name.replace('osgeo.', 'gdal.'))
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_rasterio_algorithms():
    """Extract Rasterio algorithms"""
    algorithms = []
    
    rasterio_modules = [
        'rasterio',
        'rasterio.features',
        'rasterio.mask',
        'rasterio.merge',
        'rasterio.plot',
        'rasterio.sample',
        'rasterio.transform',
        'rasterio.warp',
        'rasterio.windows',
        'rasterio.enums',
        'rasterio.crs'
    ]
    
    for module_name in rasterio_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_shapely_algorithms():
    """Extract Shapely algorithms"""
    algorithms = []
    
    shapely_modules = [
        'shapely.geometry',
        'shapely.ops',
        'shapely.affinity',
        'shapely.algorithms',
        'shapely.predicates',
        'shapely.validation',
        'shapely.prepared'
    ]
    
    for module_name in shapely_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_geopandas_algorithms():
    """Extract GeoPandas algorithms"""
    algorithms = []
    
    geopandas_modules = [
        'geopandas',
        'geopandas.tools',
        'geopandas.datasets'
    ]
    
    for module_name in geopandas_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_skimage_algorithms():
    """Extract Scikit-image algorithms"""
    algorithms = []
    
    skimage_modules = [
        'skimage.filters',
        'skimage.morphology',
        'skimage.segmentation',
        'skimage.feature',
        'skimage.transform',
        'skimage.restoration',
        'skimage.exposure',
        'skimage.measure',
        'skimage.color',
        'skimage.util'
    ]
    
    for module_name in skimage_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_opencv_algorithms():
    """Extract OpenCV algorithms with improved error handling"""
    algorithms = []
    
    opencv_modules = [
        'cv2'
    ]
    
    for module_name in opencv_modules:
        try:
            module = safe_import(module_name)
            if module:
                functions = get_module_functions(module, module_name)
                algorithms.extend(functions)
                print(f" {module_name}: Found {len(functions)} functions")
            else:
                print(f" {module_name}: Not available")
        except Exception:
            print(f" {module_name}: Import failed")
    
    return algorithms

def get_scipy_algorithms():
    """Extract SciPy algorithms relevant to geospatial analysis"""
    algorithms = []
    
    scipy_modules = [
        'scipy.ndimage',
        'scipy.spatial',
        'scipy.interpolate',
        'scipy.signal',
        'scipy.stats'
    ]
    
    for module_name in scipy_modules:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def get_numpy_algorithms():
    """Extract NumPy algorithms relevant to geospatial analysis with error suppression"""
    algorithms = []
    
    numpy_modules = [
        'numpy',
        'numpy.ma',  # Masked arrays
        'numpy.linalg'  # Linear algebra
    ]
    
    for module_name in numpy_modules:
        try:
            module = safe_import(module_name)
            if module:
                functions = get_module_functions(module, module_name)
                algorithms.extend(functions)
                print(f" {module_name}: Found {len(functions)} functions")
            else:
                print(f" {module_name}: Not available")
        except Exception:
            print(f" {module_name}: Import failed (compatibility issue)")
    
    return algorithms

def get_other_gis_libraries():
    """Extract algorithms from other common GIS libraries"""
    algorithms = []
    
    other_libraries = [
        'fiona',
        'pyproj',
        'cartopy',
        'folium',
        'xarray',
        'rioxarray',
        'geopy',
        'contextily',
        'earthpy',
        'rasterstats',
        'geoplot',
        'plotly.express'
    ]
    
    for module_name in other_libraries:
        module = safe_import(module_name)
        if module:
            functions = get_module_functions(module, module_name)
            algorithms.extend(functions)
            print(f" {module_name}: Found {len(functions)} functions")
        else:
            print(f" {module_name}: Not available")
    
    return algorithms

def categorize_python_function(module_name, function_name):
    """Categorize Python functions based on module and function name"""
    
    module_lower = module_name.lower()
    function_lower = function_name.lower()
    
    # GDAL/OGR specific
    if 'gdal' in module_lower or 'ogr' in module_lower:
        if any(keyword in function_lower for keyword in ['read', 'open', 'create', 'write', 'driver']):
            return 'GDAL I/O'
        elif any(keyword in function_lower for keyword in ['transform', 'warp', 'reproject', 'geotransform']):
            return 'GDAL Geometric'
        elif any(keyword in function_lower for keyword in ['raster', 'band', 'pixel']):
            return 'GDAL Raster'
        elif any(keyword in function_lower for keyword in ['vector', 'feature', 'geometry', 'layer']):
            return 'GDAL Vector'
        else:
            return 'GDAL General'
    
    # Rasterio specific
    elif 'rasterio' in module_lower:
        if 'mask' in module_lower or any(keyword in function_lower for keyword in ['mask', 'clip']):
            return 'Rasterio Masking'
        elif 'warp' in module_lower or any(keyword in function_lower for keyword in ['warp', 'reproject', 'transform']):
            return 'Rasterio Warping'
        elif 'merge' in module_lower or any(keyword in function_lower for keyword in ['merge', 'mosaic']):
            return 'Rasterio Merging'
        elif any(keyword in function_lower for keyword in ['read', 'write', 'open']):
            return 'Rasterio I/O'
        else:
            return 'Rasterio General'
    
    # Shapely specific
    elif 'shapely' in module_lower:
        if 'ops' in module_lower or any(keyword in function_lower for keyword in ['union', 'intersection', 'difference', 'buffer']):
            return 'Shapely Operations'
        elif 'affinity' in module_lower or any(keyword in function_lower for keyword in ['rotate', 'scale', 'translate', 'skew']):
            return 'Shapely Transformations'
        elif any(keyword in function_lower for keyword in ['point', 'line', 'polygon', 'geometry']):
            return 'Shapely Geometry'
        else:
            return 'Shapely General'
    
    # GeoPandas specific
    elif 'geopandas' in module_lower:
        if any(keyword in function_lower for keyword in ['overlay', 'sjoin', 'spatial']):
            return 'GeoPandas Spatial'
        elif any(keyword in function_lower for keyword in ['read', 'write', 'to_']):
            return 'GeoPandas I/O'
        else:
            return 'GeoPandas General'
    
    # Scikit-image specific
    elif 'skimage' in module_lower:
        if 'filters' in module_lower or any(keyword in function_lower for keyword in ['filter', 'smooth', 'gaussian', 'median']):
            return 'Skimage Filtering'
        elif 'morphology' in module_lower or any(keyword in function_lower for keyword in ['erosion', 'dilation', 'opening', 'closing']):
            return 'Skimage Morphology'
        elif 'segmentation' in module_lower or any(keyword in function_lower for keyword in ['segment', 'watershed', 'region']):
            return 'Skimage Segmentation'
        elif 'feature' in module_lower or any(keyword in function_lower for keyword in ['edge', 'corner', 'blob', 'peak']):
            return 'Skimage Features'
        elif 'transform' in module_lower or any(keyword in function_lower for keyword in ['resize', 'rotate', 'warp']):
            return 'Skimage Transform'
        elif 'measure' in module_lower or any(keyword in function_lower for keyword in ['label', 'properties', 'area', 'perimeter']):
            return 'Skimage Measurement'
        else:
            return 'Skimage General'
    
    # OpenCV specific
    elif 'cv2' in module_lower:
        if any(keyword in function_lower for keyword in ['filter', 'blur', 'smooth', 'gaussian', 'bilateral']):
            return 'OpenCV Filtering'
        elif any(keyword in function_lower for keyword in ['morphology', 'erode', 'dilate', 'opening', 'closing']):
            return 'OpenCV Morphology'
        elif any(keyword in function_lower for keyword in ['edge', 'canny', 'sobel', 'laplacian']):
            return 'OpenCV Edge Detection'
        elif any(keyword in function_lower for keyword in ['transform', 'warp', 'perspective', 'affine']):
            return 'OpenCV Transform'
        elif any(keyword in function_lower for keyword in ['feature', 'corner', 'keypoint', 'descriptor']):
            return 'OpenCV Features'
        elif any(keyword in function_lower for keyword in ['contour', 'hull', 'moments']):
            return 'OpenCV Contours'
        else:
            return 'OpenCV General'
    
    # SciPy specific
    elif 'scipy' in module_lower:
        if 'ndimage' in module_lower:
            return 'SciPy Image Processing'
        elif 'spatial' in module_lower:
            return 'SciPy Spatial'
        elif 'interpolate' in module_lower:
            return 'SciPy Interpolation'
        elif 'signal' in module_lower:
            return 'SciPy Signal Processing'
        elif 'stats' in module_lower:
            return 'SciPy Statistics'
        else:
            return 'SciPy General'
    
    # NumPy specific
    elif 'numpy' in module_lower:
        if any(keyword in function_lower for keyword in ['array', 'matrix', 'reshape', 'transpose']):
            return 'NumPy Arrays'
        elif any(keyword in function_lower for keyword in ['math', 'sum', 'mean', 'std', 'var', 'max', 'min']):
            return 'NumPy Math'
        elif 'linalg' in module_lower:
            return 'NumPy Linear Algebra'
        elif 'ma' in module_lower:
            return 'NumPy Masked Arrays'
        else:
            return 'NumPy General'
    
    # Other libraries
    elif any(lib in module_lower for lib in ['fiona', 'pyproj', 'cartopy', 'folium']):
        return f"{module_name.split('.')[0].title()} Tools"
    
    # Generic categorization
    elif any(keyword in function_lower for keyword in ['read', 'write', 'load', 'save', 'open']):
        return 'I/O Operations'
    elif any(keyword in function_lower for keyword in ['plot', 'show', 'display', 'visualize']):
        return 'Visualization'
    elif any(keyword in function_lower for keyword in ['transform', 'warp', 'project', 'convert']):
        return 'Transformations'
    elif any(keyword in function_lower for keyword in ['filter', 'smooth', 'enhance', 'process']):
        return 'Processing'
    elif any(keyword in function_lower for keyword in ['analyze', 'calculate', 'compute', 'measure']):
        return 'Analysis'
    else:
        return 'General'

def merge_algorithms(algorithm_lists):
    """Merge algorithm lists and remove duplicates"""
    seen = set()
    merged = []
    
    for alg_list in algorithm_lists:
        for alg in alg_list:
            alg_id = alg['algorithm_id']
            
            if alg_id not in seen:
                seen.add(alg_id)
                merged.append(alg)
    
    return merged

def export_python_algorithms():
    """Export all Python geospatial algorithms to CSV"""
    
    print("=== Python Geospatial Libraries Algorithm Extraction ===\n")
    
    # Detection methods for different library groups
    detection_methods = [
        ("GDAL/OGR", get_gdal_algorithms),
        ("Rasterio", get_rasterio_algorithms),
        ("Shapely", get_shapely_algorithms),
        ("GeoPandas", get_geopandas_algorithms),
        ("Scikit-image", get_skimage_algorithms),
        ("OpenCV", get_opencv_algorithms),
        ("SciPy", get_scipy_algorithms),
        ("NumPy", get_numpy_algorithms),
        ("Other GIS Libraries", get_other_gis_libraries)
    ]
    
    all_algorithms = []
    
    for method_name, method_func in detection_methods:
        print(f"\n--- Processing {method_name} ---")
        try:
            algorithms = method_func()
            if algorithms:
                all_algorithms.append(algorithms)
                print(f" {method_name}: Total {len(algorithms)} functions extracted")
            else:
                print(f" {method_name}: No functions found")
        except Exception as e:
            print(f" {method_name}: Failed (compatibility issue)")
    
    # Merge all results
    if all_algorithms:
        merged_algorithms = merge_algorithms(all_algorithms)
        
        # Write to CSV
        csv_path = '/workspace/python_gis_algorithms.csv'
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['tool', 'provider', 'algorithm_id', 'display_name', 'group', 'detection_method']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(merged_algorithms)
        
        print(f"\n=== Results ===")
        print(f" Total algorithms found: {len(merged_algorithms)}")
        print(f" CSV saved to: {csv_path}")
        
        # Provider summary
        providers = {}
        for alg in merged_algorithms:
            provider = alg['provider']
            providers[provider] = providers.get(provider, 0) + 1
        
        print(f"\nTop providers:")
        sorted_providers = sorted(providers.items(), key=lambda x: x[1], reverse=True)
        for provider, count in sorted_providers[:10]:
            print(f"  {provider}: {count}")
        
        # Category summary
        categories = {}
        for alg in merged_algorithms:
            cat = alg['group']
            categories[cat] = categories.get(cat, 0) + 1
        
        print(f"\nTop categories:")
        sorted_cats = sorted(categories.items(), key=lambda x: x[1], reverse=True)
        for cat, count in sorted_cats[:15]:
            print(f"  {cat}: {count}")
        
        return len(merged_algorithms)
    else:
        print(" No Python algorithms found")
        return 0

if __name__ == "__main__":
    total = export_python_algorithms()
    print(f"\nFinal count: {total} Python geospatial algorithms catalogued")

=== Python Geospatial Libraries Algorithm Extraction ===


--- Processing GDAL/OGR ---
 osgeo.gdal: Found 620 functions
 osgeo.ogr: Found 518 functions
 osgeo.osr: Found 187 functions
 osgeo.gdal_array: Found 32 functions
 osgeo.gdalconst: Found 0 functions
 GDAL/OGR: Total 1357 functions extracted

--- Processing Rasterio ---
 rasterio: Found 131 functions
 rasterio.features: Found 103 functions
 rasterio.mask: Found 5 functions
 rasterio.merge: Found 71 functions
 rasterio.plot: Found 50 functions
 rasterio.sample: Found 14 functions
 rasterio.transform: Found 51 functions
 rasterio.warp: Found 39 functions
 rasterio.windows: Found 47 functions
 rasterio.enums: Found 0 functions
 rasterio.crs: Found 30 functions
 Rasterio: Total 541 functions extracted

--- Processing Shapely ---
 shapely.geometry: Found 280 functions
 shapely.ops: Found 266 functions
 shapely.affinity: Found 6 functions
 shapely.algorithms: Found 0 functions
 shapely.predicates: Found 32 functions
 shapely.validatio

In [12]:
#!/usr/bin/env python3
"""
QGIS Processing Framework algorithm extraction
Extracts algorithms from QGIS Processing providers including QGIS, GDAL, GRASS, SAGA, etc.
"""

import csv
import os
import sys
import warnings
import subprocess
import json
import re
from pathlib import Path

# Suppress all warnings
warnings.filterwarnings("ignore")
os.environ['PYTHONWARNINGS'] = 'ignore'

def setup_qgis_environment():
    """Setup QGIS Python environment"""
    
    # Common QGIS Python paths
    qgis_python_paths = [
        '/usr/share/qgis/python',
        '/usr/lib/python3/dist-packages/qgis',
        '/opt/conda/envs/pygile/share/qgis/python',
        '/Applications/QGIS.app/Contents/Resources/python',
        'C:/Program Files/QGIS 3.*/apps/qgis/python',
        '/usr/local/share/qgis/python'
    ]
    
    # Add QGIS Python paths
    paths_added = 0
    for path in qgis_python_paths:
        if os.path.exists(path) and path not in sys.path:
            sys.path.insert(0, path)
            paths_added += 1
    
    # Set QGIS environment variables
    qgis_prefixes = ['/usr', '/opt/conda/envs/pygile', '/Applications/QGIS.app/Contents/MacOS']
    
    for prefix in qgis_prefixes:
        if os.path.exists(f"{prefix}/share/qgis"):
            os.environ['QGIS_PREFIX_PATH'] = prefix
            break
    
    # Set additional QGIS paths
    if 'QGIS_PREFIX_PATH' in os.environ:
        qgis_prefix = os.environ['QGIS_PREFIX_PATH']
        os.environ['QT_QPA_PLATFORM'] = 'offscreen'  # For headless operation
        
        # Add to Python path
        qgis_python = f"{qgis_prefix}/share/qgis/python"
        if os.path.exists(qgis_python) and qgis_python not in sys.path:
            sys.path.insert(0, qgis_python)
            paths_added += 1
    
    print(f"QGIS environment configured ({paths_added} Python paths added)")
    return paths_added > 0

def initialize_qgis():
    """Initialize QGIS application for processing"""
    
    try:
        from qgis.core import QgsApplication, QgsProcessingRegistry
        from qgis.analysis import QgsNativeAlgorithms
        
        # Initialize QGIS Application
        QgsApplication.setPrefixPath(os.environ.get('QGIS_PREFIX_PATH', '/usr'), True)
        qgs = QgsApplication([], False)
        qgs.initQgis()
        
        # Initialize processing
        from processing.core.Processing import Processing
        Processing.initialize()
        
        print(" QGIS application initialized successfully")
        return qgs, QgsProcessingRegistry.algorithmRegistry()
        
    except ImportError as e:
        print(f" QGIS import failed: {str(e)[:100]}...")
        return None, None
    except Exception as e:
        print(f" QGIS initialization failed: {str(e)[:100]}...")
        return None, None

def get_qgis_processing_algorithms():
    """Extract algorithms from QGIS Processing framework"""
    algorithms = []
    
    if not setup_qgis_environment():
        print(" QGIS environment setup failed")
        return algorithms
    
    qgs, registry = initialize_qgis()
    
    if not registry:
        print(" QGIS Processing registry not available")
        return algorithms
    
    try:
        # Get all algorithm providers
        providers = registry.providers()
        
        print(f"Found {len(providers)} processing providers")
        
        for provider in providers:
            provider_id = provider.id()
            provider_name = provider.name()
            
            # Get algorithms from this provider
            alg_ids = provider.algorithmIds()
            
            provider_algorithms = []
            
            for alg_id in alg_ids:
                try:
                    algorithm = registry.algorithmById(alg_id)
                    
                    if algorithm:
                        # Get algorithm details
                        display_name = algorithm.displayName()
                        group = algorithm.group()
                        short_help = ""
                        
                        try:
                            short_help = algorithm.shortHelpString()
                        except:
                            pass
                        
                        # Use short help as description, fallback to display name
                        description = short_help if short_help else display_name
                        if len(description) > 200:
                            description = description[:200] + "..."
                        
                        # Categorize based on provider and group
                        category = categorize_qgis_algorithm(provider_id, group, alg_id)
                        
                        provider_algorithms.append({
                            'tool': 'QGIS',
                            'provider': provider_id,
                            'algorithm_id': alg_id,
                            'display_name': description,
                            'group': category,
                            'detection_method': 'processing_registry'
                        })
                
                except Exception as e:
                    # Add basic entry if detailed extraction fails
                    provider_algorithms.append({
                        'tool': 'QGIS',
                        'provider': provider_id,
                        'algorithm_id': alg_id,
                        'display_name': alg_id.split(':')[-1] if ':' in alg_id else alg_id,
                        'group': categorize_qgis_algorithm(provider_id, "", alg_id),
                        'detection_method': 'processing_registry'
                    })
            
            if provider_algorithms:
                algorithms.extend(provider_algorithms)
                print(f" {provider_name} ({provider_id}): Found {len(provider_algorithms)} algorithms")
            else:
                print(f" {provider_name} ({provider_id}): No algorithms found")
        
        # Clean up QGIS
        if qgs:
            qgs.exitQgis()
            
    except Exception as e:
        print(f"Error extracting QGIS algorithms: {str(e)[:100]}...")
    
    return algorithms

def get_qgis_cli_algorithms():
    """Extract QGIS algorithms using CLI tools"""
    algorithms = []
    
    # Try to find QGIS processing CLI tools
    qgis_commands = [
        'qgis_process',
        'qgis3_process', 
        'qgis-ltr_process',
        '/usr/bin/qgis_process',
        '/opt/conda/envs/pygile/bin/qgis_process'
    ]
    
    qgis_cmd = None
    for cmd in qgis_commands:
        try:
            result = subprocess.run([cmd, '--version'], capture_output=True, text=True, timeout=10)
            if result.returncode == 0:
                qgis_cmd = cmd
                print(f" Found QGIS CLI: {cmd}")
                break
        except:
            continue
    
    if not qgis_cmd:
        print(" QGIS CLI tools not found")
        return algorithms
    
    try:
        # Get list of algorithms
        result = subprocess.run([qgis_cmd, 'list'], capture_output=True, text=True, timeout=30)
        
        if result.returncode == 0 and result.stdout:
            lines = result.stdout.split('\n')
            
            current_provider = ""
            
            for line in lines:
                line = line.strip()
                
                if not line:
                    continue
                
                # Check if this is a provider header
                if line.endswith(':') and not line.startswith(' '):
                    current_provider = line.rstrip(':')
                    continue
                
                # Check if this is an algorithm
                if line.startswith(' ') or ':' in line:
                    # Parse algorithm line
                    if ':' in line:
                        alg_id = line.strip()
                        provider_id = alg_id.split(':')[0] if ':' in alg_id else current_provider
                        alg_name = alg_id.split(':')[-1] if ':' in alg_id else alg_id
                    else:
                        alg_name = line.strip()
                        alg_id = f"{current_provider}:{alg_name}" if current_provider else alg_name
                        provider_id = current_provider
                    
                    if alg_name and len(alg_name) > 1:
                        algorithms.append({
                            'tool': 'QGIS',
                            'provider': provider_id,
                            'algorithm_id': alg_id,
                            'display_name': alg_name,
                            'group': categorize_qgis_algorithm(provider_id, "", alg_id),
                            'detection_method': 'cli_list'
                        })
            
            print(f" CLI list: Found {len(algorithms)} algorithms")
        else:
            print(f" CLI list failed: {result.stderr[:100]}...")
            
    except Exception as e:
        print(f" CLI extraction failed: {str(e)[:50]}...")
    
    return algorithms

def get_qgis_help_detailed():
    """Get detailed help for QGIS algorithms"""
    algorithms = []
    
    # First get basic list
    base_algorithms = get_qgis_cli_algorithms()
    
    if not base_algorithms:
        return algorithms
    
    qgis_cmd = None
    qgis_commands = ['qgis_process', 'qgis3_process', 'qgis-ltr_process']
    
    for cmd in qgis_commands:
        try:
            result = subprocess.run([cmd, '--version'], capture_output=True, text=True, timeout=5)
            if result.returncode == 0:
                qgis_cmd = cmd
                break
        except:
            continue
    
    if not qgis_cmd:
        return algorithms
    
    print(f"Getting detailed help for {len(base_algorithms)} algorithms...")
    
    # Limit to reasonable number for detailed extraction
    sample_algorithms = base_algorithms[:100] if len(base_algorithms) > 100 else base_algorithms
    
    for i, alg_info in enumerate(sample_algorithms):
        alg_id = alg_info['algorithm_id']
        
        if i % 20 == 0:
            print(f"  Progress: {i+1}/{len(sample_algorithms)}")
        
        try:
            # Get help for individual algorithm
            result = subprocess.run([qgis_cmd, 'help', alg_id], 
                                  capture_output=True, 
                                  text=True, 
                                  timeout=10)
            
            if result.returncode == 0 and result.stdout:
                output = result.stdout
                
                # Extract description from help output
                description = alg_id.split(':')[-1] if ':' in alg_id else alg_id
                
                # Look for description patterns
                desc_patterns = [
                    r'Description:\s*(.+?)(?:\n\n|\nUsage:|\nArguments:)',
                    r'DESCRIPTION:\s*(.+?)(?:\n\n|\nUSAGE:|\nARGUMENTS:)',
                    r'Summary:\s*(.+?)(?:\n)',
                    r'Purpose:\s*(.+?)(?:\n)'
                ]
                
                for pattern in desc_patterns:
                    match = re.search(pattern, output, re.DOTALL | re.IGNORECASE)
                    if match:
                        desc = match.group(1).strip()
                        if len(desc) > len(description) and len(desc) < 300:
                            description = desc
                            break
                
                # Clean up description
                description = ' '.join(description.split())
                
                algorithms.append({
                    'tool': 'QGIS',
                    'provider': alg_info['provider'],
                    'algorithm_id': alg_id,
                    'display_name': description,
                    'group': alg_info['group'],
                    'detection_method': 'cli_detailed_help'
                })
            else:
                # Keep original if help fails
                algorithms.append(alg_info)
                
        except:
            # Keep original if help fails
            algorithms.append(alg_info)
    
    print(f" Detailed help: Found {len(algorithms)} algorithms")
    return algorithms

def get_qgis_config_files():
    """Extract algorithms from QGIS configuration and plugin files"""
    algorithms = []
    
    # Look for QGIS plugin directories
    qgis_plugin_paths = [
        '/usr/share/qgis/python/plugins',
        '/opt/conda/envs/pygile/share/qgis/python/plugins',
        os.path.expanduser('~/.local/share/QGIS/QGIS3/profiles/default/python/plugins'),
        '/Applications/QGIS.app/Contents/Resources/python/plugins'
    ]
    
    algorithm_files_found = 0
    
    for plugin_path in qgis_plugin_paths:
        if os.path.exists(plugin_path):
            try:
                # Look for processing provider files
                for root, dirs, files in os.walk(plugin_path):
                    for file in files:
                        if file.endswith('.py') and any(keyword in file.lower() for keyword in ['algorithm', 'processing', 'provider']):
                            file_path = os.path.join(root, file)
                            algorithm_files_found += 1
                            
                            try:
                                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                                    content = f.read()
                                    
                                    # Look for algorithm class definitions
                                    algorithm_patterns = [
                                        r'class\s+(\w+)\s*\(\s*QgsProcessingAlgorithm',
                                        r'class\s+(\w+)Algorithm',
                                        r'def\s+createAlgorithm.*name.*["\']([^"\']+)["\']',
                                        r'ALGORITHM_NAME\s*=\s*["\']([^"\']+)["\']'
                                    ]
                                    
                                    for pattern in algorithm_patterns:
                                        matches = re.findall(pattern, content, re.IGNORECASE)
                                        for match in matches:
                                            if len(match) > 2:
                                                plugin_name = os.path.basename(os.path.dirname(file_path))
                                                
                                                algorithms.append({
                                                    'tool': 'QGIS',
                                                    'provider': f'plugin_{plugin_name}',
                                                    'algorithm_id': f'{plugin_name}:{match}',
                                                    'display_name': match,
                                                    'group': 'Plugin Algorithms',
                                                    'detection_method': 'config_files'
                                                })
                            except:
                                continue
                
                if algorithms:
                    print(f" Config files ({plugin_path}): Found {len(algorithms)} algorithms")
                    break
                    
            except Exception as e:
                continue
    
    if not algorithms:
        print(f" Config files: No algorithms found (checked {algorithm_files_found} files)")
    
    return algorithms

def categorize_qgis_algorithm(provider_id, group, algorithm_id):
    """Categorize QGIS algorithms based on provider and algorithm details"""
    
    provider_lower = provider_id.lower() if provider_id else ""
    group_lower = group.lower() if group else ""
    alg_lower = algorithm_id.lower() if algorithm_id else ""
    
    # Provider-specific categorization
    if provider_lower == 'qgis' or provider_lower == 'native':
        if any(keyword in group_lower for keyword in ['vector', 'geometry']):
            return 'QGIS Vector'
        elif any(keyword in group_lower for keyword in ['raster', 'analysis']):
            return 'QGIS Raster'
        elif any(keyword in group_lower for keyword in ['database', 'table']):
            return 'QGIS Database'
        elif any(keyword in group_lower for keyword in ['cartography', 'layout']):
            return 'QGIS Cartography'
        else:
            return 'QGIS General'
    
    elif provider_lower == 'gdal':
        if any(keyword in alg_lower for keyword in ['raster', 'grid', 'dem']):
            return 'GDAL Raster'
        elif any(keyword in alg_lower for keyword in ['vector', 'ogr', 'layer']):
            return 'GDAL Vector'
        elif any(keyword in alg_lower for keyword in ['warp', 'transform', 'project']):
            return 'GDAL Transform'
        else:
            return 'GDAL General'
    
    elif provider_lower == 'grass' or provider_lower.startswith('grass'):
        if any(keyword in alg_lower for keyword in ['r.', 'raster']):
            return 'GRASS Raster'
        elif any(keyword in alg_lower for keyword in ['v.', 'vector']):
            return 'GRASS Vector'
        elif any(keyword in alg_lower for keyword in ['i.', 'imagery']):
            return 'GRASS Imagery'
        elif any(keyword in alg_lower for keyword in ['d.', 'display']):
            return 'GRASS Display'
        else:
            return 'GRASS General'
    
    elif provider_lower == 'saga' or provider_lower.startswith('saga'):
        if any(keyword in alg_lower for keyword in ['grid', 'raster']):
            return 'SAGA Grid'
        elif any(keyword in alg_lower for keyword in ['shapes', 'vector']):
            return 'SAGA Vector'
        elif any(keyword in alg_lower for keyword in ['terrain', 'morphometry']):
            return 'SAGA Terrain'
        elif any(keyword in alg_lower for keyword in ['imagery', 'classification']):
            return 'SAGA Imagery'
        else:
            return 'SAGA General'
    
    elif provider_lower == 'otb':
        return 'OTB Processing'
    
    elif provider_lower == 'r' or provider_lower.startswith('r:'):
        return 'R Scripts'
    
    elif 'plugin' in provider_lower:
        return 'Plugin Algorithms'
    
    # Generic categorization based on algorithm name
    elif any(keyword in alg_lower for keyword in ['buffer', 'clip', 'intersect', 'union', 'difference']):
        return 'Geoprocessing'
    elif any(keyword in alg_lower for keyword in ['interpolat', 'grid', 'surface']):
        return 'Interpolation'
    elif any(keyword in alg_lower for keyword in ['network', 'routing', 'shortest']):
        return 'Network Analysis'
    elif any(keyword in alg_lower for keyword in ['statistic', 'summary', 'count']):
        return 'Statistics'
    elif any(keyword in alg_lower for keyword in ['join', 'merge', 'append']):
        return 'Data Management'
    elif any(keyword in alg_lower for keyword in ['export', 'import', 'convert']):
        return 'Data Conversion'
    else:
        return f'{provider_id.title()} General' if provider_id else 'General'

def merge_algorithms(algorithm_lists):
    """Merge algorithm lists and remove duplicates"""
    seen = {}
    merged = []
    
    for alg_list in algorithm_lists:
        for alg in alg_list:
            alg_id = alg['algorithm_id']
            
            if alg_id not in seen:
                seen[alg_id] = alg
                merged.append(alg)
            else:
                # Update existing entry with better description and additional detection method
                existing = seen[alg_id]
                
                # Use longer, more descriptive display name
                if len(alg['display_name']) > len(existing['display_name']):
                    existing['display_name'] = alg['display_name']
                
                # Combine detection methods
                if alg['detection_method'] not in existing['detection_method']:
                    existing['detection_method'] += f", {alg['detection_method']}"
    
    return merged

def export_qgis_algorithms():
    """Export all QGIS algorithms to CSV"""
    
    print("=== QGIS Processing Framework Algorithm Extraction ===\n")
    
    # Detection methods
    detection_methods = [
        ("Processing Registry", get_qgis_processing_algorithms),
        ("CLI List", get_qgis_cli_algorithms),
        ("Detailed Help", get_qgis_help_detailed),
        ("Config Files", get_qgis_config_files)
    ]
    
    all_algorithms = []
    
    for method_name, method_func in detection_methods:
        print(f"\n--- Trying {method_name} ---")
        try:
            algorithms = method_func()
            if algorithms:
                all_algorithms.append(algorithms)
                print(f" {method_name}: Found {len(algorithms)} algorithms")
            else:
                print(f" {method_name}: No algorithms found")
        except Exception as e:
            print(f" {method_name}: Failed ({str(e)[:50]}...)")
    
    # Merge all results
    if all_algorithms:
        merged_algorithms = merge_algorithms(all_algorithms)
        
        # Write to CSV
        csv_path = '/workspace/qgis_algorithms.csv'
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['tool', 'provider', 'algorithm_id', 'display_name', 'group', 'detection_method']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(merged_algorithms)
        
        print(f"\n=== Results ===")
        print(f" Total algorithms found: {len(merged_algorithms)}")
        print(f" CSV saved to: {csv_path}")
        
        # Provider summary
        providers = {}
        for alg in merged_algorithms:
            provider = alg['provider']
            providers[provider] = providers.get(provider, 0) + 1
        
        print(f"\nTop providers:")
        sorted_providers = sorted(providers.items(), key=lambda x: x[1], reverse=True)
        for provider, count in sorted_providers[:10]:
            print(f"  {provider}: {count}")
        
        # Category summary
        categories = {}
        for alg in merged_algorithms:
            cat = alg['group']
            categories[cat] = categories.get(cat, 0) + 1
        
        print(f"\nTop categories:")
        sorted_cats = sorted(categories.items(), key=lambda x: x[1], reverse=True)
        for cat, count in sorted_cats[:15]:
            print(f"  {cat}: {count}")
        
        # Detection method summary
        detection_stats = {}
        for alg in merged_algorithms:
            methods = [m.strip() for m in alg['detection_method'].split(',')]
            for method in methods:
                detection_stats[method] = detection_stats.get(method, 0) + 1
        
        print(f"\nDetection methods:")
        for method, count in sorted(detection_stats.items()):
            print(f"  {method}: {count}")
        
        return len(merged_algorithms)
    else:
        print(" No QGIS algorithms found")
        return 0

if __name__ == "__main__":
    total = export_qgis_algorithms()
    print(f"\nFinal count: {total} QGIS algorithms catalogued")
    


=== QGIS Processing Framework Algorithm Extraction ===


--- Trying Processing Registry ---
QGIS environment configured (0 Python paths added)
 QGIS environment setup failed
 Processing Registry: No algorithms found

--- Trying CLI List ---
 Found QGIS CLI: qgis_process
 CLI list: Found 379 algorithms
 CLI List: Found 379 algorithms

--- Trying Detailed Help ---
 Found QGIS CLI: qgis_process
 CLI list: Found 379 algorithms
Getting detailed help for 379 algorithms...
  Progress: 1/100
  Progress: 21/100
  Progress: 41/100
  Progress: 61/100
  Progress: 81/100
 Detailed help: Found 100 algorithms
 Detailed Help: Found 100 algorithms

--- Trying Config Files ---
 Config files (/opt/conda/envs/pygile/share/qgis/python/plugins): Found 30 algorithms
 Config Files: Found 30 algorithms

=== Results ===
 Total algorithms found: 398
 CSV saved to: /workspace/qgis_algorithms.csv

Top providers:
  native: 266
  gdal: 57
  qgis: 38
  pdal: 17
  plugin_tests: 7
  plugin_qgis: 3
  plugin_gui: 3
  plu