In [None]:
#!/usr/bin/env python3
"""
PyGILE-Plus Environment Setup
"""
import os
import sys

# Critical environment setup
os.environ['LD_LIBRARY_PATH'] = "/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib"
os.environ['SAGA_CMD'] = '/opt/saga/bin/saga_cmd'
os.environ['SAGA_MLB'] = '/opt/saga/lib/saga'
os.environ['GISBASE'] = '/opt/grass'
os.environ['OTB_APPLICATION_PATH'] = '/opt/otb/lib/otb/applications'

# Add GRASS to Python path
sys.path.insert(0, '/opt/grass/etc/python')

print("PyGILE-Plus initialized with direct tool access!")

PyGILE-Plus initialized with 1,773+ algorithms


In [None]:
#!/usr/bin/env python3
"""
SAGA GIS Algorithm Extraction - Minimal Version
"""
import re
import subprocess
import csv

def export_saga_algorithms():
    """Extract all SAGA algorithms efficiently"""
    
    env = {
        'LD_LIBRARY_PATH': '/opt/saga/lib:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/opt/conda/envs/pygile/lib',
        'SAGA_MLB': '/opt/saga/lib/saga'
    }
    
    # Get libraries
    result = subprocess.run(["/opt/saga/bin/saga_cmd"], capture_output=True, text=True, env=env)
    
    libraries = []
    for line in result.stdout.split('\n'):
        if line.startswith(' - '):
            lib_name = line[3:].strip().rstrip(' *')
            if lib_name and not lib_name.startswith('_'):
                libraries.append(lib_name)
    
    unique_libraries = list(dict.fromkeys(libraries))
    print(f"Processing {len(unique_libraries)} SAGA libraries...")
    
    algorithms_data = []
    
    for lib in unique_libraries:
        result = subprocess.run(["/opt/saga/bin/saga_cmd", lib], capture_output=True, text=True, env=env)
        
        for line in result.stdout.split('\n'):
            match = re.match(r'^\s*\[(\d+)\]\s+(.+)', line)
            if match:
                tool_id = match.group(1)
                tool_name = match.group(2).strip()
                algorithms_data.append([lib, tool_id, tool_name])
    
    # Save CSV
    with open('/workspace/saga_all_algorithms.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Library', 'Tool_ID', 'Algorithm_Name'])
        writer.writerows(algorithms_data)
    
    print(f"SAGA extraction complete: {len(algorithms_data)} algorithms")
    return len(algorithms_data)

if __name__ == "__main__":
    total = export_saga_algorithms()

Processing 73 SAGA libraries...
SAGA extraction complete: 733 algorithms


In [None]:
#!/usr/bin/env python3
"""
GRASS GIS Algorithm Extraction - Minimal Version
"""
import subprocess
import csv
import os

def export_grass_algorithms():
    """Extract all GRASS algorithms efficiently"""
    
    algorithms = []
    
    # Module categories
    module_categories = {
        'r.': 'Raster', 'v.': 'Vector', 'g.': 'General', 'i.': 'Imagery',
        't.': 'Temporal', 'd.': 'Display', 'db.': 'Database', 'ps.': 'PostScript'
    }
    
    # GRASS directories
    grass_directories = [
        '/opt/grass/grass84/bin',
        '/opt/grass/grass84/scripts', 
        '/opt/grass/bin'
    ]
    
    print(f"Processing GRASS directories...")
    
    for grass_dir in grass_directories:
        if os.path.exists(grass_dir):
            files = os.listdir(grass_dir)
            
            for filename in sorted(files):
                for prefix, category in module_categories.items():
                    if filename.startswith(prefix):
                        if not any(alg['algorithm_id'] == filename for alg in algorithms):
                            desc = get_module_description(filename, grass_dir)
                            algorithms.append({
                                'tool': 'GRASS',
                                'provider': 'GRASS',
                                'algorithm_id': filename,
                                'display_name': desc,
                                'group': category,
                                'location': grass_dir
                            })
                        break
    
    # Save CSV
    csv_path = '/workspace/grass_all_algorithms.csv'
    with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['tool', 'provider', 'algorithm_id', 'display_name', 'group', 'location']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(algorithms)
    
    print(f"GRASS extraction complete: {len(algorithms)} algorithms")
    return len(algorithms)

def get_module_description(module_name, module_dir):
    """Get basic module description"""
    possible_paths = [
        f'{module_dir}/{module_name}',
        f'/opt/grass/bin/{module_name}',
        f'/opt/grass/grass84/bin/{module_name}'
    ]
    
    for exec_path in possible_paths:
        if os.path.exists(exec_path):
            try:
                env = {'GISBASE': '/opt/grass', 'PATH': '/opt/grass/bin:/opt/grass/grass84/bin:' + os.environ.get('PATH', '')}
                result = subprocess.run([exec_path, '--help'], capture_output=True, text=True, env=env, timeout=3)
                
                output = result.stdout + result.stderr
                for line in output.split('\n'):
                    line = line.strip()
                    if 'Description:' in line:
                        return line.split('Description:', 1)[1].strip()
                    elif line and len(line) > 10 and not line.startswith(('Usage:', 'Flags:', 'Parameters:')):
                        return line
            except:
                continue
    
    return module_name

if __name__ == "__main__":
    total = export_grass_algorithms()

Processing GRASS directories...
GRASS extraction complete: 500 algorithms


In [None]:
#!/usr/bin/env python3
import csv

# Packages extracted directly from your Dockerfile
dockerfile_packages = [
    # Core geospatial
    "numpy", "gdal", "proj", "geos", "libspatialindex", "boost-cpp",
    "fiona", "shapely", "pyproj", "pandas", "scipy", "matplotlib", 
    "seaborn", "scikit-learn", "geopandas", "rasterio",
    
    # Data formats
    "xarray", "netcdf4", "h5py", "h5netcdf", "zarr",
    
    # Jupyter
    "jupyter", "jupyterlab", "ipywidgets",
    
    # Visualization
    "plotly", "bokeh", "folium", "contextily", "mapclassify",
    "holoviews", "hvplot", "pythreejs",
    
    # Geospatial analysis
    "osmnx", "earthpy", "geoplot",
    
    # Image processing
    "scikit-image", "tifffile", "imageio-ffmpeg", "opencv",
    
    # Web mapping
    "localtileserver", "rio-cogeo", "rioxarray", "ipyleaflet", 
    "owslib", "geemap", "leafmap",
    
    # Optional packages
    "census", "us", "pykrige", "palettable", "geojson",
    
    # Cloud tools
    "pystac", "stackstac", "planetary-computer",
    
    # GIS platforms
    "whitebox_tools",
    
    # Deep Learning
    "pytorch-cpu", "torchvision", "torchaudio", "pytorch-lightning",
    "tensorflow", "keras", "albumentations", "timm",
    
    # Additional packages (from pip installs)
    "pygis", "earthengine-api", "sklearn-xarray", "sphinx", 
    "sphinx_sitemap", "sphinxcontrib.bibtex", "sphinx_inline_tabs", 
    "pydata-sphinx-theme", "sankee", "overturemaps", "whiteboxgui",
    "jupyter-book", "ghp-import", "numpy-groupies", "sympy",
    "geojson", "dask-geopandas", "pykrige", "cenpy", "census", 
    "us", "sklearn-xarray", "whitebox", "PySAGA-cmd", "pyspatialml"
]

def categorize_package(pkg_name):
    """Categorize based on Dockerfile groupings"""
    pkg_lower = pkg_name.lower()
    
    if pkg_name in ["gdal", "proj", "geos", "fiona", "shapely", "pyproj", "geopandas", "rasterio"]:
        return "Geospatial Core"
    elif pkg_name in ["numpy", "pandas", "scipy", "matplotlib", "seaborn", "scikit-learn"]:
        return "Scientific Computing"
    elif pkg_name in ["xarray", "netcdf4", "h5py", "h5netcdf", "zarr"]:
        return "Data Formats"
    elif pkg_name in ["jupyter", "jupyterlab", "ipywidgets"]:
        return "Jupyter Environment"
    elif pkg_name in ["plotly", "bokeh", "folium", "contextily", "holoviews", "hvplot", "pythreejs"]:
        return "Visualization"
    elif pkg_name in ["geemap", "leafmap", "ipyleaflet", "owslib"]:
        return "Web Mapping"
    elif pkg_name in ["scikit-image", "opencv", "tifffile", "imageio-ffmpeg"]:
        return "Image Processing"
    elif pkg_name in ["osmnx", "earthpy", "geoplot"]:
        return "Geospatial Analysis"
    elif pkg_name in ["pystac", "stackstac", "planetary-computer", "earthengine-api"]:
        return "Cloud/Remote Sensing"
    elif pkg_name in ["whitebox_tools", "PySAGA-cmd", "pyspatialml"]:
        return "GIS Platforms"
    elif pkg_name in ["pytorch-cpu", "torchvision", "torchaudio", "pytorch-lightning", "tensorflow", "keras", "albumentations", "timm"]:
        return "Deep Learning"
    elif pkg_name in ["sphinx", "sphinx_sitemap", "jupyter-book", "ghp-import"]:
        return "Documentation"
    elif pkg_name in ["census", "us", "pykrige", "palettable", "geojson"]:
        return "Specialized Tools"
    else:
        return "Other"

# Remove duplicates while preserving order
seen = set()
unique_packages = []
for pkg in dockerfile_packages:
    if pkg not in seen:
        seen.add(pkg)
        unique_packages.append(pkg)

# Create CSV data
package_data = []
for pkg in unique_packages:
    package_data.append([
        'Python',
        'Package',
        pkg,
        pkg,
        categorize_package(pkg)
    ])

# Save CSV
with open('/workspace/python_packages_dockerfile.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Tool', 'Provider', 'Algorithm_ID', 'Display_Name', 'Group'])
    writer.writerows(package_data)

print(f"Extracted {len(package_data)} packages from Dockerfile")

# Show categories
categories = {}
for row in package_data:
    cat = row[4]
    categories[cat] = categories.get(cat, 0) + 1

print("\nCategories:")
for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True):
    print(f"  {cat}: {count}")

print(f"\nTotal Python packages from Dockerfile: {len(package_data)}")

Extracted 82 packages from Dockerfile

Categories:
  Other: 19
  Geospatial Core: 8
  Deep Learning: 8
  Scientific Computing: 6
  Visualization: 6
  Data Formats: 5
  Specialized Tools: 5
  Image Processing: 4
  Web Mapping: 4
  Cloud/Remote Sensing: 4
  Documentation: 4
  Jupyter Environment: 3
  Geospatial Analysis: 3
  GIS Platforms: 3

Total Python packages from Dockerfile: 82


In [None]:
import csv
import glob
import os

def export_otb_algorithms():
   """Export OTB algorithms to CSV - check both bin and applications directories"""
   algorithms = []
   
   print("=== OTB Algorithm Extraction ===")
   
   # Check CLI binaries
   otb_bin_dir = '/opt/otb/bin'
   if os.path.exists(otb_bin_dir):
       otb_apps = glob.glob(os.path.join(otb_bin_dir, 'otbcli_*'))
       
       for app_path in otb_apps:
           app_name = os.path.basename(app_path).replace('otbcli_', '')
           algorithms.append(['OTB', 'OTB_CLI', app_name, app_name])
       
       print(f"Found {len(otb_apps)} OTB CLI algorithms")
   
   # Check application libraries
   otb_app_dir = '/opt/otb/lib/otb/applications'
   if os.path.exists(otb_app_dir):
       otb_libs = glob.glob(os.path.join(otb_app_dir, 'otbapp_*.so'))
       
       for lib_path in otb_libs:
           lib_name = os.path.basename(lib_path).replace('otbapp_', '').replace('.so', '')
           algorithms.append(['OTB', 'OTB_APP', lib_name, lib_name])
       
       print(f"Found {len(otb_libs)} OTB application libraries")
   
   total_algorithms = len(algorithms)
   print(f"Total OTB algorithms: {total_algorithms}")
   
   # Save CSV
   with open('/workspace/otb_algorithms.csv', 'w', newline='', encoding='utf-8') as csvfile:
       writer = csv.writer(csvfile)
       writer.writerow(['Tool', 'Provider', 'Algorithm_ID', 'Algorithm_Name'])
       writer.writerows(algorithms)
   
   print(f"CSV saved with {total_algorithms} algorithms")
   return total_algorithms

# Run it:
export_otb_algorithms()

=== OTB Algorithm Extraction ===
Found 115 OTB CLI algorithms
Found 115 OTB application libraries
Total OTB algorithms: 230
CSV saved with 230 algorithms


230

In [None]:
#!/usr/bin/env python3
import csv
import subprocess
import os

def find_whitebox_executable():
    """Find WhiteboxTools executable"""
    # Try conda-forge installation path first
    conda_paths = [
        '/opt/conda/envs/pygile/bin/whitebox_tools',
        '/opt/conda/envs/pygile/bin/whiteboxtools'
    ]
    
    for path in conda_paths:
        if os.path.isfile(path) and os.access(path, os.X_OK):
            return path
    
    # Try system PATH
    try:
        result = subprocess.run(['which', 'whitebox_tools'], capture_output=True, text=True)
        if result.returncode == 0:
            return result.stdout.strip()
    except:
        pass
    
    return None

def categorize_tool(tool_name):
    """Categorize WhiteboxTools"""
    tool_lower = tool_name.lower()
    
    if any(keyword in tool_lower for keyword in ['slope', 'aspect', 'hillshade', 'curvature']):
        return 'Terrain Analysis'
    elif any(keyword in tool_lower for keyword in ['flow', 'watershed', 'stream', 'drainage']):
        return 'Hydrological Analysis'
    elif any(keyword in tool_lower for keyword in ['filter', 'smooth', 'gaussian', 'median']):
        return 'Image Processing'
    elif any(keyword in tool_lower for keyword in ['buffer', 'clip', 'vector']):
        return 'Vector Analysis'
    elif any(keyword in tool_lower for keyword in ['raster', 'grid', 'resample']):
        return 'Raster Processing'
    else:
        return 'General'

# Find executable
wb_exec = find_whitebox_executable()
if not wb_exec:
    print("WhiteboxTools executable not found")
    exit(1)

print(f"Found WhiteboxTools at: {wb_exec}")

# Extract algorithms using --listtools
algorithms = []
try:
    result = subprocess.run([wb_exec, '--listtools'], capture_output=True, text=True, timeout=30)
    
    if result.returncode == 0:
        for line in result.stdout.split('\n'):
            line = line.strip()
            if line and not line.startswith('Available') and not line.startswith('='):
                if ':' in line:
                    tool_name = line.split(':')[0].strip()
                    description = line.split(':', 1)[1].strip()
                elif ' - ' in line:
                    tool_name = line.split(' - ')[0].strip()
                    description = line.split(' - ', 1)[1].strip()
                else:
                    tool_name = line.split()[0] if line.split() else line
                    description = line
                
                if tool_name and len(tool_name) > 1:
                    algorithms.append([
                        'WhiteboxTools',
                        'WhiteboxTools',
                        tool_name,
                        description,
                        categorize_tool(tool_name)
                    ])
    
except Exception as e:
    print(f"Error extracting algorithms: {e}")

# Save CSV
with open('/workspace/whitebox_algorithms.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Tool', 'Provider', 'Algorithm_ID', 'Display_Name', 'Group'])
    writer.writerows(algorithms)

print(f"Exported {len(algorithms)} WhiteboxTools algorithms")

Found WhiteboxTools at: /opt/conda/envs/pygile/bin/whitebox_tools
Exported 460 WhiteboxTools algorithms
