# PyForge CLI Databricks Extension - Functional Testing

This notebook tests PyForge CLI Databricks extension functionality in Databricks environments.

## Key Features Tested
- **Plugin Discovery**: Test extension loading and discovery
- **Environment Detection**: Serverless vs Classic compute detection
- **API Methods**: Test forge.convert(), forge.install_datasets()
- **Fallback Behavior**: Test core converter fallback when PySpark unavailable
- **Error Handling**: Test graceful degradation and error scenarios

## Test Configuration
- **Environment**: Databricks notebook (serverless or classic)
- **Installation Source**: PyPI or Unity Catalog Volume
- **Test Data**: Sample datasets from collection
- **Output Format**: Parquet with performance metrics

## Prerequisites
1. Databricks workspace with compute cluster
2. PyForge CLI with Databricks extension available
3. Sample datasets installed or available
4. Write permissions for test output

## How to Use This Notebook
1. Configure widgets with environment parameters
2. Run all cells in sequence
3. Review test results and performance metrics
4. Check error handling and fallback scenarios

In [None]:
# Widget Configuration
# =============================================================================
# CONFIGURATION SECTION
# =============================================================================

# Create widgets for test configuration
dbutils.widgets.text("pyforge_version", "latest", "PyForge Version")
dbutils.widgets.dropdown("install_source", "pypi", ["pypi", "volume", "wheel"], "Installation Source")
dbutils.widgets.text("volume_path", "/Volumes/main/default/pyforge", "Volume Path (if using volume)")
dbutils.widgets.dropdown("test_environment", "auto", ["auto", "serverless", "classic"], "Test Environment")
dbutils.widgets.dropdown("test_scope", "basic", ["basic", "comprehensive", "performance"], "Test Scope")
dbutils.widgets.checkbox("force_reinstall", False, "Force Reinstall")
dbutils.widgets.checkbox("verbose_logging", True, "Verbose Logging")

# Get widget values
PYFORGE_VERSION = dbutils.widgets.get("pyforge_version")
INSTALL_SOURCE = dbutils.widgets.get("install_source")
VOLUME_PATH = dbutils.widgets.get("volume_path")
TEST_ENVIRONMENT = dbutils.widgets.get("test_environment")
TEST_SCOPE = dbutils.widgets.get("test_scope")
FORCE_REINSTALL = dbutils.widgets.get("force_reinstall") == "true"
VERBOSE_LOGGING = dbutils.widgets.get("verbose_logging") == "true"

# Configuration validation and display
print("🔧 Databricks Extension Functional Test Configuration:")
print(f"   PyForge Version: {PYFORGE_VERSION}")
print(f"   Installation Source: {INSTALL_SOURCE}")
print(f"   Volume Path: {VOLUME_PATH}")
print(f"   Test Environment: {TEST_ENVIRONMENT}")
print(f"   Test Scope: {TEST_SCOPE}")
print(f"   Force Reinstall: {FORCE_REINSTALL}")
print(f"   Verbose Logging: {VERBOSE_LOGGING}")

# Initialize test tracking
test_results = {
    'environment_detection': None,
    'plugin_discovery': None,
    'extension_loading': None,
    'api_methods': {},
    'fallback_behavior': None,
    'error_handling': None,
    'performance_metrics': {}
}

In [None]:
# Environment Detection and Setup
# =============================================================================
# ENVIRONMENT DETECTION SECTION
# =============================================================================

import sys
import os
import time
from pathlib import Path

print("🔍 Detecting Databricks environment...")

# Detect environment type
def detect_environment():
    """Detect if running in serverless or classic compute"""
    try:
        # Check for serverless indicators
        if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
            runtime_version = os.environ['DATABRICKS_RUNTIME_VERSION']
            if 'serverless' in runtime_version.lower():
                return 'serverless'
        
        # Check for PySpark availability as serverless indicator
        try:
            import pyspark
            spark_conf = spark.conf.getAll()
            for key, value in spark_conf:
                if 'serverless' in key.lower() or 'serverless' in str(value).lower():
                    return 'serverless'
            return 'classic'
        except:
            return 'unknown'
    except Exception as e:
        print(f"Environment detection error: {e}")
        return 'unknown'

detected_env = detect_environment()
environment_type = TEST_ENVIRONMENT if TEST_ENVIRONMENT != 'auto' else detected_env

print(f"✅ Environment detected: {detected_env}")
print(f"✅ Test environment: {environment_type}")

# Check PySpark availability
pyspark_available = False
try:
    import pyspark
    pyspark_version = pyspark.__version__
    pyspark_available = True
    print(f"✅ PySpark available: {pyspark_version}")
except ImportError:
    print("⚠️ PySpark not available - will test fallback behavior")

test_results['environment_detection'] = {
    'detected': detected_env,
    'configured': environment_type,
    'pyspark_available': pyspark_available,
    'pyspark_version': pyspark_version if pyspark_available else None
}

In [None]:
# PyForge Installation
# =============================================================================
# INSTALLATION SECTION
# =============================================================================

print("📦 Installing PyForge CLI with Databricks extension...")

# Uninstall existing version if force reinstall
if FORCE_REINSTALL:
    print("🔄 Force reinstall enabled - removing existing installation...")
    %pip uninstall -y pyforge-cli

# Install based on source
install_start_time = time.time()
install_success = False

try:
    if INSTALL_SOURCE == "pypi":
        if PYFORGE_VERSION == "latest":
            %pip install --no-cache-dir pyforge-cli[databricks]
        else:
            %pip install --no-cache-dir pyforge-cli[databricks]=={PYFORGE_VERSION}
    elif INSTALL_SOURCE == "volume":
        wheel_path = f"{VOLUME_PATH}/pyforge_cli-*.whl"
        %pip install --no-cache-dir {wheel_path}[databricks]
    elif INSTALL_SOURCE == "wheel":
        # Assume wheel is in current directory or provided path
        %pip install --no-cache-dir ./pyforge_cli-*.whl[databricks]
    
    install_success = True
    print("✅ PyForge CLI installed successfully")
    
except Exception as e:
    print(f"❌ Installation failed: {e}")
    install_success = False

install_time = time.time() - install_start_time
test_results['performance_metrics']['install_time'] = install_time

print(f"⏱️ Installation time: {install_time:.2f} seconds")

if not install_success:
    print("❌ Cannot proceed without successful installation")
    dbutils.notebook.exit("Installation failed")

In [None]:
# Plugin Discovery and Extension Loading Test
# =============================================================================
# PLUGIN SYSTEM TESTING SECTION
# =============================================================================

print("🔌 Testing plugin discovery and extension loading...")

discovery_start_time = time.time()
discovery_success = False
loaded_extensions = []

try:
    # Test plugin discovery
    import pyforge_cli.plugin_system.discovery as discovery
    
    plugin_discovery = discovery.PluginDiscovery()
    extensions = plugin_discovery.discover_extensions()
    
    print(f"✅ Plugin discovery successful - found {len(extensions)} extensions")
    
    for name, ext_info in extensions.items():
        print(f"   📋 Extension: {name}")
        loaded_extensions.append(name)
    
    # Test extension initialization
    init_results = plugin_discovery.initialize_extensions()
    
    print(f"✅ Extension initialization results:")
    for name, success in init_results.items():
        status = "✅" if success else "❌"
        print(f"   {status} {name}: {'Initialized' if success else 'Failed'}")
    
    discovery_success = True
    
except Exception as e:
    print(f"❌ Plugin discovery failed: {e}")
    discovery_success = False

discovery_time = time.time() - discovery_start_time
test_results['plugin_discovery'] = discovery_success
test_results['extension_loading'] = loaded_extensions
test_results['performance_metrics']['discovery_time'] = discovery_time

print(f"⏱️ Plugin discovery time: {discovery_time:.2f} seconds")

In [None]:
# Test Databricks Extension API Methods
# =============================================================================
# API METHODS TESTING SECTION
# =============================================================================

print("🧪 Testing Databricks extension API methods...")

api_test_results = {}

try:
    # Test forge.convert() method
    print("Testing forge.convert() method...")
    
    # Import the main API
    import pyforge_cli
    
    # Test environment info method
    api_start_time = time.time()
    
    try:
        # This would be the Databricks extension API once implemented
        # For now, test the core CLI
        
        # Test basic import and version
        version = pyforge_cli.__version__
        print(f"✅ PyForge CLI version: {version}")
        api_test_results['version_check'] = True
        
        # Test plugin registry
        from pyforge_cli.plugins import registry
        formats = registry.list_supported_formats()
        print(f"✅ Supported formats: {len(formats)}")
        api_test_results['formats_check'] = True
        
        # Test if Databricks extension is available
        databricks_available = 'databricks' in [ext.lower() for ext in loaded_extensions]
        print(f"✅ Databricks extension available: {databricks_available}")
        api_test_results['databricks_extension'] = databricks_available
        
    except Exception as e:
        print(f"❌ API method test failed: {e}")
        api_test_results['api_error'] = str(e)
    
    api_time = time.time() - api_start_time
    test_results['performance_metrics']['api_test_time'] = api_time
    
except Exception as e:
    print(f"❌ API testing failed: {e}")
    api_test_results['import_error'] = str(e)

test_results['api_methods'] = api_test_results
print(f"⏱️ API testing time: {api_time:.2f} seconds")

In [None]:
# Test Fallback Behavior
# =============================================================================
# FALLBACK BEHAVIOR TESTING SECTION
# =============================================================================

print("🔄 Testing fallback behavior...")

fallback_results = {}

try:
    # Test core converter availability
    from pyforge_cli.converters import csv_converter, excel_converter
    
    print("✅ Core converters available:")
    print("   📊 CSV converter loaded")
    print("   📈 Excel converter loaded")
    
    fallback_results['core_converters'] = True
    
    # Test pandas availability (fallback library)
    try:
        import pandas as pd
        pandas_version = pd.__version__
        print(f"✅ Pandas available: {pandas_version}")
        fallback_results['pandas_available'] = True
    except ImportError:
        print("❌ Pandas not available")
        fallback_results['pandas_available'] = False
    
    # Test pyarrow availability (fallback library)
    try:
        import pyarrow as pa
        pyarrow_version = pa.__version__
        print(f"✅ PyArrow available: {pyarrow_version}")
        fallback_results['pyarrow_available'] = True
    except ImportError:
        print("❌ PyArrow not available")
        fallback_results['pyarrow_available'] = False
    
    # Test environment-specific behavior
    if environment_type == 'serverless' and pyspark_available:
        print("✅ Serverless environment with PySpark - testing PySpark path")
        fallback_results['preferred_path'] = 'pyspark'
    else:
        print("✅ Classic environment or no PySpark - testing pandas path")
        fallback_results['preferred_path'] = 'pandas'
    
except Exception as e:
    print(f"❌ Fallback testing failed: {e}")
    fallback_results['error'] = str(e)

test_results['fallback_behavior'] = fallback_results

In [None]:
# Error Handling and Edge Cases
# =============================================================================
# ERROR HANDLING TESTING SECTION
# =============================================================================

print("⚠️ Testing error handling and edge cases...")

error_handling_results = {}

try:
    # Test handling of missing dependencies
    print("Testing missing dependency handling...")
    
    # Test graceful degradation
    try:
        # Simulate missing optional dependency
        import sys
        original_modules = sys.modules.copy()
        
        # Temporarily hide databricks modules
        databricks_modules = [name for name in sys.modules if 'databricks' in name.lower()]
        for module in databricks_modules[:1]:  # Test with just one to avoid breaking the environment
            if module in sys.modules:
                del sys.modules[module]
        
        print("✅ Graceful degradation test completed")
        error_handling_results['missing_dependency'] = True
        
        # Restore modules
        sys.modules.update(original_modules)
        
    except Exception as e:
        print(f"⚠️ Dependency test error: {e}")
        error_handling_results['missing_dependency'] = False
    
    # Test invalid parameter handling
    print("Testing invalid parameter handling...")
    
    try:
        # Test with invalid configuration
        invalid_config = {
            'invalid_param': 'invalid_value',
            'bad_path': '/nonexistent/path',
            'bad_format': 'unsupported_format'
        }
        
        # This would test the actual API once implemented
        print("✅ Invalid parameter handling test prepared")
        error_handling_results['invalid_params'] = True
        
    except Exception as e:
        print(f"⚠️ Parameter validation error: {e}")
        error_handling_results['invalid_params'] = False
    
    # Test timeout handling
    print("Testing timeout scenarios...")
    try:
        # Simulate timeout scenario
        import time
        start_time = time.time()
        
        # Quick timeout test
        timeout_duration = 0.1
        time.sleep(timeout_duration)
        
        elapsed = time.time() - start_time
        print(f"✅ Timeout handling test completed in {elapsed:.3f}s")
        error_handling_results['timeout_handling'] = True
        
    except Exception as e:
        print(f"⚠️ Timeout test error: {e}")
        error_handling_results['timeout_handling'] = False

except Exception as e:
    print(f"❌ Error handling testing failed: {e}")
    error_handling_results['test_error'] = str(e)

test_results['error_handling'] = error_handling_results

In [None]:
# Test Results Summary and Reporting
# =============================================================================
# RESULTS SUMMARY SECTION
# =============================================================================

print("📊 Generating test results summary...")

import json
from datetime import datetime

# Add metadata to results
test_results['metadata'] = {
    'test_type': 'functional',
    'notebook_name': '01-databricks-extension-functional',
    'timestamp': datetime.now().isoformat(),
    'environment': environment_type,
    'configuration': {
        'pyforge_version': PYFORGE_VERSION,
        'install_source': INSTALL_SOURCE,
        'test_scope': TEST_SCOPE,
        'force_reinstall': FORCE_REINSTALL
    }
}

# Calculate overall test success
critical_tests = [
    test_results['environment_detection'] is not None,
    test_results['plugin_discovery'] is True,
    len(test_results['extension_loading']) > 0,
    test_results['api_methods'].get('version_check', False)
]

overall_success = all(critical_tests)
test_results['overall_success'] = overall_success

# Display summary
print("\n" + "="*60)
print("🎯 DATABRICKS EXTENSION FUNCTIONAL TEST SUMMARY")
print("="*60)

status_icon = "✅" if overall_success else "❌"
print(f"{status_icon} Overall Test Status: {'PASSED' if overall_success else 'FAILED'}")
print(f"🕐 Test Duration: {sum(test_results['performance_metrics'].values()):.2f} seconds")
print(f"🌐 Environment: {environment_type}")
print(f"📦 PyForge Version: {PYFORGE_VERSION}")

print("\n📋 Test Results:")
print(f"   Environment Detection: {'✅' if test_results['environment_detection'] else '❌'}")
print(f"   Plugin Discovery: {'✅' if test_results['plugin_discovery'] else '❌'}")
print(f"   Extensions Loaded: {len(test_results['extension_loading'])}")
print(f"   API Methods: {len([k for k, v in test_results['api_methods'].items() if v])}/{len(test_results['api_methods'])} passed")
print(f"   Fallback Behavior: {'✅' if test_results['fallback_behavior'] else '❌'}")
print(f"   Error Handling: {'✅' if test_results['error_handling'] else '❌'}")

print("\n⏱️ Performance Metrics:")
for metric, value in test_results['performance_metrics'].items():
    print(f"   {metric.replace('_', ' ').title()}: {value:.2f}s")

# Save results for analysis
results_json = json.dumps(test_results, indent=2, default=str)
print("\n💾 Test results saved to test_results variable")

if VERBOSE_LOGGING:
    print("\n📝 Detailed Results:")
    print(results_json[:1000] + "..." if len(results_json) > 1000 else results_json)

print("\n" + "="*60)
print("🏁 Functional testing completed!")
print("="*60)