# Development Utilities Notebook

This notebook provides utilities for testing and debugging during development, particularly focused on module reloading and utility testing.

## Contents
1. Setup and Path Configuration
2. Module Reloading Utilities
3. Test Cases
4. Example Workflows

In [None]:
import os
import sys
import pkgutil
import importlib
from pathlib import Path
from typing import List

# Add project root to path
project_root = Path("../..").resolve()
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print(f"Project root added to path: {project_root}")

In [None]:
def get_all_modules(package_name: str) -> List[str]:
    """Get all modules in a package recursively.
    
    Args:
        package_name: Name of the package to scan (e.g., 'utils')
        
    Returns:
        List of full module paths (e.g., ['utils.aws.opensearch_utils', ...])
    """
    package = importlib.import_module(package_name)
    modules = []
    
    if hasattr(package, '__path__'):
        for loader, name, is_pkg in pkgutil.walk_packages(package.__path__, package_name + '.'):
            try:
                if not is_pkg:  # Only add leaf modules, not packages
                    modules.append(name)
            except Exception as e:
                print(f"Error importing {name}: {str(e)}")
                
    return sorted(modules)  # Sort for consistent order

def reload_module(module_name: str) -> None:
    """Reload a module by name.
    
    Args:
        module_name: Full module path (e.g., 'utils.notebook_utils.dataset_utils')
    """
    try:
        if module_name in sys.modules:
            print(f"Reloading {module_name}...")
            importlib.reload(sys.modules[module_name])
        else:
            print(f"Importing {module_name}...")
            importlib.import_module(module_name)
    except Exception as e:
        print(f"Error reloading {module_name}: {str(e)}")

def clear_module_cache(module_prefix: str = 'utils') -> None:
    """Clear all cached modules with given prefix.
    
    Args:
        module_prefix: Only clear modules starting with this prefix
    """
    modules_to_clear = [m for m in sys.modules if m.startswith(module_prefix)]
    for m in modules_to_clear:
        del sys.modules[m]
    print(f"Cleared {len(modules_to_clear)} modules from cache")

def reload_all_utils() -> None:
    """Reload all utility modules automatically."""
    # Clear existing cache
    clear_module_cache('utils')
    
    # Get all modules
    modules = get_all_modules('utils')
    print(f"\nFound {len(modules)} modules to reload:")
    for module in modules:
        print(f"- {module}")
    
    # Reload each module
    print("\nReloading modules...")
    for module in modules:
        reload_module(module)
    
    print("\nAll utility modules reloaded")

In [None]:
def test_dataset_utils():
    """Test dataset utilities functionality."""
    from utils.notebook_utils.dataset_utils import load_labeled_dataset, DATASET_REGISTRY
    
    print("Available datasets in registry:")
    for name, info in DATASET_REGISTRY.items():
        print(f"- {name}: {info['description']}")
    
    # Test dataset loading
    dataset_dir = project_root / "datasets/rag_evaluation/labeled/covid19_origin"
    print(f"\nTesting dataset loading from {dataset_dir}")
    
    try:
        dataset, documents = load_labeled_dataset(dataset_dir, download_if_missing=True)
        print(f"Successfully loaded dataset with {len(dataset.examples)} examples and {len(documents)} documents")
    except Exception as e:
        print(f"Error loading dataset: {str(e)}")

def run_all_tests():
    """Run all test cases."""
    print("Running dataset utils tests...\n")
    test_dataset_utils()
    print("\nAll tests completed")

In [None]:
# Example: Update and Test Utils
reload_all_utils()
run_all_tests()

In [None]:
# Example: Debug Dataset Loading
reload_all_utils()

from utils.notebook_utils.dataset_utils import load_labeled_dataset

dataset_dir = project_root / "datasets/rag_evaluation/labeled/covid19_origin"
try:
    dataset, documents = load_labeled_dataset(dataset_dir, download_if_missing=True)
    print(f"Success! Loaded {len(dataset.examples)} examples")
except Exception as e:
    print(f"Error: {str(e)}")
    
    print("\nChecking directory structure:")
    print(f"Dataset directory exists: {dataset_dir.exists()}")
    if dataset_dir.exists():
        print("Contents:")
        for item in dataset_dir.glob("*"):
            print(f"- {item.name}")