#  Multinode Inference
> Multinode Inference is a workflow for running inference on a large number of images across multiple nodes.


In [None]:
#| default_exp inference.multinode_inference


In [None]:
#| hide
%load_ext autoreload
%autoreload 2


In [None]:
#| export
from pathlib import Path
from typing import (
    Union, List, Dict, Any, 
    Tuple, Optional,Set,
    Callable,Tuple
)
import sys
import inspect
import json

import numpy as np
import cloudpickle

In [None]:
#| export
from be_vision_ad_tools.inference.multinode_from_aiop_tool import *

# just checking whether multinode_from_aiop_tool is working

In [None]:
# Test 1: Normal command execution and submission to node
#print("\n‚úÖ Test 1: Normal command execution and submission to node")
#dist1 = DistributeHPC(worker=2)
#job_ls = HPC_Job(cmd=["ls", "-la"])
#dist1.set_jobs([job_ls])
#print(f"   ‚úì Created HPC_Job with command: {' '.join(job_ls.command)}")
#print(f"   ‚úì Job state: {job_ls.state}")
#print(f"   ‚úì Submitted job to DistributeHPC with {dist6.worker} workers")
#dist1.start()
#print(f"   ‚úì Submitted job to node")

## Test Imports from Both Notebooks


In [None]:
#| export
from be_vision_ad_tools.inference.multinode_from_aiop_tool import (
    HPC_Job, 
    DistributeHPC,
    print_status)
from be_vision_ad_tools.inference.prediction_system import (
    predict_image_list_from_file
)


In [None]:
MODEL_PATH= Path(
    r'/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt')
DATA_PATH= Path(
    r'/home/ai_dsx.work/data/projects/AD_tool_test/images/')
GOOD_PATH= DATA_PATH / 'good'
BAD_PATH= DATA_PATH / 'bad'
good_images = list(GOOD_PATH.ls())
bad_images = list(BAD_PATH.ls())




In [None]:
#| export
def test_multinode_imports() -> Dict[str, bool]:
    """Test that all required imports from notebooks 10 and 11 work correctly."""
    results = {}
    
    # Test imports from notebook 11 (multinode infrastructure)
    try:
        from be_vision_ad_tools.inference.multinode_from_aiop_tool import (
            HPC_Job, DistributeHPC, print_status
        )
        results['HPC_Job'] = True
        results['DistributeHPC'] = True
        results['print_status'] = True
        print("‚úÖ Successfully imported from multinode_from_aiop_tool:")
        print(f"   - HPC_Job: {HPC_Job}")
        print(f"   - DistributeHPC: {DistributeHPC}")
        print(f"   - print_status: {print_status}")
    except Exception as e:
        results['multinode_imports'] = False
        print(f"‚ùå Failed to import from multinode_from_aiop_tool: {e}")
        return results
    
    # Test imports from notebook 10 (prediction system)
    try:
        from be_vision_ad_tools.inference.multinode_infer import (
            create_inference_command, create_batch_list_file
        )
        results['create_inference_command'] = True
        results['create_batch_list_file'] = True
        print("\n‚úÖ Successfully imported from multinode_infer:")
        print(f"   - create_inference_command: {create_inference_command}")
        print(f"   - create_batch_list_file: {create_batch_list_file}")
    except Exception as e:
        results['prediction_imports'] = False
        print(f"‚ùå Failed to import from multinode_infer: {e}")
        return results
    
    # Test predict_image_list_from_file
    try:
        from be_vision_ad_tools.inference.prediction_system import (
            predict_image_list_from_file
        )
        results['predict_image_list_from_file'] = True
        print("\n‚úÖ Successfully imported from prediction_system:")
        print(f"   - predict_image_list_from_file: {predict_image_list_from_file}")
    except Exception as e:
        results['predict_func'] = False
        print(f"‚ùå Failed to import predict_image_list_from_file: {e}")
        return results
    
    print("\nüéâ All imports successful!")
    return results


In [None]:
# Test the imports immediately
import_results = test_multinode_imports()
print(f"\nüìä Import Results: {import_results}")


‚úÖ Successfully imported from multinode_from_aiop_tool:
   - HPC_Job: <class 'be_vision_ad_tools.inference.multinode_from_aiop_tool.HPC_Job'>
   - DistributeHPC: <class 'be_vision_ad_tools.inference.multinode_from_aiop_tool.DistributeHPC'>
   - print_status: <function print_status>

‚úÖ Successfully imported from multinode_infer:
   - create_inference_command: <function create_inference_command>
   - create_batch_list_file: <function create_batch_list_file>

‚úÖ Successfully imported from prediction_system:
   - predict_image_list_from_file: <function predict_image_list_from_file>

üéâ All imports successful!

üìä Import Results: {'HPC_Job': True, 'DistributeHPC': True, 'print_status': True, 'create_inference_command': True, 'create_batch_list_file': True, 'predict_image_list_from_file': True}


## Smart Folder Scanner
Scans folder structure to detect flat vs nested patterns


In [None]:
from nbdev.showdoc import doc

In [None]:
#| export
def get_image_extensions() -> Set[str]:
    """Get set of supported image file extensions."""
    return {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}


def is_image_file(path: Path) -> bool:
    """Check if file is a supported image format."""
    return path.is_file() and path.suffix.lower() in get_image_extensions()


In [None]:
sm_img = bad_images[0]
print(sm_img)
is_image_file(sm_img)

/home/ai_dsx.work/data/projects/AD_tool_test/images/bad/3042400443552714.png


True

In [None]:
#| export
def get_subdirectories(root: Path) -> List[Path]:
    """Get list of subdirectories in root path."""
    return [p for p in root.iterdir() if p.is_dir()]


In [None]:
get_subdirectories(DATA_PATH)

[Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/inference_results'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/test_hyperparameter_results'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/test_hyperparameter_models'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/bad'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/test_hyperparameter_posters'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/hyperparameter_models'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/multinode_results_20250917_080243')]

In [None]:
get_subdirectories(GOOD_PATH)

[]

In [None]:
structure = {
        "type": "nested",
        "total_images": 0,
        "folders": {},
        "all_image_paths": []
    }
for sb in get_subdirectories(DATA_PATH):
    print(sb.name)
    lt_nm = sb.name
    lt_imgs = []
    for img in sb.iterdir():
        if is_image_file(img):
            lt_imgs.append(img)
            structure["all_image_paths"].append(lt_imgs)

        if lt_imgs:
            structure["folders"][lt_nm] = lt_imgs
            structure["total_images"] += len(lt_imgs)
    print(f"   {lt_nm}: {len(lt_imgs)} images")
#| export

inference_results
   inference_results: 0 images
test_hyperparameter_results
   test_hyperparameter_results: 0 images
test_hyperparameter_models
   test_hyperparameter_models: 0 images
good
   good: 85 images
bad
   bad: 2 images
test_hyperparameter_posters
   test_hyperparameter_posters: 0 images
hyperparameter_models
   hyperparameter_models: 1 images
multinode_results_20250917_080243
   multinode_results_20250917_080243: 0 images


In [None]:
print(structure)

{'type': 'nested', 'total_images': 3659, 'folders': {'good': [Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401113552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401115552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401118552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401129552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401133552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401146552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401172552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401185552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401186552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/2462401221552714.png'), Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/good/30424

In [None]:
#| export
def scan_nested_structure(
    root: Path,  # Root folder to scan
) -> Dict[str, Any]:  # Returns folder structure
    """Scan nested folder structure (folder/lot/images)."""
    structure = {
        "type": "nested",
        "total_images": 0,
        "folders": {},
        "all_image_paths": []
    }
    
    print(f"üìÅ Detected NESTED structure in: {root}")
    
    for subdir in get_subdirectories(root):
        lot_name = subdir.name
        lot_images = []
        
        for img_path in subdir.iterdir():
            if is_image_file(img_path):
                lot_images.append(img_path)
                structure["all_image_paths"].append(img_path)
        
        if lot_images:
            structure["folders"][lot_name] = lot_images
            structure["total_images"] += len(lot_images)
            print(f"   üìÇ Lot '{lot_name}': {len(lot_images)} images")
    
    return structure



In [None]:
rs = scan_nested_structure(
    DATA_PATH,
)

üìÅ Detected NESTED structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images
   üìÇ Lot 'good': 85 images
   üìÇ Lot 'bad': 2 images
   üìÇ Lot 'hyperparameter_models': 1 images


In [None]:
#| export
def scan_flat_structure(
    root: Path,  # Root folder to scan
) -> Dict[str, Any]:  # Returns folder structure
    """Scan flat folder structure (all images in root)."""
    structure = {
        "type": "flat",
        "total_images": 0,
        "folders": {},
        "all_image_paths": []
    }
    
    print(f"üìÑ Detected FLAT structure in: {root}")
    
    for img_path in root.iterdir():
        if is_image_file(img_path):
            structure["all_image_paths"].append(img_path)
    
    structure["total_images"] = len(structure["all_image_paths"])
    print(f"   üì∑ Total images: {structure['total_images']}")
    
    return structure


In [None]:
rs = scan_flat_structure(
    GOOD_PATH,
)



üìÑ Detected FLAT structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   üì∑ Total images: 85


In [None]:
#| export
def scan_folder_structure(
    root_path: Union[str, Path]  # Root path to scan
) -> Dict[str, Any]:  # Returns folder structure
    """Recursively scan folder structure and categorize as flat or nested."""
    root = Path(root_path)
    
    if not root.exists():
        raise ValueError(f"Path does not exist: {root_path}")
    
    image_extensions = get_image_extensions()
    subdirs = get_subdirectories(root)
    
    if subdirs:
        structure = scan_nested_structure(root)
    else:
        structure = scan_flat_structure(root)
    
    print(f"\n‚úÖ Scan complete: {structure['total_images']} total images")
    return structure


In [None]:
rs_n = scan_folder_structure(
    DATA_PATH,
)


üìÅ Detected NESTED structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images
   üìÇ Lot 'good': 85 images
   üìÇ Lot 'bad': 2 images
   üìÇ Lot 'hyperparameter_models': 1 images

‚úÖ Scan complete: 88 total images


In [None]:
rs = scan_folder_structure(
    GOOD_PATH,
)


üìÑ Detected FLAT structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   üì∑ Total images: 85

‚úÖ Scan complete: 85 total images


In [None]:
# Test the folder scanner with a real path
test_folder = "/home/ai_dsx.work/data/projects/AD_tool_test/images/good"
try:
    folder_info = scan_folder_structure(test_folder)
    print(f"\nüìä Structure Summary:")
    print(f"   Type: {folder_info['type']}")
    print(f"   Total Images: {folder_info['total_images']}")
    print(f"   Folders: {len(folder_info['folders'])}")
except Exception as e:
    print(f"‚ö†Ô∏è Test failed (may need actual data): {e}")


üìÑ Detected FLAT structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   üì∑ Total images: 85

‚úÖ Scan complete: 85 total images

üìä Structure Summary:
   Type: flat
   Total Images: 85
   Folders: 0


## Smart Batch Creator
Creates balanced batches from folder structure, keeping lots together when possible


In [None]:
btch=40
all_images = rs['all_image_paths']
nm_btchs = (len(all_images) + btch - 1) // btch
print(nm_btchs)

3


In [None]:
btchs = [[] for _ in range(nm_btchs)]
print(btchs)
for i,pth in enumerate(all_images):
    idx_ = i % nm_btchs
    btchs[idx_].append(pth)
print(len(btchs[0]),len(btchs[1]),len(btchs[2]))

[[], [], []]
29 28 28


In [None]:
#| export
def _create_batches_from_flat_structure(
    folder_info: Dict[str, Any],  # Folder structure information
    batch_size: int  # Maximum images per batch
) -> List[List[Path]]:  # Returns list of batches
    """Create batches from flat folder structure using round-robin distribution."""
    all_images = folder_info['all_image_paths']
    num_batches = (len(all_images) + batch_size - 1) // batch_size
    batches = [[] for _ in range(num_batches)]
    
    for i, img_path in enumerate(all_images):
        batch_idx = i % num_batches
        batches[batch_idx].append(img_path)
    
    return batches


In [None]:
batches = _create_batches_from_flat_structure(
    rs,
    40
)
len(batches[2])


28

## In case of nested structure

1. split large lot into chunks
2. add lot to batch if it fits
3. if it doesn't fit, start new batch
4. add final batch if it exists


In [None]:
for k, v in rs_n['folders'].items():
    print(k, len(v))


good 85
bad 2
hyperparameter_models 1


In [None]:
#| export
def _split_large_lot(
    lot_images: List[Path],  # Images in the lot
    batch_size: int,  # Maximum images per batch
    lot_name: str  # Name of the lot for logging
) -> List[List[Path]]:  # Returns list of batches
    """Split a large lot into multiple batches."""
    batches = []
    lot_size = len(lot_images)
    
    for i in range(0, lot_size, batch_size):
        chunk = lot_images[i:i + batch_size]
        batches.append(chunk)
    
    return batches



In [None]:
splt_lot = _split_large_lot(rs_n['folders']['good'], 10, 'good')
len(splt_lot[0]), len(splt_lot[1]), len(splt_lot[2])






(10, 10, 10)

In [None]:
#| export
def _add_lot_to_batch(
    current_batch: List[Path],  # Current batch being built
    lot_images: List[Path],  # Images to add
    batch_size: int  # Maximum batch size
) -> bool:  # Returns True if lot was added, False if it doesn't fit
    """Add lot to current batch if it fits."""
    if len(current_batch) + len(lot_images) <= batch_size:
        current_batch.extend(lot_images)
        return True
    return False


In [None]:
#| export

def _create_batches_from_nested_structure(
    folder_info: Dict[str, Any],  # Folder structure information
    batch_size: int  # Maximum images per batch
) -> List[List[Path]]:  # Returns list of batches
    """Create batches from nested folder structure keeping lots together."""
    batches = []
    current_batch = []
    
    for lot_name, lot_images in folder_info['folders'].items():
        lot_size = len(lot_images)
        
        if lot_size > batch_size:
            # Large lot: split across multiple batches
            if current_batch:
                batches.append(current_batch)
                current_batch = []
            
            # Split large lot into chunks
            lot_batches = _split_large_lot(lot_images, batch_size, lot_name)
            batches.extend(lot_batches)
            
        elif _add_lot_to_batch(current_batch, lot_images, batch_size):
            # Lot fits in current batch
            pass
            
        else:
            # Lot doesn't fit, start new batch
            if current_batch:
                batches.append(current_batch)
            current_batch = lot_images.copy()
    
    # Add final batch
    if current_batch:
        batches.append(current_batch)
    
    return batches

In [None]:
batches_n = _create_batches_from_nested_structure(rs_n, 40)
len(batches_n)
batches_n[-1]

[Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/bad/3042400443552714.png'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/bad/3042400444552714.png'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/hyperparameter_models/test_hyperparameter_comparison.png')]

In [None]:
#| export
def create_smart_batches(
    folder_info: Dict[str, Any],  # Folder structure from scan_folder_structure
    batch_size: int  # Maximum images per batch
) -> List[List[Path]]:  # Returns list of batches (each batch is list of image paths)
    """Create balanced batches from folder structure with smart lot handling."""
    
    structure_type = folder_info['type']
    
    if structure_type == "nested":
        batches = _create_batches_from_nested_structure(folder_info, batch_size)
    else:
        batches = _create_batches_from_flat_structure(folder_info, batch_size)
    
    return batches


In [None]:
btches = create_smart_batches(rs_n, 40)

In [None]:
len(btches)

4

In [None]:
btches[-1]

[Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/bad/3042400443552714.png'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/bad/3042400444552714.png'),
 Path('/home/ai_dsx.work/data/projects/AD_tool_test/images/hyperparameter_models/test_hyperparameter_comparison.png')]

In [None]:
# Test batch creation
try:
    batches = create_smart_batches(rs_n, batch_size=50)
    print(f"\nüìä Batch Summary:")
    for i, batch in enumerate(batches):
        print(f"   Batch {i+1}: {len(batch)} images")
except Exception as e:
    print(f"‚ö†Ô∏è Test skipped (needs folder_info from previous test): {e}")



üìä Batch Summary:
   Batch 1: 50 images
   Batch 2: 35 images
   Batch 3: 3 images


## Multinode Job Factory
Converts image batches into HPC_Job objects


In [None]:
#| export
def create_batch_id(
    batch_index: int#batch index
    ) -> str:
    """Generate formatted batch ID from index."""
    return f"batch_{batch_index+1:04d}"


In [None]:
#| export
def create_batch_list_file_path(
    output_dir: Path,  # output directory
    batch_id: str  # batch id
     
    ) -> Path:
    """Generate batch list file path."""
    return output_dir / "batch_lists" / f"{batch_id}_images.txt"


In [None]:
#| export
def setup_output_directory(
    output_dir: Path  # output directory
     
    ) -> Path:
    """Create output directory structure."""
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    return output_path


In [None]:
#| export
def create_batch_list_file(
    batch: List[Path], # list of image paths
    batch_list_file: Path # path to save batch list file
    ) -> None:
    """Create text file with image paths for batch processing."""
    batch_list_file.parent.mkdir(parents=True, exist_ok=True)
    with open(batch_list_file, 'w') as f:
        for img_path in batch:
            f.write(f"{img_path}\n")

In [None]:
MODEL_PATH

Path('/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt')

In [None]:
#| export
def _serialize_preprocessing_fn(preprocessing_fn: Callable) -> str:
    """Serialize preprocessing function to importable module path."""
    if preprocessing_fn is None:
        return None
    
    # Get the module and function name
    module = inspect.getmodule(preprocessing_fn)
    if module is None:
        raise ValueError(f"Cannot serialize function {preprocessing_fn}: module not found")
    
    module_name = module.__name__
    function_name = preprocessing_fn.__name__
    
    # Return as "module.path.function_name" format
    # If function is in __main__ (notebook), we need to include its source code
    if module_name == "__main__":
        try:
            source_code = inspect.getsource(preprocessing_fn)
            return ("source", source_code)
        except (OSError, TypeError):
            raise ValueError(f"Cannot get source code for function {preprocessing_fn}. "
                           f"Please define it in a proper module for HPC execution.")
    
    # Otherwise, return as importable module path
    return ("import", f"{module_name}.{function_name}")

In [None]:
def create_half_black_image(
    image: np.ndarray, # Input image as numpy array
) -> np.ndarray:
    """Create an image with half of it blacked out."""
    side='left'
    if not isinstance(image, np.ndarray):
        raise TypeError("image must be a numpy array")
    
    if side not in ["left", "right", "top", "bottom"]:
        raise ValueError(f"Invalid side: {side}. Must be one of: left, right, top, bottom")
    
    # Create a copy to avoid modifying the original
    result = image.copy()
    
    height, width = image.shape[:2]
    
    if side == "left":
        result[:, :width//2] = 0
    elif side == "right":
        result[:, width//2:] = 0
    elif side == "top":
        result[:height//2, :] = 0
    elif side == "bottom":
        result[height//2:, :] = 0
    
    return result

In [None]:
def serialize_preprocessing_function(
    preprocessing_fn: Callable,
    output_dir: Path,
    batch_id: str
) -> Optional[Path]:
    """Serialize preprocessing function for HPC execution."""
    if preprocessing_fn is None:
        return None
    
    try:
        preprocess_path = output_dir / "preprocessing" / f"{batch_id}_fn.pkl"
        preprocess_path.parent.mkdir(parents=True, exist_ok=True)
        
        with open(preprocess_path, 'wb') as f:
            cloudpickle.dump(preprocessing_fn, f)
        
        return preprocess_path
    except Exception as e:
        print(f"‚ö†Ô∏è  Failed to serialize preprocessing function: {e}")
        return None

In [None]:
fn_path = serialize_preprocessing_function(
    preprocessing_fn=create_half_black_image,
    output_dir=Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode"),
    batch_id="batch_0001"
)

In [None]:
import cloudpickle
with open(fn_path, 'rb') as f:
    fn = cloudpickle.load(f)
fn


<function __main__.create_half_black_image(image: numpy.ndarray) -> numpy.ndarray>

In [None]:
preprocessing_kwargs = {'side':'left'}
print(json.dumps(preprocessing_kwargs))
# Escape single quotes in JSON string for shell command compatibility
# This prevents shell parsing errors when the JSON string is embedded in a command
print(json.dumps(preprocessing_kwargs).replace("'", "\\'"))

{"side": "left"}
{"side": "left"}


In [None]:
#| export  
def create_inference_command_from_file(
    model_path: Path,# Path to trained model
    batch_list_file: Path, # Path to batch list file
    batch_id: str, # Batch ID
    output_dir: Path, # Output directory
    save_heatmap: bool = True, # Save heatmaps
    heatmap_style: str = "combined", # Heatmap style
    compress: bool = True, # Whether to compress the image (JPEG format)
    jpeg_quality: int = 95, # JPEG compression quality (0-100, higher is better)
    preprocessing_fn: Callable = None, # Preprocessing function
    preprocessing_kwargs: dict = None, # Preprocessing function kwargs
    **kwargs# Additional keyword arguments, e.g. preprocess_fn, preprocess_fn_kwargs
) -> List[str]:  # Returns list of command arguments for proper HPC execution
    """Create Python command list for batch inference with optional preprocessing parameters."""

    # Build Python code that reconstructs the preprocessing function
    python_code_parts = [

        "from be_vision_ad_tools.inference.prediction_system import *",
        "import importlib",
        "import numpy as np",
        "import torch",
        "import inspect",
        "from typing import Tuple",
        "import json"
    ]
    
    # Serialize preprocessing function and kwargs for HPC execution
    if preprocessing_fn is not None:
        serialization = _serialize_preprocessing_fn(preprocessing_fn)
        # Check if serialization failed (function couldn't be serialized)
        if serialization is None:
            # No preprocessing function available, set both to None
            python_code_parts.append("preprocessing_fn = None")
            python_code_parts.append("preprocessing_kwargs = None")
        else:
            # Unpack the serialization result into type and value
            serialization_type, serialization_value = serialization
            
            # Check if we need to embed the function's source code
            if serialization_type == "source":
                # Function is defined in __main__ (notebook), include source code
                # Use base64 encoding to safely embed source code
                import base64
                
                # Convert the function source code string to bytes
                source_bytes = serialization_value.encode('utf-8')
                
                # Encode the bytes to base64 for safe transmission
                source_b64 = base64.b64encode(source_bytes).decode('ascii')
                
                # Add code to decode and execute the function source in the subprocess
                python_code_parts.append(
                    "import base64; "  # Import base64 module
                    f"source_code = base64.b64decode('{source_b64}').decode('utf-8'); "  # Decode the function source
                    "exec(source_code)"  # Execute the source code to define the function
                )
                
                # Add code to assign the function by its name
                python_code_parts.append(f"preprocessing_fn = {preprocessing_fn.__name__}")
            else:
                # Function is in an importable module (not from notebook)
                # Split the full module path to get module and function name
                module_path, function_name = serialization_value.rsplit('.', 1)
                
                # Add code to import the module and get the function
                python_code_parts.append(
                    f"module = importlib.import_module('{module_path}'); "  # Import the module
                    f"preprocessing_fn = getattr(module, '{function_name}')"  # Get the function from module
                )
            
            # Handle preprocessing function keyword arguments
            if preprocessing_kwargs:
                # Convert kwargs dict to JSON string
                kwargs_json = json.dumps(preprocessing_kwargs).replace("'", "\\'")
                
                # Add code to parse the JSON string back to a dict
                python_code_parts.append(f"preprocessing_kwargs = json.loads('{kwargs_json}')")
            else:
                # No kwargs provided, set to None
                python_code_parts.append("preprocessing_kwargs = None")
    else:
        # No preprocessing function was provided at all
        python_code_parts.append("preprocessing_fn = None")
        python_code_parts.append("preprocessing_kwargs = None")
    
    # Build list of function arguments (note: this args list is not used in final command)
    args = [
        f"model_path='{model_path}'",  # Path to the trained model file
        f"image_list_file='{batch_list_file}'",  # Path to file containing list of images
        f"batch_id='{batch_id}'",  # Unique identifier for this batch
        f"output_dir='{output_dir}'",  # Directory where results will be saved
        f"save_heatmap={save_heatmap}",  # Boolean flag to save heatmap visualizations
        f"heatmap_style='{heatmap_style}'",  # Style of heatmap (e.g., "combined", "side_by_side")
        f"compress={compress}",  # Boolean flag to compress the image (JPEG format)
        f"jpeg_quality={jpeg_quality}"  # JPEG compression quality (0-100, higher is better)    
    ]
    for key, value in kwargs.items():
        if value is None:
            args.append(f"{key}=None")
        elif isinstance(value, str):
            args.append(f"{key}='{value}'")
        elif isinstance(value, (int, float, bool)):
            args.append(f"{key}={value}")
        elif isinstance(value, dict):
            kwargs_json = json.dumps(value).replace("'", "\\'")
            args.append(f"{key}='{kwargs_json}'")
        else:
            # For other types, convert to string representation
            args.append(f"{key}={repr(value)}")
    
    # Build the final command
    call_args = ', '.join([
        f"model_path='{model_path}'",
        f"image_list_file='{batch_list_file}'",
        f"batch_id='{batch_id}'",
        f"output_dir='{output_dir}'",
        f"save_heatmap={save_heatmap}",
        f"heatmap_style='{heatmap_style}'",
        f"compress={compress}",
        f"jpeg_quality={jpeg_quality}",
        "preprocessing_fn=preprocessing_fn",
        "preprocessing_kwargs=preprocessing_kwargs"
    ])
    
    python_code = "; ".join(python_code_parts) + f"; predict_image_list_from_file_enhanced({call_args})"
    
    # Return proper command list format for HPC execution
    return ["python", "-c", python_code]

In [None]:
#| export  
def create_inference_command_from_filev03(
    model_path: Path,# Path to trained model
    batch_list_file: Path, # Path to batch list file
    batch_id: str, # Batch ID
    output_dir: Path, # Output directory
    save_heatmap: bool = True, # Save heatmaps
    heatmap_style: str = "combined", # Heatmap style
    compress: bool = True, # Whether to compress the image (JPEG format)
    jpeg_quality: int = 95, # JPEG compression quality (0-100, higher is better)
    preprocessing_fn: Callable = None, # Preprocessing function
    preprocessing_fn_path: str = None, # Module path to preprocessing function (e.g. "my_module.my_function")
    preprocessing_kwargs: dict = None, # Preprocessing function kwargs
    **kwargs# Additional keyword arguments
) -> List[str]:  # Returns list of command arguments for proper HPC execution
    """Create Python command using function name string - simple and reliable for HPC."""

    # Ensure all paths are absolute for bsub execution
    model_path = Path(model_path).resolve()
    batch_list_file = Path(batch_list_file).resolve()
    output_dir = Path(output_dir).resolve()

    # Import statements
    python_code_parts = [
        "from be_vision_ad_tools.inference.prediction_system import *",
        "import importlib"
    ]
    
    # Handle preprocessing function as string import
    if preprocessing_fn_path:
        # User provided explicit module path (e.g. "my_module.preprocessing.my_func")
        module_name, func_name = preprocessing_fn_path.rsplit('.', 1)
        python_code_parts.append(
            f"_mod = importlib.import_module('{module_name}'); "
            f"preprocessing_fn = getattr(_mod, '{func_name}')"
        )
    elif preprocessing_fn is not None:
        # Try to get module path from function object
        module = inspect.getmodule(preprocessing_fn)
        if module and module.__name__ != "__main__":
            func_import_path = f"{module.__name__}.{preprocessing_fn.__name__}"
            module_name, func_name = func_import_path.rsplit('.', 1)
            python_code_parts.append(
                f"_mod = importlib.import_module('{module_name}'); "
                f"preprocessing_fn = getattr(_mod, '{func_name}')"
            )
            print(f"‚úÖ Using function import path: {func_import_path}")
        else:
            # Function is in __main__ - can't import by path
            print("‚ö†Ô∏è  Function is defined in notebook/script (__main__)")
            print("   Please move it to a module or pass preprocessing_fn_path explicitly")
            print("   Example: preprocessing_fn_path='my_module.preprocessing.my_function'")
            preprocessing_fn = None
    
    # Set to None if we couldn't resolve it
    if preprocessing_fn is None and not preprocessing_fn_path:
        python_code_parts.append("preprocessing_fn = None")
    
    # Build function call arguments
    args = [
        f"model_path='{model_path}'",
        f"image_list_file='{batch_list_file}'",
        f"batch_id='{batch_id}'",
        f"output_dir='{output_dir}'",
        f"save_heatmap={save_heatmap}",
        f"heatmap_style='{heatmap_style}'",
        f"compress={compress}",
        f"jpeg_quality={jpeg_quality}",
    ]

    # Add preprocessing if available
    if preprocessing_fn_path or (preprocessing_fn and inspect.getmodule(preprocessing_fn).__name__ != "__main__"):
        args.append("preprocessing_fn=preprocessing_fn")
        if preprocessing_kwargs:
            import json
            kwargs_json = json.dumps(preprocessing_kwargs)
            python_code_parts.append(f"preprocessing_kwargs = {kwargs_json}")
            args.append("preprocessing_kwargs=preprocessing_kwargs")

    func_call = f"predict_image_list_from_file_enhanced({', '.join(args)})"
    python_code_parts.append(func_call)

    # Join all parts with semicolons
    python_code = "; ".join(python_code_parts)

    # Return proper command list format for HPC execution
    return ["python", "-c", python_code]

In [None]:
batch_fl = batches[0][:5]
batch_idx = 0
batch_id = create_batch_id(batch_idx)
print(batch_id)
output_path = setup_output_directory(Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode"))
bs_fl = create_batch_list_file_path(
    output_path,
    batch_id
)

batch_0001


In [None]:

output_path

Path('/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode')

In [None]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
job = HPC_Job(cmd=cmd, cores=4)
dist = DistributeHPC(worker=1)
dist.set_jobs([job])
dist.start()


[1m[36mRUNNING:0, DONE:3[0m: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:08<00:00,  2.68s/it][0m
[0m: : 0it [00:08, ?it/s]


In [None]:
import torch
from typing import Tuple
def preprocess_blur_(
    image,
    kernel_size:int=5,
    #sigma:Tuple[float,float]=(0.1, 2.0)
    sigma=(0.1, 2.0)
    ) -> torch.Tensor:
    """Resize preprocessing - takes image and image_path."""
    from torchvision import transforms
    transform = transforms.Compose([
        transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma)
    ])
    return transform(image)



In [None]:
cmd = create_inference_command_from_file(
    model_path=Path(MODEL_PATH),
    batch_list_file=bs_fl,
    batch_id=batch_id,
    output_dir=Path(output_path),
    heatmap_style="side_by_side",
    save_heatmap=True,
    compress=True,
    jpeg_quality=95,
    #preprocessing_fn=create_half_black_image,
    #preprocessing_kwargs={'side':'left'})    
    #preprocessing_fn=preprocess_blur_,
    #preprocessing_kwargs={'kernel_size':5, 'sigma':(0.1, 2.0)})
)
cmd

['python',
 '-c',
 "from be_vision_ad_tools.inference.prediction_system import *; import importlib; import numpy as np; import torch; import inspect; from typing import Tuple; import json; preprocessing_fn = None; preprocessing_kwargs = None; predict_image_list_from_file_enhanced(model_path='/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt', image_list_file='/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode/batch_lists/batch_0001_images.txt', batch_id='batch_0001', output_dir='/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode', save_heatmap=True, heatmap_style='side_by_side', compress=True, jpeg_quality=95, preprocessing_fn=preprocessing_fn, preprocessing_kwargs=preprocessing_kwargs)"]

In [None]:
# Test to debug the command generation
batch_fl = batches[0][:5]
batch_idx = 0
batch_id = create_batch_id(batch_idx)
output_path = setup_output_directory(Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode"))
bs_fl = create_batch_list_file_path(output_path, batch_id)

# Test WITH preprocessing_fn to see the generated command
cmd_with_preprocessing = create_inference_command_from_filev02(
    model_path=MODEL_PATH,
    batch_list_file=bs_fl,
    batch_id=batch_id,
    output_dir=output_path,
    heatmap_style="side_by_side",
    save_heatmap=True,
    compress=True,
    jpeg_quality=95,
    preprocessing_fn=create_half_black_image,
)

print("‚úÖ Command generated successfully with v02!")
print(f"Command length: {len(cmd_with_preprocessing)}")
print(f"\nCommand preview:")
print(f"cmd[0]: {cmd_with_preprocessing[0]}")
print(f"cmd[1]: {cmd_with_preprocessing[1]}")
print(f"cmd[2] (first 200 chars): {cmd_with_preprocessing[2][:200]}...")


 Serialized preprocessing function to: /home/ai_dsx.work/data/projects/AD_tool_test/test_multinode/preprocessing/batch_0001_fn.pkl
‚úÖ Command generated successfully with v02!
Command length: 3

Command preview:
cmd[0]: python
cmd[1]: -c
cmd[2] (first 200 chars): from be_vision_ad_tools.inference.prediction_system import *; import cloudpickle; with open('/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode/preprocessing/batch_0001_fn.pkl', 'rb') as _f: ...


In [None]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
job = HPC_Job(cmd=cmd, cores=4)
dist = DistributeHPC(worker=1)
dist.set_jobs([job])
dist.start()


[1m[36mRUNNING:0, DONE:2[0m: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:30<00:00, 15.19s/it][0m
[0m: : 0it [00:30, ?it/s]


In [None]:
#| export
def create_hpc_job_object(
    command: List[str],  # Inference command list ["python", "-c", "code"]
    cores: int,  # Number of cores for job
    description: str  # Job description
) -> Any:
    """Create HPC_Job object with command and configuration."""
    from be_vision_ad_tools.inference.multinode_from_aiop_tool import HPC_Job
    job = HPC_Job(cmd=command, cores=cores)
    job.description = description
    return job


In [None]:
# ‚úÖ Test the new v03 with function string approach
print("="*70)
print("‚úÖ TESTING V03 - FUNCTION STRING APPROACH")
print("="*70)

# Test WITHOUT preprocessing (should work)
cmd_no_preproc = create_inference_command_from_filev03(
    model_path=MODEL_PATH,
    batch_list_file=Path("/tmp/test_images.txt"),
    batch_id="test_batch",
    output_dir=Path("/tmp/test_output"),
)

print("\n1Ô∏è‚É£ WITHOUT preprocessing:")
print(f"   Command length: {len(cmd_no_preproc[2])} chars")
print(f"   Generated: {cmd_no_preproc[2][:150]}...")

# Test WITH preprocessing using function string path
# Example: using a torch function that exists in a module
cmd_with_string = create_inference_command_from_filev03(
    model_path=MODEL_PATH,
    batch_list_file=Path("/tmp/test_images.txt"),
    batch_id="test_batch",
    output_dir=Path("/tmp/test_output"),
    preprocessing_fn_path="torchvision.transforms.functional.to_grayscale",
    preprocessing_kwargs={'num_output_channels': 3}
)

print("\n2Ô∏è‚É£ WITH preprocessing (string path):")
print(f"   Command length: {len(cmd_with_string[2])} chars")
print(f"   Generated: {cmd_with_string[2][:300]}...")

print("\n‚úÖ Both commands generated successfully!")
print("="*70)


In [None]:
batch_fl = batches[0][:5]
batch_idx = 0
batch_id = create_batch_id(batch_idx)
print(batch_id)
output_path = setup_output_directory(Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode"))
bs_fl = create_batch_list_file_path(
    output_path,
    batch_id
)

batch_0001


In [None]:
bs_fl

Path('/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode/batch_lists/batch_0001_images.txt')

In [None]:
create_batch_list_file(
    batch=batch_fl,
    batch_list_file=bs_fl
)

In [None]:
job = create_hpc_job_object(
    command=cmd,
    cores=4,
    description=f"inference_{batch_id}"
)



In [None]:
cmd

['python',
 '-c',
 "from be_vision_ad_tools.inference.prediction_system import predict_image_list_from_file_enhanced; import importlib; import numpy as np; import json; preprocessing_fn = None; preprocessing_kwargs = None; predict_image_list_from_file_enhanced(model_path='/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt', image_list_file='/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode/batch_lists/batch_0001_images.txt', batch_id='batch_0001', output_dir='/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode', save_heatmap=True, heatmap_style='side_by_side', compress=True, jpeg_quality=95, preprocessing_fn=preprocessing_fn, preprocessing_kwargs=preprocessing_kwargs)"]

In [None]:
#| export

def create_single_inference_job(
    batch: List[Path],  # List of image paths in batch
    batch_index: int,  # Index of batch
    model_path: Path,  # Path to trained model
    output_dir: Path,  # Output directory for results
    cores: int = 4,  # Number of cores for HPC job
    save_heatmap:bool=True,#save heatmaps
    heatmap_style:str="side_by_side",#heatmap style
    compress:bool=True,#compress images
    jpeg_quality:int=95,#jpeg quality
    preprocessing_fn:Callable=None,#preprocessing function
    preprocessing_kwargs:dict=None,#preprocessing function kwargs
    **kwargs#additional kwargs
) -> Any:
    """Create single HPC inference job from batch."""
    batch_id = create_batch_id(batch_index)
    batch_list_file = create_batch_list_file_path(output_dir, batch_id)
    
    create_batch_list_file(batch, batch_list_file)
    
    command = create_inference_command_from_filev03(
        model_path=model_path,
        batch_list_file=batch_list_file,
        batch_id=batch_id,
        output_dir=output_dir,
        save_heatmap=save_heatmap,
        heatmap_style=heatmap_style,
        compress=compress,
        jpeg_quality=jpeg_quality,
        preprocessing_fn=preprocessing_fn,
        preprocessing_kwargs=preprocessing_kwargs,
        **kwargs
    )
    
    job = create_hpc_job_object(
        command=command,
        cores=cores,
        description=f"inference_{batch_id}"
    )
    
    return job



In [None]:
output_dir = Path("/home/ai_dsx.work/data/projects/AD_tool_test/HPC_JOB_TEST")
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
jobs = create_single_inference_job(
    btches[0], 
    0, 
    MODEL_PATH, 
    output_dir,
    4,
    heatmap_style="side_by_side",
    save_heatmaps=True,
    compress=True,
    jpeg_quality=95,
    )
   

In [None]:
print(job.command)

['python', '-c', "from be_vision_ad_tools.inference.prediction_system import *; import importlib; preprocessing_fn = None; predict_image_list_from_file_enhanced(model_path='/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt', image_list_file='/home/ai_dsx.work/data/projects/AD_tool_test/HPC_JOB_TEST/batch_lists/batch_0001_images.txt', batch_id='batch_0001', output_dir='/home/ai_dsx.work/data/projects/AD_tool_test/HPC_JOB_TEST', save_heatmap=True, heatmap_style='side_by_side', compress=True, jpeg_quality=95)"]


In [None]:
#jobs = [sn_job]
num_nodes = 2
print(f"\nüöÄ Submitting {len([jobs])} jobs to {num_nodes} nodes")
distributor = DistributeHPC(
    worker=20)
distributor.set_jobs(
    [jobs], num_cpu=1)
    
# Start execution
print(f"‚ñ∂Ô∏è Starting multinode execution...")
distributor.start()


üöÄ Submitting 1 jobs to 2 nodes
‚ñ∂Ô∏è Starting multinode execution...


Total:   0%|                                              | 0/7 [00:00<?, ?it/s]
[A



[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A










[A[A[A[A[A[A[A[A[A[A[A











[A[A[A[A[A[A[A[A[A[A[A[A












[A[A[A[A[A[A[A[A[A[A[A[A[A













[A[A[A[A[A[A[A[A[A[A[A[A[A[A














[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A
















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

















[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















[1m[36mRUNNING:1, DONE:3[0m:  43%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè              | 3/7 [00:00<00:00, 19.84it/s][0m
[1m[36mRUNNING:2, DONE:3[0m:  43%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè              | 3/7 [00:00<00:00, 19.84it/s][0m

[1m[36mRUNNING:3, DONE:2[0m:  29%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

In [None]:
#| export
def create_multinode_inference_jobs(
    model_path: Path,  # Path to trained model
    batches: List[List[Path]],  # List of image batches
    output_dir: Path,  # Output directory for results
    cores: int = 4,  # Number of cores per job
    save_heatmap:bool=True,
    heatmap_style:str="side_by_side",
    compress:bool=True,
    jpeg_quality:int=95,
    preprocess_fn:Callable=None,
    preprocess_fn_kwargs:dict=None,
    **kwargs
) -> List:  # Returns list of HPC_Job objects
    """Convert image batches into HPC_Job objects for multinode execution."""
    import logging
    
    logger = logging.getLogger(__name__)
    
    logger.info(f"Creating {len(batches)} HPC jobs")
    logger.info(f"Model: {model_path}")
    logger.info(f"Output: {output_dir}")
    
    output_path = setup_output_directory(output_dir)
    
    jobs = []
    for i, batch in enumerate(batches):
        job = create_single_inference_job(
            batch=batch,
            batch_index=i,
            model_path=model_path,
            output_dir=output_path,
            cores=cores,
            save_heatmap=save_heatmap,
            heatmap_style=heatmap_style,
            compress=compress,
            jpeg_quality=jpeg_quality,
            preprocess_fn=preprocess_fn,
            preprocess_fn_kwargs=preprocess_fn_kwargs,
            **kwargs
        )
        jobs.append(job)
        logger.info(f"Job {i+1}: {job.description} with {len(batch)} images")
    
    logger.info(f"Created {len(jobs)} jobs total")
    return jobs


In [None]:
# Test job creation
test_model = Path("/home/ai_dsx.work/data/projects/AD_tool_test/models/best_model.pth")
test_output = Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_multinode_integration")

try:
    jobs = create_multinode_inference_jobs(test_model, batches[:2], test_output)
    print(f"\nüìä Job Details:")
    for i, job in enumerate(jobs):
        print(f"   Job {i+1}:")
        print(f"      Description: {job.description}")
        print(f"      Command length: {len(job.command)}")
        print(f"      State: {job.state}")
except Exception as e:
    print(f"‚ö†Ô∏è Test skipped (needs batches from previous test): {e}")



üìä Job Details:
   Job 1:
      Description: inference_batch_0001
      Command length: 3
      State: 4
   Job 2:
      Description: inference_batch_0002
      Command length: 3
      State: 4


## Small-Scale Integration Test
End-to-end test with small dataset


## Nested Folder Handler
Main entry point for handling nested folder structures


In [None]:
#| export
def handle_nested_folders(
    root_path: Path,  # Root folder to process
    batch_size: int = 100  # Maximum images per batch
) -> List[List[Path]]:  # Returns batches ready for job creation
    """Main entry point for nested folder processing (folder ‚Üí lot ‚Üí images)."""
    
    print(f"üéØ Processing nested folder structure")
    print(f"   Root: {root_path}")
    print(f"   Batch size: {batch_size}")
    
    # Step 1: Scan folder structure
    print(f"\nüì° Step 1: Scanning folder structure...")
    folder_info = scan_folder_structure(root_path)
    
    # Step 2: Create smart batches
    print(f"\nüî® Step 2: Creating smart batches...")
    batches = create_smart_batches(folder_info, batch_size)
    
    print(f"\n‚úÖ Nested folder processing complete!")
    print(f"   Total images: {folder_info['total_images']}")
    print(f"   Total batches: {len(batches)}")
    
    return batches


In [None]:
# Test the complete nested folder handler
try:
    batches_from_handler = handle_nested_folders(
        root_path=Path(test_folder),
        batch_size=50
    )
    print(f"\nüìä Handler Results:")
    print(f"   Total batches: {len(batches_from_handler)}")
    for i, batch in enumerate(batches_from_handler[:3]):  # Show first 3
        print(f"   Batch {i+1}: {len(batch)} images")
except Exception as e:
    print(f"‚ö†Ô∏è Test failed: {e}")


üéØ Processing nested folder structure
   Root: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   Batch size: 50

üì° Step 1: Scanning folder structure...
üìÑ Detected FLAT structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   üì∑ Total images: 85

‚úÖ Scan complete: 85 total images

üî® Step 2: Creating smart batches...

‚úÖ Nested folder processing complete!
   Total images: 85
   Total batches: 2

üìä Handler Results:
   Total batches: 2
   Batch 1: 43 images
   Batch 2: 42 images


## Input Validation
Validate parameters before job creation to fail fast


In [None]:
#| export
def validate_inference_inputs(
    model_path: Union[str, Path],  # Path to trained model
    root_path: Union[str, Path],  # Root folder containing images
    batch_size: int,  # Batch size for processing
    num_nodes: int  # Number of HPC nodes
) -> Tuple[Path, Path]:  # Returns validated (model_path, root_path) as Path objects
    """Validate all inputs before starting inference job creation."""
    
    # Validate model path
    model_path = Path(model_path)
    if not model_path.exists():
        raise FileNotFoundError(f"‚ùå Model not found: {model_path}")
    print(f"‚úÖ Model found: {model_path}")
    
    # Validate root path
    root_path = Path(root_path)
    if not root_path.exists():
        raise FileNotFoundError(f"‚ùå Root path not found: {root_path}")
    if not root_path.is_dir():
        raise ValueError(f"‚ùå Root path must be a directory: {root_path}")
    print(f"‚úÖ Root path valid: {root_path}")
    
    # Validate batch size
    if batch_size <= 0:
        raise ValueError(f"‚ùå Batch size must be positive, got: {batch_size}")
    print(f"‚úÖ Batch size valid: {batch_size}")
    
    # Validate num_nodes
    if num_nodes <= 0:
        raise ValueError(f"‚ùå Number of nodes must be positive, got: {num_nodes}")
    print(f"‚úÖ Number of nodes valid: {num_nodes}")
    
    return model_path, root_path


## üß™ Clean Test: HPC_Job with preprocessing_fn_path (String Import)


## Execution Summary Printer
Pretty print results after job distribution


In [None]:
#| export
def print_execution_summary(
    folder_info: Dict[str, Any],  # Folder structure info from scan
    batches: List[List[Path]],  # Created batches
    jobs: List,  # Created HPC jobs
    num_nodes: int,  # Number of nodes used
    output_dir: Path  # Output directory
) -> None:  # Prints summary to console
    """Print comprehensive summary of job distribution setup."""
    
    print("\n" + "="*70)
    print("üéØ INFERENCE JOB DISTRIBUTION SUMMARY")
    print("="*70)
    
    # Folder structure info
    print(f"\nüìÅ Data Structure:")
    print(f"   Type: {folder_info['type'].upper()}")
    print(f"   Total Images: {folder_info['total_images']:,}")
    if folder_info['type'] == 'nested':
        print(f"   Number of Lots: {len(folder_info['folders'])}")
    
    # Batch info
    print(f"\nüì¶ Batch Configuration:")
    print(f"   Total Batches: {len(batches)}")
    batch_sizes = [len(b) for b in batches]
    print(f"   Batch Sizes: min={min(batch_sizes)}, max={max(batch_sizes)}, avg={sum(batch_sizes)//len(batch_sizes)}")
    
    # Job info
    print(f"\nüè≠ HPC Jobs:")
    print(f"   Total Jobs Created: {len(jobs)}")
    print(f"   Jobs per Node (approx): {len(jobs) / num_nodes:.1f}")
    
    # Node info
    print(f"\nüñ•Ô∏è  Compute Resources:")
    print(f"   Number of Nodes: {num_nodes}")
    print(f"   Cores per Job: 4")  # Hardcoded in create_multinode_inference_jobs
    
    # Output info
    print(f"\nüíæ Output:")
    print(f"   Directory: {output_dir}")
    print(f"   Batch Lists: {output_dir / 'batch_lists'}")
    
    print("\n" + "="*70)
    print("‚úÖ Setup Complete - Ready for Execution!")
    print("="*70 + "\n")


## Main Entry Point - Smart Folder Inference Distribution
One function to orchestrate the entire workflow


In [None]:
#| export
def distribute_folder_inference(
    root_path: Union[str, Path],  # Root folder with images (flat or nested structure)
    model_path: Union[str, Path],  # Path to trained anomaly detection model
    output_dir: Union[str, Path],  # Output directory for results
    batch_size: int = 100,  # Maximum images per batch
    num_nodes: int = 10,  # Number of HPC nodes to use
    dry_run: bool = False,  # If True, setup jobs but don't execute
    save_heatmap:bool=True,
    heatmap_style:str="side_by_side",
    compress:bool=True,
    jpeg_quality:int=95,
    preprocess_fn:Callable=None,
    preprocess_fn_kwargs:dict=None,
    **kwargs
) -> Dict[str, Any]:  # Returns execution summary
    """Smart inference distribution - automatically handles flat and nested folder structures."""
    
    from be_vision_ad_tools.inference.multinode_from_aiop_tool import DistributeHPC
    
    print("üöÄ SMART FOLDER INFERENCE DISTRIBUTION")
    print("="*70)
    
    # Step 1: Validate inputs (fail fast)
    print("\nüìã Step 1: Validating inputs...")
    model_path, root_path = validate_inference_inputs(
        model_path=model_path,
        root_path=root_path,
        batch_size=batch_size,
        num_nodes=num_nodes
    )
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    print(f"‚úÖ Output directory: {output_dir}")
    
    # Step 2: Scan folder structure (auto-detect flat vs nested)
    print(f"\nüì° Step 2: Scanning folder structure...")
    folder_info = scan_folder_structure(root_path)
    
    # Step 3: Create smart batches
    print(f"\nüî® Step 3: Creating smart batches...")
    batches = create_smart_batches(folder_info, batch_size)
    
    # Step 4: Create HPC jobs
    print(f"\nüè≠ Step 4: Creating HPC jobs...")
    jobs = create_multinode_inference_jobs(
        model_path=model_path,
        batches=batches,
        output_dir=output_dir,
        save_heatmap=save_heatmap,
        heatmap_style=heatmap_style,
        compress=compress,
        jpeg_quality=jpeg_quality,
        preprocess_fn=preprocess_fn,
        preprocess_fn_kwargs=preprocess_fn_kwargs,
        **kwargs
    )
    
    # Step 5: Print summary
    print_execution_summary(
        folder_info=folder_info,
        batches=batches,
        jobs=jobs,
        num_nodes=num_nodes,
        output_dir=output_dir
    )
    
    # Step 6: Execute or dry run
    if dry_run:
        print("üîç DRY RUN MODE - Jobs created but not executed")
        print(f"   To execute, call with dry_run=False")
    else:
        print(f"\n‚ñ∂Ô∏è  Step 5: Submitting jobs to {num_nodes} nodes...")
        distributor = DistributeHPC(worker=num_nodes)
        distributor.set_jobs(jobs)
        distributor.start()
        print("\n‚úÖ Job execution completed!")
    
    # Return summary
    return {
        "structure_type": folder_info['type'],
        "total_images": folder_info['total_images'],
        "num_batches": len(batches),
        "num_jobs": len(jobs),
        "num_nodes": num_nodes,
        "output_dir": str(output_dir),
        "dry_run": dry_run
    }


## End-to-End Integration Tests
Comprehensive tests demonstrating the complete workflow


In [None]:
# Test 1: Dry run with nested structure
print("="*70)
print("TEST 1: DRY RUN WITH NESTED STRUCTURE")
print("="*70)

try:
    test_model = MODEL_PATH
    test_data = Path("/home/ai_dsx.work/data/projects/AD_tool_test/images")
    test_output = Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_distribute_inference")
    
    results = distribute_folder_inference(
        root_path=test_data,
        model_path=test_model,
        output_dir=test_output,
        batch_size=50,
        num_nodes=2,
        dry_run=True  # Don't actually execute
    )
    
    print("\nüìä Test Results:")
    for key, value in results.items():
        print(f"   {key}: {value}")
    
    print("\n‚úÖ TEST 1 PASSED - Dry run completed successfully!")
    
except Exception as e:
    print(f"\n‚ö†Ô∏è TEST 1 SKIPPED: {e}")
    print("   (This is expected if test data doesn't exist on Windows)")


TEST 1: DRY RUN WITH NESTED STRUCTURE
üöÄ SMART FOLDER INFERENCE DISTRIBUTION

üìã Step 1: Validating inputs...
‚úÖ Model found: /home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt
‚úÖ Root path valid: /home/ai_dsx.work/data/projects/AD_tool_test/images
‚úÖ Batch size valid: 50
‚úÖ Number of nodes valid: 2
‚úÖ Output directory: /home/ai_dsx.work/data/projects/AD_tool_test/test_distribute_inference

üì° Step 2: Scanning folder structure...
üìÅ Detected NESTED structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images
   üìÇ Lot 'good': 85 images
   üìÇ Lot 'bad': 2 images
   üìÇ Lot 'hyperparameter_models': 1 images

‚úÖ Scan complete: 88 total images

üî® Step 3: Creating smart batches...

üè≠ Step 4: Creating HPC jobs...

‚ö†Ô∏è TEST 1 SKIPPED: name 'create_multinode_inference_jobs' is not defined
   (This is expected if test data doesn't exist on Windows)


In [None]:
# Test 2: Dry run with flat structure
print("\n" + "="*70)
print("TEST 2: DRY RUN WITH FLAT STRUCTURE")
print("="*70)

try:
    test_model = MODEL_PATH
    test_data_flat = Path("/home/ai_dsx.work/data/projects/AD_tool_test/images/good")  # Flat folder
    test_output_flat = Path("/home/ai_dsx.work/data/projects/AD_tool_test/test_distribute_flat")
    
    results = distribute_folder_inference(
        root_path=test_data_flat,
        model_path=test_model,
        output_dir=test_output_flat,
        batch_size=30,
        num_nodes=3,
        dry_run=True
    )
    
    print("\nüìä Test Results:")
    for key, value in results.items():
        print(f"   {key}: {value}")
    
    print("\n‚úÖ TEST 2 PASSED - Flat structure handled correctly!")
    
except Exception as e:
    print(f"\n‚ö†Ô∏è TEST 2 SKIPPED: {e}")
    print("   (This is expected if test data doesn't exist on Windows)")



TEST 2: DRY RUN WITH FLAT STRUCTURE
üöÄ SMART FOLDER INFERENCE DISTRIBUTION

üìã Step 1: Validating inputs...
‚úÖ Model found: /home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt
‚úÖ Root path valid: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
‚úÖ Batch size valid: 30
‚úÖ Number of nodes valid: 3
‚úÖ Output directory: /home/ai_dsx.work/data/projects/AD_tool_test/test_distribute_flat

üì° Step 2: Scanning folder structure...
üìÑ Detected FLAT structure in: /home/ai_dsx.work/data/projects/AD_tool_test/images/good
   üì∑ Total images: 85

‚úÖ Scan complete: 85 total images

üî® Step 3: Creating smart batches...

üè≠ Step 4: Creating HPC jobs...

‚ö†Ô∏è TEST 2 SKIPPED: name 'create_multinode_inference_jobs' is not defined
   (This is expected if test data doesn't exist on Windows)


In [None]:
#| hide
import nbdev; nbdev.nbdev_export('12_test_multinode_integration.ipynb')
