# Perform multinode inference
> Performing inference on multiple node

In [None]:
#| default_exp inference.multinode_infer

In [None]:
#| hide
%load_ext autoreload
%autoreload 2


In [None]:
#| export
import sys
from pathlib import Path
from typing import List
from fastcore.all import *
from fastcore.script import *


In [None]:
!which python

In [None]:
#| export
from be_vision_ad_tools.inference.multinode_from_aiop_tool import (
    DistributeHPC, HPC_Job
)


In [None]:
#| export
CURRETNT_NB='/home/ai_dsx.work/data/projects/be-vision-ad-tools/nbs'

In [None]:
#| export
from be_vision_ad_tools.inference.prediction_system import (
    split_image_list, generate_hpc_commands, predict_image_list_from_file, 
    merge_batch_results
)


In [None]:
#| export
from typing import Union, List, Optional, Dict, Any, Tuple
from fastcore.test import *
import os
import glob
from pathlib import Path
import json

In [None]:
#| export
def resolve_test_folders(
    test_folders: Union[str, Path,List[[Union[str, Path]]]]  # Could be str , image list ,[image_list +*.png + .jpg]
    ) -> List[Path]:
    """
    Resolve test_folders parameter to a list of image paths.
        
    Example:
        >>> folders = resolve_test_folders("path/to/images")
        >>> folders = resolve_test_folders(["folder1", "folder2"])  
        >>> folders = resolve_test_folders(["folder1", "image1.jpg", "folder2"])
    """
    if not isinstance(test_folders, list):
        test_folders = [test_folders]
    
    image_paths = []
    supported_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
    
    for folder_or_file in test_folders:
        path = Path(folder_or_file)
        
        if path.is_file() and path.suffix.lower() in supported_extensions:
            # It's an image file
            image_paths.append(path)
        elif path.is_dir():
            # It's a directory - find all images
            for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tiff', '*.tif']:
                image_paths.extend(path.glob(ext))
                image_paths.extend(path.glob(ext.upper()))
        else:
            print(f"‚ö†Ô∏è  Warning: '{folder_or_file}' is not a valid file or directory")
    
    # Remove duplicates and sort
    unique_paths = sorted(set(image_paths))
    
    print(f"üìÅ Resolved {len(unique_paths)} images from {len(test_folders)} input path(s)")
    return unique_paths

In [None]:
# Test resolve_test_folders function
def test_resolve_test_folders():
    """Test folder resolution with mock data."""
    # Test empty input
    result = resolve_test_folders([])
    test_eq(len(result), 0)
    
    # Test with non-existent paths (should warn but not fail)
    result = resolve_test_folders(["non_existent_folder"])
    test_eq(len(result), 0)
    
    print("‚úÖ resolve_test_folders tests passed")


In [None]:

test_resolve_test_folders()

In [None]:
tst_images = resolve_test_folders("/home/ai_dsx.work/data/projects/AD_tool_test/images/good")

In [None]:
#| export
def validate_model_path(model_path: Union[str, Path]) -> Path:
    """Validate that model path exists and return Path object."""
    model_path = Path(model_path)
    if not model_path.exists():
        raise FileNotFoundError(f"Model not found: {model_path}")
    return model_path

In [None]:
# Test validate_model_path function
def test_validate_model_path():
    """Test model path validation."""
    # Test with non-existent file (should raise)
    try:
        validate_model_path("non_existent_model.ckpt")
        assert False, "Should have raised FileNotFoundError"
    except FileNotFoundError:
        pass  # Expected
    
    # Test with existing file (use README.md as proxy)
    try:
        result = validate_model_path(f"{Path.cwd().parent}/README.md")
        test_eq(type(result), Path)
        print("‚úÖ validate_model_path tests passed")
    except FileNotFoundError:
        print("‚ö†Ô∏è README.md not found for testing, but function logic is correct")


In [None]:

test_validate_model_path()

In [None]:
Path("/home/ai_dsx.work/data/projects/AD_tool_test/models/best_model.pth"),

In [None]:
tst_images

## Split image list into different batches
- split into batches
    - splitting can be done in different ways
        - round robin
        - chunk
    - batch size can be different for each batch

In [None]:
batch_size = 50
paths = [Path(i) for i in tst_images]
num_batches = (len(paths) + batch_size - 1) // batch_size
num_batches

In [None]:
batches = [[] for _ in range(num_batches)]
batches

In [None]:
# round robin
for i, path in enumerate(paths):
    batch_idx =  i%num_batches
    batches[batch_idx].append(path)

In [None]:
print(batches)
print(len(batches))

In [None]:
# chunk
batches = [[] for _ in range(num_batches)]
for i in range(0, len(paths), batch_size):
    batch_idx = i//batch_size
    print(batch_idx)
    batch_end = min(i + batch_size, len(paths))
    print(f' batch end = {batch_end}')
    batches[batch_idx].append(paths[i:batch_end])
print(batches)
print(len(batches))


In [None]:
#| export
def split_image_list(
    image_list: List[Union[str, Path]],  # List of image paths to split
    batch_size: int,  # Maximum number of images per batch
    batch_strategy: str = "round_robin"  # "round_robin" (balanced) or "chunk" (consecutive)
) -> List[List[Path]]:  # Returns list of image path lists, one for each batch
    """Split a large image list into batches based on batch size for parallel processing."""
    
    if not image_list:
        return []
    
    if batch_size <= 0:
        raise ValueError("Batch size must be positive")
    
    # Convert to Path objects
    paths = [Path(img) for img in image_list]
    
    if batch_size >= len(paths):
        # Batch size larger than total images - return single batch with all images
        return [paths]
    
    # Calculate number of batches needed
    num_batches = (len(paths) + batch_size - 1) // batch_size  # Ceiling division
    
    batches = [[] for _ in range(num_batches)]
    
    if batch_strategy == "round_robin":
        # Distribute images evenly across batches (balanced)
        for i, path in enumerate(paths):
            batch_idx = i % num_batches
            batches[batch_idx].append(path)
    
    elif batch_strategy == "chunk":
        # Split into consecutive chunks of batch_size
        for i in range(0, len(paths), batch_size):
            batch_idx = i // batch_size
            batch_end = min(i + batch_size, len(paths))
            batches[batch_idx] = paths[i:batch_end]
    
    else:
        raise ValueError(f"Unknown batch strategy: {batch_strategy}")
    
    # Remove empty batches (shouldn't happen with correct logic, but safety check)
    batches = [batch for batch in batches if batch]
    
    print(f"üì¶ Split {len(paths)} images into {len(batches)} batches (max {batch_size} per batch):")
    for i, batch in enumerate(batches):
        print(f"   Batch {i+1}: {len(batch)} images")
    
    return batches

In [None]:
# Test split_image_list function with batch_size parameter
def test_split_image_list():
    """Test image list splitting with batch size parameter."""
    from fastcore.test import test_eq
    
    # Test empty input
    result = split_image_list([], batch_size=10)
    test_eq(len(result), 0)
    
    # Test with small list (batch_size > list length)
    small_list = [Path(f"img_{i}.jpg") for i in range(3)]
    result = split_image_list(small_list, batch_size=5)
    test_eq(len(result), 1)  # Should return single batch
    test_eq(len(result[0]), 3)  # With all 3 images
    
    # Test with exact batch size match
    exact_list = [Path(f"img_{i}.jpg") for i in range(10)]
    result = split_image_list(exact_list, batch_size=5)
    test_eq(len(result), 2)  # Should create 2 batches
    test_eq(len(result[0]), 5)  # Each with 5 images
    test_eq(len(result[1]), 5)
    
    # Test with remainder
    remainder_list = [Path(f"img_{i}.jpg") for i in range(11)]
    result = split_image_list(
        remainder_list, 
        batch_size=5, 
        batch_strategy='round_robin')
    test_eq(len(result), 3)  # Should create 3 batches
    test_eq(len(result[0]), 4)  # First two with 5 images each
    test_eq(len(result[1]), 4)
    test_eq(len(result[2]), 3)  # Last with 1 image

    
    # Test chunk strategy
    chunk_result = split_image_list(
        remainder_list, 
        batch_size=5,
        batch_strategy="chunk"
        )
    test_eq(len(chunk_result), 3)
    test_eq(len(chunk_result[0]), 5)  # First: 0,1,2,3,4
    test_eq(len(chunk_result[1]), 5)  # Second: 5,6,7,8,9
    test_eq(len(chunk_result[2]), 1)  # Third: 10
    
    # Test invalid batch size
    try:
        split_image_list([Path("test.jpg")], batch_size=0)
        assert False, "Should raise ValueError for batch_size=0"
    except ValueError:
        pass
    
    print("‚úÖ split_image_list tests passed")


In [None]:
image_batch = split_image_list(tst_images, batch_size=50)

In [None]:
test_split_image_list()

# From image batch we create a batch list file
- batch list file is a text file with the list of images for each batch

In [None]:
#| export
def create_batch_list_file(batch: List[Path], batch_list_file: Path) -> None:
    """Create text file with image paths for batch processing."""
    batch_list_file.parent.mkdir(parents=True, exist_ok=True)
    with open(batch_list_file, 'w') as f:
        for img_path in batch:
            f.write(f"{img_path}\n")

In [None]:
output_path = Path("/home/ai_dsx.work/data/projects/AD_tool_test/inference_batch")

In [None]:
create_batch_list_file(image_batch[0], Path(output_path,"batch_0.txt"))

In [None]:

from nbdev.showdoc import show_doc

In [None]:
show_doc(predict_image_list_from_file)

In [None]:
#| export  
def create_inference_command(
    model_path: Path, 
    batch_list_file: Path, 
    batch_id: str, 
    output_dir: Path,
    save_heatmaps: bool = True, 
    heatmap_style: str = "combined"
) -> List[str]:  # Returns list of command arguments for proper HPC execution
    """Create Python command list for batch inference."""
    python_code = (
        f"from be_vision_ad_tools.inference.prediction_system import predict_image_list_from_file; "
        f"predict_image_list_from_file("
        f"model_path='{model_path}', "
        f"image_list_file='{batch_list_file}', "
        f"batch_id='{batch_id}', "
        f"output_dir='{output_dir}', "
        f"save_heatmaps={save_heatmaps}, "
        f"heatmap_style='{heatmap_style}')"
    )
    
    # Return proper command list format for HPC execution
    return ["python", "-c", python_code]

In [None]:
create_inference_command(
    model_path=Path("/home/ai_dsx.work/data/projects/AD_tool_test/models/best_model.pth"), 
    batch_list_file=Path(output_path,"batch_0.txt"), 
    batch_id="batch_0", 
    output_dir=output_path, save_heatmaps=True,
    heatmap_style="combined")

In [None]:
# Debug: Let's test the updated create_inference_command directly
test_cmd = create_inference_command(
    model_path=Path("/test/model.pth"), 
    batch_list_file=Path("/test/batch.txt"), 
    batch_id="debug_test", 
    output_dir=Path("/test/output")
)
print(f"Command type: {type(test_cmd)}")
print(f"Command length: {len(test_cmd)}")
print(f"Command content: {test_cmd}")
print(f"Is it a list? {isinstance(test_cmd, list)}")

In [None]:
# Test the corrected create_inference_command function
def test_create_inference_command():
    """Test the corrected command creation function."""
    model_path = Path("/test/model.pth")
    batch_list_file = Path("/test/batch.txt")
    batch_id = "test_batch"
    output_dir = Path("/test/output")
    
    cmd = create_inference_command(
        model_path=model_path,
        batch_list_file=batch_list_file,
        batch_id=batch_id,
        output_dir=output_dir
    )
    
    # Should return a list with 3 elements: ["python", "-c", "python_code"]
    test_eq(len(cmd), 3)
    test_eq(cmd[0], "python")
    test_eq(cmd[1], "-c")
    assert isinstance(cmd[2], str), f"Expected string, got {type(cmd[2])}"
    assert "predict_image_list_from_file" in cmd[2], "Expected function call in command"
    
    print("‚úÖ create_inference_command tests passed!")
    print(f"   Command format: {cmd[:2] + [cmd[2][:50] + '...']}")

test_create_inference_command()

In [None]:
#| export
def create_hpc_job(
    batch_id: str,  # Unique identifier for the batch
    command: Union[str, List[str]],  # Command string or list to execute
    job_name_prefix: str = "anomaly_inference",  # Prefix for job naming
    cores: int = 4,  # Number of CPU cores to request
    **hpc_kwargs  # Additional HPC job parameters
) -> HPC_Job:  # Returns configured HPC job object
    """Create single HPC job for batch inference with correct parameters."""
    
    # Handle both string and list command formats
    if isinstance(command, str):
        # Legacy string format - wrap in list
        cmd_list = [command]
    elif isinstance(command, list):
        # New list format - use directly
        cmd_list = command
    else:
        raise ValueError(f"Command must be string or list, got {type(command)}")
    
    job = HPC_Job(
        cmd=cmd_list,
        cores=cores,
        **hpc_kwargs
    )
    
    # Store batch_id for identification
    job.description = f"{job_name_prefix}_{batch_id}"
    
    return job

In [None]:
# Test the corrected create_hpc_job function
def test_create_hpc_job():
    """Test HPC job creation with both string and list commands."""
    
    # Test with string command (legacy)
    string_job = create_hpc_job("test_batch", "echo 'hello'")
    test_eq(string_job.description, "anomaly_inference_test_batch")
    test_eq(string_job.command, ["echo 'hello'"])
    
    # Test with list command (new format)
    list_cmd = ["python", "-c", "print('hello')"]
    list_job = create_hpc_job("test_batch2", list_cmd)
    test_eq(list_job.command, list_cmd)
    
    # Test with inference command (real usage)
    inference_cmd = create_inference_command(
        model_path=Path("/test/model.pth"),
        batch_list_file=Path("/test/batch.txt"),
        batch_id="real_test",
        output_dir=Path("/test/output")
    )
    inference_job = create_hpc_job("real_batch", inference_cmd)
    test_eq(len(inference_job.command), 3)
    test_eq(inference_job.command[0], "python")
    
    print("‚úÖ create_hpc_job tests passed!")
    print(f"   String command format: {string_job.command}")
    print(f"   List command format: {list_job.command[:2] + ['...']}")

test_create_hpc_job()

In [None]:
#| export
# Create a better __repr__ method for HPC_Job using fastcore's patch
#@patch_to(HPC_Job)
#def __repr__(self: HPC_Job) -> str:
    #"""Better representation for HPC_Job objects."""
    #state_names = {
        #1: "NONE", 2: "SUBMITTED", 4: "WAITING", 
        #8: "RUNNING", 16: "COMPLETED", 4096: "BSUB_FAILED", 8192: "TASK_FAILED"
    #}
    #state_name = state_names.get(self.state, f"UNKNOWN({self.state})")
    
    #cmd_display = self.command[:2] if len(self.command) > 2 else self.command
    #if len(self.command) > 2:
        #cmd_display = f"{cmd_display}... [{len(self.command)} args]"
    
    #return (f"HPC_Job(cmd={cmd_display}, "
            #f"state={state_name}, "
            #f"job_id={self.lsf_job_id or 'None'}, "
            #f"description='{self.description}')")


In [None]:
sample_job = create_hpc_job(
    batch_id="batch_0",
    command=create_inference_command(
        model_path=Path("/home/ai_dsx.work/data/projects/AD_tool_test/models/best_model.pth"), 
        batch_list_file=Path(output_path,"batch_0.txt"), 
        batch_id="batch_0", 
        output_dir=output_path, save_heatmaps=True,
        heatmap_style="combined")
)

In [None]:
sample_job

In [None]:
#| export
def create_multinode_jobs_fresh(
    model_path: Union[str, Path],  # Path to trained model checkpoint
    test_folders: Union[str, Path, List[Union[str, Path]]],  # Test image folders/files
    batch_size: int = 100,  # Maximum images per batch
    output_dir: str = "multinode_results",  # Output directory for results
    **job_kwargs  # Additional HPC job parameters
) -> List[HPC_Job]:  # Returns list of HPC jobs for execution
    """Create list of HPC jobs for multinode inference with batch size control."""
    model_path = validate_model_path(model_path)
    image_paths = resolve_test_folders(test_folders)
    
    if not image_paths:
        raise ValueError("No valid images found in test_folders")
    
    # Split into batches based on batch_size and create jobs
    image_batches = split_image_list(image_paths, batch_size=batch_size)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    jobs = []
    for i, batch in enumerate(image_batches):
        batch_id = f"batch_{i+1:04d}"
        batch_list_file = output_path / "batch_lists" / f"{batch_id}_images.txt"
        
        create_batch_list_file(batch, batch_list_file)
        command = create_inference_command(
            model_path, 
            batch_list_file, 
            batch_id, 
            output_path)
        
        # Use corrected create_hpc_job function
        job = create_hpc_job(batch_id, command, **job_kwargs)
        jobs.append(job)
    
    return jobs

In [None]:
create_multinode_jobs_fresh(
    model_path=model_path,
    test_folders="/home/ai_dsx.work/data/projects/AD_tool_test/images/good",
    batch_size=50,
    output_dir="/home/ai_dsx.work/data/projects/AD_tool_test/inference_batch",
)

In [None]:
#| export
def submit_hpc_jobs(jobs: List[HPC_Job], num_nodes: int = 4) -> DistributeHPC:
    """Submit HPC jobs and return distributor for monitoring."""
    
    # Create DistributeHPC with worker parameter (not jobs parameter)
    distributor = DistributeHPC(worker=num_nodes)
    
    # Set jobs using the set_jobs method
    distributor.set_jobs(jobs)
    
    # Start the job execution
    distributor.start()
    
    print(f"‚úÖ Submitted {len(jobs)} jobs to {num_nodes} worker nodes")
    return distributor

In [None]:
# Test the exact submit_hpc_jobs function manually
print("üîç Testing submit_hpc_jobs manually...")

def manual_submit_hpc_jobs(jobs_list, num_nodes=4):
    """Manual version of submit function for testing"""
    print(f"Creating DistributeHPC with worker={num_nodes}")
    distributor = DistributeHPC(worker=num_nodes)
    
    print(f"Setting {len(jobs_list)} jobs")
    distributor.set_jobs(jobs_list)
    
    print("Starting distributor")
    distributor.start()
    
    print(f"‚úÖ Submitted {len(jobs_list)} jobs to {num_nodes} worker nodes")
    return distributor

# Test with manual function
test_jobs = [HPC_Job(cmd=["echo test1"]), HPC_Job(cmd=["echo test2"])]
try:
    manual_dist = manual_submit_hpc_jobs(test_jobs, 2)
    print("‚úÖ Manual submit_hpc_jobs works!")
except Exception as e:
    print(f"‚ùå Manual submit failed: {e}")

# Test the actual function from the notebook
try:
    actual_dist = submit_hpc_jobs(test_jobs, 2)
    print("‚úÖ Actual submit_hpc_jobs works!")
except Exception as e:
    print(f"‚ùå Actual submit failed: {e}")
    
print("Testing complete.")

In [None]:
model_path=Path("/home/ai_dsx.work/data/projects/AD_tool_test/models/exports/TEST_MULITNODE_task_000_padim_resnet18_18_layer1/weights/torch/model.pt")

In [None]:
create_multinode_jobs_fresh(
     model_path=model_path,
     test_folders="/home/ai_dsx.work/data/projects/AD_tool_test/images/good",
     batch_size=50,
     output_dir="/home/ai_dsx.work/data/projects/AD_tool_test/inference_multinode",
)

In [None]:

# Re-run the corrected function definition
def submit_hpc_jobs_fresh(jobs: List[HPC_Job], num_nodes: int = 4) -> DistributeHPC:
    """Submit HPC jobs and return distributor for monitoring - FRESH VERSION."""
    
    # Create DistributeHPC with worker parameter (not jobs parameter)
    distributor = DistributeHPC(worker=num_nodes)
    
    # Set jobs using the set_jobs method
    distributor.set_jobs(jobs)
    
    # Start the job execution
    distributor.start()
    
    print(f"‚úÖ Submitted {len(jobs)} jobs to {num_nodes} worker nodes")
    return distributor


In [None]:
submit_hpc_jobs_fresh(
    jobs = create_multinode_jobs_fresh(
        model_path=model_path,
        test_folders="/home/ai_dsx.work/data/projects/AD_tool_test/images/good",
        batch_size=50,
        output_dir="/home/ai_dsx.work/data/projects/AD_tool_test/inference_multinode",
    ),
    num_nodes=4
)

In [None]:

# Test the fresh version
try:
    fresh_dist = submit_hpc_jobs_fresh(test_jobs, 2)
    print("‚úÖ Fresh submit_hpc_jobs works!")
except Exception as e:
    print(f"‚ùå Fresh submit failed: {e}")

In [None]:
#| export  
def wait_and_summarize_jobs(distributor: DistributeHPC, jobs: List[HPC_Job]) -> Dict[str, Any]:
    """Wait for jobs completion and return summary statistics."""
    
    # Note: DistributeHPC API may not have wait_for_all_jobs method
    # Check if the method exists before calling
    if hasattr(distributor, 'wait_for_all_jobs'):
        try:
            distributor.wait_for_all_jobs()
            print("‚úÖ All jobs completed!")
        except Exception as e:
            print(f"‚ö†Ô∏è Error waiting for jobs: {e}")
    else:
        print("‚ö†Ô∏è DistributeHPC doesn't have wait_for_all_jobs method")
        print("üí° You may need to monitor jobs manually using distributor status")
    
    # Count job statuses - check if jobs have status methods
    successful = 0
    failed = 0
    other = 0
    
    for job in jobs:
        if hasattr(job, 'get_status'):
            status = job.get_status()
            if status == "completed":
                successful += 1
            elif status == "failed":
                failed += 1
            else:
                other += 1
        elif hasattr(job, 'state'):
            # Use state attribute if available
            state = job.state
            if state == job.JOB_COMPLETED:
                successful += 1
            elif state == job.JOB_TASK_FAILED:
                failed += 1
            else:
                other += 1
        else:
            other += 1
    
    print(f"üìä Results: ‚úÖ {successful} successful, ‚ùå {failed} failed, üîÑ {other} other")
    
    return {
        "total_jobs": len(jobs),
        "successful_jobs": successful, 
        "failed_jobs": failed,
        "other_jobs": other
    }

In [None]:
create_multinode_jobs(
     model_path=model_path,
     test_folders="/home/ai_dsx.work/data/projects/AD_tool_test/images/good",
     batch_size=50,
     output_dir="/home/ai_dsx.work/data/projects/AD_tool_test/inference_multinode")

In [None]:
#| export
def run_multinode_inference(
    model_path: Union[str, Path],  # Path to trained anomaly detection model
    test_folders: Union[str, Path, List[Union[str, Path]]],  # Test image sources
    num_nodes: int = 4,  # Number of HPC nodes to use
    batch_size: int = 100,  # Maximum images per batch (not images_per_batch)
    output_dir: str = "multinode_results",  # Output directory path
    wait_for_completion: bool = True,  # Whether to wait for job completion
    **job_kwargs  # Additional HPC job parameters
) -> Dict[str, Any]:  # Returns results dictionary with job information
    """Run multinode anomaly detection inference using HPC jobs with batch size control."""
    print("üöÄ Starting Multinode Inference")
    
    # Create and submit jobs
    jobs = create_multinode_jobs(model_path, test_folders, batch_size, output_dir, **job_kwargs)
    distributor = submit_hpc_jobs(jobs, num_nodes)
    
    # Wait and get results if requested
    results = {"jobs": jobs, "distributor": distributor, "output_dir": output_dir}
    
    if wait_for_completion:
        job_summary = wait_and_summarize_jobs(distributor, jobs)
        results.update(job_summary)
    
    return results

In [None]:
# Import test framework
from fastcore.test import *

# Usage Examples

The multinode inference system provides flexible ways to run anomaly detection across multiple HPC nodes.

In [None]:
# Example 1: Single folder inference with batch size
# results = run_multinode_inference(
#     model_path="path/to/padim_model.ckpt",  # Your trained PaDiM model
#     test_folders="test_images/",            # Single test folder
#     num_nodes=4,                           # Use 4 HPC nodes
#     batch_size=50                          # Process max 50 images per batch
# )

In [None]:
# Example 2: Multiple folders with PaDiM model and larger batches
# results = run_multinode_inference(
#     model_path="models/padim_trained.ckpt",         # PaDiM model checkpoint
#     test_folders=["normal_test", "anomaly_test"],   # Multiple test folders
#     num_nodes=8,                                   # Scale to 8 nodes
#     batch_size=100,                                # Max 100 images per batch
#     save_heatmaps=True,                           # Generate visualizations
#     memory_gb=32,                                 # More memory for larger batches
#     gpu_required=True                             # Use GPU acceleration
# )

In [None]:
# Example 3: Mixed input with production settings and optimal batch size
# results = run_multinode_inference(
#     model_path="/home/models/production_padim.ckpt",  # Production model
#     test_folders=[                                    # Mixed input types
#         "batch_1_images/",                           # Directory
#         "batch_2_images/",                           # Directory  
#         "/absolute/path/special_image.jpg"           # Single file
#     ],
#     num_nodes=6,                                     # Moderate parallelism
#     batch_size=75,                                   # Max 75 images per batch
#     output_dir="production_inference_results",        # Custom output
#     wait_for_completion=True,                        # Block until done
#     time_hours=4,                                    # Longer time limit
#     memory_gb=64                                     # High memory for quality
# )

In [None]:
# Test the resolve_test_folders function
test_paths = [
    "tutorial",  # This should be a folder in your project
    "README.md"  # This should be ignored (not an image)
]

try:
    resolved = resolve_test_folders(test_paths)
    print(f"‚úÖ Function works! Found {len(resolved)} images")
except Exception as e:
    print(f"‚ö†Ô∏è  Function test failed: {e}")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export('10_inference.multinode_infer.ipynb')