In [None]:
#| default_exp inference.anomaly_score_organizer

# Anomaly Score Organizer

> Organize and save images based on their anomaly scores into customizable threshold folders

In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import os
import json
import shutil
import random
from pathlib import Path
from functools import lru_cache
from typing import Union, List, Dict, Any, Optional, Tuple
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import cv2
import matplotlib.patches as patches

from functools import lru_cache
from fastcore.all import *
from fastcore.test import *

## Data Setup for Trial and Error

> Helper functions to set up training and testing data for experimentation


In [None]:
#| export
def save_image_with_metadata(
    image: Image.Image,  # PIL Image to save
    output_path: Union[str, Path],  # Output path
    metadata: Optional[Dict[str, Any]] = None,  # Optional metadata dict
    format: str = "JPEG",  # Image format
    quality: int = 95,  # JPEG quality (1-100)
    optimize: bool = True  # Whether to optimize image
) -> Path:  # Returns saved path
    """
    Save image with optional metadata for reproducibility.

    Saves image and optionally creates a JSON file with metadata
    in the same directory.

    Example:
        img = Image.open("test.jpg")
        save_image_with_metadata(
            img, "output/test.jpg",
            metadata={"anomaly_score": 0.75, "model": "padim"}
        )
        # Creates output/test.jpg and output/test_metadata.json
    """
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # Save image
    if format == "JPEG":
        image.save(output_path, format=format, quality=quality, optimize=optimize)
    else:
        image.save(output_path, format=format, optimize=optimize)

    # Save metadata if provided
    if metadata is not None:
        metadata_path = output_path.with_suffix('.json').with_name(
            output_path.stem + '_metadata.json'
        )
        with open(metadata_path, 'w') as f:
            json.dump(metadata, f, indent=2)

    return output_path


In [None]:
from be_vision_ad_tools.inference.prediction_system import *

In [None]:
#| export
# Import from existing modules
from be_vision_ad_tools.inference.prediction_system import (
    predict_image_list_from_file_enhanced,
    predict_image_list
)

from be_vision_ad_tools.inference.multinode_inference import (
    create_smart_batches,
    scan_folder_structure,
    create_batch_list_file
)

# Data

In [None]:
import os
DATA_ROOT = os.getenv('DATA_PATH')
good_im_path= Path(DATA_ROOT,'malacca','g_imgs')
bad_im_path= Path(DATA_ROOT,'malacca','b_imgs')
MODEL_PATH = Path(DATA_ROOT, 'malacca','model.pt')
print(MODEL_PATH)
print(MODEL_PATH.exists())
sm_img = Path(good_im_path).ls()[0]
print(sm_img)
OUTPUT_DIR = Path(DATA_ROOT,'malacca','output')
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
print(f'OUTPUT_DIR: {OUTPUT_DIR.exists()}')


In [None]:
good_im_path.exists(),bad_im_path.exists()


## Core Functions

In [None]:
score_thrs = [0.5, 1]
sorted_score_thrs = sorted(score_thrs)
sorted_score_thrs

In [None]:
anomaly_score = 0.788
fn_name = None
for score_thr in sorted_score_thrs:
	if anomaly_score <= score_thr:
		print(score_thr)
		print(str(score_thr))
fn_name = str(sorted_score_thrs[-1])
print(fn_name)

In [None]:
#| export
def determine_score_folder(
    anomaly_score: float,  # Anomaly score (0.0 to 1.0)
    score_thresholds: List[float]  # List of score thresholds (e.g., [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
) -> str:  # Returns the folder name based on the score
    """
    Determine which folder an image should go to based on its anomaly score.

    Example:
        score_thresholds = [0.5, 1.0]
        - score 0.3 -> folder "0.5"
        - score 0.7 -> folder "1.0"
    """
    sorted_thresholds = normalize_score_thresholds(score_thresholds)

    # Find the appropriate folder
    for threshold in sorted_thresholds:
        if anomaly_score <= threshold:
            return str(threshold)

    # If score exceeds all thresholds, use the last one
    return str(sorted_thresholds[-1])

In [None]:
#| export
def normalize_score_thresholds(
    score_thresholds: Optional[List[float]]  # List of score thresholds or None
) -> List[float]:  # Returns sorted list of thresholds
    """
    Normalize and sort score thresholds.

    Returns default thresholds if None provided, otherwise returns sorted list.
    """
    if score_thresholds is None:
        return [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    return sorted(score_thresholds)


In [None]:
normalize_score_thresholds(
	score_thresholds=[0.5,1]
)

In [None]:
score_thresholds = [0.5, 1]
determine_score_folder(anomaly_score, score_thresholds)

In [None]:
sm_img

In [None]:
Path(sm_img).parent.name

In [None]:
#| export
def get_image_parent_folder(
    image_path: Union[str, Path]  # Path to the image
) -> str:  # Returns parent folder name
    """
    Extract parent folder name from image path.

    Example: 'first/second/image.png' -> 'second'
    """
    return Path(image_path).parent.name


In [None]:
print(sm_img)
get_image_parent_folder(sm_img)

In [None]:
parent_dir = Path(sm_img).parent.name
parent_dir

In [None]:
folder_name = '0.5'
Path(OUTPUT_DIR,parent_dir,folder_name)

In [None]:
#| export
def build_target_folder_path(
    output_dir: Path,  # Base output directory
    parent_folder: str,  # Parent folder name from image path
    folder_name: str  # Score-based folder name
) -> Path:  # Returns target folder path
    """
    Build target folder path from components.

    Creates: output_dir/parent_folder/folder_name
    """
    return Path(output_dir, parent_folder, folder_name)


In [None]:
parent_folder = get_image_parent_folder(sm_img)
folder_name = '0.5'

fn_path = build_target_folder_path(
	OUTPUT_DIR, parent_folder, folder_name)
print(f'fn_path: {fn_path}')

In [None]:
src_im = sm_img
dest_im = fn_path
print(f'src_im: {src_im}')
print(f'dest_im: {dest_im}')


In [None]:
shutil.copy2(src_im, dest_im)

In [None]:
#| export

def copy_or_move_file(
    source_path: Union[str, Path],  # Source file path
    dest_path: Union[str, Path],    # Destination file path
    copy_mode: bool = True,         # If True, copy; if False, move
    dry_run: bool = False           # If True, print action instead of executing
) -> None:
    """
    Copy or move a file from source to destination.
    Example:
        copy_or_move_file('a.png', 'b/c.png', copy_mode=True, dry_run=True)
        # Dry run: Would copy a.png to b/c.png
    """
    source_path = Path(source_path)
    dest_path = Path(dest_path)
    action = "copy" if copy_mode else "move"
    if dry_run:
        print(f"Dry run: Would {action} {source_path} to {dest_path}")
        return
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    if copy_mode:
        shutil.copy2(source_path, dest_path)
    else:
        shutil.move(str(source_path), str(dest_path))


In [None]:
copy_or_move_file(
	src_im,
	dest_im,
	copy_mode=True,
	dry_run=True)

### For testing the prediction system

In [None]:
rs = predict_image(
	model_path=MODEL_PATH,
	image_path=sm_img,
	heatmap_style='side_by_side',
	save_heatmap=False,
	show_heatmap=False,
	output_dir=OUTPUT_DIR,
	compress=True,
	jpeg_quality=95,
	device='cpu'

)

In [None]:
im_path = rs.get('image_path')
print(im_path)

In [None]:
anomaly_score = rs.get('anomaly_score')
print(anomaly_score)

In [None]:
#| export
def validate_prediction_result(
    result: Dict[str, Any]  # Prediction result dictionary
) -> Tuple[Optional[str], Optional[float]]:  # Returns (image_path, anomaly_score) or (None, None) if invalid
    """
    Validate and extract image_path and anomaly_score from prediction result.

    Returns (image_path, anomaly_score) if valid, (None, None) if invalid.
    """
    image_path = result.get('image_path')
    anomaly_score = result.get('anomaly_score')

    if image_path is None or anomaly_score is None:
        return None, None
    return image_path, anomaly_score


In [None]:
im_p, a_s = validate_prediction_result(rs)
print(im_p)
print(a_s)



In [None]:
#| export
def initialize_folder_stats(
    score_thresholds: List[float]  # List of score thresholds
) -> Dict[str, Dict[str, Any]]:  # Returns initialized stats dictionary
    """
    Initialize folder statistics dictionary.

    Returns dict with structure: {folder_name: {'count': 0, 'images': [], 'scores': []}}
    """
    return {str(t): {'count': 0, 'images': [], 'scores': []} for t in score_thresholds}

#| export
def update_folder_stats(
    folder_stats: Dict[str, Dict[str, Any]],  # Folder statistics dictionary
    folder_name: str,  # Folder name
    dest_path: str,  # Destination path
    anomaly_score: float  # Anomaly score
) -> None:
    """
    Update folder statistics with a new image result.
    """
    folder_stats[folder_name]['count'] += 1
    folder_stats[folder_name]['images'].append(dest_path)
    folder_stats[folder_name]['scores'].append(anomaly_score)


In [None]:
score_thresholds = [0.5, 1]
parent_folder = get_image_parent_folder(im_p)
print(parent_folder)
folder_name = determine_score_folder(a_s, score_thresholds)
print(folder_name)
target_folder = build_target_folder_path(
	OUTPUT_DIR, parent_folder, folder_name)
print(target_folder)


In [None]:
#| export
def save_image_by_score(
    image_path: Union[str, Path],  # Path to the source image
    anomaly_score: float,  # Anomaly score for the image
    output_dir: Path,  # Base output directory
    score_thresholds: List[float],  # List of score thresholds
    dry_run: bool = False,  # If True, do not move or copy files
    copy_mode: bool = True  # If True, copy files; if False, move files
) -> Path:  # Returns the destination path
    """
    Save (copy or move) an image to the appropriate score folder.

    Returns the destination path where the image was saved.
    """
    image_path = Path(image_path)

    if not image_path.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Get parent folder name
    im_folder = get_image_parent_folder(image_path)

    # Determine target folder
    folder_name = determine_score_folder(anomaly_score, score_thresholds)
    target_folder = build_target_folder_path(output_dir, im_folder, folder_name)
    target_folder.mkdir(parents=True, exist_ok=True)

    # Create destination path
    dest_path = target_folder / image_path.name

    # Copy or move the file or dry run
    if dry_run:
        if copy_mode:
            print(f"Dry run: Would copy {image_path} to {dest_path}")
        else:
            print(f"Dry run: Would move {image_path} to {dest_path}")
    else:
        copy_or_move_file(image_path, dest_path, copy_mode)

    return dest_path

In [None]:
dest = save_image_by_score(
	im_p,
	a_s,
	OUTPUT_DIR,
	score_thresholds,
	dry_run=True,
	copy_mode=True
)

In [None]:
dest

In [None]:
determine_score_folder(a_s, score_thresholds)

In [None]:
#| export
def process_single_image_result(
    result: Dict[str, Any],  # Prediction result dictionary
    output_dir: Path,  # Base output directory
    score_thresholds: List[float],  # List of score thresholds
    copy_mode: bool,  # Whether to copy or move
	dry_run: bool = False # If True, do not move or copy files
) -> Optional[Dict[str, Any]]:  # Returns dict with folder_name and dest_path, or None if failed
    """
    Process a single prediction result: save image and return metadata.

    Returns dict with 'folder_name' and 'dest_path', or None if processing failed.
    """
    image_path, anomaly_score = validate_prediction_result(result)

    if image_path is None or anomaly_score is None:
        print(f"‚ö†Ô∏è  Skipping result with missing data: {result}")
        return None

    try:
        dest_path = save_image_by_score(
            image_path=image_path,
            anomaly_score=anomaly_score,
            output_dir=output_dir,
            score_thresholds=score_thresholds,
            copy_mode=copy_mode,
            dry_run=dry_run
        )

        folder_name = determine_score_folder(
			anomaly_score,
			score_thresholds)
        return {
            'folder_name': folder_name,
            'dest_path': str(dest_path),
            'anomaly_score': float(anomaly_score)
        }
    except Exception as e:
        print(f"‚ùå Error processing {image_path}: {e}")
        return None

In [None]:
sn_rs_dict = process_single_image_result(
	rs,
	OUTPUT_DIR,
	score_thresholds,
	copy_mode=True,
	dry_run=True
)


In [None]:
sn_rs_dict

### Now we need to put the index at the image

In [None]:

image = cv2.imread(sm_img)
image = Image.fromarray(image)
type(image)

In [None]:
print(image.size)

In [None]:
img_copy = image.copy().convert("RGBA")
print(img_copy.size)

In [None]:
# transparent overlay
overlay = Image.new('RGBA', img_copy.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)
font = ImageFont.load_default()
index = 1
text = f"#{index}"
text

In [None]:
bbox = draw.textbbox(
	(0,0),
	text,
	font=font
)
bbox

In [None]:
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
text_width, text_height

In [None]:
# padding
padding = 10
box_width = text_width + 2 * padding
box_height = text_height + 2 * padding
box_width, box_height


In [None]:
# different positions
im_h, im_w = img_copy.size
print(im_h, im_w)
# top left
top_left_x, top_left_y = padding, padding
print(f'top_left: {top_left_x}, {top_left_y}')
# top right
top_right_x, top_right_y = im_w - box_width - padding, padding
print(f'top_right: {top_right_x}, {top_right_y}')
# bottom left
bottom_left_x, bottom_left_y = padding, im_h - box_height - padding
print(f'bottom_left: {bottom_left_x}, {bottom_left_y}')
# bottom right
bottom_right_x, bottom_right_y = im_w - box_width - padding, im_h - box_height - padding
print(f'bottom_right: {bottom_right_x}, {bottom_right_y}')


In [None]:
# Yellow color for text
txt_color = (255, 255, 0)
# RGBA color for background (semi-transparent black)
bg_color= (0, 0, 0, 180)
draw.rectangle(
	[
		top_left_x,
		top_left_y,
		top_left_x + box_width,
		top_left_y + box_height],
	fill=bg_color
)
 # Draw text
draw.text(
    (top_left_x + padding, top_left_y + padding),
    text,
    font=font,
    fill=txt_color
)

result = Image.alpha_composite(img_copy, overlay)
rs_img = result.convert("RGB")
rs_img


In [None]:
#| export
def annotate_image_with_index(
    image: Union[Image.Image, np.ndarray],  # PIL Image or numpy array
    index: int,  # Index number to display
    font_size: int = 40,  # Font size for the index number
    position: str = "top_left",  # Position: "top_left", "top_right", "bottom_left", "bottom_right"
    text_color: Tuple[int, int, int] = (255, 0, 0),  # RGB color for text (default to red)
) -> Image.Image:  # Returns annotated PIL Image
    """
    Add an index number to an image without a background box.

    """
    # Convert to PIL Image if numpy array
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)

    # Make a copy and convert to RGBA for transparency handling
    img_copy = image.copy().convert("RGBA")

    # Create a transparent overlay to draw text on
    overlay = Image.new('RGBA', img_copy.size, (255, 255, 255, 0))
    draw = ImageDraw.Draw(overlay)

    # Try to use a nice font, fall back to default if not available
    try:
        font = ImageFont.truetype(
            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
            font_size)
    except:
        try:
            font = ImageFont.truetype(
                "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
                font_size)
        except:
            font = ImageFont.load_default()

    # Prepare text without the '#' sign
    text = f"{index}"

    # Get text bounding box to calculate its dimensions
    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    # Add padding for spacing from image edges
    padding = 10

    # Calculate position for the text's top-left corner
    img_width, img_height = img_copy.size

    if position == "top_left":
        x, y = padding, padding
    elif position == "top_right":
        x, y = img_width - text_width - padding, padding
    elif position == "bottom_left":
        x, y = padding, img_height - text_height - padding
    elif position == "bottom_right":
        x, y = img_width - text_width - padding, img_height - text_height - padding
    else:
        x, y = padding, padding  # default to top_left

    # Draw text directly on the overlay (no background box)
    draw.text(
        (x, y), # Use calculated x, y directly as text's top-left
        text,
        font=font,
        fill=text_color
    )

    # Composite the overlay (with text) onto the original image copy
    result = Image.alpha_composite(img_copy, overlay)

    # Convert back to RGB (if original was RGB)
    return result.convert("RGB")

In [None]:
annotate_image_with_index(
	font_size=60,
	image=image,
	index=1,
)

### single image functionality (organize each each images by score)

In [None]:
f_map = {}
for i in score_thresholds:
	nm = str(i)
	f_p = Path(OUTPUT_DIR,nm)
	f_map[nm] = f_p
f_map

In [None]:
#| export
def create_score_folders(
    output_dir: Path,  # Base output directory
    score_thresholds: List[float],  # List of score thresholds
) -> Dict[str, Path]:  # Returns dict mapping threshold strings to folder paths
    """
    Create subdirectories for each score threshold.

    Returns a dictionary mapping threshold values to their folder paths.
    """
    output_dir = Path(output_dir)
    folder_map = {}

    for threshold in score_thresholds:
        folder_name = str(threshold)
        folder_path = Path(output_dir, folder_name)
        folder_path.mkdir(parents=True, exist_ok=True)
        folder_map[folder_name] = folder_path

    print(f"‚úÖ Created {len(folder_map)} score folders in {output_dir}")
    for threshold, path in sorted(folder_map.items()):
        print(f"   üìÅ {threshold}: {path}")

    return folder_map

In [None]:
output_dir = Path('output_dir')
create_score_folders(output_dir, score_thresholds)

In [None]:
{str(i):{'count':0,'images':[],'scores':[]} for i in score_thresholds}

In [None]:
#| export
def initialize_folder_stats(
    score_thresholds: List[float]  # List of score thresholds
) -> Dict[str, Dict[str, Any]]:  # Returns initialized stats dictionary
    """
    Initialize folder statistics dictionary.

    Returns dict with structure: {folder_name: {'count': 0, 'images': [], 'scores': []}}
    """
    return {str(t): {'count': 0, 'images': [], 'scores': []} for t in score_thresholds}


In [None]:
m_fst = initialize_folder_stats(score_thresholds)
m_fst


In [None]:
#| export
def update_folder_stats(
    folder_stats: Dict[str, Dict[str, Any]],  # Folder statistics dictionary
    folder_name: str,  # Folder name
    dest_path: str,  # Destination path
    anomaly_score: float  # Anomaly score
) -> None:
    """
    Update folder statistics with a new image result.
    """
    folder_stats[folder_name]['count'] += 1
    folder_stats[folder_name]['images'].append(dest_path)
    folder_stats[folder_name]['scores'].append(anomaly_score)

In [None]:
mock_folder_name = '0.5'
mock_dest_path = 'path/to/image.png'
mock_as = 0.5
update_folder_stats(
	folder_stats=m_fst,
	folder_name=mock_folder_name,
	dest_path=mock_dest_path,
	anomaly_score=mock_as
)
m_fst

In [None]:
#| export
def print_organization_summary(
    score_thresholds: List[float],  # List of score thresholds
    folder_stats: Dict[str, Dict[str, Any]],  # Folder statistics dictionary
    failed_count: int  # Count of failed image processing
) -> None:
    """
    Prints a summary of the image organization process.

    """
    print("\nüìä ORGANIZATION SUMMARY")
    print("="*70)

    total_processed_successfully = sum(stats['count'] for stats in folder_stats.values())
    print(f"Total images processed successfully: {total_processed_successfully}")
    print(f"Total images failed: {failed_count}")

    print("\n--- Folder Statistics ---")
    # Sort thresholds to ensure consistent printing order
    sorted_thresholds = sorted(score_thresholds)

    for i, t in enumerate(sorted_thresholds):
        folder_name = str(t)
        # Retrieve statistics for the current folder_name.
        # If 'folder_name' exists in 'folder_stats', its corresponding dictionary is returned.
        # If 'folder_name' is not found, a default dictionary {'count': 0, 'scores': []} is returned,
        # preventing a KeyError and ensuring that all score thresholds are represented in the summary,
        # even if no images fell into that specific score range.
        stats = folder_stats.get(
			folder_name,
			{'count': 0, 'scores': []})
        count = stats['count']
        avg_score = float(np.mean(stats['scores'])) if stats['scores'] else 0.0

        if i == 0:
            score_range_str = f"score <= {t}"
        else:
            prev_t = sorted_thresholds[i-1]
            score_range_str = f"{prev_t} < score <= {t}"

        print(f"Folder '{folder_name}' ({score_range_str}): Count = {count}, Avg Score = {avg_score:.4f}")


In [None]:
m_fst
print_organization_summary(
	score_thresholds,
	m_fst,
	0
)


In [None]:
failed_count =1
total_prediction_results = 4
m_fst = {
	'0.5': {'count': 1, 'images': [], 'scores': [0.1]},
	'1.0': {'count': 2, 'images': [], 'scores': [0.6, 0.8]}
}
output_dir = Path('output_dir')
score_thresholds = [0.5, 1.0]
fs_agg = {}
for fl,st in m_fst.items():
	print(fl,st)
	avg_score = float(np.mean(st['scores'])) if st['scores'] else 0.0
	print(avg_score)
	fs_agg[fl] = {
		'count': st['count'],
		'avg_score': avg_score
	}
print(fs_agg)








In [None]:
#| export
def build_organization_stats(
    output_dir: Path,  # Base output directory
    score_thresholds: List[float],  # List of score thresholds
    folder_stats: Dict[str, Dict[str, Any]],  # Folder statistics dictionary
    total_prediction_results: int,  # Total number of prediction results initially
    failed_count: int  # Count of failed image processing
) -> Dict[str, Any]:  # Returns comprehensive organization statistics
    """
    Builds a comprehensive dictionary of overall organization statistics.

    """
    aggregated_folder_stats = {}
    for folder_name, stats in folder_stats.items():
        avg_score = float(np.mean(stats['scores'])) if stats['scores'] else 0.0
        aggregated_folder_stats[folder_name] = {
            'count': stats['count'],
            'avg_score': avg_score
        }

    total_processed_successfully = sum(s['count'] for s in aggregated_folder_stats.values())

    return {
        'output_dir': str(output_dir),
        'score_thresholds': score_thresholds,
        'folder_stats': aggregated_folder_stats,
        'total_processed': total_processed_successfully,
        'failed_count': failed_count
    }


In [None]:
build_organization_stats(
	output_dir=output_dir,
	score_thresholds=score_thresholds,
	folder_stats=m_fst,
	total_prediction_results=total_prediction_results,
	failed_count=failed_count
)

In [None]:
import tempfile
m_fst = {
	'0.5': {'count': 1, 'images': ['/path/to/img1.png'], 'scores': [0.1]},
	'1.0': {'count': 2, 'images': ['/path/to/img2.png', '/path/to/img3.png'], 'scores': [0.6, 0.8]}
}
with tempfile.TemporaryDirectory() as tmpdir:
	o_b = Path(tmpdir)/"meta_test"
	print(o_b)
	o_b.mkdir()
	folder_05 = o_b / "0.5"
	folder_10 = o_b / "1.0"
	folder_05.mkdir(); folder_10.mkdir()
	print(folder_05,folder_10)

	folder_map = {
		'0.5': folder_05,
		'1.0': folder_10
	}
	for k, v in m_fst.items():
		print(k,v)
		f_n = folder_map.get(k)
		print(f_n)
		meta_path = f_n / "metadata.json"
		print(meta_path)
		avg_score = float(np.mean(v['scores'])) if v['scores'] else 0.0
		print(avg_score)

		metadata = {
			'folder_name': k,
			'count': v['count'],
			'avg_score': avg_score,
			'images': v['images'] # List of destination paths
		}
		with open(meta_path, 'w') as f:
			json.dump(metadata, f, indent=4)
		print(metadata)




In [None]:
import tempfile
m_fst = {
	'0.5': {'count': 1, 'images': ['/path/to/img1.png'], 'scores': [0.1]},
	'1.0': {'count': 2, 'images': ['/path/to/img2.png', '/path/to/img3.png'], 'scores': [0.6, 0.8]}
}
with tempfile.TemporaryDirectory() as tmpdir:
	o_b = Path(tmpdir)/"meta_test"
	print(o_b)
	o_b.mkdir()
	folder_05 = o_b / "0.5"
	folder_10 = o_b / "1.0"
	folder_05.mkdir(); folder_10.mkdir()
	print(folder_05,folder_10)

	folder_map = {
		'0.5': folder_05,
		'1.0': folder_10
	}
	for k, v in m_fst.items():
		print(k,v)
		f_n = folder_map.get(k)
		print(f_n)
		meta_path = f_n / "metadata.json"
		print(meta_path)
		avg_score = float(np.mean(v['scores'])) if v['scores'] else 0.0
		print(avg_score)

		metadata = {
			'folder_name': k,
			'count': v['count'],
			'avg_score': avg_score,
			'images': v['images'] # List of destination paths
		}
		with open(meta_path, 'w') as f:
			json.dump(metadata, f, indent=4)
		print(metadata)




In [None]:
#| export
def save_all_folder_metadata(
    folder_stats: Dict[str, Dict[str, Any]],  # Folder statistics dictionary
    folder_map: Dict[str, Path]  # Map of folder names to their paths

) -> None:
    """
    Saves a metadata.json file in each score-based folder containing statistics
    and a list of images within that folder.
    """
    # Initialize logger locally within the function to ensure it's always defined.
    # Python's import mechanism caches modules, so 'import logging' is efficient.
    # getLogger(__name__) returns the same logger instance if called multiple times.
    import logging
    logger = logging.getLogger(__name__)

    # Configure basic logging if no handlers are already set.
    # This prevents adding duplicate handlers if the function is called multiple times
    # or if logging has already been configured elsewhere.
    if not logger.handlers:
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    logger.info("Saving metadata for each folder...")
    for folder_name, stats in folder_stats.items():
        folder_path = folder_map.get(folder_name)
        if not folder_path:
            logger.warning(f"Skipping metadata save for unknown folder: {folder_name}")
            continue

        metadata_path = folder_path / "metadata.json"
        avg_score = float(np.mean(stats['scores'])) if stats['scores'] else 0.0

        metadata = {
            'folder_name': folder_name,
            'count': stats['count'],
            'avg_score': avg_score,
            'images': stats['images'] # List of destination paths
        }
        try:
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=4)
            logger.info(f"Saved metadata for '{folder_name}' to {metadata_path}")
        except Exception as e:
            logger.error(f"Failed to save metadata for folder '{folder_name}': {e}")


In [None]:
ot_dir = Path('output_dir')
score_thresholds = [0.5, 1.0]
folder_map = create_score_folders(ot_dir, score_thresholds)
folder_map


In [None]:
thresholds = [0.5, 1.0]
thr_list = {}
for i in thresholds:
	fn_ = folder_map.get(str(i))
	thr_list[str(i)] = [fn_ / f"img_{j}.png" for j in range(10)]



In [None]:
thr_list

In [None]:

m_fst = {
	'0.5': {'count': 10, 'images': [i.as_posix() for i in thr_list['0.5']], 'scores': [0.1 for _ in range(10)]},
	'1.0': {'count': 10, 'images': [i.as_posix() for i in thr_list['1.0']], 'scores': [0.6 for _ in range(10)]}
}
save_all_folder_metadata(
	folder_stats=m_fst,
	folder_map=folder_map
)

In [None]:

save_all_folder_metadata(
	folder_stats=m_fst,
	folder_map=folder_map
)

In [None]:
rs

In [None]:
#| export
def organize_images_by_score(
    prediction_results: List[Dict[str, Any]],  # List of prediction results from predict_image_list
    output_dir: Union[str, Path],  # Base output directory
    score_thresholds: List[float] = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],  # Score thresholds
    copy_mode: bool = True,  # If True, copy files; if False, move files
	dry_run: bool = False,  # If True, don't actually move/copy files
    save_metadata: bool = True  # If True, save metadata JSON for each folder
) -> Dict[str, Any]:  # Returns organization statistics
    """
    Organize images into folders based on their anomaly scores.
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    print("\nüóÇÔ∏è  ORGANIZING IMAGES BY ANOMALY SCORE")
    print("="*70)
    print(f"üìÇ Output directory: {output_dir}")
    print(f"üìä Score thresholds: {score_thresholds}")
    print(f"üìã Total images: {len(prediction_results)}")
    print(f"üîÑ Mode: {'COPY' if copy_mode else 'MOVE'}")

    # Create score folders
    folder_map = create_score_folders(
		output_dir,
		score_thresholds)

    # Initialize statistics
    folder_stats = initialize_folder_stats(
		score_thresholds)
    failed_count = 0

    print("\nüì¶ Processing images...")

    # Process each image
    for result in tqdm(prediction_results, desc="Organizing images"):
        processed = process_single_image_result(
            result=result,
			output_dir=output_dir,
			score_thresholds=score_thresholds,
			copy_mode=copy_mode,
			dry_run=dry_run

        )

        if processed is None:
            failed_count += 1
            continue

        # Update statistics
        update_folder_stats(
            folder_stats=folder_stats,
            folder_name=processed['folder_name'],
            dest_path=processed['dest_path'],
            anomaly_score=processed['anomaly_score']
        )

    # Save metadata if requested
    if save_metadata:
        print(folder_map,folder_stats)
        save_all_folder_metadata(
			folder_stats=folder_stats,
			folder_map=folder_map
		)

    # Print summary
    print_organization_summary(
		score_thresholds=score_thresholds,
		folder_stats=folder_stats,
		failed_count=failed_count
	)

    # Build and return statistics
    return build_organization_stats(
        output_dir, score_thresholds, folder_stats,
        len(prediction_results) - failed_count, failed_count
    )

In [None]:
len(rs)

In [None]:
im_p, a_s = validate_prediction_result(rs)
im_p, a_s

In [None]:
save_image_by_score(
	image_path=im_p,
	anomaly_score=a_s,
	output_dir=Path('output_dir'),
	score_thresholds=[0.5, 1.0],
	copy_mode=True,
	dry_run=True
)

In [None]:
organize_images_by_score(
	prediction_results=[rs],
	output_dir=Path('output_dir'),
	score_thresholds=[0.5, 1.0],
	copy_mode=True,
	dry_run=True,
	save_metadata=True
)

# Dataframe

In [None]:
#| export
def create_image_index_dataframe(
    image_list: Union[List[Union[str, Path]], str, Path]  # List of images or path to text file
) -> pd.DataFrame:  # Returns dataframe with index and image paths
    """
    Create a dataframe with index numbers for all images.

    This dataframe is used to track and reference images by index number
    when creating posters.

    """
    # Handle input - could be list or file path
    if isinstance(image_list, (str, Path)):
        # Read from file
        image_list_path = Path(image_list)
        if image_list_path.exists() and image_list_path.is_file():
            images = []
            with open(image_list_path, 'r') as f:
                for line in f:
                    line = line.strip()
                    if line and not line.startswith('#'):
                        images.append(line)
        else:
            raise FileNotFoundError(f"Image list file not found: {image_list}")
    else:
        images = [str(img) for img in image_list]

    # Create dataframe
    df = pd.DataFrame({
        'index': range(len(images)),
        'image_path': images,
        'image_name': [Path(img).name for img in images]
    })

    print(f"üìä Created image index dataframe with {len(df)} images")

    return df

In [None]:
df=create_image_index_dataframe(
	image_list=thr_list['0.5'])
df

## Poster creation

In [None]:
#| export
def load_image_fast(
	image_path: Union[str, Path], # path to the image
	cache: bool = True, # whether to cache the image
)-> Image.Image: # returns the image
	"""
	Load an image from a file path using a fast method.
	"""
	image_path = Path(image_path)
	if cache:
		return _load_image_cached(image_path)
	else:
		return Image.open(image_path).convert('RGB')




In [None]:
#| export
@lru_cache(maxsize=1000)
def _load_image_cached(image_path_str: Path) -> Image.Image:
	"""
	Load an image from a file path using a fast method.
	"""
	return Image.open(image_path_str).convert('RGB')


In [None]:
sm_img

In [None]:
%%time
load_image_fast(sm_img, cache=True)

In [None]:
%%time
load_image_fast(sm_img, cache=False)

In [None]:
images=[]
im_path = Path('output_dir/0.5')
for i in ['.png']:
	images.extend(im_path.glob(f"*{i}"))
sorted(set(images))

In [None]:
#| export
def get_images_from_score_folder(
	path: Union[str, Path], # path to the score folder
)->List[Path]:
	"""
	Get all images from a score folder.
	"""
	path = Path(path)
	image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif']
	images = []
	for ext in image_extensions:
		images.extend(path.glob(f"*{ext}"))
		images.extend(path.glob(f"*{ext.upper()}"))
	return sorted(set(images))



In [None]:
get_images_from_score_folder(Path('output_dir/0.5'))

In [None]:
num_images = 10
images_per_poster = 3
print((num_images/images_per_poster))
print(int(np.ceil(num_images/images_per_poster)))

In [None]:
#| export
def calculate_num_posters_needed(
	num_images: int, # number of images in the folder
	images_per_poster: int, # number of images per poster
)->int:
	"""
	Calculate the number of posters needed.
	"""
	return int(np.ceil(num_images/images_per_poster))

In [None]:
test_eq(calculate_num_posters_needed(10, 3), 4)
test_eq(calculate_num_posters_needed(11, 3), 4)
test_eq(calculate_num_posters_needed(12, 3), 4)
test_eq(calculate_num_posters_needed(13, 3), 5)
test_eq(calculate_num_posters_needed(14, 3), 5)
test_eq(calculate_num_posters_needed(15, 3), 5)


In [None]:
im_list = thr_list['0.5']
print(f' Number of images: {len(im_list)}')
images_per_poster=3
poster_idx =3
print(f' Poster index: {poster_idx}')
start_idx = poster_idx * images_per_poster
end_idx = start_idx + images_per_poster
end_idx_clipped = min(end_idx, len(im_list))
print(f'start_idx: {start_idx}')
print(f'end_idx: {end_idx}')
print(f'end_idx_clipped: {end_idx_clipped}')

poster_images = im_list[start_idx:end_idx_clipped]
print(f' Number of images in poster: {len(poster_images)}')




In [None]:
#| export
def get_poster_image_subset(
	im_list: List[Path], # list of image paths
	poster_idx: int, # index of the poster (starts at 0)
	images_per_poster: int, # number of images per poster
)->List[Path]:
	"""
	Get a subset of images for a poster.In case the number of images is not a multiple of the number of images per poster, the last poster will have fewer images.
	"""
	start_idx = poster_idx * images_per_poster
	end_idx = start_idx + images_per_poster
	end_idx_clipped = min(end_idx, len(im_list))
	return im_list[start_idx:end_idx_clipped]



In [None]:
test_eq(get_poster_image_subset(im_list, 3, 3), im_list[9:12])
test_eq(get_poster_image_subset(im_list, poster_idx=4, images_per_poster=3), im_list[12:15])

In [None]:
# number of rows in the poster
num_images = 10
poster_cols = 3
print(f'num_images/poster_cols: {num_images/poster_cols}')
print('\nposter rows:')
print(int(np.ceil(num_images/poster_cols)))


In [None]:
#| export
def get_poster_row_col(
	num_images: int, # number of images in the folder
	poster_cols: int, # number of columns in the poster
)->Tuple[int, int]:
	"""
	Calculate the number of rows in the poster.
	"""
	r_no = int(np.ceil(num_images/poster_cols))
	return r_no, poster_cols


In [None]:
test_eq(get_poster_row_col(num_images=10, poster_cols=3), (4, 3))
test_eq(get_poster_row_col(num_images=11, poster_cols=3), (4, 3))
test_eq(get_poster_row_col(num_images=12, poster_cols=3), (4, 3))
test_eq(get_poster_row_col(num_images=13, poster_cols=3), (5, 3))
test_eq(get_poster_row_col(num_images=14, poster_cols=3), (5, 3))
test_eq(get_poster_row_col(num_images=15, poster_cols=3), (5, 3))


In [None]:
#| export
def create_test_image_with_block(
    img_width: int,  # Width of the image
    img_height: int, # Height of the image
    block_size: int,   # Size of the white square block
	open_cv: bool = True # if True, return open_cv array
) -> Image.Image:
    """
    Create a simple black and white grayscale image with a white block in the center.
    """
    test_image_array = np.zeros((img_height, img_width), dtype=np.uint8)

    start_x = (img_width - block_size) // 2
    end_x = start_x + block_size
    start_y = (img_height - block_size) // 2
    end_y = start_y + block_size

    start_x = max(0, start_x)
    end_x = min(img_width, end_x)
    start_y = max(0, start_y)
    end_y = min(img_height, end_y)

    test_image_array[start_y:end_y, start_x:end_x] = 255

    test_pil_image = Image.fromarray(test_image_array, mode='L')

    return test_pil_image if not open_cv else test_image_array


In [None]:
sm_img_ = create_test_image_with_block(200, 150, 50)
sm_img_.shape

In [None]:
f_w = sm_img_.shape[1]/100
f_w

In [None]:
f_h = sm_img_.shape[0]/100
f_h

In [None]:
### Working on figure subplots

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(f_w, f_h+1))
axs.imshow(sm_img_)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(f_w, f_h+1))
axs.imshow(sm_img_)

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(f_w, f_h+1))
axs[0].imshow(sm_img_)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(f_w, f_h+1))
axs[0].imshow(sm_img_)

In [None]:
r, c = 2, 2
r, c

In [None]:
r, c=1, 1
fig, axs = plt.subplots(r, c, figsize=(f_w, f_h+1))
if r ==1 and c ==1:
	axes = [[axs]]
if r==1:
	axes = [axs]
elif c==1:
	axes = [[ax] for ax in axs]
else:
	axes = axs
axes.imshow(sm_img_)


In [None]:
#| export
def create_poster_figure(
    grid_rows: int,  # Number of rows in the grid
    grid_cols: int,  # Number of columns in the grid
    image_size: Tuple[int, int]  # Size of each image (width, height)
) -> Tuple[plt.Figure, List[List[plt.Axes]]]:  # Returns (figure, axes array)
    """
    Create a matplotlib figure with subplots arranged in a grid.

    """
    # Calculate figure size
    fig_width = grid_cols * (image_size[0] / 100)
    fig_height = grid_rows * (image_size[1] / 100) + 1  # Extra space for title

    fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(fig_width, fig_height))

    # Normalize axes to always be a 2D array
    if grid_rows == 1 and grid_cols == 1:
        axes = [[axes]]
    elif grid_rows == 1:
        axes = [axes]
    elif grid_cols == 1:
        axes = [[ax] for ax in axes]

    return fig, axes

In [None]:
fig, axes_ = create_poster_figure(2, 2, (24, 24))

In [None]:
#| export
def set_poster_title(
    fig: plt.Figure,  # Matplotlib figure
    title: Optional[str],  # Title text (optional)
    poster_index: int  # Index of the poster (0-based)
) -> None:
    """
    Set the title for a poster figure.

    """
    if title:
        fig.suptitle(f"{title} - Poster {poster_index + 1}", fontsize=14, weight='bold')

In [None]:
r, c, = 2, 4
fig, axes_ = create_poster_figure(r, c, (24, 24))
set_poster_title(fig, "Score Folder 0.5", poster_index=0)

In [None]:
image_name = 'img_1.png'
df_m = df.query('image_name == @image_name').copy()
df_m


In [None]:
df_m.iloc[0]['index']

In [None]:
#| export
def find_image_index_in_dataframe(
    image_name: str,  # Name of the image file
    df: pd.DataFrame  # DataFrame with image indices
) -> Optional[int]:  # Returns index if found, otherwise None
    """
    Find the index of an image in the dataframe.
	"""
    df_m = df.query('image_name == @image_name').copy()
    return df_m.iloc[0]['index'] if not df_m.empty else None



In [None]:
image_name = 'img_0.png'
test_eq(find_image_index_in_dataframe(image_name, df), 0)
image_name='img_2.png'
test_eq(find_image_index_in_dataframe(image_name, df), 2)
image_name='img_3.png'
test_eq(find_image_index_in_dataframe(image_name, df), 3)
image_name='img_4.png'
test_eq(find_image_index_in_dataframe(image_name, df), 4)
image_name='img_5.png'


In [None]:
im_list = sm_img.parent.ls()
df_l = create_image_index_dataframe(im_list)
df_l

In [None]:
img_ = load_image_fast(im_list[0], cache=False)
img_ = img_.resize((224, 224), Image.Resampling.LANCZOS)
idx = find_image_index_in_dataframe(im_list[0].name, df_l)
print(f'idx: {idx}')
img_a = annotate_image_with_index(img_, idx, font_size=20)
plt.imshow(img_a)


In [None]:
#| export
def load_and_prepare_image(
    img_path: Path,  # Path to the image file
    image_size: Tuple[int, int],  # Target size for the image
    image_index_df: pd.DataFrame,  # DataFrame with image indices
    annotate_with_index: bool,  # Whether to annotate with index
    font_size: int  # Font size for annotations
) -> Image.Image:  # Returns processed PIL Image
    """
    Load an image, resize it, and optionally annotate with index.

    """
    # Load and resize image
    img = load_image_fast(img_path, cache=False)
    img = img.resize(image_size, Image.Resampling.LANCZOS)

    # Annotate with index if requested
    if annotate_with_index:
        img_index = find_image_index_in_dataframe(img_path.name, image_index_df)
        if img_index is not None:
            img = annotate_image_with_index(img, img_index, font_size=font_size)

    return img

In [None]:
load_and_prepare_image(im_list[0], (224, 224), df_l, True, 5)

In [None]:
#| export
def display_image_on_axis(
    ax: plt.Axes,  # Matplotlib axis
    img: Image.Image,  # PIL Image to display
    img_path: Path  # Path to the image (for title)
) -> None:
    """
    Display an image on a matplotlib axis.

    """
    ax.imshow(np.array(img))
    ax.set_title(img_path.stem[:20], fontsize=8)  # Truncate long names
    ax.axis('off')

def display_error_on_axis(
    ax: plt.Axes,  # Matplotlib axis
    img_path: Path  # Path to the image that failed
) -> None:
    """
    Display an error message on an axis when image loading fails.

    """
    ax.text(0.5, 0.5, f"Error\n{img_path.name}",
           ha='center', va='center', transform=ax.transAxes,
           fontsize=8, color='red')
    ax.axis('off')

In [None]:
#| export
def hide_empty_grid_cells(
    axes: List[List[plt.Axes]],  # 2D array of axes
    num_images: int,  # Number of images actually displayed
    grid_cols: int  # Number of columns in the grid
) -> None:
    """
    Hide (turn off) empty grid cells that don't contain images.

    """
    total_cells = len(axes) * grid_cols
    for idx in range(num_images, total_cells):
        row = idx // grid_cols
        col = idx % grid_cols
        axes[row][col].axis('off')

In [None]:
#| export
def save_poster_figure(
    fig: plt.Figure,  # Matplotlib figure
    output_path: Path  # Path to save the poster
) -> Path:  # Returns the saved path
    """
    Save a poster figure to disk.

    """
    output_path.parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight', facecolor='white')
    plt.close()
    return output_path

In [None]:
#| export
def create_poster_from_folder(
    folder_path: Union[str, Path],  # Folder containing images
    image_index_df: pd.DataFrame,  # DataFrame with image indices
    output_path: Union[str, Path],  # Base path to save posters
    images_per_poster: Optional[int] = 20,  # Number of images per poster. If None, all images in one poster.
    image_size: Tuple[int, int] = (224, 224),  # Size of each image
    grid_cols: int = 5,  # Number of columns
    annotate_with_index: bool = True,  # Whether to annotate with index
    font_size: int = 30,  # Font size for annotations
    title: Optional[str] = None,  # Poster title
    show_poster: bool = False,  # Whether to display the poster
    save_poster: bool = True   # Whether to save the poster to disk
) -> List[Path]:  # Returns list of paths to saved posters
    """
    Create one or more posters from images in a folder, putting images_per_poster images per poster.

    Example:
        >>> create_poster_from_folder('images', df, 'out/poster.png', images_per_poster=10)

    Returns:
        List[Path]: List of saved poster paths.

    Why? If you see only white images being saved: It's likely due to `plt.savefig(...)` being called after `plt.close()` or not using the right figure context.
    This version ensures we keep correct figure context, run `plt.tight_layout()` and `fig.savefig(...)` ‚Äì NOT `plt.savefig(...)`.
    """

    folder_path = Path(folder_path)
    output_path = Path(output_path)
    saved_paths = []

    import matplotlib.pyplot as plt

    if not folder_path.exists():
        print(f"‚ö†Ô∏è  Folder not found: {folder_path}")
        return saved_paths

    images = get_images_from_score_folder(folder_path)
    #print(f'{len(images)} images found in {folder_path}')
    if not images:
        print(f"‚ö†Ô∏è  No images found in {folder_path}")
        return saved_paths

    if images_per_poster is None:
        images_per_poster = len(images)
    num_posters = (len(images) + images_per_poster - 1) // images_per_poster
    #print(f'{"#"*100}')
    #print(f'{num_posters} posters will be created')
    #print(f'{"#"*100}')

    for poster_index in range(num_posters):
        start_idx = poster_index * images_per_poster
        end_idx = min((poster_index + 1) * images_per_poster, len(images))
        batch = images[start_idx:end_idx]

        # Compute grid for this poster
        grid_rows, grid_cols_eff = get_poster_row_col(len(batch), grid_cols)
        fig, axes = create_poster_figure(grid_rows, grid_cols_eff, image_size)
        set_poster_title(fig, title, poster_index)

        for idx, img_path in enumerate(batch):
            row = idx // grid_cols_eff
            col = idx % grid_cols_eff
            ax = axes[row][col]
            try:
                img = load_and_prepare_image(
                    img_path=img_path,
                    image_size=image_size,
                    image_index_df=image_index_df,
                    annotate_with_index=annotate_with_index,
                    font_size=font_size
                )
                display_image_on_axis(ax, img, img_path)
            except Exception as e:
                import logging
                logging.error(f"Error processing image {img_path.name}: {e}")
                display_error_on_axis(ax, img_path)

        hide_empty_grid_cells(axes, len(batch), grid_cols_eff)

        if show_poster:
            fig.tight_layout()
            plt.show(block=True)  # Use plt.show instead of fig.show for matplotlib figures

        if save_poster:
            # Output path: poster_0.png, poster_1.png, etc, or just output_path if one poster
            if num_posters == 1:
                this_output_path = output_path
            else:
                base = output_path.stem
                #ext = output_path.suffix if output_path.suffix else ".jpg"
                #print(f'Ext: {ext}')
                ext='.jpg'
                # Make sure output_path is a directory if using multiple posters
                if not output_path.exists():
                    output_path.mkdir(parents=True, exist_ok=True)

                this_output_path = output_path / f"{base}_{poster_index}{ext}"


            this_output_path.parent.mkdir(parents=True, exist_ok=True)

            fig.tight_layout()
            fig.savefig(this_output_path, dpi=150, bbox_inches='tight', facecolor='white')
            saved_paths.append(this_output_path)

        plt.close(fig)

    return saved_paths

In [None]:
score_folder = sm_img.parent
image_index_df = create_image_index_dataframe(im_list)
images_per_poster = None
poster_index = 0
image_size = (224, 224)
grid_cols = 3
annotate_with_index = True

In [None]:
output_path

In [None]:
posters_ = create_poster_from_folder(
	folder_path=score_folder,
	image_index_df=image_index_df,
	output_path=output_path,
	images_per_poster=2,
	image_size=(224, 224),
	grid_cols=2,
	annotate_with_index=True,
	font_size=30,
	title='test_poster',
	show_poster=False,
	save_poster=True)

# Now all posters

In [None]:
images_in_folder = get_images_from_score_folder(sm_img.parent)
images_in_folder

In [None]:
#| export
def print_poster_creation_summary(
    poster_paths: Dict[str, List[Path]]  # Dictionary mapping folder names to poster paths
) -> None:
    """
    Print a summary of poster creation results.

    """
    print("\n‚úÖ Poster creation complete!")
    print(f"   Total folders processed: {len(poster_paths)}")
    print(f"   Total posters created: {sum(len(p) for p in poster_paths.values())}")

In [None]:
# lets assume I already copied images from normal folder to thr folder
# dataframe is created
thresholds = [0.5, 1.0]
im_path = Path(r'/home/hasan/Schreibtisch/projects/data/malacca/thr')
output_path = sm_img.parent.parent / "posters_thr"
print(f'  {output_path}')
print(f' Images in {im_path}')
poster_paths = {}
for thr in thresholds:
	folder_path = im_path / str(thr)

	output_path_=Path(fr'{output_path}/{thr}')

	folder_poster_paths = create_poster_from_folder(
		folder_path=folder_path,
		image_index_df=image_index_df,
		output_path=output_path_,
		images_per_poster=2,
		image_size=(224, 224),
		grid_cols=2,
		annotate_with_index=True,
		font_size=30,
		title=f'Score Folder {thr}',
		show_poster=False,
		save_poster=True
	)
	if folder_poster_paths:
		poster_paths[str(thr)] = folder_poster_paths

In [None]:
print_poster_creation_summary(poster_paths)

In [None]:
def create_posters_for_all_score_folders(
	image_dir: Union[str, Path],  # directory with images
	poster_dir: Union[str, Path],  # directory to save posters
    image_index_df: pd.DataFrame,  # Dataframe with image indices
    score_thresholds: List[float],  # List of score thresholds
    images_per_poster: int = 20,  # Number of images per poster
    image_size: Tuple[int, int] = (224, 224),  # Size of each image in the poster
    grid_cols: int = 5,  # Number of columns in the grid
    annotate_with_index: bool = True,  # Whether to add index numbers
	font_size: int = 30,  # Font size for index numbers
	poster_title: str = 'Score Folder', # title of the poster
	show_poster: bool = False,  # Whether to show the poster
	save_poster: bool = True  # Whether to save the poster
):
    "Generate posters for all score folders"
    image_dir = Path(image_dir)
    poster_dir = Path(poster_dir)
    poster_paths = {}

    print("\nüñºÔ∏è  CREATING POSTERS FOR SCORE FOLDERS")
    print("="*70)

    for threshold in score_thresholds:
	    folder_name = str(threshold)
	    folder_path = image_dir / folder_name

	    if not folder_path.exists():
		    print(f"‚ö†Ô∏è  Folder {folder_name} does not exist, skipping...")
		    continue
	    folder_poster_paths = create_poster_from_folder(
		    folder_path=folder_path,
		    image_index_df=image_index_df,
		    output_path=poster_dir / folder_name,
		    images_per_poster=2,
		    image_size=(224, 224),
		    grid_cols=2,
		    annotate_with_index=True,
		    font_size=30,
		    title=f'{poster_title}_{threshold}',
		    show_poster=show_poster,
		    save_poster=save_poster
	    )
	    if folder_poster_paths:
		    poster_paths[str(threshold)] = folder_poster_paths

	#print summary
    print_poster_creation_summary(poster_paths)
    return poster_paths

In [None]:
poster_dir = sm_img.parent.parent / "posters_thr"
poster_dir



In [None]:
image_dir = Path(r'/home/hasan/Schreibtisch/projects/data/malacca/thr')
image_dir

In [None]:
score_thresholds

In [None]:
poster_paths = create_posters_for_all_score_folders(
	image_dir=image_dir,
	poster_dir=poster_dir,
	image_index_df=image_index_df,
	score_thresholds=score_thresholds,
	images_per_poster=2,
	image_size=(224, 224),
	grid_cols=2,
	show_poster=False,
	save_poster=True)

In [None]:
sm_img.parent

# Now predict + organize + create posters
-> from python script look `predict_and_organize_by_score`

In [None]:
#| export
def predict_and_organize_by_score(
    model_path: Union[str, Path],  # Path to the trained model
    image_list_file: Union[str, Path],  # Text file with image paths (one per line)
    output_dir: Union[str, Path],  # Base output directory for organized images
    score_thresholds: List[float] = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],  # Score thresholds
    batch_id: Optional[str] = None,  # Optional batch identifier
    copy_mode: bool = True,  # If True, copy files; if False, move files
    save_metadata: bool = True,  # If True, save metadata JSON for each folder
    create_posters: bool = True,  # If True, create posters for each score folder
    images_per_poster: int = 20,  # Number of images per poster
    image_size: Tuple[int, int] = (224, 224),  # Size of each image in the poster
    grid_cols: int = 5,  # Number of columns in poster grid
    annotate_with_index: bool = True,  # Whether to add index numbers to images in posters
    font_size: int = 30,  # Font size for index annotations
    device: str = "auto",  # Device for inference ("auto", "cpu", "cuda")
    **kwargs  # Additional arguments passed to prediction function
) -> Dict[str, Any]:  # Returns combined prediction and organization results
    """
    Complete workflow: Predict anomaly scores, organize images, and create indexed posters.

    This is the main function that combines:
    1. Image index dataframe creation
    2. Smart batch creation
    3. Prediction using predict_image_list_from_file_enhanced
    4. Image organization based on anomaly scores
    5. Poster creation with index annotations (optional)

    Args:
        model_path: Path to the trained anomaly detection model
        image_list_file: Text file containing paths to images (one per line)
        output_dir: Directory where score-based folders will be created
        score_thresholds: List of threshold values (customize to your needs)
            Examples:
            - [0.5, 1.0] for simple two-folder setup
            - [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for fine-grained organization
        batch_id: Optional identifier for this batch
        copy_mode: Whether to copy (True) or move (False) images
        save_metadata: Whether to save JSON metadata for each folder
        create_posters: Whether to create image posters for each score folder
        images_per_poster: Number of images to include in each poster
        image_size: Size to resize each image to in posters
        grid_cols: Number of columns in the poster grid
        annotate_with_index: Whether to annotate images with their dataframe index
        font_size: Font size for index annotations
        device: Device to use for inference
        **kwargs: Additional arguments (save_heatmap, heatmap_style, etc.)

    Returns:
        Dictionary containing:
        - image_index_df: DataFrame with image indices
        - prediction_results: Full prediction results
        - organization_stats: Statistics about image organization
        - poster_paths: Paths to created posters (if create_posters=True)
    """
    print("\nüöÄ PREDICT AND ORGANIZE BY ANOMALY SCORE WITH INDEXED POSTERS")
    print("="*70)

    # Step 0: Create image index dataframe
    print("\nüìä Step 0: Creating image index dataframe...")
    image_index_df = create_image_index_dataframe(image_list_file)

    # Save the dataframe
    output_dir_path = Path(output_dir)
    output_dir_path.mkdir(parents=True, exist_ok=True)
    df_path = output_dir_path / "image_index.csv"
    image_index_df.to_csv(df_path, index=False)
    print(f"üíæ Saved image index dataframe to {df_path}")

    # Step 1: Run predictions
    print("\nüìä Step 1: Running predictions...")
    prediction_output = predict_image_list_from_file_enhanced(
        model_path=model_path,
        image_list_file=image_list_file,
        batch_id=batch_id,
        output_dir=output_dir,
        device=device,
        save_results=True,
        **kwargs
    )

    # Extract results
    prediction_results = prediction_output.get('results', [])

    if not prediction_results:
        print("‚ö†Ô∏è  No prediction results to organize!")
        return {
            'image_index_df': image_index_df,
            'prediction_results': prediction_output,
            'organization_stats': None,
            'poster_paths': None
        }

    print(f"‚úÖ Predictions complete: {len(prediction_results)} images processed")

    # Step 2: Organize images by score
    print("\nüìÅ Step 2: Organizing images by score...")
    organization_stats = organize_images_by_score(
        prediction_results=prediction_results,
        output_dir=output_dir,
        score_thresholds=score_thresholds,
        copy_mode=copy_mode,
        save_metadata=save_metadata
    )

    # Step 3: Create posters (optional)
    poster_paths = None
    if create_posters:
        print("\nüñºÔ∏è  Step 3: Creating indexed posters...")
        poster_paths = create_posters_for_score_folders(
            output_dir=output_dir,
            image_index_df=image_index_df,
            score_thresholds=score_thresholds,
            images_per_poster=images_per_poster,
            image_size=image_size,
            grid_cols=grid_cols,
            annotate_with_index=annotate_with_index,
            font_size=font_size
        )

    print("\nüéâ WORKFLOW COMPLETE!")
    print("="*70)
    print(f"üìä Image index dataframe: {df_path}")
    print(f"üìÅ Organized images: {output_dir}")
    if poster_paths:
        total_posters = sum(len(p) for p in poster_paths.values())
        print(f"üñºÔ∏è  Created {total_posters} poster(s)")

    return {
        'image_index_df': image_index_df,
        'image_index_df_path': str(df_path),
        'prediction_results': prediction_output,
        'organization_stats': organization_stats,
        'poster_paths': poster_paths
    }

## Image Indexing and Poster Creation

## High-Level Workflow Function

In [None]:
#| export
def predict_and_organize_by_score(
    model_path: Union[str, Path],  # Path to the trained model
    image_list_file: Union[str, Path],  # Text file with image paths (one per line)
    output_dir: Union[str, Path],  # Base output directory for organized images
    score_thresholds: List[float] = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],  # Score thresholds
    batch_id: Optional[str] = None,  # Optional batch identifier
    copy_mode: bool = True,  # If True, copy files; if False, move files
    save_metadata: bool = True,  # If True, save metadata JSON for each folder
    device: str = "auto",  # Device for inference ("auto", "cpu", "cuda")
    **kwargs  # Additional arguments passed to prediction function
) -> Dict[str, Any]:  # Returns combined prediction and organization results
    """
    Complete workflow: Predict anomaly scores and organize images into score-based folders.

    This is the main function that combines:
    1. Smart batch creation
    2. Prediction using predict_image_list_from_file_enhanced
    3. Image organization based on anomaly scores

    Args:
        model_path: Path to the trained anomaly detection model
        image_list_file: Text file containing paths to images (one per line)
        output_dir: Directory where score-based folders will be created
        score_thresholds: List of threshold values (customize to your needs)
            Examples:
            - [0.5, 1.0] for simple two-folder setup
            - [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for fine-grained organization
        batch_id: Optional identifier for this batch
        copy_mode: Whether to copy (True) or move (False) images
        save_metadata: Whether to save JSON metadata for each folder
        device: Device to use for inference
        **kwargs: Additional arguments (save_heatmap, heatmap_style, etc.)

    Returns:
        Dictionary containing:
        - prediction_results: Full prediction results
        - organization_stats: Statistics about image organization
    """
    print("\nüöÄ PREDICT AND ORGANIZE BY ANOMALY SCORE")
    print("="*70)

    # Step 1: Run predictions
    print("\nüìä Step 1: Running predictions...")
    prediction_output = predict_image_list_from_file_enhanced(
        model_path=model_path,
        image_list_file=image_list_file,
        batch_id=batch_id,
        output_dir=output_dir,
        device=device,
        save_results=True,
        **kwargs
    )

    # Extract results
    prediction_results = prediction_output.get('results', [])

    if not prediction_results:
        print("‚ö†Ô∏è  No prediction results to organize!")
        return {
            'prediction_results': prediction_output,
            'organization_stats': None
        }

    print(f"‚úÖ Predictions complete: {len(prediction_results)} images processed")

    # Step 2: Organize images by score
    print("\nüìÅ Step 2: Organizing images by score...")
    organization_stats = organize_images_by_score(
        prediction_results=prediction_results,
        output_dir=output_dir,
        score_thresholds=score_thresholds,
        copy_mode=copy_mode,
        save_metadata=save_metadata
    )

    print("\nüéâ WORKFLOW COMPLETE!")
    print("="*70)

    return {
        'prediction_results': prediction_output,
        'organization_stats': organization_stats
    }

## Example Usage

```python
# Example 1: Simple two-folder organization (low vs high anomaly)
results = predict_and_organize_by_score(
    model_path="path/to/model.ckpt",
    image_list_file="path/to/images.txt",
    output_dir="organized_output",
    score_thresholds=[0.5, 1.0],  # Two folders: 0.5 (normal) and 1.0 (anomaly)
    copy_mode=True
)

# Example 2: Fine-grained organization with 8 score folders
results = predict_and_organize_by_score(
    model_path="path/to/model.ckpt",
    image_list_file="path/to/images.txt",
    output_dir="organized_output",
    score_thresholds=[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    copy_mode=True,
    save_heatmap=True,
    heatmap_style="side_by_side"
)

# Example 3: Custom thresholds
results = predict_and_organize_by_score(
    model_path="path/to/model.ckpt",
    image_list_file="path/to/images.txt",
    output_dir="organized_output",
    score_thresholds=[0.25, 0.5, 0.75, 1.0],  # Four folders
    copy_mode=False  # Move files instead of copying
)
```

## Tests

In [None]:
#| hide
# Test determine_score_folder
test_eq(determine_score_folder(0.3, [0.5, 1.0]), "0.5")
test_eq(determine_score_folder(0.7, [0.5, 1.0]), "1.0")
test_eq(determine_score_folder(0.45, [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]), "0.5")
test_eq(determine_score_folder(0.85, [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]), "0.9")
print("‚úÖ All tests passed!")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()