In [2]:
from ultralytics import YOLO

# --- 1. DEFINE PATHS ---
# CRITICAL: This should point to the BEST model file saved from your previous training run.
# Ultralytics saves this automatically in your run directory.
#BEST_WEIGHTS_PATH = 'my_finetune_project/run_ladas_1280_l_v14/weights/best.pt'
BEST_WEIGHTS_PATH = 'models/ladas-1280-l.pt'
# CRITICAL: Path to your dataset configuration file (the one used for training).
# This file MUST have a 'test:' path defined for this to work correctly.
DATA_YAML_PATH = 'RIPA-ft/data.yaml'

# --- 2. LOAD THE BEST MODEL ---
# Load the model with the highest performance from the training run
print(f"Loading best model weights from: {BEST_WEIGHTS_PATH}")
model = YOLO(BEST_WEIGHTS_PATH)

# --- 3. RUN VALIDATION ON THE TEST SET ---
print("\nRunning unbiased validation on the TEST set...")

# The model.val() method runs the evaluation logic.
# The 'split="test"' argument specifically instructs Ultralytics to use the 
# directory defined under the 'test:' key in the DATA_YAML_PATH file.
metrics = model.val(
    data=DATA_YAML_PATH,     # The dataset configuration file
    imgsz=640,               # Should match the image size used during training
    split='test',            # CRITICAL: Forces evaluation on the data specified by the 'test:' path
    save_json=True           # Optional: Save metrics in a JSON file for easy processing
)

# --- 4. EXTRACT AND DISPLAY KEY METRICS ---
print("\n--- TEST SET RESULTS ---")

# Access the dictionary of results
results_dict = metrics.results_dict

# Mean Average Precision (mAP)
# mAP50 is common, mAP50-95 is more stringent (average across multiple IoU thresholds)
mAP50 = results_dict['metrics/mAP50(B)']
mAP50_95 = results_dict['metrics/mAP50-95(B)']

print(f"mAP@50 (Test Set): {mAP50:.4f}")
print(f"mAP@50-95 (Test Set): {mAP50_95:.4f}")
print(f"Precision (Test Set): {results_dict['metrics/precision(B)']:.4f}")
print(f"Recall (Test Set): {results_dict['metrics/recall(B)']:.4f}")

# The full results, curves, and confusion matrix plots are saved to the 
# 'runs/detect/val' directory by default.
#print(f"\nDetailed metrics saved to: {model.validator.save_dir}")

Loading best model weights from: models/ladas-1280-l.pt

Running unbiased validation on the TEST set...
Ultralytics 8.3.205  Python-3.11.5 torch-2.2.2 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLO11l summary (fused): 190 layers, 25,307,068 parameters, 0 gradients, 86.7 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 493.182.3 MB/s, size: 295.2 KB)
[K[34m[1mval: [0mScanning C:\Users\lucia\Documents\GitHub\Layout-Analysis-and-OCR\RIPA-ft\test\labels.cache... 86 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 86/86 84.8Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 6/6 0.9it/s 6.5s0.8s
                   all         86        969      0.191     0.0809     0.0724     0.0456
     AdvertisementZone          4          4          1          0          0          0
DigitizationArtefactZone         35         82          0          0          0          0
            FigureZone    

In [1]:
import os
import glob
import shutil
from tqdm import tqdm
import re
def restructure_flat_output(flat_input_dir: str, structured_output_dir: str):
    """
    Restructures a flat directory of YOLO outputs into a nested structure.

    Assumes flat_input_dir contains files like:
        - 'BookName_1.txt'
        - 'BookName_1.png'
        - 'Another_Book_10.txt'
        - 'Another_Book_10.png'

    Will create a structured_output_dir like:
        - 'BookName/txt/page_0001.txt'
        - 'BookName/png/page_0001.png'
        - 'Another_Book/txt/page_0010.txt'
        - 'Another_Book/png/page_0010.png'

    Args:
        flat_input_dir (str): The path to the existing flat output directory.
        structured_output_dir (str): The path to the new root directory for the structured output.
    """
    
    print(f"Scanning directory: {flat_input_dir}")
    print(f"Output will be saved to: {structured_output_dir}")

    # Find all files in the flat directory
    file_paths = glob.glob(os.path.join(flat_input_dir, '*.*'))
    
    if not file_paths:
        print("No files found to process.")
        return

    # Regex to safely parse "Book_Name_with_underscores_page_123.txt"
    # It captures the book name (non-greedy) and the page number at the end
    # This matches the f"{book_name}_page_{page_number_int}" format
    filename_pattern = re.compile(r'^(.*?)_page_(\d+)(\.(?:png|jpg|jpeg|txt))$', re.IGNORECASE)

    moved_files_count = 0
    skipped_files = []

    for old_file_path in tqdm(file_paths, desc="Restructuring files"):
        filename = os.path.basename(old_file_path)
        
        match = filename_pattern.match(filename)
        
        if not match:
            skipped_files.append(filename)
            continue
            
        try:
            # --- 1. Parse Filename ---
            book_name = match.group(1)
            page_number_str = match.group(2)
            extension = match.group(3) # e.g., ".png"
            
            page_number_int = int(page_number_str)
            
            # --- 2. Determine New Structure ---
            
            # Recreate the "page_0001" format
            new_file_basename = f"page_{page_number_int:04d}{extension}"
            
            # Determine subdirectory ('txt' or 'png')
            if extension.lower() == '.txt':
                file_type_dir = 'txt'
            else:
                file_type_dir = 'png' # Group all images under 'png'
                
            # --- 3. Create Target Path and Move File ---
            
            # Create the full target directory: output/BookName/txt
            target_dir = os.path.join(structured_output_dir, book_name, file_type_dir)
            os.makedirs(target_dir, exist_ok=True)
            
            # Create the final file path: output/BookName/txt/page_0001.txt
            new_file_path = os.path.join(target_dir, new_file_basename)
            
            # Move the file
            shutil.move(old_file_path, new_file_path)
            moved_files_count += 1

        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            skipped_files.append(filename)

    # --- 4. Final Report ---
    print("\nRestructuring complete.")
    print(f"Successfully moved {moved_files_count} files.")
    
    if skipped_files:
        print(f"\nSkipped {len(skipped_files)} files (unrecognized format):")
        for f in skipped_files[:10]: # Print a sample
            print(f"  - {f}")
        if len(skipped_files) > 10:
            print(f"  ... and {len(skipped_files) - 10} more.")

In [2]:
restructure_flat_output("inference_output_batch_all", "structured_inference_output")

Scanning directory: inference_output_batch_all
Output will be saved to: structured_inference_output


Restructuring files: 100%|██████████| 36784/36784 [01:04<00:00, 567.09it/s] 


Restructuring complete.
Successfully moved 36784 files.



