In [1]:
import os
import traceback
import random
import numpy as np
import cv2
import json
from pathlib import Path
import openslide
from tqdm import tqdm
import concurrent.futures
import matplotlib.pyplot as plt
import yaml

In [2]:
# Define constants
INPUT_DIR = "svs_exp"
YOLO_OUTPUT_DIR = "yolo_dataset"
SEG_OUTPUT_DIR = "segmentation_dataset"
DEBUG_DIR = "debug_samples"
CLASSES = ['eos', 'eosg', 'Tissue']  # Added Tissue class
TRAINING_CLASSES = ['eos', 'eosg']  # Classes we actually train on
TRAIN_RATIO, VAL_RATIO, TEST_RATIO = 0.7, 0.15, 0.15  # Dataset split ratios
TISSUE_DATASET_RATIO = 0.1  # Tissue should be 10% of dataset

# Patch parameters from the description
HPF_SIZE = 2144  # High-power field size in pixels
PATCH_SIZE = 448  # Size of patches for deep learning
PATCH_STRIDE = 424  # Stride between patches

In [3]:
def ensure_dirs_exist():
    """Create necessary directories if they don't exist"""
    for dir_path in [YOLO_OUTPUT_DIR, SEG_OUTPUT_DIR, DEBUG_DIR]:
        os.makedirs(dir_path, exist_ok=True)
        for split in ['train', 'val', 'test']:
            os.makedirs(os.path.join(dir_path, split, 'images'), exist_ok=True)
            os.makedirs(os.path.join(dir_path, split, 'labels'), exist_ok=True)
            if dir_path == SEG_OUTPUT_DIR:
                os.makedirs(os.path.join(dir_path, split, 'masks'), exist_ok=True)
    os.makedirs(os.path.join(DEBUG_DIR, 'patches'), exist_ok=True)
    os.makedirs(os.path.join(DEBUG_DIR, 'yolo_visualization'), exist_ok=True)
    os.makedirs(os.path.join(DEBUG_DIR, 'segmentation_visualization'), exist_ok=True)

def load_geojson_annotations(geojson_path):
    """Load geojson annotations and filter for classes 'eos' and 'eosg'"""
    with open(geojson_path, 'r') as f:
        geojson_data = json.load(f)
    
    annotations = []
    for feature in geojson_data['features']:
        if feature['properties'].get('classification', {}).get('name', '') in CLASSES:
            class_name = feature['properties']['classification']['name']
            class_idx = CLASSES.index(class_name)
            coordinates = feature['geometry']['coordinates'][0]  # Assuming polygon type
            annotations.append({
                'class_idx': class_idx,
                'coordinates': coordinates
            })
    
    return annotations

In [4]:
def extract_patches(slide, annotations, base_filename):
    """Extract patches from the slide according to the specified dimensions"""
    width, height = slide.dimensions
    
    # Calculate how many high power fields (HPFs) we need
    hpf_stride = 500  # From the problem description
    hpfs_x = max(1, (width - HPF_SIZE) // hpf_stride + 1)
    hpfs_y = max(1, (height - HPF_SIZE) // hpf_stride + 1)
    
    patches_info = []
    tissue_patches_info = []
    
    for hpf_x in range(hpfs_x):
        for hpf_y in range(hpfs_y):
            # HPF coordinates
            hpf_left = min(hpf_x * hpf_stride, width - HPF_SIZE)
            hpf_top = min(hpf_y * hpf_stride, height - HPF_SIZE)
            
            # Now extract patches from this HPF
            for patch_x in range(0, HPF_SIZE - PATCH_SIZE + 1, PATCH_STRIDE):
                for patch_y in range(0, HPF_SIZE - PATCH_SIZE + 1, PATCH_STRIDE):
                    patch_left = hpf_left + patch_x
                    patch_top = hpf_top + patch_y
                    
                    # Create patch info
                    patch_info = {
                        'patch_left': patch_left,
                        'patch_top': patch_top,
                        'patch_name': f"{base_filename}_hpf_{hpf_x}_{hpf_y}_patch_{patch_x}_{patch_y}"
                    }
                    
                    # Check for annotations
                    if has_annotations(patch_left, patch_top, PATCH_SIZE, annotations):
                        # Patches with annotations for eos/eosg
                        patch_info['patch_annotations'] = get_patch_annotations(
                            patch_left, patch_top, PATCH_SIZE, annotations)
                        patch_info['is_tissue'] = False
                        patches_info.append(patch_info)
                    elif random.random() < 0.05:  # Sample a subset of patches as potential Tissue
                        # Patches without annotations - potential Tissue class
                        patch_info['patch_annotations'] = []
                        patch_info['is_tissue'] = True
                        tissue_patches_info.append(patch_info)
    
    return patches_info, tissue_patches_info

In [5]:
def extract_and_save_patch_image(slide, patch_info, dataset_split):
    """Extract patch image and save annotations in YOLO format"""
    # Extract the patch from the slide
    patch_img = slide.read_region((patch_info['patch_left'], patch_info['patch_top']), 
                                  0, (PATCH_SIZE, PATCH_SIZE)).convert('RGB')
    patch_array = np.array(patch_img)
    
    # Create YOLO format annotations
    yolo_lines = []
    
    # Create individual masks for each TRAINING class (not including Tissue)
    class_masks = [np.zeros((PATCH_SIZE, PATCH_SIZE), dtype=np.uint8) for _ in range(len(TRAINING_CLASSES))]
    
    if patch_info.get('is_tissue', False):
        # This is a Tissue patch - no annotations to add for training
        # We still save the image but don't add annotations or create debug visualizations
        pass
    else:
        # Normal patch with eos/eosg annotations
        for anno in patch_info['patch_annotations']:
            class_idx = anno['class_idx']
            # Skip Tissue class annotations (class_idx 2)
            if class_idx >= len(TRAINING_CLASSES):
                continue
                
            coords = anno['normalized_coordinates']
            
            # Skip if not enough points for a valid polygon
            if len(coords) < 3:
                continue
                
            # Prepare YOLO format line (class_idx x1 y1 x2 y2 ...)
            yolo_line = f"{class_idx}"
            for x, y in coords:
                yolo_line += f" {x:.6f} {y:.6f}"
            yolo_lines.append(yolo_line)
            
            # Create segmentation mask
            try:
                # Convert normalized points back to image coordinates
                points_array = np.array([(int(x * PATCH_SIZE), int(y * PATCH_SIZE)) for x, y in coords], 
                                      dtype=np.int32)
                
                # Reshape points for fillPoly - needs to be a list of polygons
                points_array = points_array.reshape(-1, 2)
                
                # Fill the polygon in the appropriate class mask (using a 2D array)
                cv2.fillPoly(class_masks[class_idx], [points_array], 1)
            except Exception as e:
                print(f"Error creating mask for {patch_info['patch_name']}: {str(e)}")
                continue
    
    # Save files to appropriate locations
    save_yolo_annotation(patch_info['patch_name'], yolo_lines, dataset_split)
    save_images_and_masks(patch_array, class_masks, patch_info['patch_name'], dataset_split)
    
    # Create debug visualization (for a subset of non-tissue patches only)
    if random.random() < 0.05 and not patch_info.get('is_tissue', False):
        create_debug_visualization(patch_array, patch_info, class_masks)

In [6]:
def create_dataset_yaml():
    """Create the dataset.yaml file for YOLO training"""
    dataset_config = {
        'path': os.path.abspath(YOLO_OUTPUT_DIR),
        'train': 'train/images',
        'val': 'val/images',
        'test': 'test/images',
        'nc': len(TRAINING_CLASSES),  # Only count training classes
        'names': TRAINING_CLASSES  # Only include training classes
    }
    
    with open(os.path.join(YOLO_OUTPUT_DIR, 'dataset.yaml'), 'w') as f:
        yaml.dump(dataset_config, f, default_flow_style=False)

In [7]:
def process_slide_file(svs_filename):
    """Process a single SVS file and its annotations"""
    svs_path = os.path.join(INPUT_DIR, svs_filename)
    base_filename = os.path.splitext(svs_filename)[0]
    geojson_path = os.path.join(INPUT_DIR, f"{base_filename}.geojson")
    
    # Check if the corresponding geojson file exists
    if not os.path.exists(geojson_path):
        print(f"Geojson file not found for {svs_filename}")
        return
    
    try:
        # Load the slide and annotations
        slide = openslide.OpenSlide(svs_path)
        annotations = load_geojson_annotations(geojson_path)
        
        if not annotations:
            print(f"No 'eos' or 'eosg' annotations found in {geojson_path}")
            return
        
        # Extract patches and their annotations
        patches_info, tissue_patches_info = extract_patches(slide, annotations, base_filename)
        
        if not patches_info:
            print(f"No valid patches found in {svs_filename}")
            return
        
        # Calculate how many tissue patches to keep to maintain 10% ratio
        total_normal_patches = len(patches_info)
        target_tissue_patches = int(total_normal_patches * TISSUE_DATASET_RATIO / (1 - TISSUE_DATASET_RATIO))
        
        # Randomly sample tissue patches if we have more than needed
        if len(tissue_patches_info) > target_tissue_patches and target_tissue_patches > 0:
            tissue_patches_info = random.sample(tissue_patches_info, target_tissue_patches)
        
        # Randomly assign patches to train/val/test splits
        random.shuffle(patches_info)
        num_train = int(len(patches_info) * TRAIN_RATIO)
        num_val = int(len(patches_info) * VAL_RATIO)
        
        train_patches = patches_info[:num_train]
        val_patches = patches_info[num_train:num_train+num_val]
        test_patches = patches_info[num_train+num_val:]
        
        # Also split tissue patches
        random.shuffle(tissue_patches_info)
        num_train_tissue = int(len(tissue_patches_info) * TRAIN_RATIO)
        num_val_tissue = int(len(tissue_patches_info) * VAL_RATIO)
        
        train_patches.extend(tissue_patches_info[:num_train_tissue])
        val_patches.extend(tissue_patches_info[num_train_tissue:num_train_tissue+num_val_tissue])
        test_patches.extend(tissue_patches_info[num_train_tissue+num_val_tissue:])
        
        # Process each split
        for patches, split in zip([train_patches, val_patches, test_patches], ['train', 'val', 'test']):
            for patch_info in tqdm(patches, desc=f"Processing {split} patches for {base_filename}"):
                extract_and_save_patch_image(slide, patch_info, split)
        
    except Exception as e:
        print(traceback.format_exc())
        print(f"Error processing {svs_filename}: {str(e)}")
    finally:
        if 'slide' in locals():
            slide.close()

In [8]:
def has_annotations(patch_left, patch_top, patch_size, annotations):
    """Check if a patch contains enough annotation points to form a valid polygon"""
    patch_right = patch_left + patch_size
    patch_bottom = patch_top + patch_size
    
    for anno in annotations:
        # Count points inside this patch
        points_inside = 0
        
        # Handle different coordinate formats
        coords = anno['coordinates']
        # Handle nested array format [[[x,y],[x,y]]] for Tissue class
        if coords and isinstance(coords[0], list) and coords[0] and isinstance(coords[0][0], list):
            coords = coords[0]
            
        for coord in coords:
            # Make sure we extract x,y properly regardless of format
            if isinstance(coord, list):
                x, y = coord
            else:
                x, y = coord  # Direct tuple format
                
            if (patch_left <= x <= patch_right and patch_top <= y <= patch_bottom):
                points_inside += 1
                # Need at least 3 points to form a polygon
                if points_inside >= 3:
                    return True
    return False

In [9]:
def get_patch_annotations(patch_left, patch_top, patch_size, annotations):
    """Get annotations that fall within a patch and normalize their coordinates"""
    patch_annotations = []
    
    for anno in annotations:
        # Handle different coordinate formats
        coords = anno['coordinates']
        # Handle nested array format [[[x,y],[x,y]]] for Tissue class
        if coords and isinstance(coords[0], list) and coords[0] and isinstance(coords[0][0], list):
            coords = coords[0]
            
        # Filter points that are within the patch
        patch_coords = []
        for coord in coords:
            # Handle different coordinate formats
            if isinstance(coord, list):
                x, y = coord
            else:
                x, y = coord
                
            if (patch_left <= x < patch_left + patch_size and 
                patch_top <= y < patch_top + patch_size):
                # Normalize coordinates to 0-1 range
                norm_x = (x - patch_left) / patch_size
                norm_y = (y - patch_top) / patch_size
                patch_coords.append((norm_x, norm_y))
        
        # Only include annotations that have enough points for a polygon
        if len(patch_coords) >= 3:
            patch_annotations.append({
                'class_idx': anno['class_idx'],
                'coordinates': coords,  # Original coordinates
                'normalized_coordinates': patch_coords  # Normalized coordinates
            })
    
    return patch_annotations

In [10]:
def save_yolo_annotation(patch_name, yolo_lines, dataset_split):
    """Save YOLO format annotation to the appropriate location"""
    label_path = os.path.join(YOLO_OUTPUT_DIR, dataset_split, 'labels', f"{patch_name}.txt")
    with open(label_path, 'w') as f:
        f.write('\n'.join(yolo_lines))

In [11]:
def save_images_and_masks(patch_array, class_masks, patch_name, dataset_split):
    """Save the patch image and segmentation mask to appropriate locations"""
    # Save image for YOLO dataset
    yolo_image_path = os.path.join(YOLO_OUTPUT_DIR, dataset_split, 'images', f"{patch_name}.jpg")
    cv2.imwrite(yolo_image_path, cv2.cvtColor(patch_array, cv2.COLOR_RGB2BGR))
    
    # Save image and mask for segmentation dataset
    seg_image_path = os.path.join(SEG_OUTPUT_DIR, dataset_split, 'images', f"{patch_name}.jpg")
    mask_path = os.path.join(SEG_OUTPUT_DIR, dataset_split, 'masks', f"{patch_name}.png")
    
    cv2.imwrite(seg_image_path, cv2.cvtColor(patch_array, cv2.COLOR_RGB2BGR))
    
    # Combine individual class masks into a single multichannel mask
    combined_mask = np.zeros((PATCH_SIZE, PATCH_SIZE), dtype=np.uint8)
    for i in range(len(TRAINING_CLASSES)):  # Use TRAINING_CLASSES instead of CLASSES
        # Add class index + 1 to each pixel where the class is present
        combined_mask[class_masks[i] > 0] = i + 1
    
    cv2.imwrite(mask_path, combined_mask)

In [12]:
def create_debug_visualization(patch_array, patch_info, class_masks):
    """Create debug visualizations for quality control"""
    patch_name = patch_info['patch_name']
    
    # Create YOLO visualization
    yolo_debug_img = patch_array.copy()
    for anno in patch_info['patch_annotations']:
        class_idx = anno['class_idx']
        # Skip Tissue class annotations
        if class_idx >= len(TRAINING_CLASSES):
            continue
            
        coords = anno['normalized_coordinates']
        
        # Skip invalid polygons
        if len(coords) < 3:
            continue
            
        try:
            # Convert normalized points to image coordinates
            points = np.array([(int(x * PATCH_SIZE), int(y * PATCH_SIZE)) for x, y in coords], 
                             dtype=np.int32)
            
            # Draw polygon
            color = (0, 255, 0) if class_idx == 0 else (0, 0, 255)  # Green for eos, Red for eosg
            cv2.polylines(yolo_debug_img, [points], isClosed=True, color=color, thickness=2)
            
            # Add class label
            centroid_x = int(np.mean([p[0] for p in points]))
            centroid_y = int(np.mean([p[1] for p in points]))
            cv2.putText(yolo_debug_img, TRAINING_CLASSES[class_idx], 
                        (centroid_x, centroid_y), cv2.FONT_HERSHEY_SIMPLEX, 
                        0.5, color, 1)
        except Exception as e:
            print(f"Error drawing polygon for {patch_name}: {str(e)}")
    
    # Save YOLO debug visualization
    yolo_debug_path = os.path.join(DEBUG_DIR, 'yolo_visualization', f"{patch_name}_yolo_debug.jpg")
    cv2.imwrite(yolo_debug_path, cv2.cvtColor(yolo_debug_img, cv2.COLOR_RGB2BGR))
    
    # Create segmentation mask visualization
    seg_debug_img = patch_array.copy()
    mask_colors = [(0, 255, 0), (0, 0, 255)]  # Green for eos, Red for eosg
    
    # Create a combined mask visualization
    combined_mask_vis = np.zeros_like(seg_debug_img)
    
    # Count instances of each class in this patch
    counts = []
    
    for i, class_name in enumerate(TRAINING_CLASSES):  # Use TRAINING_CLASSES instead of CLASSES
        # Create colored overlay for this class
        class_mask = class_masks[i]
        colored_mask = np.zeros_like(seg_debug_img)
        
        # Apply color where mask is active
        colored_mask_tmp = colored_mask.copy()
        colored_mask_tmp[class_mask > 0] = mask_colors[i]
        
        # Add to combined mask
        combined_mask_vis = cv2.addWeighted(combined_mask_vis, 1, colored_mask_tmp, 1, 0)
        
        # Count instances (assuming connected components are individual objects)
        if np.any(class_mask):
            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(class_mask)
            # Subtract 1 because the first component is the background
            counts.append(num_labels - 1)
        else:
            counts.append(0)
    
    # Blend with original image
    alpha = 0.5
    seg_debug_img = cv2.addWeighted(seg_debug_img, 1, combined_mask_vis, alpha, 0)
    
    # Add count information as text
    cv2.putText(seg_debug_img, f"eos: {counts[0]}, eosg: {counts[1]}", 
                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    
    # Save segmentation debug visualization
    seg_debug_path = os.path.join(DEBUG_DIR, 'segmentation_visualization', f"{patch_name}_seg_debug.jpg")
    cv2.imwrite(seg_debug_path, cv2.cvtColor(seg_debug_img, cv2.COLOR_RGB2BGR))
    
    # Save each class mask individually for inspection
    for i, class_name in enumerate(TRAINING_CLASSES):  # Use TRAINING_CLASSES instead of CLASSES
        class_mask_vis = np.zeros((PATCH_SIZE, PATCH_SIZE, 3), dtype=np.uint8)
        class_mask_vis[:, :, i if i == 1 else 1] = class_masks[i] * 255  # Red for eosg (i=1), Green for eos (i=0)
        class_mask_path = os.path.join(DEBUG_DIR, 'segmentation_visualization', 
                                     f"{patch_name}_{class_name}_mask.jpg")
        cv2.imwrite(class_mask_path, class_mask_vis)
    
    # Create side-by-side visualization of original and masks
    side_by_side = np.zeros((PATCH_SIZE, PATCH_SIZE * 2, 3), dtype=np.uint8)
    side_by_side[:, :PATCH_SIZE] = patch_array
    side_by_side[:, PATCH_SIZE:] = cv2.cvtColor(seg_debug_img, cv2.COLOR_RGB2BGR)
    side_by_side_path = os.path.join(DEBUG_DIR, 'segmentation_visualization', 
                                   f"{patch_name}_side_by_side.jpg")
    cv2.imwrite(side_by_side_path, side_by_side)
    
    # Save original patch example
    patch_example_path = os.path.join(DEBUG_DIR, 'patches', f"{patch_name}.jpg")
    cv2.imwrite(patch_example_path, cv2.cvtColor(patch_array, cv2.COLOR_RGB2BGR))
    
    # Create mask boundary visualization to check for issues
    boundary_vis = patch_array.copy()
    for i, class_name in enumerate(TRAINING_CLASSES):  # Changed from CLASSES to TRAINING_CLASSES
        # Find contours of the mask
        contours, _ = cv2.findContours(class_masks[i], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        # Draw contours
        color = (0, 255, 0) if i == 0 else (0, 0, 255)  # Green for eos, Red for eosg
        cv2.drawContours(boundary_vis, contours, -1, color, 2)
    
    boundary_path = os.path.join(DEBUG_DIR, 'segmentation_visualization', 
                               f"{patch_name}_boundaries.jpg")
    cv2.imwrite(boundary_path, cv2.cvtColor(boundary_vis, cv2.COLOR_RGB2BGR))
    
    # Create a text summary file with information about the patch
    summary_info = {
        'patch_name': patch_name,
        'patch_location': f"({patch_info['patch_left']}, {patch_info['patch_top']})",
        'patch_size': f"{PATCH_SIZE}x{PATCH_SIZE}",
        'eos_count': counts[0],
        'eosg_count': counts[1],
        'total_annotations': len(patch_info['patch_annotations'])
    }
    
    summary_path = os.path.join(DEBUG_DIR, 'segmentation_visualization', 
                              f"{patch_name}_summary.txt")
    with open(summary_path, 'w') as f:
        for key, value in summary_info.items():
            f.write(f"{key}: {value}\n")

In [13]:
def create_distribution_plots(file_counts, class_counts, output_dir):
    """Create plots visualizing the dataset distribution"""
    splits = ['train', 'val', 'test']
    
    # File distribution
    plt.figure(figsize=(10, 6))
    plt.bar(splits, [file_counts[split] for split in splits])
    plt.title('Dataset Split Distribution')
    plt.xlabel('Split')
    plt.ylabel('Number of Patches')
    plt.savefig(os.path.join(output_dir, 'file_distribution.png'))
    plt.close()
    
    # Class distribution
    plt.figure(figsize=(12, 6))
    bar_width = 0.35
    index = np.arange(len(CLASSES))
    
    for i, split in enumerate(splits):
        counts = [class_counts[cls][split] for cls in CLASSES]
        plt.bar(index + i*bar_width, counts, bar_width, label=split)
    
    plt.xlabel('Class')
    plt.ylabel('Number of Instances')
    plt.title('Class Distribution Across Splits')
    plt.xticks(index + bar_width, CLASSES)
    plt.legend()
    plt.savefig(os.path.join(output_dir, 'class_distribution.png'))
    plt.close()

In [14]:
def create_dataset_summary():
    """Create summary visualizations and statistics for the entire dataset"""
    # Create output directory for summary reports
    summary_dir = os.path.join(DEBUG_DIR, 'dataset_summary')
    os.makedirs(summary_dir, exist_ok=True)
    
    # Count total files in each split
    splits = ['train', 'val', 'test']
    file_counts = {}
    class_counts = {cls: {'train': 0, 'val': 0, 'test': 0} for cls in CLASSES}
    
    for split in splits:
        image_dir = os.path.join(YOLO_OUTPUT_DIR, split, 'images')
        label_dir = os.path.join(YOLO_OUTPUT_DIR, split, 'labels')
        
        image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        file_counts[split] = len(image_files)
        
        # Count instances of each class
        for label_file in os.listdir(label_dir):
            if not label_file.endswith('.txt'):
                continue
                
            with open(os.path.join(label_dir, label_file), 'r') as f:
                lines = f.readlines()
                for line in lines:
                    class_idx = int(line.strip().split()[0])
                    if class_idx < len(CLASSES):
                        class_counts[CLASSES[class_idx]][split] += 1
    
    # Generate summary report
    with open(os.path.join(summary_dir, 'dataset_summary.txt'), 'w') as f:
        f.write("Dataset Summary\n")
        f.write("==============\n\n")
        
        f.write("File Distribution:\n")
        for split in splits:
            f.write(f"  {split}: {file_counts[split]} files\n")
        f.write("\n")
        
        f.write("Class Distribution:\n")
        for cls in CLASSES:
            f.write(f"  {cls}:\n")
            for split in splits:
                f.write(f"    {split}: {class_counts[cls][split]} instances\n")
        f.write("\n")
        
        f.write("Total Statistics:\n")
        total_files = sum(file_counts.values())
        total_annotations = sum(sum(class_counts[cls][split] for split in splits) for cls in CLASSES)
        f.write(f"  Total patches: {total_files}\n")
        f.write(f"  Total annotations: {total_annotations}\n")
        f.write(f"  Annotations per patch: {total_annotations / total_files:.2f}\n")
    
    # Create visualizations of the dataset distribution
    create_distribution_plots(file_counts, class_counts, summary_dir)

In [15]:
def main():
    # Ensure all directories exist
    ensure_dirs_exist()
    
    # Get list of SVS files
    svs_files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.svs')]
    
    if not svs_files:
        print(f"No SVS files found in {INPUT_DIR}")
        return
    
    print(f"Found {len(svs_files)} SVS files")
    for svs_file in tqdm(svs_files, 
                 total=len(svs_files), desc="Processing slides"):
        process_slide_file(svs_file)
    
    # Create dataset.yaml
    create_dataset_yaml()

    create_dataset_summary()
    
    print("Processing completed successfully!")
    print(f"YOLO dataset saved to: {YOLO_OUTPUT_DIR}")
    print(f"Segmentation dataset saved to: {SEG_OUTPUT_DIR}")
    print(f"Debug samples saved to: {DEBUG_DIR}")

if __name__ == "__main__":
    main()

Found 2 SVS files


Processing slides:   0%|                                                                         | 0/2 [00:00<?, ?it/s]
Processing train patches for 1007260:   0%|                                                  | 0/12236 [00:00<?, ?it/s][A
Processing train patches for 1007260:   0%|                                          | 9/12236 [00:00<02:22, 86.03it/s][A
Processing train patches for 1007260:   0%|                                         | 19/12236 [00:00<02:09, 94.01it/s][A
Processing train patches for 1007260:   0%|                                        | 31/12236 [00:00<01:59, 102.43it/s][A
Processing train patches for 1007260:   0%|▏                                        | 42/12236 [00:00<02:14, 90.60it/s][A
Processing train patches for 1007260:   0%|▏                                        | 52/12236 [00:00<02:10, 93.05it/s][A
Processing train patches for 1007260:   1%|▏                                       | 64/12236 [00:00<02:01, 100.43it/s][A
Processing train pa

Processing completed successfully!
YOLO dataset saved to: yolo_dataset
Segmentation dataset saved to: segmentation_dataset
Debug samples saved to: debug_samples
