# Micro-SAM Training from OMERO Data

Train micro-SAM models using annotation tables from OMERO with automated data preparation.

## 1. Setup

In [1]:
# Import the package with training convenience functions
from omero_annotate_ai import (
    create_omero_connection_widget,
    create_training_data_widget,
    prepare_training_data_from_table,
    setup_training,    # Convenience function for training setup
    run_training       # Convenience function for training execution
)

# Additional imports
import datetime
from pathlib import Path
import torch

print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")


Device available: CPU


## 2. OMERO Connection

In [None]:
# Create and display OMERO connection widget
conn_widget = create_omero_connection_widget()
conn_widget.display()

Loaded configuration from connection history: root@localhost
Password loaded from keychain


VBox(children=(HTML(value="\n                <h3>ðŸ”Œ OMERO Server Connection</h3>\n                <div style='fâ€¦

In [3]:
# Get the OMERO connection
conn = conn_widget.get_connection()

if conn is None:
    raise ConnectionError("No OMERO connection established.")

print(f"Connected to OMERO as: {conn.getUser().getName()}")

Connected to OMERO as: root


## 3. Training Data Selection

In [None]:
# Create training data selection widget
training_widget = create_training_data_widget(connection=conn)
training_widget.display()

VBox(children=(HTML(value='<h3>ðŸŽ¯ Training Data Selection</h3>', layout=Layout(margin='0 0 20px 0')), HTML(valuâ€¦

In [5]:
# Get selected training table
selected_table_id = training_widget.get_selected_table_id()
selected_table_info = training_widget.get_selected_table_info()

if selected_table_id:
    print(f"Selected training table:")
    print(f"  Table ID: {selected_table_id}")
    print(f"  Table Name: {selected_table_info.get('name', 'Unknown')}")
    print(f"  Created: {selected_table_info.get('created', 'Unknown')}")
else:
    raise ValueError("No training table selected. Please select a table above.")

Selected training table:
  Table ID: 2107
  Table Name: testscreen_20260218_183216
  Created: Unknown


## 4. Setup Training Directory

In [7]:
# Create output directory for training
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
home_dir = Path.home()
models_dir = home_dir / "omero-annotate-ai/micro-sam_models"
models_dir.mkdir(exist_ok=True)

folder_name = f"micro-sam-{timestamp}"
output_directory = models_dir / folder_name
output_directory.mkdir(exist_ok=True)

print(f"Training output directory: {output_directory}")

Training output directory: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501


## 5. Automated Data Preparation

Use the automated data preparation function to download and organize training data.

In [8]:
# Run automated data preparation
try:
    training_result = prepare_training_data_from_table(
        conn=conn,
        table_id=selected_table_id,
        training_name= selected_table_info.get('name', f"training_table_{selected_table_id}"),
        output_dir=output_directory,
        clean_existing=True
    )
    
    print("\nTraining data preparation completed successfully!")
    print(f"\nDataset statistics:")
    for key, value in training_result['stats'].items():
        print(f"  {key}: {value}")
    
    # Store paths for later use in training
    training_input_dir = training_result['training_input']
    training_label_dir = training_result['training_label']
    val_input_dir = training_result['val_input']
    val_label_dir = training_result['val_label']
    
    print(f"\nDirectory structure created:")
    print(f"  Training images: {training_input_dir}")
    print(f"  Training labels: {training_label_dir}")
    print(f"  Validation images: {val_input_dir}")
    print(f"  Validation labels: {val_label_dir}")
    
except Exception as e:
    print(f"Error during data preparation: {e}")
    raise

INFO:omero_annotate_ai.training:Starting training data preparation from table 2107
DEBUG:omero_annotate_ai.training:Parameters: output_dir=/var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501, validation_split=0.2, clean_existing=True
INFO:omero_annotate_ai.training:Loaded table with 5 rows
INFO:omero_annotate_ai.training:Table saved to: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/table_2107.csv
INFO:omero_annotate_ai.training:Using 5 processed rows for training
DEBUG:omero_annotate_ai.training:Optional columns found: ['is_volumetric']
INFO:omero_annotate_ai.training:Table schema validated for processing
INFO:omero_annotate_ai.training:Using existing train/validate split from table
INFO:omero_annotate_ai.training:Using 3 training images and 2 validation images
INFO:omero_annotate_ai.training:Preparing training dataset: 3 items to process
Preparing training data:   0%|          | 0/3 [00:00<?, ?it/s]DEBUG:omero_annotat


Training data preparation completed successfully!

Dataset statistics:
  n_training_images: 3
  n_training_labels: 3
  n_val_images: 2
  n_val_labels: 2
  total_rows_processed: 5

Directory structure created:
  Training images: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/training_input
  Training labels: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/training_label
  Validation images: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/val_input
  Validation labels: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/val_label


## 6. Micro-SAM Training Setup

Configure and run micro-SAM training using the prepared data.

In [9]:
# âœ¨ Setup training configuration using convenience function
training_config = setup_training(
    training_result,
    model_name=f"{selected_table_info.get('name', 'micro_sam_training')}_{timestamp}",
    epochs=10,               # Primary parameter: number of epochs (use 50+ for real training)
    batch_size=1,            # Adjust based on GPU memory
    learning_rate=1e-5,      # Conservative learning rate
    patch_shape=(512, 512),  # Input patch size
    model_type="vit_b_lm",       # SAM model variant
    n_objects_per_batch=25   # Objects per batch for sampling
)

print("Training configuration prepared!")
print(f'Model name: {training_config["model_name"]}')
print(f'Output directory: {training_config["output_dir"]}')
print(f'Training epochs: {training_config["epochs"]}')
print(f'Calculated iterations: {training_config["n_iterations"]}')


Training configuration prepared!
Model name: testscreen_20260218_183216_20260218_194501
Output directory: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501
Training epochs: 10
Calculated iterations: 30


In [10]:
# âœ¨ Execute training with convenience function
print("Starting micro-SAM training...")

training_results = run_training(training_config, framework="microsam")

print(f'ðŸŽ‰ Training completed successfully!')
print(f'Training Results:')
print(f'  Model name: {training_results["model_name"]}')
print(f'  Final model: {training_results.get("final_model_path", "Not available")}')
print(f'  Checkpoints saved: {len(training_results.get("checkpoints", []))}')
print(f'  Output directory: {training_results["output_dir"]}')


Starting micro-SAM training...
Starting micro-SAM training...
Model name: testscreen_20260218_183216_20260218_194501
Model type: vit_b_lm
Training configuration:
  Patch shape: (512, 512)
  Batch size: 1
  Learning rate: 1e-05
  Epochs: 10
  Objects per batch: 25
  Checkpoint folder: /var/home/maartenpaul/omero-annotate-ai/micro-sam_models/micro-sam-20260218_194501/checkpoints
  Using patch shape: (1, 512, 512)
Training device: cpu


TypeError: torch_em.segmentation.default_segmentation_loader() got multiple values for keyword argument 'is_seg_dataset'

## 7. Model Export and Summary

In [None]:
# Find the best checkpoint from training results
checkpoint_folder = Path(training_results.get("output_dir", output_directory)) / "checkpoints"
model_name = training_config["model_name"]

checkpoints = list(checkpoint_folder.glob("*.pt")) if checkpoint_folder.exists() else []
if checkpoints:
    latest_checkpoint = sorted(checkpoints)[-1]
    print(f"Latest checkpoint: {latest_checkpoint}")
    
    # Export model for inference
    export_path = output_directory / f"{model_name}_final.pt"
    print(f"Model exported to: {export_path}")
else:
    print("No checkpoints found.")

print(f"\nTraining summary:")
print(f"  Output directory: {output_directory}")
print(f"  Model name: {model_name}")
print(f"  Dataset statistics: {training_result['stats']}")

In [None]:
from micro_sam.bioimageio.model_export import export_sam_model
import imageio.v3 as imageio
import numpy as np
import os

# Use the validation data from the training preparation
val_input_path = training_result["val_input"]
val_label_path = training_result["val_label"]

val_images = sorted(os.listdir(val_input_path))
val_labels = sorted(os.listdir(val_label_path))

if val_images and val_labels:
    test_image = imageio.imread(os.path.join(val_input_path, val_images[0]))
    test_label = imageio.imread(os.path.join(val_label_path, val_labels[0]))
    
    bioimageio_model_path = output_directory / "bioimage_io_model"
    
    # Find the best checkpoint
    checkpoint_dir = checkpoint_folder / model_name
    best_checkpoint = checkpoint_dir / "best.pt"
    
    if best_checkpoint.exists():
        export_sam_model(
            image=test_image,
            label_image=test_label,
            model_type=training_config["model_type"],
            name=model_name,
            output_path=str(bioimageio_model_path),
            checkpoint_path=str(best_checkpoint),
            authors=[{"name": "Your Name", "affiliation": "Your Institution"}],
            description="Micro-SAM model trained on microscopy images",
        )
        print(f"BioImage.IO model exported to: {bioimageio_model_path}")
    else:
        print(f"Checkpoint not found at: {best_checkpoint}")
else:
    print("No validation data available for model export.")

## 8. Cleanup

In [None]:
# Close OMERO connection
if conn is not None:
    conn.close()
    print("OMERO connection closed.")
else:
    print("No active OMERO connection to close.")