# Micro-SAM Training from OMERO Data

Train micro-SAM models using annotation tables from OMERO with automated data preparation.

## 1. Setup

In [1]:
# Import the package with training convenience functions
from omero_annotate_ai import (
    create_omero_connection_widget,
    create_training_data_widget,
    prepare_training_data_from_table,
    setup_training,    # Convenience function for training setup
    run_training       # Convenience function for training execution
)

# Additional imports
import datetime
from pathlib import Path
import torch

print(f"Available functions: Connection, Training Data, Training Utils")
#print(f"Device available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")


Available functions: Connection, Training Data, Training Utils


## 2. OMERO Connection

In [None]:
# Create and display OMERO connection widget
conn_widget = create_omero_connection_widget()
conn_widget.display()

📄 Loaded configuration from connection history: root@localhost
🔐 Password loaded from keychain (no expiration)


VBox(children=(HTML(value='<h3>🔌 OMERO Server Connection</h3>', layout=Layout(margin='0 0 20px 0')), HTML(valu…

In [4]:
# Get the OMERO connection
conn = conn_widget.get_connection()

if conn is None:
    raise ConnectionError("No OMERO connection established.")

print(f"Connected to OMERO as: {conn.getUser().getName()}")

Connected to OMERO as: root


## 3. Training Data Selection

In [5]:
# Create training data selection widget
training_widget = create_training_data_widget(connection=conn)
training_widget.display()

VBox(children=(HTML(value='<h3>🎯 Training Data Selection</h3>', layout=Layout(margin='0 0 20px 0')), HTML(valu…

In [9]:
# Get selected training table
selected_table_id = training_widget.get_selected_table_id()
selected_table_info = training_widget.get_selected_table_info()

if selected_table_id:
    print(f"Selected training table:")
    print(f"  Table ID: {selected_table_id}")
    print(f"  Table Name: {selected_table_info.get('name', 'Unknown')}")
    print(f"  Created: {selected_table_info.get('created', 'Unknown')}")
else:
    raise ValueError("No training table selected. Please select a table above.")

Selected training table:
  Table ID: 1417
  Table Name: micro_sam_training_micro_sam_foci_test_20250825_103304
  Created: Unknown


## 4. Setup Training Directory

In [10]:
# Create output directory for training
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
home_dir = Path.home()
models_dir = home_dir / "micro-sam_models"
models_dir.mkdir(exist_ok=True)

folder_name = f"micro-sam-{timestamp}"
output_directory = models_dir / folder_name
output_directory.mkdir(exist_ok=True)

print(f"Training output directory: {output_directory}")

Training output directory: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055


## 5. Automated Data Preparation

Use the automated data preparation function to download and organize training data.

In [11]:
# Run automated data preparation
try:
    training_result = prepare_training_data_from_table(
        conn=conn,
        table_id=selected_table_id,
        output_dir=output_directory,
        validation_split=0.2,  # 20% for validation
        clean_existing=True
    )
    
    print("\nTraining data preparation completed successfully!")
    print(f"\nDataset statistics:")
    for key, value in training_result['stats'].items():
        print(f"  {key}: {value}")
    
    # Store paths for later use in training
    training_input_dir = training_result['training_input']
    training_label_dir = training_result['training_label']
    val_input_dir = training_result['val_input']
    val_label_dir = training_result['val_label']
    
    print(f"\nDirectory structure created:")
    print(f"  Training images: {training_input_dir}")
    print(f"  Training labels: {training_label_dir}")
    print(f"  Validation images: {val_input_dir}")
    print(f"  Validation labels: {val_label_dir}")
    
except Exception as e:
    print(f"Error during data preparation: {e}")
    raise

Loaded table with 4 rows
Table saved to: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\table_1417.csv
✅ Using 4 processed rows for training
Optional columns found: ['is_volumetric']
Table schema validated for processing
Using 2 training images and 2 validation images


Preparing training data:   0%|          | 0/2 [00:00<?, ?it/s]INFO:omero.gateway:Registered 96174e1f-f0ed-4eb8-9590-0018d8261487/ba32c4d2-b17c-46cb-bdaf-4323232e4481omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000


Item 0 - Image ID: 455, Patch: True, Dimensions: 512x512 at (201,245), Volumetric: False
  2D Patch Request - start_coords: (201, 245, 0, 0, 0), dimensions: 512x512


INFO:omero.gateway:Unregistered 96174e1f-f0ed-4eb8-9590-0018d8261487/ba32c4d2-b17c-46cb-bdaf-4323232e4481omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000
Preparing training data:  50%|█████     | 1/2 [00:00<00:00,  2.28it/s]

  Returned array shape: (512, 512, 1, 1, 1)
  Extracted 2D shape: (512, 512)
  Saved 2D TIFF to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\training_input\input_00000.tif with shape (512, 512)
  Attempting to download label with ID: 1413
  File annotation found: seg_00000.tif
  Label shape: (512, 512) saved to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\training_label\label_00000.tif


INFO:omero.gateway:Registered 96174e1f-f0ed-4eb8-9590-0018d8261487/6b8c3913-d800-4dd2-a3c9-8bddb0665b59omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000
INFO:omero.gateway:Unregistered 96174e1f-f0ed-4eb8-9590-0018d8261487/6b8c3913-d800-4dd2-a3c9-8bddb0665b59omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000


Item 1 - Image ID: 456, Patch: True, Dimensions: 512x512 at (326,217), Volumetric: False
  2D Patch Request - start_coords: (326, 217, 0, 0, 0), dimensions: 512x512


Preparing training data: 100%|██████████| 2/2 [00:00<00:00,  2.47it/s]


  Returned array shape: (512, 512, 1, 1, 1)
  Extracted 2D shape: (512, 512)
  Saved 2D TIFF to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\training_input\input_00001.tif with shape (512, 512)
  Attempting to download label with ID: 1414
  File annotation found: seg_00001.tif
  Label shape: (512, 512) saved to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\training_label\label_00001.tif


Preparing val data:   0%|          | 0/2 [00:00<?, ?it/s]

Item 0 - Image ID: 452, Patch: True, Dimensions: 512x512 at (70,490), Volumetric: False
  2D Patch Request - start_coords: (70, 490, 0, 0, 0), dimensions: 512x512


INFO:omero.gateway:Registered 96174e1f-f0ed-4eb8-9590-0018d8261487/3c420281-1b0c-43af-a377-b9eb7752d13domero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000
INFO:omero.gateway:Unregistered 96174e1f-f0ed-4eb8-9590-0018d8261487/3c420281-1b0c-43af-a377-b9eb7752d13domero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000


  Returned array shape: (512, 512, 1, 1, 1)
  Extracted 2D shape: (512, 512)


Preparing val data:  50%|█████     | 1/2 [00:00<00:00,  2.71it/s]INFO:omero.gateway:Registered 96174e1f-f0ed-4eb8-9590-0018d8261487/e987d8c1-d22d-45fd-b798-203c54f7a622omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000


  Saved 2D TIFF to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\val_input\input_00000.tif with shape (512, 512)
  Attempting to download label with ID: 1415
  File annotation found: seg_00002.tif
  Label shape: (512, 512) saved to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\val_label\label_00000.tif
Item 1 - Image ID: 457, Patch: True, Dimensions: 512x512 at (213,332), Volumetric: False
  2D Patch Request - start_coords: (213, 332, 0, 0, 0), dimensions: 512x512


INFO:omero.gateway:Unregistered 96174e1f-f0ed-4eb8-9590-0018d8261487/e987d8c1-d22d-45fd-b798-203c54f7a622omero.api.RawPixelsStore -t -e 1.1:tcp -h 172.19.0.8 -p 34317 -t 60000
Preparing val data: 100%|██████████| 2/2 [00:00<00:00,  2.90it/s]

  Returned array shape: (512, 512, 1, 1, 1)
  Extracted 2D shape: (512, 512)
  Saved 2D TIFF to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\val_input\input_00001.tif with shape (512, 512)
  Attempting to download label with ID: 1416
  File annotation found: seg_00003.tif
  Label shape: (512, 512) saved to C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\val_label\label_00001.tif
✅ Training data prepared successfully in: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055
Statistics: {'n_training_images': 2, 'n_training_labels': 2, 'n_val_images': 2, 'n_val_labels': 2, 'total_rows_processed': 4}

Training data preparation completed successfully!

Dataset statistics:
  n_training_images: 2
  n_training_labels: 2
  n_val_images: 2
  n_val_labels: 2
  total_rows_processed: 4

Directory structure created:
  Training images: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\training_input
  Training labels: C:\Users\Maarten\micro-sam_models\micro-




## 6. Micro-SAM Training Setup

Configure and run micro-SAM training using the prepared data.

In [12]:
# ✨ Setup training configuration using convenience function
training_config = setup_training(
    training_result,
    epochs=10,               # Primary parameter: number of epochs (use 50+ for real training)
    batch_size=1,            # Adjust based on GPU memory
    learning_rate=1e-5,      # Conservative learning rate
    patch_shape=(512, 512),  # Input patch size
    model_type="vit_b",       # SAM model variant
    n_objects_per_batch=25   # Objects per batch for sampling
)

print("Training configuration prepared!")
print(f'Model name: {training_config["model_name"]}')
print(f'Output directory: {training_config["output_dir"]}')
print(f'Training epochs: {training_config["epochs"]}')
print(f'Calculated iterations: {training_config["n_iterations"]}')


Training configuration prepared!
Model name: micro_sam_training_20250825_114101
Output directory: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055
Training epochs: 10
Calculated iterations: 20


In [13]:
# ✨ Execute training with convenience function
print("Starting micro-SAM training...")

training_results = run_training(training_config, framework="microsam")

print(f'🎉 Training completed successfully!')
print(f'Training Results:')
print(f'  Model name: {training_results["model_name"]}')
print(f'  Final model: {training_results.get("final_model_path", "Not available")}')
print(f'  Checkpoints saved: {len(training_results.get("checkpoints", []))}')
print(f'  Output directory: {training_results["output_dir"]}')


Starting micro-SAM training...
Starting micro-SAM training...
Model name: micro_sam_training_20250825_114101
Model type: vit_b
Training configuration:
  Patch shape: (512, 512)
  Batch size: 1
  Learning rate: 1e-05
  Epochs: 10
  Objects per batch: 25
  Checkpoint folder: C:\Users\Maarten\micro-sam_models\micro-sam-20250825_114055\checkpoints
  Using patch shape: (1, 512, 512)
Training device: cpu
Data loaders created successfully!


Verifying labels in 'train' dataloader: 100%|██████████| 50/50 [00:06<00:00,  7.61it/s]
Verifying labels in 'val' dataloader:   6%|▌         | 3/50 [00:00<00:04,  9.49it/s]


KeyboardInterrupt: 

## 7. Model Export and Summary

In [None]:
# Find the best checkpoint
checkpoints = list(checkpoint_folder.glob("*.pt"))
if checkpoints:
    latest_checkpoint = sorted(checkpoints)[-1]
    print(f"Latest checkpoint: {latest_checkpoint}")
    
    # Export model for inference
    export_path = output_directory / f"{model_name}_final.pt"
    print(f"Model exported to: {export_path}")
else:
    print("No checkpoints found.")

print(f"\nTraining summary:")
print(f"  Output directory: {output_directory}")
print(f"  Model name: {model_name}")
print(f"  Training completed with {n_iterations} iterations")
print(f"  Dataset statistics: {training_result['stats']}")

## 8. Cleanup

In [None]:
# Close OMERO connection
if conn is not None:
    conn.close()
    print("OMERO connection closed.")
else:
    print("No active OMERO connection to close.")