# Prepare OMERO Data for BiaPy training

Train AI models using annotation tables from OMERO with automated data preparation.

## 1. Setup

In [None]:
# Import the package with training convenience functions
from omero_annotate_ai import (
    create_omero_connection_widget,
    create_training_data_widget,
    prepare_training_data_from_table,

)

# Additional imports
import datetime
from pathlib import Path
import torch

print(f"Check if GPU is available: {'CUDA' if torch.cuda.is_available() else 'CPU'}")


## 2. OMERO Connection

In [None]:
# Create and display OMERO connection widget
conn_widget = create_omero_connection_widget()
conn_widget.display()

In [None]:
# Get the OMERO connection
conn = conn_widget.get_connection()

if conn is None:
    raise ConnectionError("No OMERO connection established.")

print(f"Connected to OMERO as: {conn.getUser().getName()}")

## 3. Training Data Selection

In [None]:
# Create training data selection widget
training_widget = create_training_data_widget(connection=conn)
training_widget.display()

In [None]:
# Get selected training table
selected_table_id = training_widget.get_selected_table_id()
selected_table_info = training_widget.get_selected_table_info()

if selected_table_id:
    print(f"Selected training table:")
    print(f"  Table ID: {selected_table_id}")
    print(f"  Table Name: {selected_table_info.get('name', 'Unknown')}")
    print(f"  Created: {selected_table_info.get('created', 'Unknown')}")
else:
    raise ValueError("No training table selected. Please select a table above.")

## 4. Setup Training Directory

In [None]:
# Create output directory for training
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
home_dir = Path.home()
models_dir = home_dir / "dl4mic_data"
models_dir.mkdir(exist_ok=True)

folder_name = f"training_data-{timestamp}"
output_directory = models_dir / folder_name
output_directory.mkdir(exist_ok=True)

print(f"Training output directory: {output_directory}")

## 5. Automated Data Preparation

Use the automated data preparation function to download and organize training data.

In [None]:
# Run automated data preparation
try:
    training_result = prepare_training_data_from_table(
        conn=conn,
        table_id=selected_table_id,
        training_name= selected_table_info.get('name', f"training_table_{selected_table_id}"),
        output_dir=output_directory,
        clean_existing=True,
        verbose=False  # Set to True for detailed debug output
    )
    
    print("\nTraining data preparation completed successfully!")
    print(f"\nDataset statistics:")
    for key, value in training_result['stats'].items():
        print(f"  {key}: {value}")
    
    # Store paths for later use in training
    training_input_dir = training_result['training_input']
    training_label_dir = training_result['training_label']
    val_input_dir = training_result['val_input']
    val_label_dir = training_result['val_label']
    
    print(f"\nDirectory structure created:")
    print(f"  Training images: {training_input_dir}")
    print(f"  Training labels: {training_label_dir}")
    print(f"  Validation images: {val_input_dir}")
    print(f"  Validation labels: {val_label_dir}")

    #print path to paste in DL4MicEverywhere as WSL path replace D or c drive with /mnt/d or /mnt/c
    print(f"\nTo use this data for BiaPy training use the following paths:")
    print("These paths can be pasted directly into the BiaPy input fields.")
    print(f"  Data folder: \n /mnt/{training_input_dir.drive[:-1].lower()}{output_directory.as_posix()[2:]}")
    (output_directory / 'output').mkdir(parents=True, exist_ok=True)
    print(f"  Output folder: \n /mnt/{training_input_dir.drive[:-1].lower()}{(output_directory / 'output').as_posix()[2:]}")
        
except Exception as e:
    print(f"Error during data preparation: {e}")
    raise