In [1]:
import datetime as dt
import os
import shutil
from pathlib import Path

import geopandas as gpd

import seabeepy as sb
from seabeepy.config import SETTINGS

In [2]:
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

# Classify SeaBee orthomosaics (production mode)

This notebook uses NR's machine learning workflow to classify orthomosaics from SeaBee missions. It is designed to run as a "cron job" that will scan and process all flight folders within the specified `base_dirs`.

Orthomosaics for each flight folder must be organised according to the specification [here](https://seabee-no.github.io/documentation/data-upload.html) and already published on the GeoNode.

**This workflow focuses on applying existing (i.e. pre-trained) models to new data, not on training new models**. The available pre-trained models are stored in the `models` bucket on MinIO.

For the time being, training of new models will be handled by in-house by NR.

## 1. User input

In [3]:
# Parent directories containing flight folders to process
base_dirs = [
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2017",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2018",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2019",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2020",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2021",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2022",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2023",
    # r"/home/notebook/shared-seabee-ns9879k/seabirds/2024",
    r"/home/notebook/shared-seabee-ns9879k/seabirds/2025",
]

# Directory for temporary files
temp_dir = r"/home/notebook/ml_temp/"

# Only publish detections where the confidence score is greater than
# or equal to 'det_pub_thresh'
det_pub_thresh = 0.7

In [4]:
# Run info
run_date = dt.datetime.today()
print(f"Processing started: {run_date}")

Processing started: 2025-05-09 12:39:33.555484


## 2. Identify missions for processing

In [5]:
# Get all potential folders for classification
classify_list = [
    f.parent
    for base_dir in base_dirs
    for f in Path(base_dir).rglob("config.seabee.yaml")
    if sb.ortho.parse_config(f.parent)["classify"]
    and sb.ml.is_classification_ready(f.parent)
    and sb.ortho.check_config_valid(f.parent)
    and not sb.ml.check_results_exist(f.parent)
]

print("\nThe following folders are ready to be processed:")
print(classify_list)


The following folders are ready to be processed:
[PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2017/Oslo-Oslo_Oestensjoevannet-nordvest_201705200511'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2017/Oslo-Oslo_Oestensjoevannet-nordoest_201705200442'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2017/Oslo-Oslo_Oestensjoevannet-soervest_201705200508'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2017/Akershus-Nesodden_Soendre-Skjaelholmen_201705091455'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2017/Akershus-Asker_Store-Bjerkoeyskjaer_201705051232'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2018/Oslo-Oslo_Oestensjoevannet-nordvest_201805280608'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2018/Akershus-Baerum_Dokkskjaer_201806241900'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabirds/2018/Oslo-Oslo_Killingen-soer_201805191449'), PosixPath('/home/notebook/shared-seabee-ns9879k/seabir

In [6]:
len(classify_list)

1571

## 3. Run classifications

In [None]:
%%time

for mission_dir in classify_list:
    mission_name = sb.ortho.get_layer_name(mission_dir)
    print(f"\n################\nProcessing: {mission_name}")
    orthophoto_path = os.path.join(mission_dir, "orthophoto", f"{mission_name}.tif")
    mission_temp_dir = os.path.join(temp_dir, f"{mission_name}-workdir")

    # Get user settings for ML
    ml_options = sb.ml.get_ml_options(mission_dir)
    print(f"Using model '{ml_options['model']}'.")

    # Configure ML pipeline
    sb.ml.write_config_production(
        orthophoto_path,
        mission_temp_dir,
        ml_options["model"],
        ml_options["task"],
    )

    # Run classification
    try:
        sb.ml.run_classification(
            mission_name, os.path.join(mission_temp_dir, "config"), ml_options["task"]
        )

        # Copy results to MinIO and clean temp folder
        sb.storage.copy_folder(
            os.path.join(mission_temp_dir, "results"),
            str(mission_dir),
            minio_client,
            overwrite=True,
        )
        shutil.rmtree(mission_temp_dir)

        # # Write detections to PostGIS
        # if ml_options["task"] == "detection":
        #     sb.ml.write_seabird_detections_to_postgis(
        #         mission_dir, SETTINGS.DB_USER, SETTINGS.DB_PASSWORD
        #     )
    except Exception as e:
        print(f"ERROR: Classification failed for '{mission_name}'.")
        print(str(e))

## 4. Publish results

In [None]:
publish_list = [
    f.parent
    for base_dir in base_dirs
    for f in Path(base_dir).rglob("config.seabee.yaml")
    if sb.ortho.parse_config(f.parent)["classify"]
    and sb.ortho.check_config_valid(f.parent)
    and sb.ml.is_classification_ready(
        f.parent
    )  # ml.check_results_exist requires published layer
    and sb.ml.check_results_exist(f.parent)
    and sb.ortho.parse_config(f.parent)["publish"]
    and not sb.ml.is_classification_published(f.parent)
]

print("The following missions will be published to GeoNode:")
print(publish_list)

In [None]:
for mission_dir in publish_list:
    try:
        mission_name = sb.ortho.get_layer_name(mission_dir)
        print(f"\n################\nProcessing: {mission_name}")
        print("Converting class IDs to class names.")
        res_dir = sb.ml.get_latest_results_dir(mission_dir)
        res_path = os.path.join(res_dir, "out.gpkg")
        gdf = gpd.read_file(res_path)

        ml_options = sb.ml.get_ml_options(mission_dir)
        if ml_options["task"] == "detection":
            # Seabirds mission
            layer_name = f"{mission_name}_detections"
            layer_name_filt = f"{mission_name}_filtered-detections"
            gdf = sb.ml.convert_seabird_class_codes_to_names(
                gdf, SETTINGS.DB_USER, SETTINGS.DB_PASSWORD
            )
            gdf_filt = gdf.query("score_species >= @det_pub_thresh").copy()
            style_dict = {
                layer_name: f"red_outline.sld",
            }
        else:
            # Process segmentation results
            # layer_name = f"{mission_name}_classifications"
            # style_dict = {
            #     mission_name: f"annotation_classes_v{class_version}_level{style_level}.sld",
            # }
            pass

        print("Uploading to GeoServer.")
        # Save locally
        temp_gpkg = os.path.join(temp_dir, f"{layer_name}.gpkg")
        gdf.to_file(temp_gpkg, layer=layer_name)
        gdf_filt.to_file(temp_gpkg, layer=layer_name_filt, mode="a")

        # Copy to MinIO and delete local version
        gpkg_path = os.path.join(
            mission_dir,
            "results",
            ml_options["task"],
            ml_options["model"],
            f"{layer_name}.gpkg",
        )
        sb.storage.copy_file(temp_gpkg, gpkg_path, minio_client, overwrite=True)
        os.remove(temp_gpkg)

        # Upload filtered results layer to GeoServer
        store_name = sb.geo.upload_geopackage_layers_to_geoserver(
            gpkg_path,
            [layer_name_filt],
            SETTINGS.GEOSERVER_USER,
            SETTINGS.GEOSERVER_PASSWORD,
            workspace="geonode",
            style_dict=style_dict,
        )

        print("Publishing to GeoNode.")
        sb.geo.publish_to_geonode(
            layer_name_filt,
            SETTINGS.GEONODE_USER,
            SETTINGS.GEONODE_PASSWORD,
            store_name=store_name,
            workspace="geonode",
        )

        print("Updating metadata.")
        metadata = {
            "abstract": sb.geo.get_detection_abstract(
                gdf, layer_name_filt, ml_options["model"], str(gpkg_path)
            ),
            "date": dt.datetime.strptime(
                gdf["datetimereg"].iloc[0], "%Y-%m-%d %H:%M:%S"
            ).isoformat(),
            "date_type": "creation",
            "attribution": "SeaBee",
        }
        sb.geo.update_geonode_metadata(
            layer_name_filt,
            SETTINGS.GEONODE_USER,
            SETTINGS.GEONODE_PASSWORD,
            metadata,
        )
    except Exception as e:
        print(f"**** Error processing {mission_name}:\n{e}\n")