In [None]:
import datetime as dt
import os
import shutil
from pathlib import Path

import geopandas as gpd
import pandas as pd
from config import SETTINGS
from sqlalchemy import create_engine

import seabeepy as sb

In [None]:
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

# Classify SeaBee orthomosaics (production mode)

This notebook uses NR's machine learning workflow to classify orthomosaics from SeaBee missions. It is designed to run as a "cron job" that will scan and process all flight folders within the specified `base_dirs`.

Orthomosaics for each flight folder must be organised according to the specification [here](https://seabee-no.github.io/documentation/data-upload.html) and already published on the GeoNode.

**This workflow focuses on applying existing (i.e. pre-trained) models to new data, not on training new models**. The available pre-trained models are stored in the `models` bucket on MinIO.

For the time being, training of new models will be handled by in-house by NR.

## 1. User input

In [None]:
# Parent directories containing flight folders to process
base_dirs = [r"/home/notebook/shared-seabee-ns9879k/hub-test-bucket"]

# Directory for temporary files
temp_dir = r"/home/notebook/ml_temp/"

In [None]:
# Run info
run_date = dt.datetime.today()
print(f"Processing started: {run_date}")

## 2. Identify missions for processing

In [None]:
# Get all potential folders for classification
classify_list = [
    f.parent
    for base_dir in base_dirs
    for f in Path(base_dir).rglob("config.seabee.yaml")
    if sb.ml.is_classification_ready(f.parent)
    and sb.ortho.check_config_valid(f.parent)
    and not sb.ml.check_results_exist(f.parent)
]

# Get just those where 'classify' is True in 'config.seabee.yaml'
classify_list = [f for f in classify_list if sb.ortho.parse_config(f)["classify"]]

print("\nThe following folders are ready to be processed:")
print(classify_list)

## 3. Run classifications

In [None]:
%%time

for mission_dir in classify_list:
    mission_name = sb.ortho.get_layer_name(mission_dir)
    print(f"\n################\nProcessing: {mission_name}")
    orthophoto_path = os.path.join(mission_dir, "orthophoto", f"{mission_name}.tif")
    mission_temp_dir = os.path.join(temp_dir, f"{mission_name}-workdir")

    # Get user settings for ML
    ml_options = sb.ml.get_ml_options(mission_dir)

    # Get 'annotations' info to pass to NR application
    # TODO: Not sure exactly what this does or what else can be included. Ask Jarle.
    annotations = {
        "crs": sb.geo.get_geotiff_info(orthophoto_path)["crs"],
        "column_main_class": "species",
        "columns_subtasks": ["activity", "sex", "age"],
    }

    sb.ml.write_config_production(
        orthophoto_path,
        mission_temp_dir,
        ml_options["model"],
        ml_options["task"],
        annotations,
    )
    sb.ml.run_classification(
        mission_name, os.path.join(mission_temp_dir, "config"), ml_options["task"]
    )
    sb.storage.copy_folder(
        os.path.join(mission_temp_dir, "results"),
        str(mission_dir),
        minio_client,
        overwrite=True,
    )
    shutil.rmtree(mission_temp_dir)