In [1]:
import yaml
import subprocess
import os
from pathlib import Path
from sqlalchemy import create_engine 
import geopandas as gpd
import pandas as pd
from datetime import datetime

from config import SETTINGS
import seabeepy as sb

In [2]:
base_dirs = [
    r"/home/notebook/shared-seabee-ns9879k/notebook-logs"
]

In [3]:
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

## Create configurations

In [4]:
%mkdir -p /home/notebook/tmp_detection/config

In [5]:
root_path = "/home/notebook/shared-seabee-ns9879k"
tmp_path = "/home/notebook/tmp_detection"
TASK = "detection"
MODEL = "2022_nina_birds_20230817"

`hub.yaml` configures the program and paths for the hub. We don't have a gpu on the hub and data is mounted so we can skip minio.

In [6]:
DEVICE = os.environ.get("DEVICE", "cpu")


In [7]:
def dump_config(orthophoto_file: str, root_path: str, tmpdir: str):
    """`image_detection.yaml` configures the nr program for training or detection on various images"""

    with open("./config/hub.yaml", "w") as f:
        yaml.dump(
            {
                "DPATH_WORK": f"{tmpdir}/work",
                "DPATH_MODELS": f"{root_path}/models",
                "DPATH_RESULTS": f"{tmpdir}/results",
                "DPATH_PRETRAINED": f"{tmpdir}/pretrained",
                "MINIO": {"USE": False},
                "TEST": {"DEVICE": DEVICE},
            },
            f,
        )

    with open("./config/image_detection.yaml", "w") as f:
        yaml.dump(
            {
                "mode": "production",
                "task": TASK,
                "score_threshold": 0.1,
                "model": {"id": f"{TASK}/{MODEL}"},
                "dataset": {
                    "root": root_path,
                    "annotations": {
                        "crs": "epsg:32632",
                        "column_main_class": "species",
                        "columns_subtasks": ["activity", "sex", "age"],
                    },
                    "test_filenames": [orthophoto_file],
                },
            },
            f,
        )


def pretty_run(cmd_list: list[str]):
    run = subprocess.run(cmd_list, capture_output=True, text=True)
    print(run.stderr)
    print(run.stdout)
    run.check_returncode()


def run_detection(mission_name: str):
    """Run the detection process elements

    A detection run consist of:
        1. preprocess the images
        2. the main detection run
        3. post process generating the gpkg data product

    """

    print("1. preproc")
    pretty_run(
        [
            "nrseabee_det_preproc",
            "-c",
            "config/hub.yaml",
            "-o",
            f"config/{mission_name}_det_preproc.yaml",
            "config/image_detection.yaml",
        ]
    )

    print("2. process")
    pretty_run(
        [
            "nrseabee_det_test",
            "-c",
            "config/hub.yaml",
            "-o",
            f"config/{mission_name}_det_test.yaml",
            f"config/{mission_name}_det_preproc.yaml",
        ]
    )

    print("3. post-process")
    pretty_run(
        [
            "nrseabee_det_postproc",
            "-c",
            "config/hub.yaml",
            "-o",
            f"config/{mission_name}_postproc.yaml",
            f"config/{mission_name}_det_test.yaml",
        ]
    )
    print("fin")

In [8]:
detect_list = [
    f.parent
    for base_dir in base_dirs
    for f in Path(base_dir).rglob("config.seabee.yaml")
    if sb.ortho.is_detection_ready(f.parent)
    and not sb.ortho.check_subdir_exists(os.path.join(f.parent, "results", TASK), MODEL)
]

## 1. Run the bird detection


In [10]:
for mission_fold in detect_list:
    mission_name = mission_fold.name
    orthophoto_path = os.path.join(
        mission_fold, "orthophoto", f"{mission_name}.tif"
    )
    tmp_fold = os.path.join(tmp_path, f"{mission_name}-workdir")
    dump_config(orthophoto_path, root_path, tmp_fold, MODEL)
    run_detection(mission_name)
    sb.storage.copy_folder(os.path.join(tmp_fold, "results"), mission_fold, minio_client, overwrite=True)

1. preproc

Created log file: oslofjorden_lyseren-zoom_202306271117-workdir/work/dataset/LOG_preprocess.txt
Started: 20240313-143522

 file: /home/kim/work/seabee/test-detection/seabirds/2023/oslofjorden_lyseren-zoom_202306271117/orthophoto/oslofjorden_lyseren-zoom_202306271117.tif
 spatial sample rate: 4.157321079158024 mm x 4.157094088842256 mm
x_res: 0.004157321079158023
y_res: 0.004157094088842256
scale_factor_x: 0.8314642158316047
scale_factor_y: 0.8314188177684512
height: 8577
width: 6746
 original data of shape (8577, 6746) re-sampled to (7131, 5609)
 Doing tile row 1/3
 saving oslofjorden_lyseren-zoom_202306271117-workdir/work/dataset/images/_home_kim_work_seabee_test_detection_seabirds_2023_oslofjorden_lyseren_zoom_202306271117_orthophoto_oslofjorden_lyseren_zoom_202306271117_tif_1_1.jpg and the accompanying .jgw
Save complete
 saving oslofjorden_lyseren-zoom_202306271117-workdir/work/dataset/images/_home_kim_work_seabee_test_detection_seabirds_2023_oslofjorden_lyseren_zoom_20

## 2. Look up hot encoding and publish

In [None]:
publish_list = [
    f.parent
    for base_dir in base_dirs
    for f in Path(base_dir).rglob("config.seabee.yaml")
    if sb.ortho.check_subdir_exists(os.path.join(f.parent, "results", TASK), MODEL)
    and sb.ortho.parse_config(f.parent)["publish"]
    and not sb.ortho.is_detection_published(f.parent, TASK, MODEL)
]

In [None]:
engine = create_engine(f"postgresql:///seabee?host=geonode-postgresql&port=5432&user={SETTINGS.DB_USER}&password={SETTINGS.DB_PASSWORD}")
table_names = ["species", "activity", "sex", "age"]
table_lookup = {}
for table in table_names:
    table_lookup[table] =pd.read_sql_query(f"SELECT * FROM {table}", engine).add_prefix(f"{table}_")
engine.dispose()

In [None]:
for mission_fold in publish_list:
    mission_name = mission_fold.name
    print(f"\n################\nPublishing: {mission_name}")
    print("Preparing geopackage for publishing.")

    layer_name = sb.ortho.get_layer_name(mission_fold, MODEL)
    model_dir = mission_fold / "results" / TASK / MODEL
    last_run_date = sorted([p for p in model_dir.iterdir() if p.is_dir()], reverse=True)[0].name

    gdf = gpd.read_file(os.path.join(mission_fold, "results", TASK, MODEL, last_run_date, "out.gpkg"))
    for table in table_names:
        gdf[table] = gdf[table].astype(int)
        gdf = gdf.merge(table_lookup[table], left_on=table, right_on=f'{table}_id', how="left").drop(f"{table}_id", axis=1)
    gdf.to_file(f"{layer_name}.gpkg", layer=layer_name)
    
    print("Uploading geopackage to GeoServer.")
    stan_path = os.path.join(mission_fold, "results", TASK, MODEL, f"{layer_name}.gpkg")
    metadata = {
        "abstract": sb.geo.get_detection_abstract(gdf, sb.ortho.get_layer_name(mission_fold), MODEL, str(stan_path)),
        "date": datetime.strptime(last_run_date, '%Y-%m-%d-%H%M').isoformat(),
        "date_type": "creation",
        "attribution": "SeaBee",
    }

    print("Publishing to GeoNode.")
    sb.geo.upload_geopackage_to_geoserver(f"{layer_name}.gpkg", SETTINGS.GEOSERVER_USER, SETTINGS.GEOSERVER_PASSWORD)

    sb.geo.publish_to_geonode(
        layer_name,
        SETTINGS.GEONODE_USER,
        SETTINGS.GEONODE_PASSWORD,
        workspace="geonode",
    )

    sb.geo.update_geonode_metadata(
        layer_name,
        SETTINGS.GEONODE_USER,
        SETTINGS.GEONODE_PASSWORD,
        metadata,
    )
    sb.storage.copy_file(f"{layer_name}.gpkg", stan_path, minio_client, overwrite=False)