In [None]:
import datetime as dt
import logging
import os
import time
from glob import glob

import yaml
from config import SETTINGS
from pyodm import Node

import seabeepy as sb

# NINA seabird surveys 2023

Initial notebook for processing data from Sindre's fieldwork campaign during spring/summer 2023. The code performs the following operations:

 1. Mosics raw RGB images (in JPEG format) to create orthophotos using NodeODM.
 
 2. Optimises the orthophotos for display on GeoNode.
 
 3. Adds the optimised orthophotos to GeoServer, publishes them to GeoNode, and adds (very) basic metadata.
 
 4. A log file recording progress and any errors is saved to `temp_dir`.

Assumptions:

 * Mission folders are named `Region_Area_YYYYMMDD` or `Region_Area_YYYYMMDDhhmm`.
 
 * Raw JPEG images are contained in a subfolder named `images`.
 
 * The total number of files expected in the `images` folder is specified in the `nfiles` attribute of `config.yaml`, which is located in the top level of the mission folder. To ensure all data has finished uploading before starting any processing, the code first checks that the number of files in `images` is equal to the value specified in `config.yaml`. 
 
 * If the mission folder already contains a folder named `orthophoto`, the code skips ahead to the next mission (i.e. it assumes the folder has already been processed).
 
 * If available, ground control points must be stored in a subfolder named `./gcp/gcp_list-ODM.txt`.
 
**To do:** It looks like Sindre's most recent uploads also include multi-spectral data in TIFF format (also in the `images` folder). The script here should be extended to process these too, if desired.

In [None]:
# User input
pc_quality = "high"
feature_quality = "high"
base_dir = r"/home/notebook/shared-seabee-ns9879k/seabirds/2023/"
temp_dir = r"/home/notebook/cogs/"

In [None]:
def check_folder_structure(dir_path):
    """Check whether folder should be processed.

    Args
        dir_path: Str. Path to mission folder.

    Returns
        Bool. True if folder can be processed, else False.
    """
    if os.path.isdir(os.path.join(dir_path, "orthophoto")):
        msg = "Skipping. 'orthophoto' subdirectory already exists."
        print(msg)
        logging.info(msg)

        return False

    elif not os.path.isdir(os.path.join(dir_path, "images")):
        msg = "Skipping. Could not identify 'images' subdirectory."
        print(msg)
        logging.warning(msg)

        return False

    elif not os.path.isfile(os.path.join(dir_path, "config.yaml")):
        msg = "Skipping. Could not find file 'config.yaml'."
        print(msg)
        logging.error(msg)

        return False

    else:
        return True


def parse_mission_data(mission_name):
    """Extract region, area and date from folder name.

    Args
        mission_name: Str. Name of mission folder

    Returns
        Tuple or Bool. (region, area, date) if name can be parsed, else False.
    """
    try:
        region, area, date = mission_name.split("_")
    except ValueError:
        msg = f"Skipping. Could not parse mission name '{mission_name}' (expected 'region_area_date')"
        print(msg)
        logging.error(msg)

        return False

    try:
        date = dt.datetime.strptime(date, "%Y%m%d")
    except ValueError:
        try:
            date = dt.datetime.strptime(date, "%Y%m%d%H%M")
        except ValueError:
            msg = f"Skipping. Could not parse date '{date}' (expected 'YYYYMMDD' or 'YYYYMMDDhhmm')"
            print(msg)
            logging.error(msg)

            return False

    msg = f"Data collected at {area} ({region}) on {date}."
    print(msg)
    logging.info(msg)

    return (region, area, date)


def check_file_count(dir_path):
    """Count the number of files in 'image_fold' and check it agrees with
    the value in 'config.yaml'.

    Args
        dir_path: Str. Path to mission folder.

    Returns
        Bool. True if count agrees, else False.
    """
    image_fold = os.path.join(dir_path, "images")
    nfiles_found = len(
        [
            name
            for name in os.listdir(image_fold)
            if os.path.isfile(os.path.join(image_fold, name))
        ]
    )

    config_path = os.path.join(dir_path, "config.yaml")
    with open(config_path, "r") as stream:
        data = yaml.safe_load(stream)
    nfiles_expected = data["nfiles"]

    if nfiles_found == nfiles_expected:
        return True
    else:
        msg = "Skipping. Number of files in 'images' does not match 'config.yaml'."
        print(msg)
        logging.error(msg)

        return False

In [None]:
# Check NodeODM is available
node = Node.from_url("http://nodeodm")
print(node.info())

In [None]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

In [None]:
# Setup logging
log_date = dt.datetime.today()
log_file = os.path.join(temp_dir, f"seabirds_logging_{log_date}.txt")
logging.basicConfig(
    filename=log_file,
    filemode="w",
    level=logging.INFO,
    format="%(levelname)-9s : %(message)s",
)

msg = f"""
Processing started: {log_date}
    
User options
base_dir:           {base_dir}
temp_dir:           {temp_dir}
pc_quality:         {pc_quality}
feature_quality:    {feature_quality}        
"""
print(msg)
logging.info(msg)

In [None]:
# Process mission data in 'base_dir'
try:
    dir_list = sorted(glob(os.path.join(base_dir, "*/")))
    for mission_fold in dir_list:
        msg = f"\n###################################################\nProcessing: {mission_fold}"
        print(msg)
        logging.info(msg)

        # Check whether folder can be processed
        if check_folder_structure(mission_fold):
            mission = os.path.basename(os.path.dirname(mission_fold))
            mission_data = parse_mission_data(mission)
            if mission_data:
                region, area, date = mission_data

                # Check that data upload is complete
                if check_file_count(mission_fold):
                    start_time = time.time()
                    msg = "Orthomosaicing."
                    print(msg)
                    logging.info(msg)

                    # Get images to process
                    image_fold = os.path.join(mission_fold, "images")
                    image_files = sb.ortho.list_images(
                        image_fold, ext="JPG", verbose=False
                    )
                    msg = f"{len(image_files)} images to process."
                    print(msg)
                    logging.info(msg)

                    # Get GCPs, if available
                    gcp_path = os.path.join(mission_fold, "gcp", "gcp_list-ODM.txt")
                    if os.path.isfile(gcp_path):
                        gcps = gcp_path
                        msg = "GCP file identified."
                        print(msg)
                        logging.info(msg)
                    else:
                        gcps = None
                        msg = "GCP file not found."
                        print(msg)
                        logging.info(msg)

                    # Send task to NodeODM. Options are documented here:
                    # https://docs.opendronemap.org/arguments/
                    nodeodm_options = {
                        "dsm": True,
                        "dtm": True,
                        "cog": True,
                        "orthophoto-compression": "LZW",
                        "orthophoto-resolution": 0.1,  # cm/pixel. If set very small, output will be auto-limited by data to max sensible value
                        "dem-resolution": 0.1,  # cm/pixel. If set very small, output will be auto-limited by data to max sensible value
                        "max-concurrency": 16,
                        "auto-boundary": True,
                        "use-3dmesh": True,
                        "fast-orthophoto": False,
                        # "split": 100,
                        # "split-overlap": 50,
                        "feature-quality": feature_quality,  # ultra | high | medium | low | lowest
                        "pc-quality": pc_quality,  # ultra | high | medium | low | lowest
                        "gcp": gcps,
                    }
                    task = node.create_task(
                        image_files,
                        nodeodm_options,
                    )
                    sb.ortho.show_nodeodm_task_progress_bar(task)

                    msg = "Reorganising results."
                    print(msg)
                    logging.info(msg)

                    task_id = task.info().uuid
                    sb.storage.copy_nodeodm_results(task_id, mission_fold, minio_client)

                    msg = "Preparing orthophoto for publishing."
                    print(msg)
                    logging.info(msg)

                    # Standardise and save locally
                    ortho_path = os.path.join(
                        mission_fold, "orthophoto", "odm_orthophoto.original.tif"
                    )
                    temp_path = os.path.join(temp_dir, mission + ".tif")
                    sb.geo.standardise_orthophoto(
                        ortho_path,
                        temp_path,
                        red_band=1,
                        green_band=2,
                        blue_band=3,
                        nodata=255,
                    )

                    # Copy to MinIo and delete local version
                    stan_path = os.path.join(
                        mission_fold, "orthophoto", mission + ".tif"
                    )
                    sb.storage.copy_file(
                        temp_path, stan_path, minio_client, overwrite=False
                    )
                    os.remove(temp_path)

                    msg = "Uploading to GeoServer."
                    print(msg)
                    logging.info(msg)

                    layer_name = sb.geo.upload_raster_to_geoserver(
                        stan_path,
                        SETTINGS.GEOSERVER_USER,
                        SETTINGS.GEOSERVER_PASSWORD,
                        workspace="geonode",
                    )

                    msg = "Publishing to GeoNode."
                    print(msg)
                    logging.info(msg)

                    sb.geo.publish_to_geonode(
                        layer_name,
                        SETTINGS.GEONODE_USER,
                        SETTINGS.GEONODE_PASSWORD,
                        workspace="geonode",
                    )

                    msg = "Updating metadata."
                    print(msg)
                    logging.info(msg)

                    metadata = {
                        "abstract": f"RGB mosaic collected by NINA at {area} ({region}) on {date}.",
                        "date": date.isoformat(),
                        "date_type": "creation",
                        "attribution": "SeaBee",
                    }
                    sb.geo.update_geonode_metadata(
                        layer_name,
                        SETTINGS.GEONODE_USER,
                        SETTINGS.GEONODE_PASSWORD,
                        metadata,
                    )
                    end_time = time.time()
                    elapsed = dt.timedelta(seconds=end_time - start_time)
                    msg = f"Finished in {elapsed}."
                    print(msg)
                    logging.info(msg)
except Exception as e:
    logging.exception("message")
    raise