In [None]:
import datetime as dt
import os

import yaml
from config import SETTINGS
from schema import And, Optional, Or, Schema, SchemaError

import seabeepy as sb

# Refactor `config.yaml` for seabirds missions

In [this issue](https://github.com/SeaBee-no/documentation/issues/23), we agreed to change the folder structure so that all metadata is read from `config.yaml`, instead of trying to parse the mission folder name (see especially the post [here](https://github.com/SeaBee-no/documentation/issues/23#issuecomment-1586830338)).

For backwards compatibility, this notebook updates all the old seabirds `config.yaml` files to use the new standard. The files are also renamed `config.seabee.yaml`, as discussed with Kim on Teams.

In [None]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

In [None]:
def check_old_config_exists(dir_path):
    """ """
    if os.path.isfile(os.path.join(dir_path, "config.seabee.yaml")):
        return True
    else:
        return False


def parse_config(dir_path):
    """Parse 'config.yaml'.

    Args
        dir_path: Str. Path to mission folder.

    Returns
        Dict.
    """
    config_path = os.path.join(dir_path, "config.seabee.yaml")
    with open(config_path, "r") as stream:
        data = yaml.safe_load(stream)

    return data


def parse_old_mission_data(dir_path):
    """ """
    mission_name = os.path.split(dir_path)[-1]

    try:
        group, area, date = mission_name.split("_")
    except ValueError:
        print(f"Could not parse '{mission_name}'. Expected (grouping_area_date).")
        return False

    try:
        dt.datetime.strptime(date, "%Y%m%d")
    except ValueError:
        try:
            dt.datetime.strptime(date, "%Y%m%d%H%M")
        except ValueError:
            print(
                f"Could not parse date '{date}'. Expected 'yyyymmdd' or 'yyyymmddHHMM."
            )
            return False

    return (group, area, date)


def check_old_config_valid(dir_path, verbose=False):
    """ """
    schema = Schema(
        {
            "nfiles": And(int, lambda n: n > 0),
            "organisation": str,
            "mosaic": bool,
            "publish": bool,
            "theme": lambda s: s in ("Seabirds", "Mammals", "Habitat"),
            Optional("creator_name"): Or(str, None),
            Optional("project"): Or(str, None),
            Optional("odm_options"): {
                Optional("dsm"): bool,
                Optional("dtm"): bool,
                Optional("cog"): bool,
                Optional("orthophoto-compression"): lambda s: s
                in ("JPEG", "LZW", "PACKBITS", "DEFLATE", "LZMA", "NONE"),
                Optional("orthophoto-resolution"): Or(int, float),
                Optional("dem-resolution"): Or(int, float),
                Optional("max-concurrency"): int,
                Optional("auto-boundary"): bool,
                Optional("use-3dmesh"): bool,
                Optional("fast-orthophoto"): bool,
                Optional("pc-rectify"): bool,
                Optional("split"): int,
                Optional("split-overlap"): int,
                Optional("crop"): And(Or(int, float), lambda x: x >= 0),
                Optional("pc-quality"): lambda s: s
                in ("ultra", "high", "medium", "low", "lowest"),
                Optional("feature-quality"): lambda s: s
                in ("ultra", "high", "medium", "low", "lowest"),
            },
        }
    )

    config_path = os.path.join(dir_path, "config.yaml")
    with open(config_path, "r") as stream:
        data = yaml.safe_load(stream)

    try:
        schema.validate(data)
    except SchemaError as e:
        if verbose:
            print("Could not parse 'config.yaml':")
            print(e)

        return False

    return True


def check_new_config_valid(dir_path, verbose=False):
    """Check that information contained in 'config.yaml' can be parsed correctly.

    Args
        dir_path: Str. Path to flight directory
        verbose:  Bool. Default False. Whether to print error details if file is not
                  valid

    Returns
        Bool. True if 'config.yaml' is valid, else False.
    """
    # Define valid schema for 'config.yaml'
    # Full list of ODM options here: https://docs.opendronemap.org/arguments/
    schema = Schema(
        {
            "grouping": str,
            "area": str,
            "datetime": And(
                str,
                Or(
                    lambda date: dt.datetime.strptime(date, "%Y%m%d"),
                    lambda date: dt.datetime.strptime(date, "%Y%m%d%H%M"),
                ),
            ),
            "nfiles": And(int, lambda n: n > 0),
            "organisation": str,
            "mosaic": bool,
            "publish": bool,
            "theme": lambda s: s.lower() in ("seabirds", "mammals", "habitat"),
            Optional("spectrum_type"): Or(
                lambda s: s.lower() in ("rgb", "ms", "hsi"), None
            ),
            Optional("elevation"): Or(And(int, lambda x: x >= 0), None),
            Optional("creator_name"): Or(str, None),
            Optional("project"): Or(str, None),
            Optional("odm_options"): {
                Optional("dsm"): bool,
                Optional("dtm"): bool,
                Optional("cog"): bool,
                Optional("orthophoto-compression"): lambda s: s
                in ("JPEG", "LZW", "PACKBITS", "DEFLATE", "LZMA", "NONE"),
                Optional("orthophoto-resolution"): Or(int, float),
                Optional("dem-resolution"): Or(int, float),
                Optional("max-concurrency"): int,
                Optional("auto-boundary"): bool,
                Optional("use-3dmesh"): bool,
                Optional("fast-orthophoto"): bool,
                Optional("pc-rectify"): bool,
                Optional("split"): int,
                Optional("split-overlap"): int,
                Optional("crop"): And(Or(int, float), lambda x: x >= 0),
                Optional("pc-quality"): lambda s: s
                in ("ultra", "high", "medium", "low", "lowest"),
                Optional("feature-quality"): lambda s: s
                in ("ultra", "high", "medium", "low", "lowest"),
            },
        }
    )

    config_path = os.path.join(dir_path, "config.seabee.yaml")
    with open(config_path, "r") as stream:
        data = yaml.safe_load(stream)

    try:
        schema.validate(data)
    except SchemaError as e:
        if verbose:
            print("Could not parse 'config.yaml':")
            print(e)

        return False

    return True


def write_new_config(dir_path, data):
    config_path = os.path.join(dir_path, "config.seabee.yaml")
    with open(config_path, "w") as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)

In [None]:
# # Add 'classify' to all seabirds missions
# base_dir = r"/home/notebook/shared-seabee-ns9879k/seabirds"
# temp_dir = r"/home/notebook/temp"

# dir_list = [f for f in os.scandir(base_dir) if f.is_dir()]
# dir_list = [f for f in dir_list if check_old_config_exists(f)]
# dir_list = [
#     f for f in dir_list if check_old_config_valid(f) and parse_old_mission_data(f)
# ]

# for dir_path in dir_list:
#     # Update config
#     data = parse_config(dir_path)
#     group, area, date = parse_old_mission_data(dir_path)
#     data["grouping"] = group
#     data["area"] = area
#     data["datetime"] = date

#     # Remove blank fields
#     data = {k: v for k, v in data.items() if v is not None}

#     # Save locally and validate
#     write_new_config(temp_dir, data)
#     assert check_new_config_valid(temp_dir)

#     # Copy to MinIO
#     src_path = os.path.join(temp_dir, "config.seabee.yaml")
#     dst_path = os.path.join(dir_path, "config.seabee.yaml")
#     sb.storage.copy_file(src_path, dst_path, minio_client, overwrite=True)
#     os.remove(src_path)

#     # Delete old config
#     old_path = os.path.join(dir_path, "config.yaml")
#     sb.storage.delete_file(old_path, minio_client)

In [None]:
# # Add 'classify' to all seabirds missions
# base_dir = r"/home/notebook/shared-seabee-ns9879k/seabirds/2022/"
# temp_dir = r"/home/notebook/temp"

# dir_list = [f for f in os.scandir(base_dir) if f.is_dir()]
# dir_list = [f for f in dir_list if check_old_config_exists(f)]

# for dir_path in dir_list:
#     # Update config
#     data = parse_config(dir_path)
#     data["classify"] = True
#     data["ml_options"] = {"task": "detection"}

#     # Save locally and validate
#     write_new_config(temp_dir, data)
#     assert sb.ortho.check_config_valid(temp_dir)

#     # Copy to MinIO
#     src_path = os.path.join(temp_dir, "config.seabee.yaml")
#     dst_path = os.path.join(dir_path, "config.seabee.yaml")
#     sb.storage.copy_file(src_path, dst_path, minio_client, overwrite=True)
#     os.remove(src_path)