In [None]:
import os

import yaml
from seabeepy.config import SETTINGS
from pathlib import Path
import pandas as pd

import seabeepy as sb
import requests

# Refactor `config.seabee.yaml` for niva-tidy missions

In [None]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

In [None]:
# Parent directories containing flight folders to process
base_dirs = [
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2022"),
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2023"),
]

In [None]:
def get_spectrum_type(spec: str):
    if spec is None:
        return None
    if spec.lower() in ["rgb", "msi", "hsi"]:
        return spec.lower()
    if spec.lower() == "ms":
        return "msi"


def template_config() -> dict:
    return dict(
        grouping=None,
        area=None,
        datetime=None,
        spectrum_type=None,
        elevation=None,
        project=None,
        nfiles=None,
        organisation=None,
        creator_name=None,
        theme="habitat",
        mosaic=False,
        publish=True,
        classify=True,
    )


def niva_name_to_config(name: str):

    parts = name.split("_")

    if len(parts) == 7:
        org, date, group, area, spect, elev, _ = parts
    elif len(parts) == 6:
        org, date, group, area, spect, elev = parts
    elif len(parts) == 5:
        org, date, group, area, spect = parts
        elev = None
    elif len(parts) == 4:
        org, date, group, area = parts
        spect = None
        elev = None
    return dict(
        grouping=group,
        area=area,
        datetime=date,
        spectrum_type=get_spectrum_type(spect),
        organisation=org,
        elevation=int(elev) if elev is not None and elev.isnumeric() else elev,
    )


def parse_config(dir_path: Path):
    conf = {}
    config_path = dir_path / "config.seabee.yaml"
    if config_path.exists():
        with open(config_path, "r") as f:
            conf = yaml.safe_load(f)
    return conf


def write_new_config(path: Path, data):
    with open(path, "w") as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)


def merge_conf(folder_config: dict, existing_config: dict) -> dict:
    """Merge the folder_config with the existing_config

    Use various rules some times use folder name is best, other times the config
    """

    new_config = template_config()
    for key, val in folder_config.items():
        new_config[key] = val
    for key in ["creator_name", "project", "mosaic", "publish", "theme", "area", "spectrum_type"]:
        if key in existing_config:
            if key == "creator_name" and "_" in existing_config[key]:
                new_config[key] = " ".join(existing_config[key].split("_"))
            else:
                new_config[key] = existing_config[key]
    
    # If file config is placeholder switch back to config based on folder name
    if new_config["area"].lower() in ["site", "oslo"]:
        new_config["area"] = folder_config["area"]

    if new_config["spectrum_type"] is None:
        new_config["spectrum_type"] = existing_config["spectrum_type"]
    new_config["folder_grouping"] = folder_config["grouping"]
    return apply_rules(new_config)


def apply_rules(config: dict, folder_name=""):
    # msi -> ms + lower
    config["spectrum_type"] = get_spectrum_type(config["spectrum_type"])
    if not isinstance(config["elevation"], int):
        config["elevation"] = None
    # No underscore in attributes, this destroys the layer name
    if "_" in config["area"]:
        config["area"] = config["area"].replace("_", "-")
    
    if "oldberg" in config["area"]:
        config["area"] = config["area"].replace("oldberg", "olberg")
    
    if config["area"].startswith("larvik-"):
        config["area"] = config["area"].split("-", 1)[-1]

    if config["area"].startswith("runde-"):
        config["area"] = config["area"].split("-", 1)[-1]

    if config["project"] is None:
        # Around three missions with missing project, use seabee
        config["project"] = "seabee"
    
    config["project"] = config["project"].lower()
    
    if config["project"] in ["runderunderunderunde", "srunde"]:
        config["project"] = "runde"
    
    if config["area"] == "runde":
        config["project"] = "runde"

    if config["area"].startswith("runde-"):
        config["area"] = config["area"].split("-", 1)[-1]
    
    if folder_name: 
        if config["area"] == "olberg":
            config["area"] = niva_name_to_config(folder_name)["area"]
        if "_msi_" in folder_name:
             config["spectrum_type"]="msi"
        elif "_hsi_" in folder_name:
            config["spectrum_type"]="hsi"
        elif "_rgb_" in folder_name:
            config["spectrum_type"]="rgb" 
    # Check folder grouping
    if config["project"] in ["seabee"]:
        # try to use naming from folder name for project
        config["project"] = config["folder_grouping"]
    
    if config["project"] in ["oslo", "io"]:
        config["project"] = "io23"

    if config["project"] == "io23" and "oslo" not in config["area"]:
        config["area"] = "oslo-" + config["area"]

    if config["project"] == "plastinoland":
        config["project"] = "plastnoland"

    if config["project"] == "larvik":
        # spresial rule for larvik ending up as project?
        config["project"] = "zosmap"
        

    if config["organisation"] in ["spectorfly"]:
        config["organisation"] = "spectrofly"
    
    if "stege-nor" in config["area"].lower():
        config["grouping"] = f"{config['organisation'].lower()}"
        config["area"] = config["area"].lower()
        if config["area"].lower() == "stege-nor" :
            config["area"] = "stege-nor-full"
    elif folder_name.endswith("_odm"):
        config["grouping"] = f"{config['organisation'].lower()}-{config['project'].lower()}-odm-test"
    else:
        config["grouping"] = f"{config['organisation'].lower()}-{config['project'].lower()}"

    return config


def to_seabee_config(df: pd.DataFrame, tmp_dir):
    """Try to convert the dataframe to seabee config files

    Try to save df to seabee config files and resturn a new dataframe with the results
    this way we can play a bit with various changes and resulting layer name. The config files
    are saved in tmp_dir.
    """
    mission_dict = dict(folder_name=[], layer_name=[], valid=[], dir_path=[])
    keys = template_config().keys()
    mission_dict.update({k: [] for k in keys})
    mission_dict["folder_grouping"] = []
    mission_dict["old_layer_name"] = []
    mission_dict["dir_path"] = []
    for _, data in df.iterrows():
        mission_name = data["folder_name"]
        config = apply_rules(data[list(keys) + ["folder_grouping"]].to_dict(), mission_name)
        clean_config = {k: config[k] for k in keys if config[k] is not None}
        nfiles = len(sb.ortho.list_images(data.dir_path / "images", verbose=False))
        mission_dict["folder_name"].append(mission_name)
        try:
            sb.ortho.CONFIG_SCHEMA.validate({**clean_config, "nfiles": 1})
            write_new_config(tmp_dir / "config.seabee.yaml", clean_config)
            layer_name = sb.ortho.get_layer_name(tmp_dir)
        except Exception as e:
            print(f"Error in {mission_name}: {e}")
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append("error")
            mission_dict["valid"].append(False)
        else:
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append(layer_name)
            mission_dict["valid"].append(True)
            tmp_file = tmp_dir / f"{mission_name}.yaml"
            write_new_config(tmp_file, config)

        for k, v in config.items():
            mission_dict[k].append(v)
        mission_dict["dir_path"].append(data.dir_path)
        mission_dict["old_layer_name"].append(data.old_layer_name)

    return pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))

In [None]:
dir_list = [
    p
    for p in base_dirs
    for p in p.iterdir()
    if p.is_dir() and (p / "config.seabee.yaml").exists()
]

In [None]:
tmp_dir = Path("./tmp")
tmp_dir.mkdir(exist_ok=True)

In [None]:
mission_dict = dict(folder_name=[], dir_path=[])
mission_dict.update({k: [] for k in template_config().keys()})
mission_dict["folder_grouping"] = []
mission_dict["old_layer_name"] = []
for dir_path in dir_list:
    # Update config
    mission_name = os.path.split(dir_path)[-1]
    existing_conf = parse_config(dir_path)
    write_new_config(tmp_dir / "config.seabee.yaml", existing_conf)
    mission_dict["old_layer_name"].append(sb.ortho.get_layer_name(tmp_dir))
    folder_conf = niva_name_to_config(mission_name)
    new_conf = merge_conf(folder_conf, existing_conf)
    mission_dict["folder_name"].append(mission_name)
    nfiles = len(sb.ortho.list_images(dir_path / "images", verbose=False))
    new_conf["nfiles"] = nfiles
    for k, v in new_conf.items():
        mission_dict[k].append(v)
    mission_dict["dir_path"].append(dir_path)

In [None]:
df = pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))
df

In [None]:
df = to_seabee_config(df, tmp_dir)
df

# Valid config

In [None]:
df[df.valid]

# Save the current config

In case something goes wrong keep a local state

In [None]:
keys = [k for k in template_config()]
df[df.valid][["layer_name", "folder_name", "old_layer_name"] + keys].sort_values("layer_name").to_csv("niva-naming-overview.csv", index=False)

# Replace config files

In [None]:
keys = template_config().keys()
for _, data in df[df.valid].iterrows():
    tmp_path = Path("/home/notebook/seabeepy/notebooks") / tmp_dir / "config.seabee.yaml"
    tmp_path.unlink(missing_ok=True)
    config = data[keys].to_dict()

    # Where possible also allow zero files if they will be uploaded later
    sb.ortho.CONFIG_SCHEMA.validate({**config, "nfiles": 1})
    write_new_config(tmp_dir / "config.seabee.yaml", config)
    dst_path = os.path.join(data.dir_path, "config.seabee.yaml")
    print(f"Updating {data.folder_name}")
    
    #sb.storage.copy_file(str(tmp_path), dst_path, minio_client, overwrite=True)

# Delete ortho files

In [None]:
for _, data in df.iterrows():
    file_path = data.dir_path / "orthophoto" / (data.old_layer_name + ".tif")
    if file_path.exists():
        print(f"Deleting {file_path}")
        sb.storage.delete_file(str(file_path), minio_client)
        

# Delete geonode datasets

In [None]:
base_url = sb.geo.GEONODE_URL

auth = (SETTINGS.GEONODE_USER, SETTINGS.GEONODE_PASSWORD)

In [None]:
for _, data in df.iterrows():
    try:
        ds = sb.geo.get_dataset_by_title(data.old_layer_name)
    except:
        pass
    else:
        print(f"Deleting {data.old_layer_name}")
        r = requests.delete(f"{base_url}resources/{ds['pk']}", auth=auth)
        print(r.status_code)