In [24]:
import os

import yaml
from config import SETTINGS
from pathlib import Path
import pandas as pd

import seabeepy as sb

# Refactor `config.seabee.yaml` for niva-tidy missions

In [25]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

In [26]:
# Parent directories containing flight folders to process
base_dirs = [
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2022"),
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2023"),
]

In [27]:
def get_spectrum_type(spec: str):
    if spec is None:
        return None
    if spec.lower() in ["rgb", "ms", "hsi"]:
        return spec.lower()
    if spec.lower() == "msi":
        return "ms"


def template_config() -> dict:
    return dict(
        grouping=None,
        area=None,
        datetime=None,
        spectrum_type=None,
        elevation=None,
        project=None,
        nfiles=None,
        organisation=None,
        creator_name=None,
        theme="habitat",
        mosaic=False,
        publish=True,
        # classify=True,
    )


def niva_name_to_config(name: str):

    parts = name.split("_")

    if len(parts) == 7:
        org, date, group, area, spect, elev, _ = parts
    elif len(parts) == 6:
        org, date, group, area, spect, elev = parts
    elif len(parts) == 5:
        org, date, group, area, spect = parts
        elev = None
    elif len(parts) == 4:
        org, date, group, area = parts
        spect = None
        elev = None
    return dict(
        grouping=group,
        area=area,
        datetime=date,
        spectrum_type=get_spectrum_type(spect),
        organisation=org,
        elevation=int(elev) if elev is not None and elev.isnumeric() else elev,
    )


def parse_config(dir_path: Path):
    conf = {}
    config_path = dir_path / "config.seabee.yaml"
    if config_path.exists():
        with open(config_path, "r") as f:
            conf = yaml.safe_load(f)
    return conf


def write_new_config(path: Path, data):
    with open(path, "w") as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)


def merge_conf(folder_config: dict, existing_config: dict) -> dict:
    """Merge the folder_config with the existing_config

    Use various rules some times use folder name is best, other times the config
    """

    new_config = template_config()
    for key, val in folder_config.items():
        new_config[key] = val
    for key in ["creator_name", "project", "mosaic", "publish", "theme", "area"]:
        if key in existing_config:
            if key == "creator_name" and "_" in existing_config[key]:
                new_config[key] = " ".join(existing_config[key].split("_"))
            else:
                new_config[key] = existing_config[key]
    
    # If file config is placeholder switch back to config based on folder name
    if new_config["area"].lower() in ["site", "oslo"]:
        new_config["area"] = folder_config["area"]

    if new_config["spectrum_type"] is None:
        new_config["spectrum_type"] = existing_config["spectrum_type"]
    new_config["folder_grouping"] = folder_config["grouping"]
    return apply_rules(new_config)


def apply_rules(config: dict, folder_name=""):
    # msi -> ms + lower
    config["spectrum_type"] = get_spectrum_type(config["spectrum_type"])
    if not isinstance(config["elevation"], int):
        config["elevation"] = None
    # No underscore in attributes, this destroys the layer name
    if "_" in config["area"]:
        config["area"] = config["area"].replace("_", "-")
    
    if "oldberg" in config["area"]:
        config["area"] = config["area"].replace("oldberg", "olberg")
    
    if config["area"].startswith("larvik-"):
        config["area"] = config["area"].split("-", 1)[-1]

    if config["area"].startswith("runde-"):
        config["area"] = config["area"].split("-", 1)[-1]

    if config["project"] is None:
        # Around three missions with missing project, use seabee
        config["project"] = "seabee"
    
    config["project"] = config["project"].lower()
    
    if config["project"] in ["runderunderunderunde", "srunde"]:
        config["project"] = "runde"
    
    if config["area"] == "runde":
        config["project"] = "runde"

    if config["area"].startswith("runde-"):
        config["area"] = config["area"].split("-", 1)[-1]
    
    if folder_name and config["area"] == "olberg":
        config["area"] = niva_name_to_config(folder_name)["area"]
    # Check folder grouping
    if config["project"] in ["seabee"]:
        # try to use naming from folder name for project
        config["project"] = config["folder_grouping"]
    
    if config["project"] in ["oslo", "io"]:
        config["project"] = "io23"

    if config["project"] == "io23" and "oslo" not in config["area"]:
        config["area"] = "oslo-" + config["area"]

    if config["project"] == "plastinoland":
        config["project"] = "plastnoland"

    if config["project"] == "larvik":
        # spresial rule for larvik ending up as project?
        config["project"] = "zosmap"
        

    if config["organisation"] in ["spectorfly"]:
        config["organisation"] = "spectrofly"
    
    if "stege-nor" in config["area"].lower():
        config["grouping"] = f"{config['organisation'].lower()}"
        config["area"] = config["area"].lower()
        if config["area"].lower() == "stege-nor" :
            config["area"] = "stege-nor-full"
    else:
        config["grouping"] = f"{config['organisation'].lower()}-{config['project'].lower()}"

    return config


def to_seabee_config(df: pd.DataFrame, tmp_dir):
    """Try to convert the dataframe to seabee config files

    Try to save df to seabee config files and resturn a new dataframe with the results
    this way we can play a bit with various changes and resulting layer name. The config files
    are saved in tmp_dir.
    """
    mission_dict = dict(folder_name=[], layer_name=[], valid=[], dir_path=[])
    keys = template_config().keys()
    mission_dict.update({k: [] for k in keys})
    mission_dict["folder_grouping"] = []
    for _, data in df.iterrows():
        mission_name = data["folder_name"]
        config = apply_rules(data[list(keys) + ["folder_grouping"]].to_dict(), mission_name)
        clean_config = {k: config[k] for k in keys if config[k] is not None}
        nfiles = len(sb.ortho.list_images(data.dir_path / "images", verbose=False))
        mission_dict["folder_name"].append(mission_name)
        try:
            sb.ortho.CONFIG_SCHEMA.validate({**clean_config, "nfiles": 1})
            write_new_config(tmp_dir / "config.seabee.yaml", config)
            layer_name = sb.ortho.get_layer_name(tmp_dir)
        except Exception as e:
            print(f"Error in {mission_name}: {e}")
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append("error")
            mission_dict["valid"].append(False)
        else:
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append(layer_name)
            mission_dict["valid"].append(True)
            tmp_file = tmp_dir / f"{mission_name}.yaml"
            write_new_config(tmp_file, config)

        for k, v in config.items():
            mission_dict[k].append(v)
        mission_dict["dir_path"].append(data.dir_path)        
        

    return pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))

In [28]:
dir_list = [
    p
    for p in base_dirs
    for p in p.iterdir()
    if p.is_dir() and (p / "config.seabee.yaml").exists()
]

In [29]:
tmp_dir = Path("./tmp")
tmp_dir.mkdir(exist_ok=True)

In [30]:
mission_dict = dict(folder_name=[], dir_path=[])
mission_dict.update({k: [] for k in template_config().keys()})
mission_dict["folder_grouping"] = []
for dir_path in dir_list:
    # Update config
    mission_name = os.path.split(dir_path)[-1]
    existing_conf = parse_config(dir_path)
    folder_conf = niva_name_to_config(mission_name)
    new_conf = merge_conf(folder_conf, existing_conf)
    mission_dict["folder_name"].append(mission_name)
    nfiles = len(sb.ortho.list_images(dir_path / "images", verbose=False))
    new_conf["nfiles"] = nfiles
    for k, v in new_conf.items():
        mission_dict[k].append(v)
    mission_dict["dir_path"].append(dir_path)

In [31]:
df = pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))
df

Unnamed: 0,folder_name,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
0,niva_202208181236_kelpmap_vega-s_msi_60,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
1,niva_202205180845_plastinoland_drammen_documen...,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-plastnoland,drammen,202205180845,ms,0,plastnoland,112,niva,Medyan Ghareeb,Habitat,False,True,plastinoland
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
3,niva_202208191055_kelpmap_vega-n_msi_120,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208191055,ms,120,kelpmap,4956,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
4,spectrofly_202208311309_runde_remoy_msi_120,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan Ghareeb,Habitat,False,True,halden
184,niva_202310031115_io_husviksbaen_rgb_80,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-io23,oslo-husviksbaen,202310031115,rgb,80,io23,196,niva,Medyan Ghareeb,Habitat,False,True,io
185,niva_202308301044_halden_h23_msi_80,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan Ghareeb,Habitat,False,True,halden
186,niva_202309271230_ascomap_bringnes_msi_100,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan Ghareeb,Habitat,False,True,ascomap


In [32]:
df = to_seabee_config(df, tmp_dir)
df

Error in niva_202209_runde_runde_otter: Key 'datetime' error:
Or(<function <lambda> at 0x7faa2ce97250>, <function <lambda> at 0x7faa2cddecb0>) did not validate '202209'
<lambda>('202209') raised ValueError("time data '202209' does not match format '%Y%m%d'")
<lambda>('202209') raised ValueError("time data '202209' does not match format '%Y%m%d%H%M'")
Error in niva_yyyymmddhhmm_area_site_typ_alt: Key 'datetime' error:
Or(<function <lambda> at 0x7faa2ce97250>, <function <lambda> at 0x7faa2cddecb0>) did not validate 'yyyymmddhhmm'
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' does not match format '%Y%m%d'")
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' does not match format '%Y%m%d%H%M'")
Error in niva_yyyymmddhhmm_area_site_typ_alt - Copy: Key 'datetime' error:
Or(<function <lambda> at 0x7faa2ce97250>, <function <lambda> at 0x7faa2cddecb0>) did not validate 'yyyymmddhhmm'
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' d

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
1,niva_202205180845_plastinoland_drammen_documen...,niva-plastnoland_drammen_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-plastnoland,drammen,202205180845,ms,0,plastnoland,112,niva,Medyan Ghareeb,Habitat,False,True,plastinoland
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-kelpmap_vega-n_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208191055,ms,120,kelpmap,4956,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan Ghareeb,Habitat,False,True,halden
184,niva_202310031115_io_husviksbaen_rgb_80,niva-io23_oslo-husviksbaen_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-io23,oslo-husviksbaen,202310031115,rgb,80,io23,196,niva,Medyan Ghareeb,Habitat,False,True,io
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan Ghareeb,Habitat,False,True,halden
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan Ghareeb,Habitat,False,True,ascomap


# Valid config

In [33]:
df[df.valid][df.organisation=="spectrofly"]

  df[df.valid][df.organisation=="spectrofly"]


Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120.0,kelpmap,4850,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120.0,runde,0,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
5,spectrofly_202209011343_runde_runde_rgb_120,spectrofly-runde_runde_202209011343_rgb_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,runde,202209011343,rgb,120.0,runde,572,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
6,spectrofly_202208181238_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208181238_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208181238,ms,120.0,kelpmap,8979,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
12,spectrofly_202209010804_runde_runde_rgb_120,spectrofly-runde_runde_202209010804_rgb_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,runde,202209010804,rgb,120.0,runde,535,spectrofly,Medyan Ghareeb,Habitat,False,False,runde
13,spectrofly_202208181038_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208181038_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208181038,ms,120.0,kelpmap,7802,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
15,spectrofly_20220624_kelpmap_sverrehavn_uuv_usv,spectrofly-kelpmap_sverrehavn_20220624_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,sverrehavn,20220624,ms,,kelpmap,0,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
18,spectrofly_202209011550_runde_runde_rgb_195,spectrofly-runde_runde_202209011550_rgb_195m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,runde,202209011550,rgb,195.0,runde,172,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
28,spectrofly_202208181139_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208181139_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208181139,ms,120.0,kelpmap,7885,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
29,spectrofly_20220820_kelpmap_sola_uuv_usv,spectrofly-kelpmap_sola_20220820_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,sola,20220820,ms,,kelpmap,0,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap


# Area naming used

In [34]:
df.sort_values(by="area").area.value_counts(sort=False)

drammen                3
finnmark-bringnes      8
finnmark-vassbukta     7
halden-h20             2
halden-h21             2
halden-h22             2
halden-h23             2
halden-h24             2
halden-h25             2
halden-h28             2
halden-h29             2
halden-h30             2
halden-h31             2
halden-h33             2
halden-h34             2
halden-h35             2
halden-h36             2
halden-h37             2
hellviktangen          2
juvika                 1
knerten                3
landsteilene           3
mfs                    1
olberg-all            35
olberg-n               7
olberg-s              21
oslo-bekkensten        2
oslo-bonnebukta        2
oslo-fagerstrand       2
oslo-fornebu           1
oslo-husviksbaen       2
oslo-ulvoya            2
remoy                 15
runde                 15
site                   3
sola                   1
stege-nor-fast         1
stege-nor-full         1
sverrehavn             1
vega                   2


In [35]:
df.groupby("area").first()

Unnamed: 0_level_0,folder_name,layer_name,valid,dir_path,grouping,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
drammen,niva_202205180845_plastinoland_drammen_documen...,niva-plastnoland_drammen_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-plastnoland,202205180845,ms,0.0,plastnoland,112,niva,Medyan Ghareeb,Habitat,False,True,plastinoland
finnmark-bringnes,niva_202309271147_ascomap_bringnes_msi_30,niva-ascomap_finnmark-bringnes_202309271147_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,202309271147,ms,30.0,ascomap,2149,niva,Medyan Ghareeb,Habitat,False,True,ascomap
finnmark-vassbukta,niva_202309281024_ascomap_vassbukta_msi_100,niva-ascomap_finnmark-vassbukta_202309281024_m...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,202309281024,ms,100.0,ascomap,3073,niva,Medyan Ghareeb,Habitat,False,True,ascomap
halden-h20,niva_202308301136_halden_h20_msi_80,niva-sabicas_halden-h20_202308301136_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301136,ms,80.0,sabicas,301,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h21,niva_202308301118_halden_h21_msi_80,niva-sabicas_halden-h21_202308301118_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301118,ms,80.0,sabicas,770,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h22,niva_202308300752_halden_h22_msi_80,niva-sabicas_halden-h22_202308300752_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308300752,ms,80.0,sabicas,378,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h23,niva_202308301032_halden_h23_rgb_80,niva-sabicas_halden-h23_202308301032_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301032,rgb,80.0,sabicas,76,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h24,niva_202308291402_halden_h24_msi_80,niva-sabicas_halden-h24_202308291402_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308291402,ms,80.0,sabicas,896,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h25,niva_202308291253_halden_h25_rgb_80,niva-sabicas_halden-h25_202308291253_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308291253,rgb,80.0,sabicas,99,niva,Medyan Ghareeb,Habitat,False,True,halden
halden-h28,niva_202308291436_halden_h28_msi_80,niva-sabicas_halden-h28_202308291436_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308291436,ms,80.0,sabicas,1372,niva,Medyan Ghareeb,Habitat,False,True,halden


# Grouping attribute

Grouping is formed by `{org}-{project}` 

In [36]:
gr = df.sort_values(by="grouping").grouping.value_counts(sort=False)

In [37]:
gr

niva-area               3
niva-ascomap           15
niva-io23              11
niva-kelpmap           12
niva-plastnoland        3
niva-runde             10
niva-sabicas           30
niva-seabee             9
niva-zosmap            63
ntnu-runde              8
spectrofly              2
spectrofly-kelpmap      9
spectrofly-massimal     1
spectrofly-runde       12
Name: grouping, dtype: int64

In [38]:
df.groupby("grouping").first()

Unnamed: 0_level_0,folder_name,layer_name,valid,dir_path,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
grouping,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
niva-area,niva_yyyymmddhhmm_area_site_typ_alt,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,site,yyyymmddhhmm,ms,,area,0,niva,Medyan Ghareeb,Habitat,False,True,area
niva-ascomap,niva_202309271147_ascomap_bringnes_msi_30,niva-ascomap_finnmark-bringnes_202309271147_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,finnmark-bringnes,202309271147,ms,30.0,ascomap,2149,niva,Medyan Ghareeb,Habitat,False,True,ascomap
niva-io23,niva_202310030959_io_bonnebukta_rgb_90,niva-io23_oslo-bonnebukta_202310030959_rgb_90m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,oslo-bonnebukta,202310030959,rgb,90.0,io23,289,niva,Medyan Ghareeb,Habitat,False,True,io
niva-kelpmap,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,vega-s,202208181236,ms,60.0,kelpmap,4272,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
niva-plastnoland,niva_202205180845_plastinoland_drammen_documen...,niva-plastnoland_drammen_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,drammen,202205180845,ms,0.0,plastnoland,112,niva,Medyan Ghareeb,Habitat,False,True,plastinoland
niva-runde,niva_202209010650_runde_runde_msi_60,niva-runde_runde_202209010650_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,runde,202209010650,ms,60.0,runde,6786,niva,Medyan Ghareeb,Habitat,False,True,runde
niva-sabicas,niva_202308291402_halden_h24_msi_80,niva-sabicas_halden-h24_202308291402_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,halden-h24,202308291402,ms,80.0,sabicas,896,niva,Medyan Ghareeb,Habitat,False,True,halden
niva-seabee,niva_202203151403_seabee_landsteilene_msi_100,niva-seabee_landsteilene_202203151403_ms_100m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,landsteilene,202203151403,ms,100.0,seabee,3702,niva,Medyan Ghareeb,Habitat,False,True,seabee
niva-zosmap,niva_202305051123_larvik_olberg-all_rgb_60,niva-zosmap_olberg-all_202305051123_rgb_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,olberg-all,202305051123,rgb,60.0,zosmap,873,niva,Medyan Ghareeb,Habitat,False,True,larvik
ntnu-runde,ntnu_202209011115_runde_runde_hsi_40,ntnu-runde_runde_202209011115_hsi_40m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,runde,202209011115,hsi,40.0,runde,0,ntnu,Medyan Ghareeb,Habitat,False,True,runde


# Kelpmap naming

In [39]:
df[df.grouping == "niva-kelpmap"]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60.0,kelpmap,4272,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-kelpmap_vega-n_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208191055,ms,120.0,kelpmap,4956,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
9,niva_202208180814_kelpmap_vega-s_msi_115,niva-kelpmap_vega-s_202208180814_ms_115m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208180814,ms,115.0,kelpmap,13710,niva,Medyan Ghareeb,Habitat,False,False,kelpmap
11,niva_202208181012_kelpmap_vega-s_rgb_60,niva-kelpmap_vega-s_202208181012_rgb_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181012,rgb,60.0,kelpmap,494,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
19,niva_20220819_kelpmap_vega_documentation_0,niva-kelpmap_vega_20220819_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega,20220819,ms,0.0,kelpmap,14,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
22,niva_202208190944_kelpmap_vega-n_rgb_120,niva-kelpmap_vega-n_202208190944_rgb_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208190944,rgb,120.0,kelpmap,444,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
26,niva_202208190944_kelpmap_vega-n_msi_120,niva-kelpmap_vega-n_202208190944_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208190944,ms,120.0,kelpmap,5034,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
37,niva_202208181236_kelpmap_vega-s_rgb_60,niva-kelpmap_vega-s_202208181236_rgb_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,rgb,60.0,kelpmap,491,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
40,niva_202208181012_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181012_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181012,ms,60.0,kelpmap,4308,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
45,niva_202208180814_kelpmap_vega-s_rgb_115,niva-kelpmap_vega-s_202208180814_rgb_115m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208180814,rgb,115.0,kelpmap,1804,niva,Medyan Ghareeb,Habitat,False,True,kelpmap


# Project attribute

Seems to be standardized to uppercase, should change all to uppercase?

In [40]:
df.sort_values(by="project").project.value_counts(sort=False)

area            3
ascomap        15
io23           11
kelpmap        21
massimal        1
plastnoland     3
runde          30
sabicas        30
seabee         11
zosmap         63
Name: project, dtype: int64

In [41]:
df.groupby("project").first().layer_name

project
area                                                       error
ascomap        niva-ascomap_finnmark-bringnes_202309271147_ms...
io23              niva-io23_oslo-bonnebukta_202310030959_rgb_90m
kelpmap                  niva-kelpmap_vega-s_202208181236_ms_60m
massimal         spectrofly-massimal_juvika_202206241415_rgb_60m
plastnoland             niva-plastnoland_drammen_202205180845_ms
runde                spectrofly-runde_remoy_202208311309_ms_120m
sabicas              niva-sabicas_halden-h24_202308291402_ms_80m
seabee             niva-seabee_landsteilene_202203151403_ms_100m
zosmap               niva-zosmap_olberg-all_202305051123_rgb_60m
Name: layer_name, dtype: object

In [42]:
df[df.project=="larvik"]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping


# Not valid

In [43]:
df[~df.valid]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
35,niva_202209_runde_runde_otter,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-runde,runde,202209,ms,,runde,0,niva,Medyan Ghareeb,Habitat,False,True,runde
38,niva_yyyymmddhhmm_area_site_typ_alt,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-area,site,yyyymmddhhmm,ms,,area,0,niva,Medyan Ghareeb,Habitat,False,True,area
41,niva_yyyymmddhhmm_area_site_typ_alt - Copy,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-area,site,yyyymmddhhmm,ms,,area,0,niva,Medyan Ghareeb,Habitat,False,True,area
44,niva_202209_runde_runde_documentation_0,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-runde,runde,202209,ms,0.0,runde,6,niva,Medyan Ghareeb,Habitat,False,True,runde
132,niva_yyyymmddhhmm_area_site_typ_alt_compare,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-area,site,yyyymmddhhmm,ms,,area,0,niva,Medyan Ghareeb,Habitat,False,True,area


In [44]:
df

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish,folder_grouping
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
1,niva_202205180845_plastinoland_drammen_documen...,niva-plastnoland_drammen_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-plastnoland,drammen,202205180845,ms,0,plastnoland,112,niva,Medyan Ghareeb,Habitat,False,True,plastinoland
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan Ghareeb,Habitat,False,True,kelpmap
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-kelpmap_vega-n_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208191055,ms,120,kelpmap,4956,niva,Medyan Ghareeb,Habitat,False,True,kelpmap
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan Ghareeb,Habitat,False,True,runde
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan Ghareeb,Habitat,False,True,halden
184,niva_202310031115_io_husviksbaen_rgb_80,niva-io23_oslo-husviksbaen_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-io23,oslo-husviksbaen,202310031115,rgb,80,io23,196,niva,Medyan Ghareeb,Habitat,False,True,io
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan Ghareeb,Habitat,False,True,halden
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan Ghareeb,Habitat,False,True,ascomap


# Save config files

Also store a sample where nfiles are invalid

In [45]:
df = to_seabee_config(df[df.valid], tmp_dir)

In [46]:
keys = [k for k in template_config()]
df[df.valid][["layer_name", "folder_name"] + keys].sort_values("layer_name").to_csv("niva-naming-overview.csv", index=False)