In [90]:
import os

import yaml
from config import SETTINGS
from pathlib import Path
import pandas as pd

import seabeepy as sb

# Refactor `config.seabee.yaml` for niva-tidy missions

In [91]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

In [92]:
# Parent directories containing flight folders to process
base_dirs = [
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2022"),
    Path(r"/home/notebook/shared-seabee-ns9879k/niva-tidy/2023"),
]

In [223]:
def get_spectrum_type(spec: str):
    if spec is None:
        return None
    if spec.lower() in ["rgb", "ms", "hsi"]:
        return spec.lower()
    if spec.lower() == "msi":
        return "ms"


def template_config() -> dict:
    return dict(
        grouping=None,
        area=None,
        datetime=None,
        spectrum_type=None,
        elevation=None,
        project=None,
        nfiles=None,
        organisation=None,
        creator_name=None,
        theme="habitat",
        mosaic=False,
        publish=True,
        # classify=True,
    )


def niva_name_to_config(name: str):

    parts = name.split("_")

    if len(parts) == 7:
        org, date, group, area, spect, elev, _ = parts
    elif len(parts) == 6:
        org, date, group, area, spect, elev = parts
    elif len(parts) == 5:
        org, date, group, area, spect = parts
        elev = None
    elif len(parts) == 4:
        org, date, group, area = parts
        spect = None
        elev = None
    return dict(
        grouping=group,
        area=area,
        datetime=date,
        spectrum_type=get_spectrum_type(spect),
        organisation=org,
        elevation=int(elev) if elev is not None and elev.isnumeric() else elev,
    )


def parse_config(dir_path: Path):
    conf = {}
    config_path = dir_path / "config.seabee.yaml"
    if config_path.exists():
        with open(config_path, "r") as f:
            conf = yaml.safe_load(f)
    return conf


def write_new_config(path: Path, data):
    with open(path, "w") as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False, allow_unicode=True)


def merge_conf(folder_config: dict, existing_config: dict) -> dict:
    """Merge the folder_config with the existing_config

    Use various rules some times use folder name is best, other times the config
    """

    new_config = template_config()
    for key, val in folder_config.items():
        new_config[key] = val
    for key in ["creator_name", "project", "mosaic", "publish", "theme", "area"]:
        if key in existing_config:
            if key == "creator_name" and "_" in existing_config[key]:
                new_config[key] = " ".join(existing_config[key].split("_"))
            if key == "area" and existing_config[key] != "SITE":
                new_config[key] = existing_config[key]
            else:
                new_config[key] = existing_config[key]
    if new_config["spectrum_type"] is None:
        new_config["spectrum_type"] = existing_config["spectrum_type"]
    return apply_rules(new_config)


def apply_rules(config: dict):
    # msi -> ms + lower
    config["spectrum_type"] = get_spectrum_type(config["spectrum_type"])
    if not isinstance(config["elevation"], int):
        config["elevation"] = None
    # No underscore in attributes, this destroys the layer name
    if "_" in config["area"]:
        config["area"] = config["area"].replace("_", "-")
    
    if "oldberg" in config["area"]:
        config["area"] = config["area"].replace("oldberg", "olberg")

    if config["project"] is None:
        # Around three missions with missing project, use seabee
        config["project"] = "seabee"
    
    config["project"] = config["project"].lower()
    
    if config["project"] in ["runderunderunderunde", "srunde"]:
        config["project"] = "runde"

    if config["organisation"] in ["spectorfly"]:
        config["organisation"] = "spectrofly"
    config["grouping"] = f"{config['organisation'].lower()}-{config['project'].lower()}"
    return config


def to_seabee_config(df: pd.DataFrame, tmp_dir):
    """Try to convert the dataframe to seabee config files

    Try to save df to seabee config files and resturn a new dataframe with the results
    this way we can play a bit with various changes and resulting layer name. The config files
    are saved in tmp_dir.
    """
    mission_dict = dict(folder_name=[], layer_name=[], valid=[], dir_path=[])
    keys = template_config().keys()
    mission_dict.update({k: [] for k in keys})

    for _, data in df.iterrows():
        mission_name = data["folder_name"]
        config = apply_rules(data[keys].to_dict())
        clean_config = {k: config[k] for k in keys if config[k] is not None}
        nfiles = len(sb.ortho.list_images(data.dir_path / "images", verbose=False))
        mission_dict["folder_name"].append(mission_name)
        try:
            sb.ortho.CONFIG_SCHEMA.validate({**clean_config, "nfiles": 1})
            write_new_config(tmp_dir / "config.seabee.yaml", config)
            layer_name = sb.ortho.get_layer_name(tmp_dir)
        except Exception as e:
            print(f"Error in {mission_name}: {e}")
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append("error")
            mission_dict["valid"].append(False)
        else:
            config["nfiles"] = nfiles
            mission_dict["layer_name"].append(layer_name)
            mission_dict["valid"].append(True)
            tmp_file = tmp_dir / f"{mission_name}.yaml"
            write_new_config(tmp_file, config)

        for k, v in config.items():
            mission_dict[k].append(v)
        mission_dict["dir_path"].append(data.dir_path)

    return pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))

In [224]:
dir_list = [
    p
    for p in base_dirs
    for p in p.iterdir()
    if p.is_dir() and (p / "config.seabee.yaml").exists()
]

In [225]:
tmp_dir = Path("./tmp")
tmp_dir.mkdir(exist_ok=True)

In [226]:
mission_dict = dict(folder_name=[], dir_path=[])
mission_dict.update({k: [] for k in template_config().keys()})

for dir_path in dir_list:
    # Update config
    mission_name = os.path.split(dir_path)[-1]
    existing_conf = parse_config(dir_path)
    folder_conf = niva_name_to_config(mission_name)
    new_conf = merge_conf(folder_conf, existing_conf)
    mission_dict["folder_name"].append(mission_name)
    nfiles = len(sb.ortho.list_images(dir_path / "images", verbose=False))
    new_conf["nfiles"] = nfiles
    for k, v in new_conf.items():
        mission_dict[k].append(v)
    mission_dict["dir_path"].append(dir_path)

In [227]:
df = pd.DataFrame(mission_dict).astype(dict(elevation="Int16"))

In [228]:
df = to_seabee_config(df, tmp_dir)
df

Error in niva_202209_runde_runde_otter: Key 'datetime' error:
Or(<function <lambda> at 0x7f7da5f42830>, <function <lambda> at 0x7f7da5e8e290>) did not validate '202209'
<lambda>('202209') raised ValueError("time data '202209' does not match format '%Y%m%d'")
<lambda>('202209') raised ValueError("time data '202209' does not match format '%Y%m%d%H%M'")
Error in niva_yyyymmddhhmm_area_site_typ_alt: Key 'datetime' error:
Or(<function <lambda> at 0x7f7da5f42830>, <function <lambda> at 0x7f7da5e8e290>) did not validate 'yyyymmddhhmm'
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' does not match format '%Y%m%d'")
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' does not match format '%Y%m%d%H%M'")
Error in niva_yyyymmddhhmm_area_site_typ_alt - Copy: Key 'datetime' error:
Or(<function <lambda> at 0x7f7da5f42830>, <function <lambda> at 0x7f7da5e8e290>) did not validate 'yyyymmddhhmm'
<lambda>('yyyymmddhhmm') raised ValueError("time data 'yyyymmddhhmm' d

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
1,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-seabee_SITE_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202208191055,ms,120,seabee,4956,niva,Medyan_Ghareeb,Habitat,False,True
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan_Ghareeb,Habitat,False,True
184,niva_202310031115_io_husviksbaen_rgb_80,niva-seabee_oslo_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,oslo,202310031115,rgb,80,seabee,196,niva,Medyan_Ghareeb,Habitat,False,True
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan_Ghareeb,Habitat,False,True
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan_Ghareeb,Habitat,False,True


# Valid config

In [229]:
df[df.valid]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
1,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-seabee_SITE_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202208191055,ms,120,seabee,4956,niva,Medyan_Ghareeb,Habitat,False,True
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan_Ghareeb,Habitat,False,True
184,niva_202310031115_io_husviksbaen_rgb_80,niva-seabee_oslo_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,oslo,202310031115,rgb,80,seabee,196,niva,Medyan_Ghareeb,Habitat,False,True
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan_Ghareeb,Habitat,False,True
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan_Ghareeb,Habitat,False,True


# Area naming used

In [230]:
df.sort_values(by="area").area.value_counts(sort=False)

SITE                  34
Stege-nor              1
Stege-nor-fast         1
finnmark-bringnes      8
finnmark-vassbukta     7
fornebu                1
halden-h20             2
halden-h21             2
halden-h22             2
halden-h23             2
halden-h24             2
halden-h25             2
halden-h28             2
halden-h29             2
halden-h30             2
halden-h31             2
halden-h33             2
halden-h34             2
halden-h35             2
halden-h36             2
halden-h37             2
hellviktangen          2
knerten                1
landsteilene           1
larvik-olberg-all     30
larvik-olberg-n        1
larvik-olberg-s       10
mfs                    1
olberg                14
olberg-all             2
olberg-n               1
olberg-s               5
oslo                  10
remoy                  7
runde                  7
vega-n                 4
vega-s                10
Name: area, dtype: int64

In [231]:
df.groupby("area").first()

Unnamed: 0_level_0,folder_name,layer_name,valid,dir_path,grouping,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
SITE,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
Stege-nor,spectorfly_20230911_seabee_stegenor_rgb_120_full,spectrofly-seabee_Stege-nor_20230911_rgb_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-seabee,20230911,rgb,120,seabee,6163,spectrofly,nhj,Habitat,True,True
Stege-nor-fast,spectorfly_20230911_seabee_stegenor_rgb_120_fast,spectrofly-seabee_Stege-nor-fast_20230911_rgb_...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-seabee,20230911,rgb,120,seabee,6163,spectrofly,nhj,Habitat,True,True
finnmark-bringnes,niva_202309271147_ascomap_bringnes_msi_30,niva-ascomap_finnmark-bringnes_202309271147_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,202309271147,ms,30,ascomap,2149,niva,Medyan_Ghareeb,Habitat,False,True
finnmark-vassbukta,niva_202309281024_ascomap_vassbukta_msi_100,niva-ascomap_finnmark-vassbukta_202309281024_m...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,202309281024,ms,100,ascomap,3073,niva,Medyan_Ghareeb,Habitat,False,True
fornebu,niva_202310121354_oslo_fornebu_rgb_80,niva-seabee_fornebu_202310121354_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,202310121354,rgb,80,seabee,322,niva,Medyan Ghareeb,Habitat,True,True
halden-h20,niva_202308301136_halden_h20_msi_80,niva-sabicas_halden-h20_202308301136_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301136,ms,80,sabicas,301,niva,Medyan_Ghareeb,Habitat,False,True
halden-h21,niva_202308301118_halden_h21_msi_80,niva-sabicas_halden-h21_202308301118_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301118,ms,80,sabicas,770,niva,Medyan_Ghareeb,Habitat,False,True
halden-h22,niva_202308300752_halden_h22_msi_80,niva-sabicas_halden-h22_202308300752_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308300752,ms,80,sabicas,378,niva,Medyan_Ghareeb,Habitat,False,True
halden-h23,niva_202308301032_halden_h23_rgb_80,niva-sabicas_halden-h23_202308301032_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,202308301032,rgb,80,sabicas,76,niva,Medyan_Ghareeb,Habitat,False,True


# Grouping attribute

Grouping is formed by `{org}-{project}` 

In [232]:
gr = df.sort_values(by="grouping").grouping.value_counts(sort=False)

In [233]:
gr

niva-ascomap           15
niva-kelpmap            8
niva-runde              2
niva-sabicas           30
niva-seabee           101
ntnu-seabee             8
spectrofly-kelpmap      6
spectrofly-runde       11
spectrofly-seabee       7
Name: grouping, dtype: int64

In [234]:
df.groupby("grouping").first()

Unnamed: 0_level_0,folder_name,layer_name,valid,dir_path,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
grouping,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
niva-ascomap,niva_202309271147_ascomap_bringnes_msi_30,niva-ascomap_finnmark-bringnes_202309271147_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,finnmark-bringnes,202309271147,ms,30,ascomap,2149,niva,Medyan_Ghareeb,Habitat,False,True
niva-kelpmap,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
niva-runde,niva_202208311315_runde_remoy_msi_60,niva-runde_remoy_202208311315_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,remoy,202208311315,ms,60,runde,4878,niva,Medyan_Ghareeb,Habitat,False,True
niva-sabicas,niva_202308291402_halden_h24_msi_80,niva-sabicas_halden-h24_202308291402_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,halden-h24,202308291402,ms,80,sabicas,896,niva,Medyan_Ghareeb,Habitat,False,True
niva-seabee,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
ntnu-seabee,ntnu_202209011115_runde_runde_hsi_40,ntnu-seabee_SITE_202209011115_hsi_40m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,SITE,202209011115,hsi,40,seabee,0,ntnu,Medyan_Ghareeb,Habitat,False,True
spectrofly-kelpmap,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
spectrofly-runde,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
spectrofly-seabee,spectrofly_202208181038_kelpmap_vega-s_msi_120,spectrofly-seabee_SITE_202208181038_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,SITE,202208181038,ms,120,seabee,7802,spectrofly,Medyan_Ghareeb,Habitat,False,True


# Kelpmap naming

In [235]:
df[df.grouping == "niva-kelpmap"]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
9,niva_202208180814_kelpmap_vega-s_msi_115,niva-kelpmap_vega-s_202208180814_ms_115m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208180814,ms,115,kelpmap,13710,niva,Medyan_Ghareeb,Habitat,False,True
11,niva_202208181012_kelpmap_vega-s_rgb_60,niva-kelpmap_vega-s_202208181012_rgb_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181012,rgb,60,kelpmap,494,niva,Medyan_Ghareeb,Habitat,False,True
26,niva_202208190944_kelpmap_vega-n_msi_120,niva-kelpmap_vega-n_202208190944_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208190944,ms,120,kelpmap,5034,niva,Medyan_Ghareeb,Habitat,False,True
37,niva_202208181236_kelpmap_vega-s_rgb_60,niva-kelpmap_vega-s_202208181236_rgb_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,rgb,60,kelpmap,491,niva,Medyan_Ghareeb,Habitat,False,True
40,niva_202208181012_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181012_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181012,ms,60,kelpmap,4308,niva,Medyan_Ghareeb,Habitat,False,True
45,niva_202208180814_kelpmap_vega-s_rgb_115,niva-kelpmap_vega-s_202208180814_rgb_115m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208180814,rgb,115,kelpmap,1804,niva,Medyan_Ghareeb,Habitat,False,True
55,niva_202208191055_kelpmap_vega-n_rgb_120,niva-kelpmap_vega-n_202208191055_rgb_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-n,202208191055,rgb,120,kelpmap,405,niva,Medyan_Ghareeb,Habitat,False,True


# Project attribute

Seems to be standardized to uppercase, should change all to uppercase?

In [236]:
df.sort_values(by="project").project.value_counts(sort=False)

ascomap     15
kelpmap     14
runde       13
sabicas     30
seabee     116
Name: project, dtype: int64

In [237]:
df.groupby("project").first()

Unnamed: 0_level_0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,nfiles,organisation,creator_name,theme,mosaic,publish
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ascomap,niva_202309271147_ascomap_bringnes_msi_30,niva-ascomap_finnmark-bringnes_202309271147_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271147,ms,30,2149,niva,Medyan_Ghareeb,Habitat,False,True
kelpmap,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,4272,niva,Medyan_Ghareeb,Habitat,False,True
runde,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
sabicas,niva_202308291402_halden_h24_msi_80,niva-sabicas_halden-h24_202308291402_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h24,202308291402,ms,80,896,niva,Medyan_Ghareeb,Habitat,False,True
seabee,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,112,niva,Medyan_Ghareeb,Habitat,False,True


In [238]:
df[df.valid]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
1,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-seabee_SITE_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202208191055,ms,120,seabee,4956,niva,Medyan_Ghareeb,Habitat,False,True
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan_Ghareeb,Habitat,False,True
184,niva_202310031115_io_husviksbaen_rgb_80,niva-seabee_oslo_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,oslo,202310031115,rgb,80,seabee,196,niva,Medyan_Ghareeb,Habitat,False,True
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan_Ghareeb,Habitat,False,True
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan_Ghareeb,Habitat,False,True


# Not valid

In [239]:
df[~df.valid]

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
35,niva_202209_runde_runde_otter,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202209,ms,,seabee,0,niva,Medyan_Ghareeb,Habitat,False,True
38,niva_yyyymmddhhmm_area_site_typ_alt,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,yyyymmddhhmm,ms,,seabee,0,niva,Medyan_Ghareeb,Habitat,False,True
41,niva_yyyymmddhhmm_area_site_typ_alt - Copy,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,yyyymmddhhmm,ms,,seabee,0,niva,Medyan_Ghareeb,Habitat,False,True
44,niva_202209_runde_runde_documentation_0,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202209,ms,0.0,seabee,6,niva,Medyan_Ghareeb,Habitat,False,True
132,niva_yyyymmddhhmm_area_site_typ_alt_compare,error,False,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,yyyymmddhhmm,ms,,seabee,0,niva,Medyan_Ghareeb,Habitat,False,True


# Modify various naming issues

In [240]:
df

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
1,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-seabee_SITE_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202208191055,ms,120,seabee,4956,niva,Medyan_Ghareeb,Habitat,False,True
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan_Ghareeb,Habitat,False,True
184,niva_202310031115_io_husviksbaen_rgb_80,niva-seabee_oslo_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,oslo,202310031115,rgb,80,seabee,196,niva,Medyan_Ghareeb,Habitat,False,True
185,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan_Ghareeb,Habitat,False,True
186,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan_Ghareeb,Habitat,False,True


# Save config files

Also store a sample where nfiles are invalid

In [241]:
to_seabee_config(df[df.valid], tmp_dir)

Unnamed: 0,folder_name,layer_name,valid,dir_path,grouping,area,datetime,spectrum_type,elevation,project,nfiles,organisation,creator_name,theme,mosaic,publish
0,niva_202208181236_kelpmap_vega-s_msi_60,niva-kelpmap_vega-s_202208181236_ms_60m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-kelpmap,vega-s,202208181236,ms,60,kelpmap,4272,niva,Medyan_Ghareeb,Habitat,False,True
1,niva_202205180845_plastinoland_drammen_documen...,niva-seabee_SITE_202205180845_ms,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202205180845,ms,0,seabee,112,niva,Medyan_Ghareeb,Habitat,False,True
2,spectrofly_202208180945_kelpmap_vega-s_msi_120,spectrofly-kelpmap_vega-s_202208180945_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-kelpmap,vega-s,202208180945,ms,120,kelpmap,4850,spectrofly,Medyan_Ghareeb,Habitat,False,True
3,niva_202208191055_kelpmap_vega-n_msi_120,niva-seabee_SITE_202208191055_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,SITE,202208191055,ms,120,seabee,4956,niva,Medyan_Ghareeb,Habitat,False,True
4,spectrofly_202208311309_runde_remoy_msi_120,spectrofly-runde_remoy_202208311309_ms_120m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,spectrofly-runde,remoy,202208311309,ms,120,runde,0,spectrofly,Medyan_Ghareeb,Habitat,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,niva_202308290947_halden_h29_rgb_80,niva-sabicas_halden-h29_202308290947_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h29,202308290947,rgb,80,sabicas,72,niva,Medyan_Ghareeb,Habitat,False,True
179,niva_202310031115_io_husviksbaen_rgb_80,niva-seabee_oslo_202310031115_rgb_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-seabee,oslo,202310031115,rgb,80,seabee,196,niva,Medyan_Ghareeb,Habitat,False,True
180,niva_202308301044_halden_h23_msi_80,niva-sabicas_halden-h23_202308301044_ms_80m,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-sabicas,halden-h23,202308301044,ms,80,sabicas,693,niva,Medyan_Ghareeb,Habitat,False,True
181,niva_202309271230_ascomap_bringnes_msi_100,niva-ascomap_finnmark-bringnes_202309271230_ms...,True,/home/notebook/shared-seabee-ns9879k/niva-tidy...,niva-ascomap,finnmark-bringnes,202309271230,ms,100,ascomap,4116,niva,Medyan_Ghareeb,Habitat,False,True
