In [9]:
import os
import pandas as pd
import yaml
import shutil
from tqdm.auto import tqdm

In [10]:
def find_data_roots(project_root):
    print("Detecting project folders...")
    for root, dirs, _ in os.walk(project_root):
        if "Pictures" in dirs and "Telemetrie" in dirs:
            yield root
            # Do not descend further
            dirs[:] = []

def load_meta():
    for project_root in tqdm(find_data_roots("/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)")):
        print(project_root)
        meta_fn = os.path.join(project_root, "meta.yaml")

        # Update with additional metadata
        if os.path.isfile(meta_fn):
            with open(meta_fn) as f:
                yield {"project_root": project_root, **yaml.unsafe_load(f)}

metadata = pd.DataFrame(load_meta())
metadata

0it [00:00, ?it/s]

Detecting project folders...
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0059_PS122_5-59-337/Haul 4/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0059_PS122_5-59-337/Haul 3/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0059_PS122_5-59-337/Haul 2/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0059_PS122_5-59-337/Haul 1/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0062_PS122_5-62-9/Haul 7/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0062_PS122_5-62-9/Haul 8/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0062_PS122_5-62-36/Haul 9/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0062_PS122_5-62-36/Haul 8/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_2020)/Leg5/0062_PS122_5-62-75/Haul 9/LOKI_10001.01
/isibhv/projects/loki/_raw_data/PS122 (MOSAiC 2019_

Unnamed: 0,project_root,sample_bottomdepth,sample_detail_location,sample_latitude,sample_longitude,sample_region
0,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4313.6,Amundsen Basin,88.02494,108.6722,Arctic Ocean
1,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4313.6,Amundsen Basin,88.02494,108.6722,Arctic Ocean
2,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4313.6,Amundsen Basin,88.02494,108.6722,Arctic Ocean
3,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4313.6,Amundsen Basin,88.02494,108.6722,Arctic Ocean
4,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4292.2,Amundsen Basin,89.00293,101.22303,Arctic Ocean
5,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4292.2,Amundsen Basin,89.00293,101.22303,Arctic Ocean
6,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4275.1,Amundsen Basin,89.07169,107.3592,Arctic Ocean
7,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4275.1,Amundsen Basin,89.07169,107.3592,Arctic Ocean
8,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4272.3,Amundsen Basin,89.07728,109.18302,Arctic Ocean
9,/isibhv/projects/loki/_raw_data/PS122 (MOSAiC ...,4295.1,Amundsen Basin,88.71107,107.36427,Arctic Ocean


In [11]:
metadata.to_csv("project_metadata.csv", index=False)

In [12]:
# Flip sign of sample_bottomdepth
for _, row in metadata.iterrows():
    if row["sample_bottomdepth"] > 0:
        continue

    meta_fn = os.path.join(row["project_root"], "meta.yaml")
    print(meta_fn)

    data = row.to_dict()
    data.pop("project_root", None)

    data["sample_bottomdepth"] = -data["sample_bottomdepth"]

    # Update with additional metadata
    with open(meta_fn, "w") as f:
        yaml.dump(data, f)