In [1]:
import os
import shutil
from itertools import product

import fiona
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import rasterize
from rasterio.windows import Window

import seabeepy as sb
from seabeepy.config import SETTINGS

In [2]:
# Login to MinIO
minio_client = sb.storage.minio_login(
    user=SETTINGS.MINIO_ACCESS_ID, password=SETTINGS.MINIO_SECRET_KEY
)

# Generate ML training data for habitat missions

This notebook rasterises vector annotation for use in training ML algorithms. If you are running `seabeepy` from a local installation, it can also be used to download relevant datasets to your local machine (e.g. to an NR server for training).

The main input is an Excel file mapping annotation folders within `niva-tidy/annotation` to the relevant orthophotos.

## 1. User input

In [3]:
# Directory in which to save data on your local PC
local_dir = r"/home/notebook/temp"

# Path to Excel file mapping annotation to raster mosaics
anno_mission_xlsx = r"/home/notebook/shared-seabee-ns9879k/niva-tidy/annotation/habitat_mission_annotation.xlsx"

# Version of annotation classes used in .xlsx
anno_version = "1-1"

# Path to annotation on MinIO
anno_base_dir = r"niva-tidy/annotation"

# Whether to copy rasterised annotation back to MinIO
upload_results = True

## 2. List of annotation to process

In [4]:
df = pd.read_excel(anno_mission_xlsx).dropna(how="any")
df

Unnamed: 0,anno_group,spectrum,tiff_path
0,olberg_2021,rgb,niva-tidy/2021/niva_202106160744_seabee_olberg...
1,olberg_2021,msi,niva-tidy/2021/niva_202106170926_seabee_olberg...
2,olberg_June_2023_v2,rgb,niva-tidy/2023/niva_202306230844_larvik_olberg...
3,olberg_June_2023_v2,msi,niva-tidy/2023/niva_202306231147_larvik_olberg...
4,olberg_Sept_2023,rgb,niva-tidy/2023/niva_202309131140_larvik_olberg...
5,olberg_Sept_2023,msi,niva-tidy/2023/niva_202309131140_larvik_olberg...
6,remoy_2022,rgb,niva-tidy/2022/spectrofly_202208310730_runde_r...
7,remoy_2022,msi,niva-tidy/2022/spectrofly_202208311043_runde_r...
8,vega-n_2022,rgb,niva-tidy/2022/spectrofly_202208191500_kelpmap...
9,vega-n_2022,msi,niva-tidy/2022/spectrofly_202208190819_kelpmap...


## 3. Process data

Performs the following steps:

 1. Downloads vector annotation and raster orthophotos to the user's local machine (or a folder within their `HOME` directory on Sigma2).
      
 2. Rasterises the annotation to match the orthophotos.
    
 3. Copies the rasterised versions back to MinIO (if `upload_results` is `True`).

In [5]:
# Loop over datasets
for idx, row in df.iterrows():
    anno_grp, spec, src_tiff_path = row

    print(f"\nProcessing: {anno_grp} ({spec})")

    # Make local folder for data
    local_anno_dir = os.path.join(local_dir, f"{anno_grp}_{spec}")
    if not os.path.exists(local_anno_dir):
        os.makedirs(local_anno_dir)

    print("  Downloading data...")
    # Annotation
    src_gpkg_path = os.path.join(anno_base_dir, anno_grp, f"{anno_grp}_annotation.gpkg")
    dst_gpkg_path = os.path.join(local_anno_dir, f"{anno_grp}_annotation.gpkg")
    minio_client.get(src_gpkg_path, dst_gpkg_path)

    # Orthomosaic
    tiff_name = os.path.basename(src_tiff_path)
    dst_tiff_path = os.path.join(local_anno_dir, tiff_name)
    minio_client.get(src_tiff_path, dst_tiff_path)

    print("  Rasterising region of interest...")
    roi_gdf = gpd.read_file(dst_gpkg_path, layer=f"{anno_grp}_region_of_interest")
    roi_tiff_path = os.path.join(local_anno_dir, f"{anno_grp}_region_of_interest.tif")
    sb.geo.geodataframe_to_raster(roi_gdf, "region_id", dst_tiff_path, roi_tiff_path)

    print("  Rasterising subareas...")
    sub_gdf = gpd.read_file(dst_gpkg_path, layer=f"{anno_grp}_subareas")
    sub_tiff_path = os.path.join(local_anno_dir, f"{anno_grp}_subareas.tif")
    sb.geo.geodataframe_to_raster(sub_gdf, "subarea_id", dst_tiff_path, sub_tiff_path)

    print("  Rasterising annotation...")
    anno_gdf = gpd.read_file(
        dst_gpkg_path, layer=f"{anno_grp}_merged_annotation_v{anno_version}"
    )
    for level in (1, 2, 3):
        print(f"    Level {level}")
        lev_gdf = anno_gdf.dropna(subset=f"lev{level}_name").copy()
        lev_gdf[f"lev{level}_code"] = lev_gdf[f"lev{level}_code"].astype(int)
        anno_tiff_path = os.path.join(
            local_anno_dir, f"{anno_grp}_level{level}_annotation_v{anno_version}.tif"
        )
        sb.geo.geodataframe_to_raster(
            lev_gdf, f"lev{level}_code", dst_tiff_path, anno_tiff_path
        )

    # Add a CSV mapping class codes to names
    class_df = sb.anno.get_class_codes(anno_version)
    class_csv = os.path.join(
        local_anno_dir, f"class_codes_annotation_v{anno_version}.csv"
    )
    class_df.to_csv(class_csv, index=False)

    # Copy back to MinIO if desired
    if upload_results:
        dst_fold = os.path.join(
            anno_base_dir, f"ml_training_data_annotation_v{anno_version}"
        )
        sb.storage.copy_folder(
            local_anno_dir, dst_fold, minio_client, containing_folder=True
        )

        # Delete local version
        shutil.rmtree(local_anno_dir)

print("Done.")


Processing: olberg_2021 (rgb)
  Downloading data...
  Rasterising region of interest...
  Rasterising subareas...
  Rasterising annotation...
    Level 1
    Level 2
    Level 3

Processing: olberg_2021 (msi)
  Downloading data...
  Rasterising region of interest...
  Rasterising subareas...
  Rasterising annotation...
    Level 1
    Level 2
    Level 3

Processing: olberg_June_2023_v2 (rgb)
  Downloading data...
  Rasterising region of interest...
  Rasterising subareas...
  Rasterising annotation...
    Level 1
    Level 2
    Level 3

Processing: olberg_June_2023_v2 (msi)
  Downloading data...
  Rasterising region of interest...
  Rasterising subareas...
  Rasterising annotation...
    Level 1
    Level 2
    Level 3

Processing: olberg_Sept_2023 (rgb)
  Downloading data...
  Rasterising region of interest...
  Rasterising subareas...
  Rasterising annotation...
    Level 1
    Level 2
    Level 3

Processing: olberg_Sept_2023 (msi)
  Downloading data...
  Rasterising region of in