In [1]:
import datetime as dt
import os
import subprocess
from glob import glob
from pathlib import Path

import requests
from seabeepy.config import SETTINGS
from geo.Geoserver import Geoserver
from tqdm.notebook import tqdm

# Upload NINA 'seabirds' datasets

Processing and uploading NINA's 'seabirds' datasets to GeoNode. The code does the following:

 * Searches for files named `odm_orthophoto.original.tif` in the `odm_orthophoto` sub-directory for each seabird mission
 
 * Discards the `alpha` band, and ensures the remaining bands are converted to 8-bit and saved with internal overviews (i.e. as COGs) using LZW compression (with the `predictor=2` flag enabled)
 
 * Adds the processed mosaics to GeoServer
 
 * Updates basic metadata
 
## 1. Raster processing

In [2]:
base_dir = r"/home/notebook/shared-seabee-ns9879k/seabirds/2022/"
cog_fold = r"/home/notebook/cogs/"
n_threads = 4
n2process = 50  # Number of grids to process

In [3]:
dir_list = sorted(glob(os.path.join(base_dir, "*/")))
print(len(dir_list))

131


In [4]:
for dir_path in dir_list[:n2process]:
    mission_name = Path(dir_path).name
    fpath = os.path.join(dir_path, "odm_orthophoto", "odm_orthophoto.original.tif")
    if os.path.isfile(fpath):
        cog_path = os.path.join(cog_fold, mission_name + ".tif")
        cmd = [
            "gdal_translate",
            "-b",
            "1",
            "-b",
            "2",
            "-b",
            "3",
            "-of",
            "COG",
            "-ot",
            "Byte",
            "-co",
            "COMPRESS=LZW",
            "-co",
            "PREDICTOR=2",
            "-co",
            f"NUM_THREADS={n_threads}",
            "-co",
            "OVERVIEWS=IGNORE_EXISTING",
            "-scale",
            fpath,
            cog_path,
        ]
        subprocess.check_call(cmd)

Input file size is 10015, 8584
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 17263, 14856
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 8928, 12831
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25790, 21508
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 21470, 17668
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25680, 21143
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 27002, 23648
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25142, 26277
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25562, 27416
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25638, 26436
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 25874, 26613
0...10...20...30...40...50...60...70...8

## 2. Upload to GeoServer

In [5]:
# Authernticate with GeoServer
geo = Geoserver(
    "https://geonode.seabee.sigma2.no/geoserver",
    username=SETTINGS.GEOSERVER_USER,
    password=SETTINGS.GEOSERVER_PASSWORD,
)

In [6]:
# Upload COGs to GeoServer
workspace = "geonode"

search_path = os.path.join(cog_fold, "*.tif")
flist = glob(search_path)
for fpath in flist:
    fname = os.path.basename(fpath)
    layer_name = os.path.splitext(fname)[0]

    # Add to GeoServer. Note: Will overwrite layer if it exists
    status = geo.create_coveragestore(
        layer_name=layer_name, path=fpath, workspace=workspace
    )
    # print(status)

## 3. Update GeoNode

To get the new datasets to appear in GeoNode, login to the GeoNode administration panel and navigate to

    Home > Management Commands Over HTTP > Management command jobs
    
Choose `Add management command job` and set the **Command** to `updatelayers`. Check the **Autostart** box and click **Save**. If you have added a lot of data, the update process may take a while. When it is finished, the status should be updated and the new images datasets be visible in GeoNode.

## 4. Update metadata

In [7]:
base_url = "https://geonode.seabee.sigma2.no/api/v2/"

auth = (SETTINGS.GEOSERVER_USER, SETTINGS.GEONODE_PASSWORD)

search_path = os.path.join(cog_fold, "*.tif")
flist = glob(search_path)
for fpath in tqdm(flist):
    fname = os.path.basename(fpath)
    layer_name = os.path.splitext(fname)[0]

    # Find resource ID
    filter_url = base_url + f"resources?search={layer_name}&search_fields=title"
    response = requests.request("GET", filter_url)
    response.raise_for_status()
    data = response.json()
    assert data["total"] == 1, f"More than one dataset found with title '{layer_name}'."
    dataset_id = data["resources"][0]["pk"]

    # Extract metadata
    area, site, date = layer_name.split("_")
    date = dt.datetime.strptime(date, "%Y%m%d")
    abstract = f"RGB mosaic collected by NINA (Sindre Molværsmyr) at {site} ({area}) on {date}."

    # Update metadata
    data = {
        "abstract": abstract,
        "date": date.isoformat(),
        "date_type": "creation",
        "attribution": "SeaBee",
    }
    update_url = base_url + f"datasets/{dataset_id}"
    response = requests.patch(update_url, auth=auth, json=data)
    response.raise_for_status()

  0%|          | 0/37 [00:00<?, ?it/s]