# Dependencies

Use extra UbuntuGIS repository to get _GDAL_ version 3.0.4 or higher, since _Colab_'s native version 2.2.x is too old for the pipeline.

> If after installation version of _GDAL_ at the end is still 2.2.x, then restart runtime.

In [None]:
# Check container OS version (for correct UbuntuGIS package version)
!lsb_release -a

## GDAL

> If _GDAL 3.0.4_ (library and _Python_ bindings) or above is already installed in the system, then just skip or comment out the cell below, 'cause it's intended for _Google Colab_, which has _GDAL 2.2.x_ only.

In [None]:
# Dark magic happens here: installing dependencies for GDAL 3.0.4
# build process via APT and install GDAL itself via PyPI
!time (add-apt-repository -y ppa:ubuntugis/ubuntugis-unstable && \
 apt install python3-gdal=3.0.4+dfsg-1~bionic0 && \
 apt purge --autoremove python3-gdal && \
 pip install gdal==3.0.4 && \
 apt install gdal-bin=3.0.4+dfsg-1~bionic0)

from osgeo import gdal; print(f"\nGDAL version {(gdal.__version__)}")

## OpenVINO

Install Intel ® OpenVINO

> OpenVINO DLDT will be accessible as `import openvino`

In [None]:
%pip install -q openvino-dev[onnx]

!mo -h

# Google Drive

Mount _Google Drive_ for SAR images (recomended to store input and output images).

> If _Google Colab_ is not used, then this cell may be commented out.

In [None]:
from os import path as osp

from google.colab import drive


PATH_DRIVE = osp.join('/', 'content', 'drive')

# Do not mount if it is already attached
if not osp.exists(PATH_DRIVE):
    print("Mounting Google Drive...")
    drive.mount(PATH_DRIVE)
else:
    print("Google Drive has been already mounted!")

# Paths

Paths to be used by preprocessing steps. Use `PATH_STORAGE` as a subdirectory hierarchy to store processed GeoTIFFs/Shapefiles right into _Google Drive_ (empty string will make saving to _Google Drive_'s root into folders `input`/`output`). `PATH_STORAGE` is used with the `PATH_DRIVE` variable only.

`PATH_TEMP` is used to store intermediate GeoTIFFs while processing.

`PATH_INPUT` is used as a source of GeoTIFFs (differs from _dataset_ directory in that it have **no masks**).

`PATH_OUTPUT` is used to save images (for example, inference on a test subset).

`PATH_MODELS` is used to save model weights for later inference.

`PATH_DATASET` is a source of GeoTIFF **images** and **masks** for training and inference (for example, test subset).

`PATH_RESOURCES` is used as a source of auxiliary files such as GeoJSON search area or Shapefile cutline.

In [None]:
import os
from os import path as osp


PATH_STORAGE = osp.join('ods', 'soc')  # arbitrary subpath in Google Drive (if any)
if 'PATH_DRIVE' in locals():
    PREFIX_DRIVE = osp.join(osp.basename(PATH_DRIVE), 'MyDrive', PATH_STORAGE)
else:
    PREFIX_DRIVE = ''

PATH_TEMP = osp.join('/', 'content', 'temp')
PATH_INPUT = osp.join('/', 'content', PREFIX_DRIVE, 'input')
PATH_OUTPUT = osp.join('/', 'content', PREFIX_DRIVE, 'output')
PATH_MODELS = osp.join('/', 'content', PREFIX_DRIVE, 'models')
PATH_DATASET = osp.join('/', 'content', PREFIX_DRIVE, 'dataset')
PATH_OPENVINO = osp.join('/', 'content', PREFIX_DRIVE, 'vino')
PATH_RESOURCES = osp.join('/', 'content', 'resources')

print('\n'.join((PATH_STORAGE, PATH_TEMP, PATH_INPUT, PATH_OUTPUT, PATH_MODELS,
                 PATH_DATASET, PATH_OPENVINO, PATH_RESOURCES)))

os.environ['PATH_OPENVINO'] = PATH_OPENVINO
os.environ['PATH_TEMP'] = PATH_TEMP

# Functions

Auxiliary functions (for example, drawing/plotting data).

In [None]:
import os

from matplotlib import pyplot as plt


def draw_one_row(*images, size=1024, output=None):
    try:
        size = size[:2]
    except:
        size = (size, size)
    count = len(images)
    figure, axes = plt.subplots(1, count, dpi=72,
                                figsize=(size[0] / 72, size[1] / 72))
    for i in range(count):
        axes[i].imshow(images[i])
    if output is not None:
        try:
            os.makedirs(osp.dirname(output), exist_ok=True)
            plt.savefig(output)
        except:
            pass
    plt.show()

# W&B

An API token is required in order to authorize in WandB:
https://wandb.ai/authorize

In [None]:
%pip install --quiet --upgrade wandb

In [None]:
import os

from getpass import getpass


# ATTENTION: do not forget to set proper WandB token here (as string)
if os.getenv('WANDB_API_KEY', None) is None:
    os.environ['WANDB_API_KEY'] = getpass('https://wandb.ai/authorize :')
# os.environ['WANDB_MODE'] = 'dryrun'  # 'offline' / do not sync immediately

try:
    import wandb
    wandb.login()
except:
    wandb = None

In [None]:
WANDB_ENTITY = 'maritimeai'
WANDB_PROJECT = 'sea-ice-segmentation'
WANDB_GROUP = 'OpenVINO'
WANDB_NAME = '/'.join(['OpenVINO', 'Baseline'])

if 'wandb' in locals() and wandb is not None:
    experiment = wandb.init(entity=WANDB_ENTITY,  # config=config,
                            project=WANDB_PROJECT, group=WANDB_GROUP,
                            name=WANDB_NAME, notes='OpenVINO debug pipeline')
else:
    experiment = None

In [None]:
artifacts_baseline = [
    'Baseline.Fold1.2021-09-13-09-15-27:latest',
    'Baseline.Fold2.2021-10-12-07-54-33:latest',
    'Baseline.Fold3.2021-09-17-10-37-26:latest',
    'Baseline.Fold4.2021-09-24-22-44-24:latest',
    'Baseline.Fold5.2021-09-23-09-45-27:latest'
]
if experiment is not None:
    artifacts = {}
    for artifact_name in artifacts_baseline:
        artifact = experiment.use_artifact(artifact_name)
        artifacts[artifact_name] = {
            'data': artifact,
            'path': osp.abspath(artifact.download())
        }

    print(f"Downloaded {len(artifacts)} artifacts")
    experiment.finish()

In [None]:
artifacts

In [None]:
%ls artifacts/*

# Data loading


In [None]:
# Images/masks paths
path_images_hh = osp.join(PATH_DATASET, 'images', 'hh')
path_images_hv = osp.join(PATH_DATASET, 'images', 'hv')
path_masks = osp.join(PATH_DATASET, 'masks', '2-class')

items_images = set(os.listdir(path_images_hh))
items_masks = set(os.listdir(path_masks))
items_test = sorted(items_images - items_masks)
dict(enumerate(items_test))

# ONNX

## Shapefile cutline

In [None]:
!git clone https://github.com/MaritimeAI/resources.git

> Cutline shape may also be checked with `ogrinfo` utility from _GDAL_ library.

In [None]:
FILE_SHAPEFILE = osp.join(PATH_RESOURCES, 'clustering', 'cutline',
                          'Start_Ice_Map_UTMz40WGS84f_r.shp')

try:
    if osp.isfile(FILE_SHAPEFILE):
        shape = osp.abspath(osp.realpath(FILE_SHAPEFILE))
    else:
        raise FileNotFoundError
except (TypeError, FileNotFoundError) as e:
    print(f"Shapefile '{FILE_SHAPEFILE}' does not exist!")
    shape = None
print(f"Available shape is {shape}")
# !ogrinfo "{shape}"

## Inference and visualization (ONNX)

Use vectorized cutline from repository to crop `NoData` area.

Inference part itself that does not depend on _PyTorch_ or any model code — just _ONNX_ exported model and _ONNX Runtime_.

> HH + HV polarizations are being combined during inference, just like in `DatasetSAR` class.

> _Python_'s garbage collector is being used intensively here, 'cause images are really large.

In [None]:
%pip install --quiet onnxruntime

In [None]:
import gc

import cv2 as cv
import numpy as np
import onnxruntime

from tempfile import TemporaryDirectory
from time import time

from osgeo import gdal

PLOT = True
INFERENCE_ONNX = False  # optionally avoid inferencing with ONNX Runtime

time_inference_onnx = {}

if INFERENCE_ONNX:
    for artifact in artifacts:
        session_ort = onnxruntime.InferenceSession(osp.join(artifacts[artifact]\
                                                            ['path'],
                                                            'model.onnx'))
        time_inference_onnx[artifact] = []
        # Target directory for test inference
        path_target = osp.join(PATH_TEMP, 'onnx', artifact)
        os.makedirs(path_target, exist_ok=True)
        # Batch size is supposed to be 1
        for i, item_test in enumerate(items_test):
            source = osp.join(path_images_hh, item_test)
            target = osp.join(path_target, item_test)
            with TemporaryDirectory() as path_temp:
                temp = osp.join(path_temp, item_test)
                gdal.Translate(temp, source,
                            options=['-b', '1', '-colorinterp', 'gray',
                                        '-co', 'COMPRESS=DEFLATE'])

                # Input images must be 8-bit GeoTIFFs
                image_hh = cv.imread(osp.join(path_images_hh, item_test),
                                    cv.IMREAD_LOAD_GDAL)
                image_hv = cv.imread(osp.join(path_images_hv, item_test),
                                    cv.IMREAD_LOAD_GDAL)

                # Make HH and HV sizes match (sizes mismatch should never happen)
                # image_hv = cv.resize(image_hv, image_hh.shape[::-1],
                #                      cv.INTER_NEAREST)
                image = (np.dstack((image_hv, image_hh, image_hv)) /
                         np.float32(255))
                del image_hh, image_hv
                gc.collect()

                image = cv.resize(image, (1024, 1024), cv.INTER_LINEAR)
                # Output image (GeoTIFF copy of image_hh)
                dataset = gdal.Open(temp, gdal.GA_Update)
                band = dataset.GetRasterBand(1)
                h, w = band.ReadAsArray().shape

                # Inference
                time_start = time()
                inputs_ort = {
                    session_ort.get_inputs()[0].name: (np.moveaxis(image,
                                                                   -1, 0)\
                                                       [None, ...])
                }
                outputs_ort = session_ort.run(None, inputs_ort)
                time_stop = time()
                # Log inference time
                time_inference_onnx[artifact].append(time_stop - time_start)

                # Process inference result into a mask
                # WARNING: this resize part works just because there are 3 classes
                mask = cv.resize(np.moveaxis(outputs_ort[0][0], 0, -1),
                                (w, h), cv.INTER_NEAREST)
                # mask[..., 2] *= 2  # change 'ice' class weight
                mask = mask.argmax(-1).clip(0, 255).astype('uint8')
                # Save the mask to GDAL dataset
                band.WriteArray(mask)
                dataset.FlushCache()
                del band, dataset
                gc.collect()

                # Write mask as GeoTIFF (assume NoData value is always zero)
                gdal.Warp(target, temp, dstNodata=0, xRes=40, yRes=40,
                        cutlineDSName=f"{shape}",
                        cropToCutline=(True if shape else False),
                        creationOptions=['COMPRESS=DEFLATE'])
                if PLOT:
                    mask = cv.imread(target, cv.IMREAD_LOAD_GDAL)
                    draw_one_row(cv.resize(image, (w, h), cv.INTER_LINEAR),
                                 mask)
                del image, mask
            gc.collect()

time_inference_onnx

# OpenVINO

## Convert ONNX models to OpenVINO

Model optimizer is being called via command-line: `mo --input_model <path/to/model.onnx> --output_dir <path/to/output>`.

In [None]:
models_openvino = {}

try:
    for artifact in artifacts:
        models_openvino[artifact] = {}

        # Source directory of the ONNX model
        path_source = osp.abspath(osp.join(artifacts[artifact]['path'],
                                           'model.onnx'))
        models_openvino[artifact]['source'] = path_source
        print("Model source:", models_openvino[artifact]['source'])

        # Target directory for the inference
        path_target = osp.abspath(osp.join(PATH_TEMP, 'openvino',
                                           artifact.split(':')[0]))
        models_openvino[artifact]['target'] = path_target
        os.makedirs(models_openvino[artifact]['target'], exist_ok=True)
        print("Model target:", models_openvino[artifact]['target'])

        # Convert an ONNX model into OpenVINO IR (intermediate representation)
        !mo --input_model {models_openvino[artifact]['source']} \
                      --output_dir {models_openvino[artifact]['target']} \
                      --framework onnx --batch 1 --data_type half \
                      --enable_concat_optimization
except Exception as e:
    print(e)

# models_openvino

Check out target paths.

In [None]:
for name_model in models_openvino:
    print(models_openvino[name_model]['target'])
    %ls {models_openvino[name_model]['target']}

## Inference and visualization (OpenVINO)

Cycle through all the models (from artifacts), cycling each model through the whole image set (150 HH+HV images).

In [None]:
import cv2 as cv
import numpy as np

from openvino.inference_engine import IECore, StatusCode


IE_EXTENSION = None
IE_DEVICE = 'CPU'
IE_CONFIG = None

PLOT = True

time_inference_openvino = {}

for artifact in artifacts:
    ie = IECore()

    if IE_EXTENSION:
        ie.add_extension(IE_EXTENSION, IE_DEVICE)
    if IE_CONFIG:
        ie.set_config({'CONFIG_FILE': IE_CONFIG})

    net = ie.read_network(model=osp.join(models_openvino[artifact]['target'],
                                         'model.xml'))
    # net = ie.read_network(model=osp.join(models_openvino[artifact]['source'])\
    #                       .replace('latest', 'v0'))

    assert len(net.input_info) == 1, f"Only single input topologies!"
    assert len(net.outputs) == 1, f"Only single output topologies!"

    blob_input = next(iter(net.input_info))
    # blob_output = next(iter(net.outputs))

    # Configure Inference Engine (sync)
    # net.input_info[blob_input].precision = 'U8'
    # net.outputs[blob_output].precision = 'FP32'

    num_inputs = len(items_images)  # dataset
    # num_classes = max(net.outputs[blob_output].shape)
    # print(f"Classes = {num_classes}")

    net_exec = ie.load_network(network=net, device_name=IE_DEVICE,
                               num_requests=num_inputs)

    models_openvino[artifact]['network'] = net
    models_openvino[artifact]['session'] = net_exec
    models_openvino[artifact]['blob_input'] = blob_input
    # models_openvino[artifact]['blob_output'] = blob_output

for artifact in artifacts:
    path_target = osp.join(models_openvino[artifact]['target'], 'output')
    os.makedirs(path_target, exist_ok=True)
    for item in items_images:
        source = osp.join(path_images_hh, item)
        target = osp.join(path_target, item)
        with TemporaryDirectory() as path_temp:
            temp = osp.join(path_temp, item)
            gdal.Translate(temp, source,
                        options=['-b', '1', '-colorinterp', 'gray',
                                    '-co', 'COMPRESS=DEFLATE'])

            # Input images must be 8-bit GeoTIFFs
            image_hh = cv.imread(osp.join(path_images_hh, item),
                                cv.IMREAD_LOAD_GDAL)
            image_hv = cv.imread(osp.join(path_images_hv, item),
                                cv.IMREAD_LOAD_GDAL)

            # Make HH and HV sizes match (sizes mismatch should never happen)
            # image_hv = cv.resize(image_hv, image_hh.shape[::-1], cv.INTER_NEAREST)
            image = np.dstack((image_hv, image_hh, image_hv)) / np.float32(255)
            del image_hh, image_hv
            gc.collect()

            _, _, h, w = (models_openvino[artifact]['network']
                        .input_info[models_openvino[artifact]['blob_input']]
                        .input_data.shape)
            image = cv.resize(image, (w, h), cv.INTER_LINEAR)
            image = image.transpose((2, 0, 1))  # HWC -> CHW
            image = np.expand_dims(image, axis=0)  # CHW -> NCHW

            # Output image (GeoTIFF copy of image_hh)
            dataset = gdal.Open(temp, gdal.GA_Update)
            band = dataset.GetRasterBand(1)
            h, w = band.ReadAsArray().shape

            # Inference
            time_start = time()
            result = (models_openvino[artifact]['session']
                    .infer(inputs={
                        models_openvino[artifact]['blob_input']: image
                    }))
            time_stop = time()
            # Log inference time
            times = time_inference_openvino.get('artifact', [])
            time_inference_openvino[artifact] = times
            time_inference_openvino[artifact].append(time_stop - time_start)

            # Process inference result into a mask
            # WARNING: this resize part works just because there are 3 classes
            mask_raw = np.moveaxis(result['output'][0], 0, -1)
            mask = cv.resize(mask_raw, (w, h), cv.INTER_NEAREST)
            # mask[..., 2] *= 2  # change 'ice' class weight
            mask = mask.argmax(-1).clip(0, 255).astype('uint8')
            # Save the mask to GDAL dataset
            band.WriteArray(mask)
            dataset.FlushCache()
            del band, dataset
            gc.collect()

            # Write mask as GeoTIFF (assume NoData value is always zero)
            gdal.Warp(target, temp, dstNodata=0, xRes=40, yRes=40,
                    cutlineDSName=f"{shape}",
                    cropToCutline=(True if shape else False),
                    creationOptions=['COMPRESS=DEFLATE'])
            if PLOT:
                mask = cv.imread(target, cv.IMREAD_LOAD_GDAL)
                # print(f"Nodata: min = {mask_raw[..., 0].min()},",
                #       f"max = {mask_raw[..., 0].max()}")
                # print(f"Water: min = {mask_raw[..., 1].min()},",
                #       f"max = {mask_raw[..., 1].max()}")
                # print(f"Ice: min = {mask_raw[..., 2].min()},",
                #       f"max = {mask_raw[..., 2].max()}")
                draw_one_row(cv.resize(image[0].transpose((1, 2, 0)),
                                    (w, h), cv.INTER_LINEAR), mask)
            del image, mask
        gc.collect()

    models_openvino[artifact]['times'] = time_inference_openvino['artifact']
    break

time_inference_openvino