### importing icevision for dataset loading and model training and other libraries for coco creation

In [None]:
import numpy as np
import ceruleanml.data as data
import json
import os
from PIL import Image
from pathlib import Path
import skimage.io as skio

ml_data_path = os.path.join(os.path.abspath(os.getcwd()),"../../data/cerulean_v2_example/")
path = Path(ml_data_path)
layer_pths = list(path.glob("assets_test1_png/*"))

In [None]:
img_fname = layer_pths[1]
arr = skio.imread(layer_pths[1])
tiled_arr = data.reshape_split(arr, (512, 512))

### Making a COCO Dataset for Mask-RCNN, converting set of instance layers per scenes from annotations to COCO labels

In [None]:
from pycococreatortools import pycococreatortools

In [None]:
info = {
    "description": "Cerulean Dataset V2",
    "url": "none",
    "version": "1.0",
    "year": 2021,
    "contributor": "Skytruth",
    "date_created": "2022/2/23"
}

licenses = [
    {
        "url": "none",
        "id": 1,
        "name": "CeruleanDataset V2"
    }
]
categories = [{"supercategory":"slick", "id":1,"name":"infra_slick"},
              {"supercategory":"slick", "id":2,"name":"natural_seep"},
              {"supercategory":"slick", "id":3,"name":"coincident_vessel"},
              {"supercategory":"slick", "id":4,"name":"recent_vessel"},
              {"supercategory":"slick", "id":5,"name":"old_vessel"},
              {"supercategory":"slick", "id":6,"name":"ambiguous"}]

coco_output = {
    "info": info,
    "licenses": licenses,
    "images": [],
    "annotations": [],
    "categories": categories
}

In [None]:
outdir = "../../data/cerulean_v2_example/tiled_data_example"
os.makedirs(outdir, exist_ok=True)

In [None]:
image_id = 1
segmentation_id = 1

images_d = []

# filter for jpeg images
for instance_path in instance_paths:
    arr = skio.imread(instance_path)
    tiled_arr = data.reshape_split(arr, (512, 512))
    if "Background" in instance_path: # its the vv image
        save_tiles_from_3d(tiled_arr, instance_path, outdir
    elif "Layer" in instance_path: # its an instance label
        for i in range(tiles_n):
            instance_tile = tiled_arr[i]
            fname = os.path.join(outdir, os.path.basename(os.path.dirname(instance_path))+f"_label-image_tile_{i}.png")
            images_d.append({"file_name": fname, "height": 512, "width": 512, "id":i})

            # go through each label image to extract annotation
            image_info = pycococreatortools.create_image_info(
                image_id, os.path.basename(instance_path), (512,512))
            coco_output["images"].append(image_info)
            class_id = data.get_layer_cls(instance_tile, data.class_mapping_photopea, data.class_mapping_coco)
            if class_id is not 0:
                category_info = {"id":class_id,"is_crowd":True} # forces compressed RLE format
            else:
                category_info = {"id":class_id,"is_crowd":False}
            r,g,b = data.class_mapping_photopea[data.class_mapping_coco_inv[class_id]]
            binary_mask = rgbalpha_to_binary(instance_tile, r,g,b).astype(np.uint8)

            annotation_info = pycococreatortools.create_annotation_info(
                segmentation_id, image_id, category_info, binary_mask,
                instance_tile.shape, tolerance=0)

            if annotation_info is not None:
                coco_output["annotations"].append(annotation_info)

            segmentation_id = segmentation_id + 1

            image_id = image_id + 1
        else:
            raise ValueError(f"The file doesn't have 'Background' or 'Layer' in the title, got {instance_path} instead.")

with open(f'{outdir}/instances_slick_train_v2.json', 'w') as output_json_file:
    json.dump(coco_output, output_json_file)

In [None]:
class_map = ClassMap(["oil_slick"])
class_map # https://airctic.github.io/icedata/dataset_voc_nb/#define-class_map

In [None]:
parser = parsers.COCOMaskParser(f'{path}/instances_slick_train.json', img_dir)

### Parsing works! we're just trying to test if this trains and evaluates correctly, it's ok if many of these instances don't look like instances for now since the dataset was made for semantic segmentation

It's possible icevision is filtering out all negative samples here during autofixing, which we can check. Our most useful samples will include non-background class hard negatives and positives anyway

In [None]:
# Parse the annotations to create the train and validation records
train_records, valid_records = parser.parse()
x=show_records(train_records[:3], ncols=3, class_map=class_map)
plt.savefig("train_slick_examples.png")

Normalizing is best practice and necessary for icevision to propoerly display predicition results

In [None]:
train_tfms = tfms.A.Adapter(
    [
        tfms.A.Normalize(),
    ]
)

In [None]:
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(size=512), tfms.A.Normalize()])

sourced from: https://airctic.com/0.8.1/getting_started_instance_segmentation/

In [None]:
train_ds.records.autofix??

In [None]:
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=6, shuffle=True) # adjust num_workers for your processor count
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=6, shuffle=False)

model = model_type.model(backbone=backbone(pretrained=False), num_classes=len(parser.class_map))

metrics = [COCOMetric(metric_type=COCOMetricType.mask, print_summary=False)]

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)

lr = learn.lr_find()

In [None]:
lr

The suggested learning rate makes getting to higher confidence predictions take too long. We picked the learning rate arbitrarily below to speed up getting to losses closer to .5 instead of greater than 1. 

In [None]:
learn.fine_tune(30,2.511886486900039e-03)

a TODO is to debug the COCOMetric, it should not be -1 given that we are now acheiving detections that intersect with groundtruth.

In [None]:
print(f"approximate time to train 30 epochs in minutes: {25*30/60}")


The predictions above .7 confidence that roughly line up with groundtruth demonstrates that icevision-trained models can produce predictions that look like they are headed in the correct direction, even for an imperfect training set.

In [None]:
model_type.show_results??

In [None]:
x = model_type.show_results(model, valid_ds, detection_threshold=.6)
plt.savefig("inference_results.png")

In [None]:
show_results??