In [None]:
from ceruleanml import data
from ceruleanml import evaluation
from ceruleanml import preprocess
from fastai.data.block import DataBlock
from fastai.vision.data import ImageBlock, MaskBlock
from fastai.vision.augment import aug_transforms, Resize
from fastai.vision.learner import unet_learner
from fastai.data.transforms import IndexSplitter
from fastai.metrics import DiceMulti
from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask
from torchvision.models import resnet18, resnet34, resnet50
from fastai.callback.fp16 import MixedPrecision
from fastai.callback.tensorboard import TensorBoardCallback
from fastai.vision.core import PILImageBW
from datetime import datetime
from pathlib import Path
import os, random
from icevision.visualize import show_data
import torch

In [None]:
from fastai.callback.tracker import EarlyStoppingCallback, SaveModelCallback

### Parsing COCO Dataset with Icevision

In [None]:
class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))

In [None]:
with_context=False
mount_path = "/root/"
train_set = "train-no-context-512"
tiled_images_folder_train = "tiled_images_no_context"
json_name_train = "instances_TiledCeruleanDatasetV2NoContextFiles.json"

coco_json_path_train = f"{mount_path}/partitions/{train_set}/{json_name_train}"
tiled_images_folder_train = f"{mount_path}/partitions/{train_set}/{tiled_images_folder_train}"
val_set = "val-no-context-512"
tiled_images_folder_val= "tiled_images_no_context"
json_name_val = "instances_TiledCeruleanDatasetV2NoContextFiles.json"
coco_json_path_val= f"{mount_path}/partitions/{val_set}/{json_name_val}"
tiled_images_folder_val = f"{mount_path}/partitions/{val_set}/{tiled_images_folder_val}"

#with aux files
# with_context=True
# mount_path = "/root/"
# train_set = "train-with-context-512"
# tiled_images_folder_train = "tiled_images"
# json_name_train = "instances_TiledCeruleanDatasetV2.json"

# coco_json_path_train = f"{mount_path}/partitions/{train_set}/{json_name_train}"
# tiled_images_folder_train = f"{mount_path}/partitions/{train_set}/{tiled_images_folder_train}"
# val_set = "val-with-context-512"
# tiled_images_folder_val= "tiled_images"
# json_name_val = "instances_TiledCeruleanDatasetV2.json"
# coco_json_path_val= f"{mount_path}/partitions/{val_set}/{json_name_val}"
# tiled_images_folder_val = f"{mount_path}/partitions/{val_set}/{tiled_images_folder_val}"

bs=16
size=128
n="all"
arch=34

class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))
negative_sample_count = 0
negative_sample_count_val = 40
area_thresh = 10

# f"{mount_path}/partitions/val/instances_tiled_cerulean_train_v2.json"

## looking at area distribution to find area threshold

In [None]:
# df = preprocess.get_area_df(coco_json_path_train, tiled_images_folder_train)
# df

In [None]:
record_collection_with_negative_small_filtered_train = preprocess.load_set_record_collection(
    coco_json_path_train, tiled_images_folder_train, area_thresh, negative_sample_count, preprocess=False
)
record_ids_train = record_collection_to_record_ids(record_collection_with_negative_small_filtered_train)

In [None]:
record_collection_with_negative_small_filtered_val = preprocess.load_set_record_collection(
    coco_json_path_val, tiled_images_folder_val, area_thresh, negative_sample_count_val, preprocess=False
)
record_ids_val = record_collection_to_record_ids(record_collection_with_negative_small_filtered_val)

In [None]:
assert len(set(record_ids_train)) + len(set(record_ids_val)) == len(record_ids_train) + len(record_ids_val)

In [None]:
train_val_record_ids = record_ids_train + record_ids_val
combined_record_collection = record_collection_with_negative_small_filtered_train + record_collection_with_negative_small_filtered_val

In [None]:
def get_val_indices(combined_ids, val_ids):
    return list(range(len(combined_ids)))[-len(val_ids):]

In [None]:
#show_data.show_records(random.choices(combined_train_records, k=9), ncols=3)

### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser. aug_transforms can only be used with_context=True

In [None]:
val_indices = get_val_indices(train_val_record_ids, record_ids_val)

In [None]:
def get_image_by_record_id(record_id):
    return get_image_path(combined_record_collection, record_id)

def get_mask_by_record_id(record_id):
    return record_to_mask(combined_record_collection, record_id)

batch_transfms = [*aug_transforms(flip_vert=True, max_warp=0.1, size=size)]
coco_seg_dblock = DataBlock(
        blocks=(ImageBlock, MaskBlock(codes=class_ints)), # ImageBlock is RGB by default, uses PIL
        get_x=get_image_by_record_id,
        splitter=IndexSplitter(val_indices),
        get_y=get_mask_by_record_id,
        batch_tfms=batch_transfms,
        n_inp=1
    )


dls = coco_seg_dblock.dataloaders(source=train_val_record_ids, batch_size=bs)

In [None]:
dls.show_batch()

### Fastai2 Trainer

In [None]:
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d_%b_%Y_%H_%M_%S")
experiment_dir =  Path(f'{mount_path}/experiments/cv2/'+timestampStr+'_fastai_unet/')
experiment_dir.mkdir(exist_ok=True)
print(experiment_dir)

In [None]:
archs = {18: resnet18, 34: resnet34, 50: resnet50}

In [None]:
b = dls.one_batch()

In [None]:
b[0].shape

In [None]:
# removed these callbacks since they cause this error: https://forums.fast.ai/t/learner-object-has-no-attribute-recorder/46328/18
# SaveModelCallback(monitor="valid_loss", with_opt=True), 
# EarlyStoppingCallback(monitor='valid_loss', min_delta=0.005, patience=5)

In [None]:
from fastai.metrics import Dice, DiceMulti

In [None]:
cbs = [TensorBoardCallback(projector=False, trace_model=False), 
       SaveModelCallback(monitor="valid_loss", with_opt=True),
       EarlyStoppingCallback(monitor='valid_loss', min_delta=0.005, patience=5) ]

learner = unet_learner(dls, archs[arch], metrics=[DiceMulti, Dice],
                       model_dir=experiment_dir, n_out=7,
                       cbs=cbs) #cbs=cbs# SaveModelCallback saves model when there is improvement
# lr = learner.lr_find()

In [None]:
# lr

dice_multi changes a little bit when training with fewer samples. but not with more samples???

In [None]:
size=128
n="all"
bs=16
arch=34

In [None]:
print("size", size)
print("batch size", bs)
print("arch", arch)
print("n chips", n)
print("epoch", 5)

In [None]:

learner.fine_tune(5, 2e-4, freeze_epochs=1) # cbs=cbs

In [None]:
learner.show_results()

## Everything below here needs to be debugged, confusion matrix error

In [None]:
# validation = learner.validate()  
# there's a strange bug here where an internal method is not found that should be found 
# for the Callback class. seems like a verison mismatch issue. happens if any callback included

In [None]:
size=512
# savename = f'test_6batch_{arch}_{size}_{round(validation[1],3)}.pt'
savename = f'test_6batch_{arch}_{size}.pt'

We save the best model in a variety of formats for loading later

In [None]:
from ceruleanml.inference import save_fastai_model_state_dict_and_tracing, load_tracing_model, test_tracing_model_one_batch, logits_to_classes

state_dict_pth, tracing_model_gpu_pth, tracing_model_cpu_pth  = save_fastai_model_state_dict_and_tracing(learner, dls, savename, experiment_dir)

In [None]:
#import pdb
#learn.show_results(max_n=4, figsize=(20,20), vmin=0, vmax=3)


# Model Inference and Result Evaluation

import skimage.io as skio
import numpy as np
val_record_ids = record_collection_to_record_ids(record_collection_with_negative_small_filtered_val)
pred_arrs = []
val_arrs = []
for v, i in zip(range(len(record_ids_val)), record_ids_val):
    v = record_collection_with_negative_small_filtered_val[v]
    v_masks = v.detection.masks[0].to_mask(v.common.height,v.common.width).data
    p = get_image_path(record_collection_with_negative_small_filtered_val,i)
    arr = skio.imread(p)
    # necessary for 1 channel input since fastai uses PIL during predict
    class_pred = learner.predict(np.squeeze(arr))
    class_pred = class_pred[0].cpu().detach().numpy()
    pred_arrs.append(class_pred)
    val_arrs.append(v_masks)

In [None]:
cm, f1 = evaluation.cm_f1(val_arrs, pred_arrs, 6, mount_path) # todo add normalize false

In [None]:
learner = torch.load("/root/data/experiments/cv2/10_May_2022_18_02_59_fastai_unet/18_64_0.493.pkl")


import skimage.io as skio
val_record_ids = record_collection_to_record_ids(valid_records)
pred_arrs = []
with learner.no_logging():
    for i in val_record_ids:
        p = get_image_path(valid_records,i)
        arr = skio.imread(p)
        pred_arr = learner.predict(arr)
        pred_arrs.append(pred_arr)

In [None]:
# this results in vm dying, not just kernel crash
# coco_seg_dblock = DataBlock(
#     blocks=(ImageBlock, MaskBlock(codes=class_ints)),
#     get_x=get_image_by_record_id,
#     get_y=get_mask_by_record_id,
#     n_inp=1,
# )

# dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=5)


In [None]:
result = learner.get_preds(dl=dls[0])

In [None]:
learner.get_preds??

In [None]:
len(pred_arrs)

In [None]:
target_label,prediction_arr, activations = pred_arrs[0]

In [None]:
skio.imshow(target_label.cpu().detach().numpy())

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[0])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy())

In [None]:
array([      60.73,       190.3,      4.3598]) # means
array([     16.099,      17.846,       9.603]) # stats