In [41]:
from ceruleanml import data
from ceruleanml import evaluation
from ceruleanml import preprocess
from fastai.data.block import DataBlock
from fastai.vision.data import ImageBlock, MaskBlock
from fastai.vision.augment import aug_transforms
from fastai.vision.learner import unet_learner
from fastai.data.transforms import IndexSplitter, Normalize
from fastai.metrics import Dice
from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask
from torchvision.models import resnet18, resnet34, resnet50
from fastai.callback.fp16 import MixedPrecision
from fastai.callback.tensorboard import TensorBoardCallback
from datetime import datetime
from pathlib import Path
import os, random
from icevision.visualize import show_data
import torch

### Parsing COCO Dataset with Icevision

In [2]:
class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))

In [3]:
data_path = "/root/"
mount_path = "/root/data"
train_set = "train-no-context"
tiled_images_folder_train = "tiled_images_no_context"
json_name_train = "instances_TiledCeruleanDatasetV2NoContextFiles.json"
val_set = "val-no-context"
tiled_images_folder_val= "tiled_images_no_context"
json_name_val = "instances_TiledCeruleanDatasetV2NoContextFiles.json"
coco_json_path_train = f"{mount_path}/partitions/{train_set}/{json_name_train}"
tiled_images_folder_train = f"{mount_path}/partitions/{train_set}/{tiled_images_folder_train}"
coco_json_path_val= f"{mount_path}/partitions/{val_set}/{json_name_val}"
tiled_images_folder_val = f"{mount_path}/partitions/{val_set}/{tiled_images_folder_val}"

class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))
negative_sample_count = 100
negative_sample_count_val = 50
mean = [60.73, 190.3, 4.3598]
std = [16.099, 17.846, 9.603]
area_thresh = 10
# f"{mount_path}/partitions/val/instances_tiled_cerulean_train_v2.json"

## looking at area distribution to find area threshold

In [4]:
df = preprocess.get_area_df(coco_json_path_train, tiled_images_folder_train)
df

  0%|          | 0/3863 [00:00<?, ?it/s]

Unnamed: 0,record_id,label,area
0,857,infra_slick,5918
1,4261,infra_slick,3366
2,4247,infra_slick,468
3,4248,infra_slick,326
4,7857,infra_slick,319
...,...,...,...
366,72482,ambiguous,145185
367,72483,ambiguous,2
368,72496,ambiguous,134272
369,72497,ambiguous,48306


In [5]:
record_collection_with_negative_small_filtered_train = preprocess.load_set_record_collection(
    coco_json_path_train, tiled_images_folder_train, area_thresh, negative_sample_count
)
record_ids_train = record_collection_to_record_ids(record_collection_with_negative_small_filtered_train)

  0%|          | 0/3863 [00:00<?, ?it/s]

100%|██████████| 3014/3014 [00:00<00:00, 95783.00it/s]


  0%|          | 0/100 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m


  0%|          | 0/100 [00:00<?, ?it/s]

In [6]:
record_collection_with_negative_small_filtered_val = preprocess.load_set_record_collection(
    coco_json_path_val, tiled_images_folder_val, area_thresh, negative_sample_count_val
)
record_ids_val = record_collection_to_record_ids(record_collection_with_negative_small_filtered_val)

  0%|          | 0/352 [00:00<?, ?it/s]

100%|██████████| 266/266 [00:00<00:00, 91848.59it/s]


  0%|          | 0/50 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m


  0%|          | 0/50 [00:00<?, ?it/s]

In [7]:
assert len(set(record_ids_train)) + len(set(record_ids_val)) == len(record_ids_train) + len(record_ids_val)

In [44]:
train_val_record_ids = record_ids_train + record_ids_val
combined_record_collection = record_collection_with_negative_small_filtered_train + record_collection_with_negative_small_filtered_val

In [45]:
def get_val_indices(combined_ids, val_ids):
    return list(range(len(combined_ids)))[len(val_ids):]

In [46]:
#show_data.show_records(random.choices(combined_train_records, k=9), ncols=3)

### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser and applies transformations

In [47]:
mean = [60.73,       190.3,      4.3598]
std = [16.099,      17.846,       9.603]

In [48]:
batch_transfms = [aug_transforms(),  Normalize.from_stats(mean,std)]

In [49]:
val_indices = get_val_indices(train_val_record_ids, record_ids_val)

In [51]:
def get_image_by_record_id(record_id):
    return get_image_path(combined_record_collection, record_id)

def get_mask_by_record_id(record_id):
    return record_to_mask(combined_record_collection, record_id)

#size = 64  # Progressive resizing could happen here
augs = aug_transforms(flip_vert=True, max_warp=0.1) #, size=size)
coco_seg_dblock = DataBlock(
    blocks=(ImageBlock, MaskBlock(codes=class_ints)),
    get_x=get_image_by_record_id,
    splitter=IndexSplitter(val_indices),
    get_y=get_mask_by_record_id,
    batch_tfms=[Normalize.from_stats(mean,std)],
    n_inp=1,
)

dls = coco_seg_dblock.dataloaders(source=train_val_record_ids, batch_size=1)

  ret = func(*args, **kwargs)


### Fastai2 Trainer

In [52]:
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d_%b_%Y_%H_%M_%S")
experiment_dir =  Path(f'{mount_path}/experiments/cv2/'+timestampStr+'_fastai_unet/')
experiment_dir.mkdir(exist_ok=True)

In [53]:
arch = 18
archs = {18: resnet18, 34: resnet34, 50: resnet50}

In [54]:
learner = unet_learner(dls, archs[arch], metrics=[Dice()], model_dir=experiment_dir, n_out = 7, cbs=[MixedPrecision]) # cbs=[MixedPrecision]

#lr = learner.lr_find()

cbs = [TensorBoardCallback(projector=False, trace_model=False)]

learner.fine_tune(1, 2e-4, cbs=cbs)#, cbs=SaveModelCallback(monitor='dice'))w



epoch,train_loss,valid_loss,dice,time


KeyboardInterrupt: 

In [None]:
validation = learner.validate()

In [None]:
size=512
savename = f'test_1batch_{arch}_{size}_{round(validation[1],3)}.pt'

In [None]:
from ceruleanml.inference import save_fastai_model_state_dict_and_tracing, load_tracing_model, test_tracing_model_one_batch, logits_to_classes

In [None]:
state_dict_pth, tracing_model_gpu_pth, tracing_model_cpu_pth  = save_fastai_model_state_dict_and_tracing(learner, dls, savename, experiment_dir)

In [None]:
import torch
experiment_dir = '/root/data/experiments/cv2/24_May_2022_01_49_56_fastai_unet/'
savename = "tracing_cpu_test_1batch_18_512_0.082.pt"
tracing_model = load_tracing_model(os.path.join(experiment_dir, savename))
out_batch_logits = test_tracing_model_one_batch(dls.to('cpu'), tracing_model)

In [None]:
conf, classes = logits_to_classes(out_batch_logits)

In [None]:
classes.shape

In [None]:
conf.shape

In [None]:
learn.show_results(max_n=4, figsize=(20,20), vmin=0, vmax=3)

Default path for tensorboard logs is `./runs/`

In [None]:
f'{mount_path}/experiments/cv2/'

In [None]:
!ls '/root/data/experiments/cv2/20_May_2022_19_29_39_fastai_unet'

In [None]:
!ls './runs/'

Copy logs to appropriate exeriments folder in the mounted GCS volume.

In [None]:
!cp -R './runs/' {modelpath}'/tensorboard/'

Now, run the following from anywhere with gcs authenticated:

In [None]:
!tensorboard --logdir="./runs"


# Model Inference and Result Evaluation

In [None]:
val_record_ids = record_collection_to_record_ids(valid_records)
pred_arrs = []
val_arrs = []
with learn.no_logging():
    for v, i in zip(range(len(val_record_ids[0:10])), val_record_ids[0:10]):
        v = valid_records[v].as_dict()
        v_masks = v['detection']['masks'][0].to_mask(v['common']['height'],v['common']['width']).data
        p = get_image_path(valid_records,i)
        arr = skio.imread(p)
        pred_arr = learn.predict(arr)
        pred_arr = pred_arr[0].cpu().detach().numpy()
        pred_arrs.append(pred_arr)
        val_arrs.append(v_masks)

In [None]:
cm, f1 = eval.cm_f1(val_arrs, pred_arrs, 6, mount_path)

In [None]:
learner = torch.load("/root/data/experiments/cv2/10_May_2022_18_02_59_fastai_unet/18_64_0.493.pkl")

In [None]:
learner.predict??

In [None]:
import skimage.io as skio
val_record_ids = record_collection_to_record_ids(valid_records)
pred_arrs = []
with learner.no_logging():
    for i in val_record_ids:
        p = get_image_path(valid_records,i)
        arr = skio.imread(p)
        pred_arr = learner.predict(arr)
        pred_arrs.append(pred_arr)

In [None]:
# this results in vm dying, not just kernel crash
# coco_seg_dblock = DataBlock(
#     blocks=(ImageBlock, MaskBlock(codes=class_ints)),
#     get_x=get_image_by_record_id,
#     get_y=get_mask_by_record_id,
#     n_inp=1,
# )

# dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=5)


In [None]:
result = learner.get_preds(dl=dls[0])

In [None]:
learner.get_preds??

In [None]:
len(pred_arrs)

In [None]:
target_label,prediction_arr, activations = pred_arrs[0]

In [None]:
skio.imshow(target_label.cpu().detach().numpy())

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[0])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy())

In [None]:
array([      60.73,       190.3,      4.3598]) # means
array([     16.099,      17.846,       9.603]) # stats