In [1]:
from ceruleanml import data
from icevision.parsers import COCOMaskParser
from icevision.data import SingleSplitSplitter
from fastai.data.block import DataBlock
from fastai.vision.data import ImageBlock, MaskBlock
from fastai.vision.augment import aug_transforms
from fastai.vision.learner import unet_learner
from fastai.data.transforms import RandomSplitter, Normalize
from fastai.metrics import Dice
from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask
from torchvision.models import resnet18, resnet34, resnet50
from fastai.callback.fp16 import MixedPrecision
from fastai.callback.tensorboard import TensorBoardCallback
from datetime import datetime
from pathlib import Path
import os

[1m[1mINFO    [0m[1m[0m - [1mDownloading default `.ttf` font file - SpaceGrotesk-Medium.ttf from https://raw.githubusercontent.com/airctic/storage/master/SpaceGrotesk-Medium.ttf to /root/.icevision/fonts/SpaceGrotesk-Medium.ttf[0m | [36micevision.visualize.utils[0m:[36mget_default_font[0m:[36m67[0m


Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...


### Parsing COCO Dataset with Icevision

In [2]:
data_path = "/root/"
mount_path = "/root/data"

In [5]:
class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))
parser = COCOMaskParser(annotations_filepath=f"{data_path}/tile-cerulean-v2-partial-with-context/instances_Tiled Cerulean Dataset V2.json", img_dir=f"{mount_path}/tile-cerulean-v2-partial-with-context/tiled_images")
train_records, valid_records = parser.parse(autofix=False)

  0%|          | 0/3883 [00:00<?, ?it/s]

### Importing functions for returning an image sample and a semantic segmentation label for each sample

In [6]:
record_ids = record_collection_to_record_ids(train_records)

### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser and applies transformations

In [7]:
def get_image_by_record_id(record_id):
    return get_image_path(train_records, record_id)
def get_mask_by_record_id(record_id):
    return record_to_mask(train_records, record_id)

In [8]:
mean = [60.73,       190.3,      4.3598]
std = [16.099,      17.846,       9.603]

In [9]:
batch_transfms = [aug_transforms(),  Normalize.from_stats(mean,std)]

In [10]:
#size = 64  # Progressive resizing could happen here
augs = aug_transforms(flip_vert=True, max_warp=0.1) #, size=size)
coco_seg_dblock = DataBlock(
    blocks=(ImageBlock, MaskBlock(codes=class_ints)),
    get_x=get_image_by_record_id,
    splitter=RandomSplitter(),
    get_y=get_mask_by_record_id,
    batch_tfms=[Normalize.from_stats(mean,std)],
    n_inp=1,
)

dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=5)

  ret = func(*args, **kwargs)


In [8]:
coco_seg_dblock.summary(record_ids)

Setting-up type transforms pipelines
Collecting items from [30477, 66481, 15704, 33144, 49895, 22086, 31863, 63334, 51044, 31480, 54136, 30504, 77054, 54822, 3115, 32915, 31024, 19683, 47035, 15826, 19682, 55872, 5276, 51749, 29087, 7293, 44622, 24519, 56022, 67646, 19844, 8076, 54279, 25627, 49696, 49693, 24870, 31937, 28516, 42301, 27093, 70866, 68064, 39139, 23451, 66082, 37502, 19667, 46713, 18865, 77673, 53899, 25100, 35504, 17075, 64430, 22901, 21465, 53250, 33273, 27649, 44013, 5419, 34301, 16425, 44029, 22440, 51270, 43292, 74108, 62090, 58729, 30723, 11707, 54643, 3130, 31330, 22492, 19827, 42236, 18133, 15720, 64254, 20665, 27622, 32476, 11008, 7279, 72481, 13281, 29018, 37925, 74293, 45840, 31279, 34729, 65017, 46360, 34254, 22084, 47701, 37924, 18684, 55886, 26836, 72468, 11874, 36916, 36500, 64651, 77674, 9420, 10881, 55472, 37853, 44056, 26264, 26707, 59110, 68275, 8659, 49424, 65818, 22100, 65031, 36523, 20474, 13037, 50470, 28, 23100, 66095, 63063, 8091, 47019, 38258, 1

### Fastai2 Trainer

In [16]:
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d_%b_%Y_%H_%M_%S")
experiment_dir =  Path(f'{mount_path}/experiments/cv2/'+timestampStr+'_fastai_unet/')
experiment_dir.mkdir(exist_ok=True)

In [15]:
arch = 50
archs = {18: resnet18, 34: resnet34, 50: resnet50}

In [17]:
learn = unet_learner(dls, archs[arch], metrics=[Dice()], model_dir=experiment_dir, n_out = 7, cbs=[MixedPrecision]) # cbs=[MixedPrecision]

lr = learn.lr_find()

cbs = [TensorBoardCallback(projector=False, trace_model=False)]

learn.fine_tune(2, 2e-4, cbs=cbs)#, cbs=SaveModelCallback(monitor='dice'))w

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

RuntimeError: CUDA out of memory. Tried to allocate 728.00 MiB (GPU 0; 14.76 GiB total capacity; 11.34 GiB already allocated; 209.75 MiB free; 13.32 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
validation = learn.validate()

In [None]:
savename = f'{arch}_{size}_{round(validation[1],3)}.pkl'
learn.export(f'{experiment_dir}/{savename}')

In [None]:
learn.show_results(max_n=4, figsize=(20,20), vmin=0, vmax=3)

Default path for tensorboard logs is `./runs/`

In [None]:
!ls './runs/'

Copy logs to appropriate exeriments folder in the mounted GCS volume.

In [None]:
!cp -R './runs/' {modelpath}'/tensorboard/'

Now, run the following from anywhere with gcs authenticated:

In [None]:
!tensorboard --logdir="./runs"


# Model Inference and Result Evaluation

In [9]:
import torch

In [10]:
learner = torch.load("/root/data/experiments/cv2/10_May_2022_18_02_59_fastai_unet/18_64_0.493.pkl")

In [11]:
learner.predict??

[0;31mSignature:[0m [0mlearner[0m[0;34m.[0m[0mpredict[0m[0;34m([0m[0mitem[0m[0;34m,[0m [0mrm_type_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mwith_input[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Prediction on `item`, fully decoded, loss function decoded and probabilities
[0;31mSource:[0m   
    [0;32mdef[0m [0mpredict[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mitem[0m[0;34m,[0m [0mrm_type_tfms[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mwith_input[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0mdl[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mdls[0m[0;34m.[0m[0mtest_dl[0m[0;34m([0m[0;34m[[0m[0mitem[0m[0;34m][0m[0;34m,[0m [0mrm_type_tfms[0m[0;34m=[0m[0mrm_type_tfms[0m[0;34m,[0m [0mnum_workers[0m[0;34m=[0m[0;36m0[0m[0;34m)[0m[0;34m[0m
[0;34m[0m        [0minp[0m[0;34m,[0m[0mpreds[0m[0;34m,[0m[0m_[0m[0;34m,[0m[0mdec_p

In [None]:
import skimage.io as skio
val_record_ids = record_collection_to_record_ids(valid_records)
pred_arrs = []
with learner.no_logging():
    for i in val_record_ids:
        p = get_image_path(valid_records,i)
        arr = skio.imread(p)
        pred_arr = learner.predict(arr)
        pred_arrs.append(pred_arr)

In [None]:
# this results in vm dying, not just kernel crash
# coco_seg_dblock = DataBlock(
#     blocks=(ImageBlock, MaskBlock(codes=class_ints)),
#     get_x=get_image_by_record_id,
#     get_y=get_mask_by_record_id,
#     n_inp=1,
# )

# dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=5)


In [None]:
result = learner.get_preds(dl=dls[0])

In [None]:
learner.get_preds??

In [None]:
len(pred_arrs)

In [None]:
target_label,prediction_arr, activations = pred_arrs[0]

In [None]:
skio.imshow(target_label.cpu().detach().numpy())

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[0])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy())

In [None]:
array([      60.73,       190.3,      4.3598]) # means
array([     16.099,      17.846,       9.603]) # stats