In [None]:
from ceruleanml import data
from icevision.parsers import COCOMaskParser
from icevision.data import SingleSplitSplitter
from fastai.data.block import DataBlock
from fastai.vision.data import ImageBlock, MaskBlock
from fastai.vision.augment import aug_transforms
from fastai.data.transforms import RandomSplitter
import os

### Parsing COCO Dataset with Icevision

In [None]:
size = 64  # Progressive resizing could happen here
augs = aug_transforms(flip_vert=True, max_warp=0.1, size=size)
class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))
parser = COCOMaskParser(
    annotations_filepath="./instances_slicks_test_v2_small.json",
    img_dir="./tiled_images",
)

train_records = parser.parse(data_splitter=SingleSplitSplitter())

### Importing functions for returning an image sample and a semantic segmentation label for each sample

In [None]:
from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask

record_ids = record_collection_to_record_ids(train_records[0])

### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser+

In [None]:
coco_seg_dblock = DataBlock(
    blocks=(ImageBlock, MaskBlock(codes=class_ints)),
    get_x=lambda record_id: get_image_path(train_records[0], record_id),
    splitter=RandomSplitter(),
    get_y=lambda record_id: record_to_mask(train_records[0], record_id),
    batch_tfms=aug_transforms(),
    n_inp=1,
)

dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=7)

In [None]:
dls.show_batch(max_n=7)

In [None]:
dls = SegmentationDataLoaders.from_dblock(
    dblock = seg_db,
    source = path,
    path = path,
    bs = 4)

## CV1 Parser

In [None]:
mount_path = "/root/data/cv1_transfer/"
ml_data_path = os.path.join(mount_path, "labeled_data")
path = Path(ml_data_path)
oil_chps = np.loadtxt(path/'oil_chps.txt', dtype=str)
codes = np.loadtxt(path/'codes.txt', dtype=str)
valid_names = np.loadtxt(path/'valid_names.txt', dtype=str)

In [None]:
import skimage.io as skio
import numpy as np
import dask

labels = path/"lbl"
labels_no_3 = path/"lbl_no3"

os.makedirs(labels_no_3, exist_ok=True)

vals = set()
lazy_results = []
for i in list(labels.glob("*.png"))[0:100]:
    arr = skio.imread(i)
    values = np.unique(arr)
    # I think this was setting a class we didn't need to 0 
    # for the purposes of recreating Jona's binary classifier
    arr[arr==3] = 0
    try:
        lazy_result = dask.delayed(skio.imsave)(labels_no_3 / i.name, arr, check_contrast=False)
        lazy_results.append(lazy_result)
    except:
        continue
computed_results = dask.compute(*lazy_results)

In [None]:
random.seed(42)
oilless_chps = [chp for chp in get_image_files(path/'chp') if chp.stem not in oil_chps]
density = 0
num_oilless = density * len(oil_chps)
mixed_chps = random.sample(oilless_chps,num_oilless) + [path/'chp'/(chp+'.png') for chp in oil_chps]
len(mixed_chps)

In [None]:
codes

In [None]:
def get_chps(path): return mixed_chps
def get_lbls(fn): return fn.parent.parent/"lbl_no3"/f"{fn.name}"
def splitter_func(fn): return fn.name.rsplit('_',1)[0] in valid_names # XXX Check to make sure this should be returning True for Valid

drive_files = get_chps(path)

In [None]:
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d_%b_%Y_%H_%M_%S")
lcl =  Path('/root/data/experiments/cv1/'+timestampStr+'/')
#lcl = Path("../out_data")
lcl.mkdir(exist_ok=True)
modelpath = lcl

use_lcl = False
if use_lcl:
  path = lcl
  lcl_chp = path/"chp"
  lcl_lbl = path/"lbl_no3"
  lcl_chp.mkdir(exist_ok=True)
  lcl_lbl.mkdir(exist_ok=True)
  for i, f in enumerate(drive_files): # 10 minutes?!
    if not i%50: print(i, f)
    if not (lcl_chp/f.name).exists():
      copyfile(f, lcl_chp/f.name)
    if not (lcl_lbl/f.name).exists():
      copyfile(get_lbls(f), lcl_lbl/f.name)

In [None]:
seg_db = DataBlock(
    blocks=(ImageBlock, MaskBlock(codes=codes)),
    get_items = get_chps,
    splitter=FuncSplitter(splitter_func),
    batch_tfms=augs,
    get_y=get_lbls)

In [None]:
dls = SegmentationDataLoaders.from_dblock(
    dblock = seg_db,
    source = path,
    path = path,
    bs = 4)

In [None]:
seg_db.summary(path)

In [None]:
dls.show_batch(vmin=0, vmax=3)

In [None]:
# add best model callback saver
# write to drive not lcl

In [None]:
arch = 18
archs = {18: resnet18, 34: resnet34, 50: resnet50}

In [None]:
learn = unet_learner(dls, archs[arch], metrics=[Dice()], model_dir=modelpath, cbs=[MixedPrecision])

In [None]:
lr = learn.lr_find()

In [None]:
lr

In [None]:
cbs = [TensorBoardCallback(projector=False, trace_model=False)]


In [None]:
learn.fine_tune(5, 2e-4, cbs=cbs)#, cbs=SaveModelCallback(monitor='dice'))w

In [None]:
# learn.load("model")

In [None]:
learn

In [None]:
savename = f'{density}_{arch}_{size}_{round(learn.validate()[1],3)}.pkl'
learn.export(f'{modelpath}/{savename}')

In [None]:
ls {modelpath}

In [None]:
learn.show_results(max_n=4, figsize=(20,20), vmin=0, vmax=3)

Default path for tensorboard logs is `./runs/`

In [None]:
!ls './runs/'

Copy logs to appropriate exeriments folder in the mounted GCS volume.

In [None]:
!cp -R './runs/' {modelpath}'/tensorboard/'

Now, run the following from anywhere with gcs authenticated:

In [None]:
!tensorboard --logdir="/root/data/experiments/cv1/09_Mar_2022_18_32_17/tensorboard"
