# Experiment Notebook for CV2

This notebook is a development workspace to add or subtract features during model development. Once a set of changes is ready to be experimented with, it should be cleaned, copied and saved to a new notebook that can be run end to end with no errors and committed in a separate git commit. For example, "fastai2_unet_trainer_cv2-1channel-baseline.ipynb" is an experiment that should not be changed in version control once committed but cells can be edited to inspect the results on your local machine.

In [None]:
from torchvision.models import resnet18, resnet34, resnet50
bs_d ={512:4, 256:32, 224:32, 128:64, 64:256} # Batch Size for each image size
lr_d = {512:3e-4, 256:1e-3, 224:3e-3, 128:3e-3, 64:1e-2} # Learning Rate for each image size
mins_d = {512:5.88, 256:1.5, 224:1.15, 128:0.4, 64:0.2} # Duration of epoch for each image size
arch_d = {18: resnet18, 34: resnet34, 50: resnet50} # ResNet Architectures

run_list = [[224, 11]]*20 # List of tuples, where the tuples are [px size, training time in minutes]
with_context = False
fp16 = False
n = "all"
arch = 34
negative_sample_count = 0
negative_sample_count_val = 0
area_thresh = 10
remap_dict = {
    "ambiguous": None,
}

In [None]:
from ceruleanml import data
from ceruleanml import evaluation
from ceruleanml import preprocess
from ceruleanml import inference
from fastai.data.block import DataBlock
from fastai.vision.data import ImageBlock, MaskBlock
from fastai.vision.augment import aug_transforms, Resize
from fastai.vision.learner import unet_learner
from fastai.data.transforms import IndexSplitter
from fastai.metrics import DiceMulti, Dice, accuracy_multi, PrecisionMulti, RecallMulti
from ceruleanml.coco_load_fastai import record_collection_to_record_ids, get_image_path, record_to_mask
from fastai.callback.fp16 import MixedPrecision
from fastai.callback.tensorboard import TensorBoardCallback
from fastai.vision.core import PILImageBW
from datetime import datetime
from pathlib import Path
import os, random
from icevision.visualize import show_data
import torch
from fastai.callback.tracker import EarlyStoppingCallback, SaveModelCallback
import skimage.io as skio
import numpy as np
from math import log
import glob
from google.cloud import storage


In [None]:
### Parsing COCO Dataset
if with_context:
    train_set = "train-with-context-512"
    tiled_images_folder_train = "tiled_images"
    json_name_train = "instances_TiledCeruleanDatasetV2.json"
    val_set = "val-with-context-512"
    tiled_images_folder_val= "tiled_images"
    json_name_val = "instances_TiledCeruleanDatasetV2.json"
else:
    train_set = "train-no-context-512"
    tiled_images_folder_train = "tiled_images_no_context"
    json_name_train = "instances_TiledCeruleanDatasetV2NoContextFiles.json"
    val_set = "val-no-context-512"
    tiled_images_folder_val= "tiled_images_no_context"
    json_name_val = "instances_TiledCeruleanDatasetV2NoContextFiles.json"

mount_path = "/root/"
coco_json_path_train = f"{mount_path}/partitions/{train_set}/{json_name_train}"
tiled_images_folder_train = f"{mount_path}/partitions/{train_set}/{tiled_images_folder_train}"
coco_json_path_val= f"{mount_path}/partitions/{val_set}/{json_name_val}"
tiled_images_folder_val = f"{mount_path}/partitions/{val_set}/{tiled_images_folder_val}"

class_map = {v: k for k, v in data.class_mapping_coco_inv.items()}
class_ints = list(range(1, len(list(class_map.keys())[:-1]) + 1))

record_collection_with_negative_small_filtered_train = preprocess.load_set_record_collection(
    coco_json_path_train, tiled_images_folder_train, area_thresh, negative_sample_count, preprocess=False, remap_dict=remap_dict
)
record_ids_train = record_collection_to_record_ids(record_collection_with_negative_small_filtered_train)

record_collection_with_negative_small_filtered_val = preprocess.load_set_record_collection(
    coco_json_path_val, tiled_images_folder_val, area_thresh, negative_sample_count_val, preprocess=False, remap_dict=remap_dict
)
record_ids_val = record_collection_to_record_ids(record_collection_with_negative_small_filtered_val)

assert len(set(record_ids_train)) + len(set(record_ids_val)) == len(record_ids_train) + len(record_ids_val)

train_val_record_ids = record_ids_train + record_ids_val
combined_record_collection = record_collection_with_negative_small_filtered_train + record_collection_with_negative_small_filtered_val

def get_val_indices(combined_ids, val_ids):
    return list(range(len(combined_ids)))[-len(val_ids):]

#show_data.show_records(random.choices(combined_train_records, k=9), ncols=3)

### Constructing a FastAI DataBlock that uses parsed COCO Dataset from icevision parser. aug_transforms can only be used with_context=True

val_indices = get_val_indices(train_val_record_ids, record_ids_val)

def get_image_by_record_id(record_id):
    return get_image_path(combined_record_collection, record_id)

def get_mask_by_record_id(record_id):
    return record_to_mask(combined_record_collection, record_id)

### Fastai2 Trainer

In [None]:
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%Y-%m-%d_%H-%M-%S")
experiment_dir =  Path(f'{mount_path}/experiments/cv2/'+timestampStr+'_fastai_unet/')
experiment_dir.mkdir(exist_ok=True)
print(experiment_dir)

In [None]:
init_size = run_list[0][0]
batch_transfms = [*aug_transforms(flip_vert=True, max_rotate=180, max_warp=0.1, size=init_size)]
coco_seg_dblock = DataBlock(
        blocks=(ImageBlock, MaskBlock(codes=class_ints)), # ImageBlock is RGB by default, uses PIL
        get_x=get_image_by_record_id,
        splitter=IndexSplitter(val_indices),
        get_y=get_mask_by_record_id,
        batch_tfms=batch_transfms,
        item_tfms = Resize(init_size),
        n_inp=1
    )
dls = coco_seg_dblock.dataloaders(source=train_val_record_ids, batch_size=bs_d[init_size])


In [None]:
cbs = [TensorBoardCallback(projector=False, trace_model=False), 
       # SaveModelCallback(monitor="valid_loss", with_opt=True),
       # EarlyStoppingCallback(monitor='valid_loss', min_delta=0.005, patience=10) 
       ]

learner = unet_learner(dls, arch_d[arch], metrics=[DiceMulti],
                       model_dir=experiment_dir, n_out=7,
                       cbs=cbs) #cbs=cbs# SaveModelCallback saves model when there is improvement

if fp16:
       learner = learner.to_fp16()

running_total_epochs = {}

In [None]:
for size, total_train_time in run_list:
    epochs = max(int(total_train_time/mins_d[size]), 1)
    bs = bs_d[size]
    lr = lr_d[size]

    batch_transfms = [*aug_transforms(flip_vert=True, max_rotate=180, max_warp=0.1, size=size)]
    coco_seg_dblock = DataBlock(
            blocks=(ImageBlock, MaskBlock(codes=class_ints)), # ImageBlock is RGB by default, uses PIL
            get_x=get_image_by_record_id,
            splitter=IndexSplitter(val_indices),
            get_y=get_mask_by_record_id,
            batch_tfms=batch_transfms,
            item_tfms = Resize(size),
            n_inp=1
        )
    dls = coco_seg_dblock.dataloaders(source=train_val_record_ids, batch_size=bs)
    learner.dls = dls
    print(f"Training time is: {total_train_time} minutes")
    print("starting from running total", running_total_epochs)
    print("image size", size)
    print("batch size", bs)
    print("arch", arch)
    print("lr", lr)
    print("n chips", n)
    print("context", with_context)
    print("epochs", epochs)

    learner.fine_tune(epochs, lr, freeze_epochs=0) # cbs=cbs

    running_total_epochs[size] = sum(filter(None,[running_total_epochs.get(size),epochs]))

In [None]:
# Save *.pt file locally
savename = f'{size}_{running_total_epochs[size]}_{bs}_{arch}_{lr}_{round(learner.validate()[1],3)}.pt'
learner.export(experiment_dir/(savename[:-3]+".pkl"))
inference.save_fastai_model_state_dict_and_tracing(learner, dls, savename, experiment_dir)

In [None]:
# Upload *.pt folder to GCS
def upload_from_directory(directory_path: str, dest_bucket_name: str, dest_blob_name: str):
    rel_paths = glob.glob(directory_path + '/**', recursive=True)
    bucket = storage.Client().bucket(dest_bucket_name)
    for local_file in rel_paths:
        if os.path.isfile(local_file):
            remote_path = f'{dest_blob_name}/{local_file[len(directory_path)+1:]}'
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)


upload_from_directory(str(experiment_dir), 'ceruleanml', str(experiment_dir.relative_to('/root')))

In [None]:
# tensorboard --logdir=/root/work/notebooks/runs

In [None]:
learner.show_results()

In [None]:
run_list = [[512, 120]] # List of tuples, where the tuples are [px size, training time in minutes]

# Model Inference and Result Evaluation

In [None]:
evaluation.get_cm_for_learner(dls, learner, mount_path)

In [None]:
validation = learner.validate()

# We save the best model in a variety of formats for loading later. Eval on Torchscript model still being debugged

In [None]:
save_template = f'test_{bs}_{arch}_{size}_{round(validation[1],3)}_{epochs}.pt'

In [None]:
from ceruleanml.inference import save_fastai_model_state_dict_and_tracing, load_tracing_model, test_tracing_model_one_batch, logits_to_classes

state_dict_pth, tracing_model_gpu_pth, tracing_model_cpu_pth  = save_fastai_model_state_dict_and_tracing(learner, dls, save_template, experiment_dir)

In [None]:
model = torch.load(tracing_model_cpu_pth)

In [None]:
def get_cm_for_torchscript_model(dls, model, save_path):
"""
the torchscript model when it is loaded operates on batches, not individual images
this doesn't support eval on negative samples if they are in the dls, 
since val masks don't exist with neg samples. need to be constructed with np.zeros

returns cm and f1 score
"""
val_arrs = []
class_preds = []
for batch_tuple in dls.valid:
    semantic_masks_batch = batch_tuple[1].cpu().detach().numpy()
    class_pred_batch = model(batch_tuple[0].cpu())
    class_pred_batch = class_pred_batch.cpu().detach().numpy()
    val_arrs.extend(semantic_masks_batch)
    class_preds.append(class_pred_batch)
return evaluation.cm_f1(val_arrs, class_preds, 6, save_path) # todo add normalize false

In [None]:
get_cm_for_torchscript_model(dls, model, mount_path)

In [None]:
# this results in vm dying, not just kernel crash
# coco_seg_dblock = DataBlock(
#     blocks=(ImageBlock, MaskBlock(codes=class_ints)),
#     get_x=get_image_by_record_id,
#     get_y=get_mask_by_record_id,
#     n_inp=1,
# )

# dls = coco_seg_dblock.dataloaders(source=record_ids, batch_size=5)


In [None]:
result = learner.get_preds(dl=dls[0])

In [None]:
learner.get_preds??

In [None]:
len(pred_arrs)

In [None]:
target_label,prediction_arr, activations = pred_arrs[0]

In [None]:
skio.imshow(target_label.cpu().detach().numpy())

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[0])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy()[1])

In [None]:
skio.imshow(base_img.cpu().detach().numpy())

In [None]:
array([      60.73,       190.3,      4.3598]) # means
array([     16.099,      17.846,       9.603]) # stats