# Goal

Run on Docker

In [1]:
import os
import torch

gpu_count = torch.cuda.device_count()
cpu_count = os.cpu_count()
print("#GPU = {0:d}, #CPU = {1:d}".format(gpu_count, cpu_count))

#GPU = 1, #CPU = 64


In [2]:
# Dataloader params

subset_size = 330

test_pct  = 1 - float(subset_size)/335
bs        = 20
nepochs   = 3
num_workers = 32

iso       = 3
maxs      = [87, 90, 90]

In [1]:
# PATHS

# CODE (DeepPit, OBELISK, etc)
code_src     = "/workspace"
deepPit_src  = f"{code_src}/DeepPit"
obelisk_src  = f"{code_src}/OBELISK"

# DATA (training, ABIDE, etc)
todd_data_src = "../../../../..//media/labcomputer/e33f6fe0-5ede-4be4-b1f2-5168b7903c7a/home/rachel/"
olab_data_src = "/gpfs/data/oermannlab/private_data/DeepPit/"
docker_data_src = "../PitMRdata/Labels/ABIDE"

code_src      = "/gpfs/home/gologr01/DeepPit/"
curr_data_src = olab_data_src

def change_src(fn, old_prefix=todd_data_src, new_prefix=olab_data_src):
    return new_prefix + fn[len(old_prefix):]

# path to training data
train_src = "/gpfs/data/oermannlab/private_data/DeepPit/PitMRdata/samir_labels"

In [4]:
# clear cache
import os
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

print("#GPU = {0:d}, #CPU = {1:d}".format(torch.cuda.device_count(), os.cpu_count()))

# print GPU stats
import GPUtil as GPU
GPUs = GPU.getGPUs()
for i,gpu in enumerate(GPUs):
    print("Index {0:d}, GPU {1:20s} RAM Free: {2:.0f}MB | Used: {3:.0f}MB | Util {4:3.0f}% | Total {5:.0f}MB".format(i, gpu.name, gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

#GPU = 1, #CPU = 64
Index 0, GPU GeForce RTX 3090     RAM Free: 23872MB | Used: 393MB | Util   2% | Total 24265MB


# Imports

In [5]:
# imports
from transforms import AddChannel, Iso, PadSz

# Utilities
import os
import time
import pickle
from pathlib import Path

# Fastai
from fastai import *
from fastai.torch_basics import *
from fastai.basics import *

# PyTorch
from torchvision.models.video import r3d_18
from fastai.callback.all import SaveModelCallback
from torch import nn

# 3D extension to FastAI
# from faimed3d.all import *

# Input IO
import SimpleITK as sitk
import meshio

# Numpy and Pandas
import numpy as np
import pandas as pd
from pandas import DataFrame as DF

# Helper functions
from helpers.preprocess import get_data_dict, paths2objs, folder2objs, seg2mask, mask2bbox, print_bbox, get_bbox_size, print_bbox_size
from helpers.general import sitk2np, np2sitk, print_sitk_info, round_tuple, lrange, lmap, get_roi_range, numbers2groups
from helpers.viz import viz_axis

import sys
sys.path.append(deepPit_src)
sys.path.append(obelisk_src)

# OBELISK
from utils import *

# Distributed Training

In [6]:
from fastai.distributed import *
# import argparse
# parser = argparse.ArgumentParser()
# parser.add_argument("--local_rank", type=int)
# args = parser.parse_args()
# torch.cuda.set_device(args.local_rank)
# torch.distributed.init_process_group(backend='nccl', init_method='env://')

# Data

1. Source = path to labels (segmentation)
2. data dict[foldername] = (path to MR, path to Segm tensor)
    
Special subsets:
1. *training*: small subset of all labelled items (quick epoch w/ 100 instead of 335 items).
2. *unique*: subset of items with unique size, spacing, and orientation (quickly evaluate resize vs. istropic)

In [7]:
import time
model_time = time.ctime() # 'Mon Oct 18 13:35:29 2010'
print(f"Time: {model_time}")

Time: Fri Jun 18 17:30:58 2021


In [8]:
# labelled train data
train_src = docker_data_src

# print
print("Folders in train path: ", end=""); print(*os.listdir(train_src), sep=", ")

# get data
data = {}
folders = os.listdir(train_src)

# filter .zip
folders = [folder for folder in folders if not folder.endswith(".zip")]
print("Folders ", folders)

for folder in folders: data.update(get_data_dict(f"{train_src}/{folder}/{folder}"))

# all items
items = list(data.values())

# MR files: unique sz, sp, dir
with open(f'{deepPit_src}/saved_metadata/unique_sz_sp_dir.pkl', 'rb') as f:
    unique = pickle.load(f)

# Create (MR path, Segm path) item from MR path
def get_folder_name(s):
    start = s.index("samir_labels/")
    s = s[start + len("samir_labels/50373-50453/"):]
    return s[0:s.index("/")]

# get unique
unique = [(change_src(mr), data[get_folder_name(mr)][1]) for mr in unique]

# subset
subset_idxs, test_idxs = RandomSplitter(valid_pct=test_pct)(items)
subset = [items[i] for i in subset_idxs]
test   = [items[i] for i in test_idxs]

# print
print(f"Total {len(items)} items in dataset.")
print(f"Training subset of {len(subset)} items.")
print(f"Test subset of {len(test)} items.")

# model name
model_name = f"iso_{iso}mm_pad_{maxs[0]}_{maxs[1]}_{maxs[2]}_bs_{bs}_subset_{len(subset)}_epochs_{nepochs}_time_{model_time}"
print(f"Model name: {model_name}")

# save test set indices
with open(f'{deepPit_src}/model_test_sets/{model_name}_test_items.pkl', 'wb') as f:
    pickle.dump(list(test), f)
    
# print
print(f"Total {len(items)} items in dataset.")
print(f"Training subset of {len(subset)} items.")
print(f"Unique subset of {len(unique)} items.")

Folders in train path: 50213-50312.zip, 50373-50453, 50313-50372, 50155-50212.zip, 50213-50312, 50155-50212, 50313-50372.zip, 50373-50453.zip, 50002-50153, 50002-50153.zip
Folders  ['50373-50453', '50313-50372', '50213-50312', '50155-50212', '50002-50153']
Total 335 items in dataset.
Training subset of 330 items.
Test subset of 5 items.
Model name: iso_3mm_pad_87_90_90_bs_20_subset_330_epochs_20_time_Fri Jun 18 17:30:58 2021
Total 335 items in dataset.
Training subset of 330 items.
Unique subset of 28 items.


In [9]:
# model name
model_name = f"iso_3mm_pad_87_90_90_bs_{bs}_subset_{len(subset)}_epochs_{nepochs}_time_{model_time}"
print(f"Model name: {model_name}"),

# save test set indices\n",
with open(f'{deepPit_src}/model_test_sets/{model_name}_test_items.pkl', 'wb') as f:
    pickle.dump(list(test), f)
      
# with open(f"model_test_sets/{model_name}_test_items.pkl", 'rb') as f:
#     test = pickle.load(f)
# print(test[0]), print(len(test))

Model name: iso_3mm_pad_87_90_90_bs_20_subset_330_epochs_20_time_Fri Jun 18 17:30:58 2021


# Transforms

1. Isotropic 3mm or Resize to 50x50x50 dimensions
2. Crop/Pad to common dimensions

In [10]:
# # test

# tfms = [Iso(3)]
# tls = TfmdLists(unique, tfms)

# start = time.time()
# iso_szs = [mr.shape for mr,mk in tls]
# elapsed = time.time() - start

# print(f"Elapsed: {elapsed} s for {len(unique)} items.")

In [11]:
# start = time.time()
# iso_szs = [mr.shape for mr,mk in tls]
# elapsed = time.time() - start

# print(f"Elapsed: {elapsed} s for {len(unique)} items.")

In [12]:
# print(*[f"{get_folder_name(mr)}: {tuple(sz)}" for (mr,mk),sz in zip(unique, iso_szs)], sep="\n")

In [13]:
# maxs = [int(x) for x in torch.max(torch.tensor(iso_szs), dim=0).values]
# print("Maxs: ", maxs)

# Crop

In [14]:
# # test
# iso_items = list(tls[0:2])

# # tfms
# pad_tfms = [PadSz(maxs)]

# # tls
# pad_tls = TfmdLists(iso_items, pad_tfms)

# pad_tls[0][0].shape, pad_tls[1][0].shape

# Dataloaders

TODO augmentations.

- dset = tfms applied to items
- splits into training/valid
- bs

In [15]:
# time it
start = time.time()

# splits
splits = RandomSplitter(seed=42)(subset)
print(f"Training: {len(splits[0])}, Valid: {len(splits[1])}")

# tfms
tfms = [Iso(3), PadSz(maxs)]

# tls
tls = TfmdLists(items, tfms, splits=splits)

# dls
dls = tls.dataloaders(bs=bs, after_batch=AddChannel(), num_workers=num_workers)

# GPU
dls = dls.cuda()

# end timer
elapsed = time.time() - start
print(f"Elapsed time: {elapsed} s for {len(subset)} items")

# test get one batch
b = dls.one_batch()
print(type(b), b[0].shape, b[1].shape)
print(len(dls.train), len(dls.valid))

Training: 264, Valid: 66
Elapsed time: 2.2636847496032715 s for 330 items
<class 'tuple'> torch.Size([20, 1, 87, 90, 90]) torch.Size([20, 1, 87, 90, 90])
13 4


# Metric

Linear combination of Dice and Cross Entropy

In [16]:
def dice(input, target):
    iflat = input.contiguous().view(-1)
    tflat = target.contiguous().view(-1)
    intersection = (iflat * tflat).sum()
    return ((2. * intersection) /
           (iflat.sum() + tflat.sum()))

def dice_score(input, target):
    return dice(input.argmax(1), target)

def dice_loss(input, target): 
    return 1 - dice(input.softmax(1)[:, 1], target)

def loss(input, target):
    return dice_loss(input, target) + nn.CrossEntropyLoss()(input, target[:, 0])

## OBELISK

In [17]:
# start = time.time()

# segs = torch.cat([tl[1] for tl in dls.train],0)
# print(segs.shape)

# elapsed = time.time() - start

# print(f"Elapsed time: {elapsed} s for {len(segs)} items")

In [18]:
# class_weight = torch.sqrt(1.0/(torch.bincount(segs.view(-1)).float()))
# class_weight = class_weight/class_weight.mean()
# class_weight[0] = 0.5
# np.set_printoptions(formatter={'float': '{: 0.2f}'.format})
# print('inv sqrt class_weight',class_weight.data.cpu().numpy())

In [19]:
from utils import my_ohem

In [20]:
# pos_weight = torch.load("saved_metadata/class_weights.pt")
# class_weights = [0, pos_weight]

# # inv
# class_weights [1.0/x for x in class_wei]
# my_criterion = my_ohem(.25,[0, pos_weight]) #.cuda())#0.25 

In [21]:
def obelisk_loss_fn(predict, target): return my_criterion(F.log_softmax(predict,dim=1),target)

In [22]:
# ipython nbconvert --to python  '6 - Dataloaders- NB - Simple-Copy1.ipynb'

# Learner

In [23]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [24]:
# OBELISK-NET from github
from models import obelisk_visceral, obeliskhybrid_visceral

In [25]:
full_res = maxs

learn = Learner(dls=dls, \
                model=obeliskhybrid_visceral(num_labels=2, full_res=full_res), \
                loss_func= loss, #DiceLoss(), #nn.CrossEntropyLoss(), \
                metrics = dice_score, \
                model_dir = code_src + "models", \
                cbs = [SaveModelCallback(monitor='dice_score', fname=model_name, with_opt=True)])

# SaveModelCallback: model_dir = "./models", cbs = [SaveModelCallback(monitor='dice_score')]

# GPU
learn.model = learn.model.cuda()

#learn = learn.to_distributed(args.local_rank)


In [26]:
# # test:

# #dls.device = "cpu"

# start = time.time()

# x,y = dls.one_batch()
# #x,y = to_cpu(x), to_cpu(y)

# pred = learn.model(x)
# loss = learn.loss_func(pred, y)

# elapsed = time.time() - start

# print(f"Elapsed: {elapsed} s")
# print("Batch: x,y")
# print(type(x), x.shape, x.dtype, "\n", type(y), y.shape, y.dtype)

# print("Pred shape")
# print(type(pred), pred.shape, pred.dtype)

# print("Loss")
# print(loss)
# print(learn.loss_func)

# LR Finder

In [27]:
# learn.lr_find()

In [28]:
print("PRE learn.fit one cycle")
with learn.distrib_ctx():
    learn.fit_one_cycle(1, 3e-3, wd = 1e-4)

PRE learn.fit one cycle


epoch,train_loss,valid_loss,dice_score,time
0,1.552306,1.504593,0.011449,00:14


Better model found at epoch 0 with dice_score value: 0.011448514647781849.


In [None]:
print("unfreeze, learn 50")
learn.unfreeze()
with learn.distrib_ctx():
    learn.fit_one_cycle(nepochs, 3e-3, wd = 1e-4)

In [None]:
# learn.save('iso_3mm_pad_87_90_90_subset_50_epochs_50')

In [None]:
# learn.lr_find()

In [None]:
# print("unfreeze, learn 50")
# learn.unfreeze()
# learn.fit_one_cycle(50, 1e-3, wd = 1e-4)

In [None]:
# testmask = torch.tensor([[[False, False, False], [False, False, False], [True, True, True]],
#                        [[False, False, False], [False, False, True], [True, True, True]],
#                        [[False, False, False], [False, False, False], [False, False, False]]])
# testmask

In [None]:
# testmaskN = np.array(testmask)
# testmaskN

In [None]:
# maskT = testmask.type(torch.BoolTensor)

# iT = torch.any(maskT, dim=(1,2))
# jT = torch.any(maskT, dim=(0,2))
# kT = torch.any(maskT, dim=(0,1))

# iminT, imaxT = torch.where(iT)[0][[0, -1]]
# jminT, jmaxT = torch.where(jT)[0][[0, -1]]
# kminT, kmaxT = torch.where(kT)[0][[0, -1]]

In [None]:
# maskN = np.array(testmask).astype(bool)
    
# iN = np.any(maskN, axis=(1, 2))
# jN = np.any(maskN, axis=(0, 2))
# kN = np.any(maskN, axis=(0, 1))

# iminN, imaxN = np.where(iN)[0][[0, -1]]
# jminN, jmaxN = np.where(jN)[0][[0, -1]]
# kminN, kmaxN = np.where(kN)[0][[0, -1]]

In [None]:
# maskT.shape, maskN.shape

In [None]:
# print(iT)
# print(jT)
# print(kT)
# print([x for x in (iminT, imaxT, jminT, jmaxT, kminT, kmaxT)])

In [None]:
# print(iN)
# print(jN)
# print(kN)
# print([int(x) for x in (iminN, imaxN, jminN, jmaxN, kminN, kmaxN)])

In [None]:
#     def torch_mask2bbox(mask):
#         mask = mask.type(torch.BoolTensor)

#         i = torch.any(mask, dim=0)
#         j = torch.any(mask, dim=1)
#         k = torch.any(mask, dim=2)

#         imin, imax = torch.where(i)[0][[0, -1]]
#         jmin, jmax = torch.where(j)[0][[0, -1]]
#         kmin, kmax = torch.where(k)[0][[0, -1]]

#         # inclusive idxs
#         return imin, imax+1, jmin, jmax+1, kmin, kmax+1