In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
!pip install glob2

In [None]:
!pip install pydicom kornia opencv-python scikit-image nbdev

In [None]:
!conda install -c conda-forge gdcm -y

In [None]:
!git clone https://github.com/asvcode/fmi.git

In [1]:
#from fastai.basics import *
from fastai.callback.all import *
from fastai.vision.all import *
from fastai.medical.imaging import *

import re
import pydicom
import gdcm



ModuleNotFoundError: No module named 'gdcm'

In [None]:
from fmi.fmi.explore import *
from fmi.fmi.preprocessing import *
from fmi.fmi.pipeline import *
from fmi.fmi.retinanet import *

In [None]:
path = Path("../input/siim-covid19-detection")

df_img = pd.read_csv(path / 'train_image_level.csv')
df_study = pd.read_csv(path / 'train_study_level.csv')

In [None]:
df_img.head(2)

In [None]:
df_study.head(2)

In [None]:
df_study['id'] = df_study.id.str.replace('_study', '')
df_study.rename(columns = {'id': 'StudyInstanceUID'}, inplace = True)

In [None]:
study_ids = df_img['StudyInstanceUID'].unique()
img_ids = df_study['StudyInstanceUID'].unique()

set(study_ids) == set(img_ids), df_img.shape[0] == df_study.shape[0]

In [None]:
print(df_img.shape, df_study.shape)

In [None]:
id_group = df_img.groupby(['StudyInstanceUID'])['id'].count().to_frame()
id_group = id_group.rename(columns = {'id': 'number of ids'})
UIDs_multiple_ids = id_group[id_group['number of ids'] > 1].index
df_img[df_img['StudyInstanceUID'].isin(UIDs_multiple_ids)].sort_values('StudyInstanceUID')

In [None]:
df = pd.merge(df_img, df_study, on = 'StudyInstanceUID')

In [None]:
#img_counts = df.groupby(['StudyInstanceUID'])['id'].nunique().to_frame()
#img_counts.columns = ['No. images']
#
#img_counts.sort_values(by = 'No. images', ascending = False)

In [None]:
df.boxes.fillna("[{'x': 0, 'y': 0, 'width': 1, 'height': 1}]", inplace = True)                                       ## Add formating for consistancy
df['boxes_list'] = df.boxes.str.split(r'},\s*')                                                                      ## Split (string) list of bounding boxes at }

In [None]:
df['boxes_list'] = df['boxes_list'].apply(lambda x: [re.sub(r"\[|{|}|\]|:|'|x|y|width|height|\s", '', y) for y in x])## Remove superfluous formating
df['boxes_list'] = df['boxes_list'].apply(lambda x: [y.split(',') for y in x])                                       ## Split the values in each bounding box list
df['boxes_list'] = df['boxes_list'].apply(lambda x: [list(map(float,y)) for y in x])                                 ## Cast bounding box values from string to float
df['boxes_list'] = df['boxes_list'].apply(lambda x: [[y[0], y[1], y[0] + y[2], y[1] + y[3]] for y in x])             ## The BBoxBlock expects the bounding box in the format min_x, min_y, max_x, max_y (in contrast to min_x, min_y, width, height)

In [None]:
df['labels_list'] = df['label'].str.split(r'\s')                                                                     ## 
df['labels_list'] = df['labels_list'].apply(lambda x: [y for y in x if not re.match('.*\d+', y)])                    ## Keep every entry of the list, that doesnt contain any number

In [None]:
df.drop(['boxes', 'label'], axis = 1, inplace = True)

In [None]:
## Sanity check
assert(df.apply(lambda x: len(x['boxes_list']) != len(x['labels_list']), axis = 1 ).sum() == 0 )

In [None]:
df['id'] = df['id'].str.replace('_image', '.dcm')

In [None]:
df.head()

In [None]:
def get_bbox(fn):
    return df.loc[df['id'] == fn.name, 'boxes_list'].values[0]
    
def get_bbox_label(fn):
    return df.loc[df['id'] == fn.name, 'labels_list'].values[0]

In [None]:
class PILDicom(PILBase):
    _open_args,_tensor_cls,_show_args = {},TensorDicom,TensorDicom._show_args
    @classmethod
    def create(cls, fn:(Path,str,bytes), mode='RGB')->None:
        "Open a `DICOM file` from path `fn` or bytes `fn` and load it as a `PIL Image`"
        if isinstance(fn,bytes): im = Image.fromarray(pydicom.dcmread(pydicom.filebase.DicomBytesIO(fn)).pixel_array)
        if isinstance(fn,(Path,str)): im = pydicom.dcmread(fn).pixel_array
        im = (im / np.max(im)) * 255
        #im = (im.windowed(w=40, l=80))*255
        im = Image.fromarray(im.astype(np.uint8))
        #im = Image.fromarray(im)
        im.load()
        im = im._new(im.im)
        return cls(im.convert(mode) if mode else im)

In [None]:
db = DataBlock(
    blocks = (ImageBlock(cls=PILDicom), BBoxBlock, BBoxLblBlock),
    #blocks = (ImageBlock(cls=DicomView), BBoxBlock, BBoxLblBlock),
    get_items = get_dicom_files,
    splitter = RandomSplitter(),
    get_y = [lambda x: get_bbox(x), lambda x: get_bbox_label(x)],
    item_tfms = Resize(224),
    n_inp = 1
)

In [None]:
dls = db.dataloaders(path / 'train', verbose =  True, bs = 32)

In [None]:
dls.show_batch()

---

In [None]:
dls.show_batch(b = b)

In [None]:
get_c(dls)

# Build Model

In [None]:
encoder = create_body(models.resnet34, True, -2)
arch = RetinaNet(encoder, get_c(dls), final_bias = -4)

In [None]:
aspect_ratios = [0.5, 1, 2]
scales = [2**0, 2**(1/3), 2**(2/3)]

crit = RetinaNetFocalLoss(scales=scales, ratios=aspect_ratios)

In [None]:
def _retinanet_split(m): 
    return L(m.encoder,nn.Sequential(m.c5top6, m.p6top7, m.merges, m.smoothers, m.classifier, m.box_regressor)).map(params)

In [None]:
learn = Learner(
    dls, 
    arch, 
    loss_func=crit, 
    splitter=_retinanet_split
)

In [None]:
learn.freeze()

In [None]:
learn.lr_find()

In [None]:
%debug