# References

In [21]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../../fastai/old/') #fastai version 0.7
#sys.path.append('../../fastai/') #fastai version 1

from fastai.conv_learner import *
from fastai.dataset import *
import torchvision.models as pytorch_models

import pdb
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
import pandas as pd
import matplotlib.pyplot as plt
import pydicom
import numpy as np
from pathlib import Path
from glob import glob
from matplotlib.patches import Rectangle
import png
from tqdm import tqdm_notebook as tqdm
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
from cycler import cycler
import datetime

# Global Variables

In [22]:
torch.cuda.set_device(0)
torch.backends.cudnn.benchmark=True

dp = Path('../../datasets/RSNA_PDC/')

DICOMS = dp/'stage_2_train_images'

PNGS = dp/'train2_png'
PNGS.mkdir(exist_ok=True)

TEST_ONE = 'test1_png'
TEST_TWO = 'test2_png'

SUBMISSIONS = dp/'submissions'
SUBMISSIONS.mkdir(exist_ok=True)

f_model=resnet34
#f_model = pytorch_models.inception_v3(pretrained=True)

sz=512
bs=16

validation_percentage = 0.05

original_image_size = 1024

num_colr = 12

# Classes

In [23]:
class ObjDetDataset(Dataset):
    def __init__(self, ds, y2): 
        self.ds = ds 
        self.y2 = y2
    
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x, y = self.ds[i]
        return (x, (y, self.y2[i]))


class ConcatLblDataset(Dataset):
    def __init__(self, ds, y2):
        self.ds,self.y2 = ds,y2
        self.sz = ds.sz
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x,y = self.ds[i]
        return (x, (y,self.y2[i]))

class ConcatLblDataset_TestData(Dataset):
    def __init__(self, ds, y_tuple_placeholder):
        self.ds,self.y = ds,y_tuple_placeholder
        self.sz = ds.sz
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x = self.ds.get_x(i)
        x = self.ds.transform(x)
        return (x, (self.y))
    
class StdConv(nn.Module):
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
        

class OutConv(nn.Module):
    def __init__(self, k, nin, bias):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
        self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
        self.oconv1.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv1(x), self.k),
                flatten_conv(self.oconv2(x), self.k)]


class SSD_MultiHead(nn.Module):
    def __init__(self, k, bias, drop):
        super().__init__()
        self.drop = nn.Dropout(drop)
        self.sconv1 = StdConv(512,256, drop=drop)
        self.sconv2 = StdConv(256,256, drop=drop)
        self.sconv3 = StdConv(256,256, drop=drop)
        self.out0 = OutConv(k, 256, bias)
        self.out1 = OutConv(k, 256, bias)
        self.out2 = OutConv(k, 256, bias)
        self.out3 = OutConv(k, 256, bias)

    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv1(x)
        x = F.adaptive_max_pool2d(x, anc_grids[0]) # adaptive maxpool for 1st size of anchors
        o1c,o1l = self.out1(x)
        x = self.sconv2(x)
        x = F.adaptive_max_pool2d(x, anc_grids[1]) # adaptive maxpool for 2nd size of anchors
        o2c,o2l = self.out2(x) 
        x = self.sconv3(x)
        x = F.adaptive_max_pool2d(x, anc_grids[2]) # adaptive maxpool for 3rd size of anchors
        o3c,o3l = self.out3(x)
#         return [o1c, o1l]
        return [torch.cat([o1c,o2c,o3c], dim=1),
                torch.cat([o1l,o2l,o3l], dim=1)]


class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ, self.num_classes+1)
        t = V(t[:,:-1].contiguous())#.cpu()
        x = pred[:,:-1]
        w = self.get_weight(x,t)
        #return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
        return F.binary_cross_entropy_with_logits(x, t, w, reduction='sum')/self.num_classes
    
    def get_weight(self,x,t): return None


class FocalLoss(BCE_Loss):
    def get_weight(self,x,t):
        alpha,gamma = 0.25,1
        p = x.sigmoid()
        pt = p*t + (1-p)*(1-t)
        w = alpha*t + (1-alpha)*(1-t)
        return w * (1-pt).pow(gamma)

# Functions 

In [20]:
def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(True)
    ax.get_yaxis().set_visible(True)
    return ax

def from_dicom_to_png(dicom_path, png_path):
    ds = pydicom.dcmread(dicom_path)
    shape = ds.pixel_array.shape
    # Convert to float to avoid overflow or underflow losses.
    image_2d = ds.pixel_array.astype(float)
    # Rescaling grey scale between 0-255
    image_2d_scaled = (np.maximum(image_2d,0) / image_2d.max()) * 255.0
    # Convert to uint
    image_2d_scaled = np.uint8(image_2d_scaled)
    # Write the PNG file
    with open(png_path, 'wb') as png_file:
        w = png.Writer(shape[1], shape[0], greyscale=True)
        w.write(png_file, image_2d_scaled)
        
        
def hw_bb(row): return np.array([row['y'], row['x'], row['height']+row['y'], row['width']+row['x']])

##[x_upper_left, y_upper_left, width, height]
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])

def parse_data(df):
    """
    Method to read a CSV file (Pandas dataframe) and parse the 
    data into the following nested dictionary:

      parsed = {
        
        'patientId-00': {
            'dicom': path/to/dicom/file,
            'label': either 0 or 1 for normal or pnuemonia, 
            'boxes': list of box(es)
        },
        'patientId-01': {
            'dicom': path/to/dicom/file,
            'label': either 0 or 1 for normal or pnuemonia, 
            'boxes': list of box(es)
        }, ...

      }

    """
    parsed = collections.defaultdict(lambda:{'dicom': None,
                                        'png': None,     
                                        'label': None,
                                        'boxes': []})
    for n, row in df.iterrows():
        # --- Initialize patient entry into parsed 
        pid = row['patientId']
        parsed[pid]['dicom'] = str(DICOMS/f'{pid}.dcm')
        parsed[pid]['png'] = str(PNGS/f'{pid}.png')
        parsed[pid]['label'] = row['Target']
        parsed[pid]['boxes'].append(hw_bb(row))

    return parsed

def get_lrg(b):
    if not b: raise Exception()
    b = sorted(b, key=lambda x: np.product(x[-2:]-x[:2]), reverse=True)
    return [b[0]]

def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im, cmap='gray')
    #ax.get_xaxis().set_visible(False)
    #ax.get_yaxis().set_visible(False)
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, col='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=col, lw=2))
    draw_outline(patch, 4)
    
def draw_text(ax, xy, txt, sz=14, col='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=col, fontsize=sz, weight='bold')
    draw_outline(text, 1)
    
def draw_im(im, ann, ax=None):
    ax = show_img(im, figsize=(12,6), ax=ax)
    l = cats[ann['label']]
    for b in ann['boxes']:
        b = bb_hw(b)
        draw_rect(ax, b)
        draw_text(ax, b[:2], l, sz=16)
        
def draw_idx(im_a, ax=None):
    dcm_data = pydicom.read_file(im_a['dicom'])
    im = dcm_data.pixel_array
    draw_im(im, im_a, ax=ax)
    
def from_dicom_to_png(parsed):
    for k, v in parsed.items():
        dcm_data = pydicom.read_file(v['dicom'])
        im = dcm_data.pixel_array
        imageio.imwrite(v['png'], im)



def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
    bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
    if prs is None:  prs  = [None]*len(bb)
    if clas is None: clas = [None]*len(bb)
    ax = show_img(im, ax=ax)
    for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
        if((b[2]>0) and (pr is None or pr > thresh)):
            draw_rect(ax, b, col=colr_list[i%num_colr])
            txt = f'{i}: '
            if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
            if pr is not None: txt += f' {pr:.2f}'
            draw_text(ax, b[:2], txt, col=colr_list[i%num_colr])


def get_cmap(N):
    color_norm  = mcolors.Normalize(vmin=0, vmax=N-1)
    return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba
            
cmap = get_cmap(num_colr)
colr_list = [cmap(float(x)) for x in range(num_colr)]
            

def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

    
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]


def intersect(box_a, box_b):
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]

def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))

def jaccard(box_a, box_b):
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union

def get_y(bbox,clas):
    bbox = bbox.view(-1,4)/sz
    bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
    return bbox[bb_keep],clas[bb_keep]

def actn_to_bb(actn, anchors):
    actn_bbs = torch.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

def map_to_ground_truth(overlaps, print_it=False):
    prior_overlap, prior_idx = overlaps.max(1)
    if print_it: print(prior_overlap)
#     pdb.set_trace()
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

def ssd_1_loss(b_c, b_bb, bbox, clas, print_it=False):
    bbox,clas = get_y(bbox,clas)
    a_ic = actn_to_bb(b_bb, anchors)
    overlaps = jaccard(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_ground_truth(overlaps,print_it)
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[1-pos] = len(id2cat)
    gt_bbox = bbox[gt_idx]
    loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    clas_loss  = loss_f(b_c, gt_clas)
    return loc_loss, clas_loss

def ssd_loss(pred,targ, print_it=False):
    lcs,lls = 0.,0.
    for b_c,b_bb,bbox,clas in zip(*pred,*targ):
        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,loss_function,print_it)
        lls += loc_loss
        lcs += clas_loss
    if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return lls+lcs

def torch_gt(ax, ima, bbox, clas, prs=None, thresh=0.4):
    return show_ground_truth(ax, ima, to_np((bbox*sz).long()),
         to_np(clas), to_np(prs) if prs is not None else None, thresh)

def plot_results(thresh):
    x,y = next(iter(md_mbb_csv.val_dl))
    y = V(y)
    batch = learn.model(V(x))
    b_clas,b_bb = batch

    x = to_np(x)
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    for idx,ax in enumerate(axes.flat):
        ima=md_mbb_csv.val_ds.ds.denorm(x)[idx]
        bbox,clas = get_y(y[0][idx], y[1][idx])
        a_ic = actn_to_bb(b_bb[idx], anchors)
        clas_pr, clas_ids = b_clas[idx].max(1)
        clas_pr = clas_pr.sigmoid()
        torch_gt(ax, ima, a_ic, clas_ids, clas_pr, clas_pr.max().data[0]*thresh)
    plt.tight_layout()
    
###non max suppression
def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

def show_nmf(idx, dataset, xBatch, yBatch, is_test, b_bb, b_clas):
    ima=dataset.denorm(xBatch)[idx]
    if is_test == False:
        bbox,clas = get_y(yBatch[0][idx], yBatch[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    clas_pr, clas_ids = b_clas[idx].max(1)
    clas_pr = clas_pr.sigmoid()

    conf_scores = b_clas[idx].sigmoid().t().data

    out1,out2,cc = [],[],[]
    for cl in range(0, len(conf_scores)-1):
        c_mask = conf_scores[cl] > 0.25
        if c_mask.sum() == 0: continue
        scores = conf_scores[cl][c_mask]
        l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    if not cc:
        print(f"{i}: empty array")
        return
    cc = T(np.concatenate(cc))
    out1 = torch.cat(out1)
    out2 = torch.cat(out2)

    fig, ax = plt.subplots(figsize=(8,8))
    torch_gt(ax, ima, out2, cc, out1, 0.1)
    

####
#f_bb: shape == number of anchorboxes * 4; RAW output activations for all bounding boxes for one image 
#without non maximum suppression
#example: [x_upperLeft, y_upperLeft, x_down_right, y_down_right]*number of anchorboxes
##
#f_clas: shape == number of anchorboxes * number of categories +1(for background);
#RAW output acitivations for each of the classes for each anchorbox 
#example for 3 classes: [prob_class1, probprob_class2, prob_class3]*number of anchorboxes
##
#thresh: threshold for predicted probability for bbx that should be kept
##
#input_sz: the size the images get scaled to before beeing put into the neural net
##
#im_size: the real, original size of the image
###
#return:
#cc: array of category labels for each bounding box
##
#out1: probabilities/ceranties for predicted category of each bounding box
##
#out2: array of arrays of bounding box coordinates [x_upperLeft, y_upperLeft, x_down_right, y_down_right]
#scaled ot the original size of the image
def nms_pred(f_clas, f_bb, anchors, thresh, input_sz, im_size):
    a_ic = actn_to_bb(f_bb, anchors)
    clas_pr, clas_ids = f_clas.max(1)
    clas_pr = clas_pr.sigmoid()

    conf_scores = f_clas.sigmoid().t().data

    out1,out2,cc = [],[],[]
    for cl in range(0, len(conf_scores)-1):
        c_mask = conf_scores[cl] > thresh
        if c_mask.sum() == 0: continue
        scores = conf_scores[cl][c_mask]
        l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    if not cc:
        return cc, out1, out2
    cc = T(np.concatenate(cc))
    out1 = torch.cat(out1)
    out2 = torch.cat(out2)
    cc = to_np(cc)
    out1 = to_np(out1)
    out2 = to_np(out2)
    ##scaling the bbxs to the original size of the image
    for i, row in enumerate(out2):
        for j, cell in enumerate(row):
            out2[i][j] = out2[i][j]*input_sz*(im_size/sz)
    return cc, out1, out2

# Set up data

In [None]:
MBB_CSV = dp/'tmp/mbb.csv'

labs2 = pd.read_csv(dp/'stage_2_train_labels.csv')

labs2.x.fillna(0, inplace=True)
labs2.y.fillna(0, inplace=True)
labs2.width.fillna(1023, inplace=True)
labs2.height.fillna(1023, inplace=True)

parsed = parse_data(labs2)

df_cat_bbxs = pd.DataFrame({'fn': [os.path.basename(parsed[o]['png']) for o in parsed],
                   'cat': [parsed[o]['label'] for o in parsed],
                   'bbox': [' '.join(str(int(p)) for p in [val for sublist in parsed[o]['boxes'] for val in sublist]) for o in parsed]})

df_bbxs = df_cat_bbxs[['fn', 'bbox']]

df_bbxs.to_csv(MBB_CSV, index=False)

cats = {0: 'normal', 1: 'pneumonia'}
mc = []
for index, row in tqdm(df_cat_bbxs.iterrows(), total=df_cat_bbxs.shape[0]):
    if row['cat']==0:
        mc.append([cats[0]])
    else:
        boxes = np.array([int(i) for i in row['bbox'].split()])
        n_o_bbxs = np.array([bb_hw(o) for o in boxes.reshape(-1,4)]).shape[0]
        c = []
        for i in range(n_o_bbxs):
            c.append(cats[1]) 
        mc.append(c)
        
id2cat = list(cats.values())
cat2id = {v:k for k,v in enumerate(id2cat)}

mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc])

val_idxs = get_cv_idxs(len(df_bbxs), val_pct=validation_percentage)
#val_idxs = []
((val_mcs,trn_mcs),) = split_by_idx(val_idxs, mcs)

aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
            RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
            RandomFlip(tfm_y=TfmType.COORD)]
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md_mbb_csv = ImageClassifierData.from_csv(dp, 
                                          os.path.basename(PNGS), 
                                          MBB_CSV, 
                                          tfms=tfms, 
                                          bs=bs, 
                                          continuous=True, 
                                          num_workers=4,
                                          test_name=TEST_TWO,
                                          val_idxs=val_idxs)


trn_ds2 = ConcatLblDataset(md_mbb_csv.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md_mbb_csv.val_ds, val_mcs)
md_mbb_csv.trn_dl.dataset = trn_ds2
md_mbb_csv.val_dl.dataset = val_ds2

# Create Anchors

In [18]:
anc_grids = [4,2,1]
# anc_grids = [2]
anc_zooms = [0.7, 1., 1.3]
# anc_zooms = [1.]
anc_ratios = [(1.,1.), (1.,0.5), (0.5,1.)]
# anc_ratios = [(1.,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
k = len(anchor_scales)
anc_offsets = [1/(o*2) for o in anc_grids]

anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)

anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])

# Train

In [None]:
head_reg4 = SSD_MultiHead(k = k, bias=-4., drop=0.4)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md_mbb_csv, models)
learn.opt_fn = optim.Adam
learn.crit = ssd_loss

In [None]:
lr = 1e-3
lrs = np.array([lr/100,lr/10,lr])

In [None]:
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=1)

In [None]:
lr = 1e-3*1.2
lrs = np.array([lr/10,lr,lr])

In [None]:
#learn.fit(lrs, n_cycle = 2, cycle_len=10, use_clr=(20,10)) #drop4.1
learn.fit(lrs, n_cycle = 1, cycle_len=20, use_clr=(30,20))

learn.save('fl0')

In [None]:
learn.sched.plot_lr()

In [None]:
learn.freeze_to(-2)
#learn.fit(lrs/4, n_cycle = 1, cycle_len=10, use_clr=(20,10)) #drop4.1
learn.fit(lrs/4, n_cycle = 1, cycle_len=15, use_clr=(40,15))

In [None]:
learn.save('drop4.2')

In [None]:
learn.sched.plot_lr()

# Prediction

## New dataset object without augmentations

In [None]:
aug_tfms_pred = []
tfms_pred = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.NO, aug_tfms=aug_tfms_pred)
md_mbb_csv_pred = ImageClassifierData.from_csv(dp, 
                                          os.path.basename(PNGS), 
                                          MBB_CSV, 
                                          tfms=tfms_pred, 
                                          bs=bs, 
                                          continuous=True, 
                                          num_workers=4,
                                          test_name=TEST_TWO)

trn_ds2_pred = ConcatLblDataset(md_mbb_csv_pred.trn_ds, trn_mcs)
val_ds2_pred = ConcatLblDataset(md_mbb_csv_pred.val_ds, val_mcs)
md_mbb_csv_pred.trn_dl.dataset = trn_ds2_pred
md_mbb_csv_pred.val_dl.dataset = val_ds2_pred

## Dummy test set targets

In [None]:
#test_paths = glob(os.path.join(dp/TEST_TWO, '*.png'))
#test_mcs = np.empty(len(test_paths), dtype=object)
#for i in range(len(test_paths)):
    #test_mcs[i] = np.zeros(1, dtype=int)

#test_mcs[0:10]

test_ds_placeholder_target = (np.zeros(4, dtype=np.float32), np.zeros(1, dtype=np.int))



test_ds_pred = ConcatLblDataset_TestData(md_mbb_csv_pred.test_ds, test_ds_placeholder_target)
md_mbb_csv_pred.test_dl.dataset = test_ds_pred

## New learner object 

In [17]:
head_reg4_pred = SSD_MultiHead(k, -4.)
models_pred = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4_pred)
learn_pred = ConvLearner(md_mbb_csv_pred, models_pred)
learn_pred.opt_fn = optim.Adam
learn_pred.crit = ssd_loss

learn_pred.load('')

learn_pred.model.eval()

NameError: name 'k' is not defined

## Predict

###  Check if the order in dataset.fnames is the same as in the batches generated by the dataloader

In [None]:
#n=0
#for b in iter(md_mbb_csv_pred.test_dl):
#    print(n)
#    xT, yT = b
#    batchT = learn_pred.model(V(xT))
#    b_clasT,b_bbT = batchT
#    xT = to_np(xT)
#    for i in range(4):
#        ü = bs*n+i
#        print(f'{ü}:{i}')
#        show_img(open_image(dp/md_mbb_csv_pred.test_ds.ds.fnames[ü]))
#        show_nmf(idx=i, 
#                 dataset=md_mbb_csv_pred.test_ds.ds, 
#                 xBatch=xT, 
#                 yBatch=yT, 
#                 is_test=True, 
#                 b_bb=b_bbT, 
#                 b_clas=b_clasT)
#    n = n+1
#    if n >=2:
#        break

### Get a dictionary where key==filename and value is a tuple (class_propabilities for each bb, coordinates for each bb), NO NMS or similar, just raw output activations from the net

In [None]:
n=0
dict_fname_to_activations = {}
for b in iter(md_mbb_csv_pred.test_dl):
    print(n)
    xT, yT = b
    b_clasT,b_bbT = learn_pred.model(V(xT))
    for i, (c, bb) in enumerate(zip(b_clasT,b_bbT)):
        fn = md_mbb_csv_pred.test_ds.ds.fnames[bs*n+i]
        dict_fname_to_activations.update({fn:(c,bb)})
    n = n+1

len(dict_fname_to_activations)

### Submission file

In [None]:
#dict with bbx coordinates after nms and in format [confidence x_upper_left y_upper_left width height]
dict_fname_to_preds_nms_hw = {}
for key, value in dict_fname_to_activations.items():
    c, b = dict_fname_to_activations[key]
    cc, out1, out2 = nms_pred(c, b, anchors, 0.25, sz, original_image_size)
    v=[]
    for n, cat in enumerate(cc):
        if cat == 0:
            continue
        else:
            #xyhw = ''.join(str(np.rint(bb_hw(out2[n])))).replace(".", "")
            xyhw = ''.join(str(np.rint(bb_hw(out2[n].clip(min=0))))).replace(".", "")
            conf = str(out1[n])
            concat = ' '.join((conf, xyhw)).replace("[", "").replace("]", "")
            v.append(concat)
    v = ' '.join(v)
    k = os.path.splitext(os.path.basename(key))[0]
    dict_fname_to_preds_nms_hw.update({k:v})  

In [None]:
len(dict_fname_to_preds_nms_hw)

In [None]:
submission_df = pd.DataFrame(list(dict_fname_to_preds_nms_hw.items()), columns=['patientId', 'PredictionString'])

In [None]:
submission_df[0:20]

In [None]:
sub_name = f'stage2-drop4.1-NoNegatives-thresh0.25-{str(datetime.datetime.now())}.csv'
submission_df.to_csv(SUBMISSIONS/sub_name, index=False)

In [None]:
!kaggle competitions submit -c rsna-pneumonia-detection-challenge -f SUBMISSIONS/sub_name -m ""