In [1]:
from fastai.vision.all import *

In [2]:
pd.options.display.max_columns = 100

In [3]:
datapath = Path("/home/code-base/scratch_space/rsna_data/")
embspath = Path(datapath/"cnn_embs")
train_df = pd.read_csv(datapath/'train.csv')

In [4]:
list(embspath.ls())

[Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD2'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_effb3_256_FOLD0'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_effb3_512_FOLD0'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD1'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_FOLD1'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_FOLD2'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_FOLD3'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD3'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/.ipynb_checkpoints'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_EFFNETB3_512_ALL_FROM_FOLD0'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_FOLD0'),
 Path('/home/code-base/scratch_space/rsna_

### Load Embeddings & Preds

In [5]:
fold = 0
folddir = embspath/f'full_512_ALL_FROM_FOLD{fold}'; list(folddir.ls())
# folddir = embspath/f'full_EFFNETB3_512_ALL_FROM_FOLD{fold}'; list(folddir.ls())

[Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/xresnet34_embeddings_part0.pth'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/files.pth'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/xresnet34_embeddings_finalpart.pth'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/preds.pth'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/xresnet34_embeddings_part2.pth'),
 Path('/home/code-base/scratch_space/rsna_data/cnn_embs/full_512_ALL_FROM_FOLD0/xresnet34_embeddings_part1.pth')]

In [6]:
embeddings = torch.cat([torch.load(folddir/f"xresnet34_embeddings_{o}.pth") for o in ["part0", "part1", "part2", "finalpart"]])
# embeddings = torch.cat([torch.load(folddir/f"effb3_embeddings_{o}.pth") for o in ["part0", "part1", "part2", "finalpart"]])

In [7]:
preds = torch.load(folddir/"preds.pth")

In [8]:
files = torch.load(folddir/"files.pth")

In [9]:
embeddings.shape, preds.shape, len(files)

(torch.Size([1790594, 1024]), torch.Size([1790594, 2]), 1790594)

In [10]:
# add zero for padded input idx
input_pad_idx = len(embeddings)
embeddings = torch.cat([embeddings, torch.zeros_like(embeddings[:1])])

In [11]:
# for effnet
embeddings = embeddings.squeeze(-1).squeeze(-1)

In [12]:
embeddings[input_pad_idx], embeddings.shape, input_pad_idx

(tensor([0., 0., 0.,  ..., 0., 0., 0.]), torch.Size([1790595, 1024]), 1790594)

In [13]:
preds[:,1]

tensor([-1.3467, -1.3633, -0.9565,  ..., -1.1660,  0.1888, -1.2041],
       dtype=torch.float16)

### Metadata Features

In [14]:
metadata_path = datapath/'metadata'
metadata_files = get_files(metadata_path, extensions=".csv")
metadf = pd.concat([pd.read_csv(o) for o in metadata_files]).reset_index(drop=True)

In [15]:
metadf.shape

(1790594, 68)

In [16]:
len(metadf['StudyInstanceUID'].unique())

7279

In [17]:
def minmax_scaler(o): return (o - min(o))/(max(o) - min(o))

In [18]:
scaled_pos = metadf.groupby('StudyInstanceUID')['ImagePositionPatient2'].apply(minmax_scaler)
metadf.loc[:,'scaled_position'] = scaled_pos.values

In [19]:
meta_feat_cols = ['scaled_position']

In [20]:
assert np.isnan(metadf[meta_feat_cols]).sum().sum() == 0

In [21]:
mean_std = metadf[meta_feat_cols].agg(['mean', 'std']).T

In [22]:
mean_std_dict = dict(zip(mean_std.index, mean_std.values.tolist())); mean_std_dict

{'scaled_position': [0.5078721739409284, 0.29139548181397823]}

In [23]:
# standard scaler for training
for c in mean_std_dict: metadf[c] = (metadf[c] - mean_std_dict[c][0]) / mean_std_dict[c][1]

In [24]:
meta_feats_dict = dict(zip(metadf['SOPInstanceUID'], metadf[meta_feat_cols].to_numpy()))

In [25]:
len(meta_feats_dict)

1790594

### Fold Metadata

In [26]:
meta_embeddings = []
for o in files:
    sopid = o.stem.split("_")[1]
    meta_embeddings.append(meta_feats_dict[sopid])
meta_embeddings = np.vstack(meta_embeddings)
meta_embeddings= tensor(meta_embeddings)

In [27]:
use_preds = True
if use_preds:
    meta_embeddings = torch.cat([meta_embeddings, preds[:,1].view(-1,1).float()],1)

In [28]:
meta_embeddings.shape, type(meta_embeddings)

(torch.Size([1790594, 2]), torch.Tensor)

In [29]:
meta_embeddings = torch.cat([meta_embeddings, torch.zeros_like(meta_embeddings[:1])])

In [30]:
embeddings.shape, meta_embeddings.shape

(torch.Size([1790595, 1024]), torch.Size([1790595, 2]))

In [31]:
combined_embeddings = torch.cat([embeddings, meta_embeddings], 1)

In [32]:
combined_embeddings.shape

torch.Size([1790595, 1026])

In [33]:
combined_embeddings

tensor([[ 7.5664, 12.0469,  1.0723,  ...,  0.3210,  1.3457, -1.3467],
        [ 4.5781,  1.9736,  5.1953,  ...,  1.1133, -0.7914, -1.3633],
        [ 9.6797,  2.4531,  7.2617,  ...,  0.9951, -0.9785, -0.9565],
        ...,
        [12.5312,  1.9893, 10.1328,  ...,  1.5674, -0.7226,  0.1888],
        [ 8.1797,  2.8672, 18.1094,  ...,  1.2500, -1.0009, -1.2041],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

### Data

In [34]:
from fastai.text.all import *

In [35]:
image_targets = ['pe_present_on_image']
exam_targets = [
    'negative_exam_for_pe', # exam level
    'rv_lv_ratio_gte_1', # exam level
    'rv_lv_ratio_lt_1', # exam level
    'leftsided_pe', # exam level
    'chronic_pe', # exam level
    'rightsided_pe', # exam level
    'acute_and_chronic_pe', # exam level
    'central_pe', # exam level
    'indeterminate' # exam level
]

In [36]:
targets_df = train_df[['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']+image_targets+exam_targets]

In [37]:
targets_dict = dict(zip(targets_df['SOPInstanceUID'].values, targets_df[image_targets+exam_targets].values))

In [38]:
len(targets_dict)

1790594

In [39]:
files_dict = defaultdict(list)
for i,o in enumerate(files):
    slice_no, sopid = o.stem.split("_")
    sid = o.parent.name
    slice_no = int(slice_no)        
    files_dict[sid].append({"slice_no":slice_no, "embs_idx":i, "img_y":targets_dict[sopid][0], "exam_y":targets_dict[sopid][1:]})

In [40]:
all_pids = list(files_dict.keys())

In [41]:
len(all_pids)

7279

In [42]:
def get_x(pid, files_dict):
    o = files_dict[pid]    
    l = sorted(o, key=lambda x: x['slice_no']) 
    return tensor([o['embs_idx'] for o in l])

def get_img_y(pid, files_dict):
    o = files_dict[pid]    
    l = sorted(o, key=lambda x: x['slice_no']) 
    img_y = [o['img_y'] for o in l]
    img_y = tensor(img_y).float()
    return img_y

def get_exam_y(pid, files_dict):
    d = files_dict[pid][0]        
    exam_y = tensor(d['exam_y']).float()
    return exam_y
    
# before_batch: after collecting samples before collating
targ_pad_idx = 666
def SequenceBlock():       return  TransformBlock(type_tfms=[partial(get_x, files_dict=files_dict)], 
                                                  dl_type=SortedDL,
                                                  dls_kwargs={'before_batch':
                                                               [partial(pad_input, pad_idx=input_pad_idx),
                                                                partial(pad_input, pad_idx=targ_pad_idx, pad_fields=1)]})
def SequenceTargetBlock(): return TransformBlock(type_tfms=[partial(get_img_y, files_dict=files_dict)])
def TargetBlock():         return TransformBlock(type_tfms=[partial(get_exam_y, files_dict=files_dict)])

In [43]:
get_img_y(all_pids[0], files_dict)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.])

In [44]:
get_exam_y(all_pids[0], files_dict)

tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.])

In [45]:
# normalized_embeddings = F.normalize(combined_embeddings, dim=0)
# normalized_embeddings.isnan().sum()
# normalized_embeddings

In [46]:
# assert combined_embeddings.isnan().sum().item() == 0

### Model

In [47]:
device = default_device(); device

device(type='cuda', index=0)

In [48]:
class AWD_LSTM(Module):
    "AWD-LSTM inspired by https://arxiv.org/abs/1708.02182"
    initrange=0.1

    def __init__(self, emb_sz,n_hid, n_layers, hidden_p=0.2, input_p=0.6, weight_p=0.5, bidir=False):
        store_attr('emb_sz,n_hid,n_layers')
        self.bs = 1
        self.n_dir = 2 if bidir else 1
        
        self.rnns = nn.ModuleList([self._one_rnn(emb_sz if l == 0 else n_hid, (n_hid)//self.n_dir, bidir, weight_p, l) for l in range(n_layers)])

        self.input_dp = RNNDropout(input_p)
        self.hidden_dps = nn.ModuleList([RNNDropout(hidden_p) for l in range(n_layers)])
        self.reset()

    def forward(self, x, from_embeds=False):
        
        if from_embeds: inp = x
        else: inp = combined_embeddings[x].to(device)
        bs,sl = inp.shape[:2]
        if bs!=self.bs: self._change_hidden(bs)

        output = self.input_dp(inp)
        new_hidden = []
        for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dps)):
            output, new_h = rnn(output, self.hidden[l])
            new_hidden.append(new_h)
            if l != self.n_layers - 1: output = hid_dp(output)
        self.hidden = to_detach(new_hidden, cpu=False, gather=False)
        return output

    def _change_hidden(self, bs):
        self.hidden = [self._change_one_hidden(l, bs) for l in range(self.n_layers)]
        self.bs = bs

    def _one_rnn(self, n_in, n_out, bidir, weight_p, l):
        "Return one of the inner rnn"
        rnn = nn.LSTM(n_in, n_out, 1, batch_first=True, bidirectional=bidir, bias=False)
        return WeightDropout(rnn, weight_p)

    def _one_hidden(self, l):
        "Return one hidden state"
        nh = (self.n_hid) // self.n_dir
        return (one_param(self).new_zeros(self.n_dir, self.bs, nh), one_param(self).new_zeros(self.n_dir, self.bs, nh))

    def _change_one_hidden(self, l, bs):
        if self.bs < bs:
            nh = (self.n_hid) // self.n_dir
            return tuple(torch.cat([h, h.new_zeros(self.n_dir, bs-self.bs, nh)], dim=1) for h in self.hidden[l])
        if self.bs > bs: return (self.hidden[l][0][:,:bs].contiguous(), self.hidden[l][1][:,:bs].contiguous())
        return self.hidden[l]

    def reset(self):
        "Reset the hidden states"
        [r.reset() for r in self.rnns if hasattr(r, 'reset')]
        self.hidden = [self._one_hidden(l) for l in range(self.n_layers)]

In [49]:
lstm_width = 512
layers = [lstm_width * 3] + [lstm_width] + [9]

class MultiHeadedSequenceClassifier(Module):
    "dim: input sequence feature dim"
    def __init__(self, bptt=72, input_pad_idx=input_pad_idx, n_meta=1, dim=1024, nlayers=2, cls_ps=[0.4, 0.1], **awd_kwargs):
        
        store_attr('input_pad_idx')
        self.awd_lstm = AWD_LSTM(dim+n_meta, lstm_width, nlayers, bidir=True, **awd_kwargs)
#         self.awd_lstm = AWD_QRNN(dim+n_meta, 512, 2, bidir=True)
        self.encoder = SentenceEncoder(bptt=bptt, module=self.awd_lstm, pad_idx=input_pad_idx)
        
        # image level preds
        self.seq_head = LinearDecoder(1, lstm_width, bias=True)
 
        # exam level preds
        self.exam_head = PoolingLinearClassifier(layers, ps=cls_ps, bptt=bptt)
        
    
    def forward(self, x):
        out, mask = self.encoder(x) 
       
        # img level out
        seq_cls_out,_,_ = self.seq_head(out)
        seq_cls_out = seq_cls_out.squeeze(-1)
              
        # exam level out
        exam_out,_,_ = self.exam_head((out,mask))

        return (seq_cls_out, exam_out)

In [50]:
class MultiLoss(Module):
    
    def __init__(self, targ_pad_idx=666):
        store_attr("targ_pad_idx")

    def forward(self, inp, yb0, yb1):
        image_target_weight = 0.07361963
        exam_target_weights = tensor([0.0736196319, 0.2346625767,  0.0782208589, 
                                      0.06257668712, 0.1042944785, 0.06257668712,
                                      0.1042944785,  0.1877300613, 0.09202453988]).to(yb1.device)
        seq_cls_out, exam_out = inp
       
        # img loss
        mask = yb0 != self.targ_pad_idx 
        
        img_loss, qs = 0, 0        
        for _m,_y,_p in zip(mask, yb0, seq_cls_out):
            qi = _y[_m].mean()
            qs += image_target_weight*qi*sum(_m)
            img_loss += image_target_weight*qi*(F.binary_cross_entropy_with_logits(_p[_m], _y[_m], reduction='sum'))
        
        # exam loss
        exam_losses = F.binary_cross_entropy_with_logits(exam_out, yb1,reduction='none')
        tot_exam_loss = (exam_losses*(exam_target_weights.unsqueeze(0))).sum()
        tot_exam_wgts = len(exam_losses)*(tensor(exam_target_weights).sum())
        
        return (tot_exam_loss+img_loss)/(qs+tot_exam_wgts)

In [51]:
class ImageLoss(Module):
    
    def __init__(self, targ_pad_idx=666):
        store_attr("targ_pad_idx")

    def forward(self, inp, yb0, yb1):
#         exam_targets = [
#     'negative_exam_for_pe', # exam level
#     'rv_lv_ratio_gte_1', # exam level
#     'rv_lv_ratio_lt_1', # exam level
#     'leftsided_pe', # exam level
#     'chronic_pe', # exam level
#     'rightsided_pe', # exam level
#     'acute_and_chronic_pe', # exam level
#     'central_pe', # exam level
#     'indeterminate' # exam level
# ]
        image_target_weight = 0.07361963
        exam_target_weights = tensor([0.0736196319, 0.2346625767,  0.0782208589, 
                                      0.06257668712, 0.1042944785, 0.06257668712,
                                      0.1042944785,  0.1877300613, 0.09202453988]).to(yb1.device)
        seq_cls_out, exam_out = inp
       
        # img loss
        mask = yb0 != self.targ_pad_idx 
        
        img_loss, qs = 0, 0        
        for _m,_y,_p in zip(mask, yb0, seq_cls_out):
            qi = _y[_m].mean()
            qs += image_target_weight*qi*sum(_m)
            img_loss += image_target_weight*qi*(F.binary_cross_entropy_with_logits(_p[_m], _y[_m], reduction='sum'))
        
        return (img_loss)/(qs)

In [52]:
class ExamLoss(Module):
    
    def __init__(self, targ_pad_idx=666):
        store_attr("targ_pad_idx")

    def forward(self, inp, yb0, yb1):
        exam_target_weights = tensor([0.0736196319, 0.2346625767,  0.0782208589, 
                                      0.06257668712, 0.1042944785, 0.06257668712,
                                      0.1042944785,  0.1877300613, 0.09202453988]).to(yb1.device)
        seq_cls_out, exam_out = inp
       
        # exam loss
        exam_losses = F.binary_cross_entropy_with_logits(exam_out, yb1,reduction='none')
        tot_exam_loss = (exam_losses*(exam_target_weights.unsqueeze(0))).sum()
        tot_exam_wgts = len(exam_losses)*(tensor(exam_target_weights).sum())
        
        return (tot_exam_loss)/(tot_exam_wgts)

### Train

In [53]:
do_cv = True
FOLD = fold

if do_cv: 
    valid_pids = pd.read_pickle(datapath/f'cv_pids/pids_fold{FOLD}.pkl')

In [54]:
len(valid_pids)

1456

In [55]:
combined_embeddings.shape

torch.Size([1790595, 1026])

In [56]:
data = DataBlock(blocks=(SequenceBlock,SequenceTargetBlock,TargetBlock), 
                 n_inp=1, 
                 splitter=FuncSplitter(lambda o: True if o in valid_pids else False)
                )
dls = data.dataloaders(all_pids, bs=128)

In [57]:
mult = 1.
cls_ps = [0.4*mult,0.1*mult]
model = SequentialRNN(MultiHeadedSequenceClassifier(bptt=256, n_meta=2, dim=1024, nlayers=2, cls_ps=cls_ps,
                                                    hidden_p=0.2*mult, input_p=0.6*mult, weight_p=0.5*mult)) # dim = 1536 for effnet 1024 for xresnet
loss_func = MultiLoss()
learner = Learner(dls, model, loss_func=loss_func, metrics=[ImageLoss(), ExamLoss()],
                  cbs=[ModelResetter(), TerminateOnNaNCallback(),
                       SaveModelCallback(fname=f"nometa_sequence_with_preds_fulldata_fold{fold}")])

In [None]:
learner.fit_flat_cos(20, lr=0.005)

### Get OOF Preds

In [None]:
def get_pid(pid):
    return pid

def get_x(pid, files_dict):
    o = files_dict[pid]    
    l = sorted(o, key=lambda x: x['slice_no']) 
    return tensor([o['embs_idx'] for o in l])

def get_img_y(pid, files_dict):
    o = files_dict[pid]    
    l = sorted(o, key=lambda x: x['slice_no']) 
    img_y = [o['img_y'] for o in l]
    img_y = tensor(img_y).float()
    return img_y

def get_exam_y(pid, files_dict):
    d = files_dict[pid][0]        
    exam_y = tensor(d['exam_y']).float()
    return exam_y
    
# before_batch: after collecting samples before collating
targ_pad_idx = 666
def SequenceBlock():       return  TransformBlock(type_tfms=[partial(get_x, files_dict=files_dict)], 
                                                  dl_type=SortedDL,
                                                  dls_kwargs={'before_batch':
                                                               [partial(pad_input, pad_idx=input_pad_idx),
                                                                partial(pad_input, pad_idx=targ_pad_idx, pad_fields=1)]})
def PidBlock(): return TransformBlock(type_tfms=[get_pid])
def SequenceTargetBlock(): return TransformBlock(type_tfms=[partial(get_img_y, files_dict=files_dict)])
def TargetBlock():         return TransformBlock(type_tfms=[partial(get_exam_y, files_dict=files_dict)])

In [None]:
data = DataBlock(blocks=(SequenceBlock,SequenceTargetBlock,TargetBlock, PidBlock), 
                 n_inp=1, 
                 splitter=FuncSplitter(lambda o: True if o in valid_pids else False)
#                 splitter=RandomSplitter(0.3),
                )
dls = data.dataloaders(all_pids, bs=128)
model = SequentialRNN(MultiHeadedSequenceClassifier(bptt=256, n_meta=2, dim=1024, nlayers=2)) # dim = 1536 for effnet 1024 for xresnet
loss_func = MultiLoss()
learner = Learner(dls, model, loss_func=loss_func, metrics=[],cbs=[])

In [None]:
learner.load(f"nometa_sequence_with_preds_fulldata_fold{fold}");

In [None]:
learner.model.eval().to(device);

In [None]:
len(all_pids)

In [None]:
test_dl = learner.dls.test_dl(all_pids, with_labels=True)

In [None]:
seq_pids = []
seq_img_preds = []
seq_img_targs = []
seq_exam_preds = []
seq_exam_targs = []
with torch.no_grad():
    for xb,yb0,yb1,pids in progress_bar(test_dl):
        img_pred, exam_pred = to_detach(learner.model(xb))
        seq_img_preds.append(img_pred)
        seq_img_targs.append(yb0)
        seq_exam_preds.append(exam_pred)
        seq_exam_targs.append(yb1)
        seq_pids.append(pids)

In [None]:
stacking_datapath = datapath/'final_lstm_stacking'
if not stacking_datapath.exists(): stacking_datapath.mkdir()

In [None]:
subfolder = f"xresnet_FOLD{FOLD}"
stacking_folder = stacking_datapath/subfolder
if not stacking_folder.exists(): stacking_folder.mkdir()

In [None]:
len(seq_pids), len(seq_img_preds), len(seq_img_targs), len(seq_exam_preds), len(seq_exam_targs), len(valid_pids)

In [None]:
torch.save(seq_pids, stacking_folder/'seq_pids.pth')
torch.save(seq_img_preds, stacking_folder/'seq_img_preds.pth')
torch.save(seq_img_targs, stacking_folder/'seq_img_targs.pth')
torch.save(seq_exam_preds, stacking_folder/'seq_exam_preds.pth')
torch.save(seq_exam_targs, stacking_folder/'seq_exam_targs.pth')
torch.save(valid_pids, stacking_folder/'valid_pids.pth')