In [None]:
import pandas as pd
import numpy as np
import glob
import pydicom
import os, os.path as osp

from scipy.ndimage.interpolation import zoom
from tqdm import tqdm

import matplotlib
import matplotlib.pylab as plt
from PIL import Image
import PIL
import torch.optim as optim

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

In [None]:
sample_df = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/sample_submission.csv')
df = pd.read_csv('../input/rsna-str-pulmonary-embolism-detection/test.csv')

dicom_folders = list(('../input/rsna-str-pulmonary-embolism-detection/test/' + df.StudyInstanceUID + '/'+ df.SeriesInstanceUID).unique())

In [None]:
len(dicom_folders)

In [None]:
del df

In [None]:
normalize = transforms.Normalize(
    mean=[0.5, 0.5, 0.5],
    std=[0.5, 0.5, 0.5]
)

test_ds_trans = transforms.Compose([
                                transforms.ToPILImage(),
#                                 transforms.Grayscale(num_output_channels=3),
                                transforms.Resize((224,224)),
                                transforms.ToTensor(),
                                
#                                 transforms.CenterCrop(224),
                                normalize
])



In [None]:
def load_dicom_array(f):
    dicom_files = glob.glob(osp.join(f, '*.dcm'))
    dicoms = [pydicom.dcmread(d) for d in dicom_files]
    M = float(dicoms[0].RescaleSlope)
    B = float(dicoms[0].RescaleIntercept)
    # Assume all images are axial
    z_pos = [float(d.ImagePositionPatient[-1]) for d in dicoms]
    dicoms = np.asarray([d.pixel_array for d in dicoms])
    dicoms = dicoms[np.argsort(z_pos)]
    dicoms = dicoms * M
    dicoms = dicoms + B
    return dicoms, np.asarray(dicom_files)[np.argsort(z_pos)]

def window(img, WL=50, WW=350):
    upper, lower = WL+WW//2, WL-WW//2
    X = np.clip(img.copy(), lower, upper)
    X = X - np.min(X)
    X = X / np.max(X)
    X = (X*255.0).astype('uint8')
    return X

class Lungs(Dataset):
    def __init__(self, dicom_folders):
        self.dicom_folders = dicom_folders
    def __len__(self): return len(self.dicom_folders)
    def get(self, i):
        return load_dicom_array(self.dicom_folders[i])
    def __getitem__(self, i):
        try:
            return self.get(i)
        except Exception as e:
            print(e)
            return None


# SAVEDIR = '../../data/train-jpegs/'
MAX_LENGTH = 256.

dset = Lungs(dicom_folders)
loader = DataLoader(dset, batch_size=1, shuffle=False, num_workers=0, collate_fn=lambda x: x)

In [None]:
# resnet34 = models.resnet34()
# fc_feat_size = resnet34.fc.in_features
# resnet34.fc.in_features, resnet34.fc.out_features

In [None]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

In [None]:
# resnet34.avgpool = Identity()
# resnet34.fc = Identity()

In [None]:
# 7.7 - Multi-Task Learning
def weights_init(m):
    classname = m.__class__.__name__
    for l in m.modules():
        if isinstance(l, nn.Conv2d):
            torch.nn.init.uniform_(l.weight)
        elif isinstance(l, nn.BatchNorm2d):
            torch.nn.init.uniform_(l.weight)
        elif isinstance(l, nn.Linear):
            torch.nn.init.xavier_normal_(l.weight)
        
class MultiTaskHead(nn.Module):
    def __init__(self):
        super(MultiTaskHead, self).__init__()
        self.m = nn.Sequential(
                nn.Conv2d(512, 512, kernel_size=(1,1), stride=(1,1), bias=False), 
                nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
                nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False),
                nn.BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.AdaptiveAvgPool2d(output_size=(1,1)),
        )
        self.l = nn.Sequential(
                nn.Linear(2048, fc_feat_size, bias=True), 
            # 7.12 - Dropout
                nn.Dropout(),
                nn.ReLU(),
                nn.Linear(fc_feat_size, 128, bias=True), 
#                 nn.Dropout(),
#                 nn.Linear(256, 128, bias=True), 
#                 nn.Dropout(),
                nn.Linear(128, 1), 
#                 nn.Sigmoid()
        )
        
    def forward(self, x):
        x = x.view(x.shape[0], 512, int(np.sqrt(x.shape[1]/512)), int(np.sqrt(x.shape[1]/512)))
        x = self.m(x)
        x = self.l(x.squeeze())
        return x

class WrapperModel(nn.Module):
    def __init__(self, pretrained_model):
        super(WrapperModel, self).__init__()
        self.stage = 'train'
        
        self.backbone = pretrained_model
        '''
        image-level feature
        '''
        self.linear_pe_present_on_image = MultiTaskHead()
        weights_init(self.linear_pe_present_on_image)
        '''
        exam-level features
        '''
        self.linear_negative_exam_for_pe = MultiTaskHead()
        weights_init(self.linear_negative_exam_for_pe)
        
        self.linear_indeterminate = MultiTaskHead()
        weights_init(self.linear_indeterminate)
        
        self.linear_chronic_pe = MultiTaskHead()
        weights_init(self.linear_chronic_pe)
        
        self.linear_acute_and_chronic_pe = MultiTaskHead()
        weights_init(self.linear_acute_and_chronic_pe)
        
        self.linear_central_pe = MultiTaskHead()
        weights_init(self.linear_central_pe)
        
        self.linear_leftsided_pe = MultiTaskHead()
        weights_init(self.linear_leftsided_pe)
        
        self.linear_rightsided_pe = MultiTaskHead()
        weights_init(self.linear_rightsided_pe)
        
        self.linear_rv_lv_ratio_gte_1 = MultiTaskHead()
        weights_init(self.linear_rv_lv_ratio_gte_1)
        
        self.linear_rv_lv_ratio_lt_1 = MultiTaskHead()
        weights_init(self.linear_rv_lv_ratio_lt_1)
        
#         '''
#         informational features
#         '''
#         self.linear_qa_motion = MultiTaskHead()
#         weights_init(self.linear_qa_motion)
        
#         self.linear_qa_contrast = MultiTaskHead()
#         weights_init(self.linear_qa_contrast)
                
#         self.linear_true_filling_defect_not_pe = MultiTaskHead()
#         weights_init(self.linear_true_filling_defect_not_pe)
        
#         self.linear_flow_artifact = MultiTaskHead()
#         weights_init(self.linear_flow_artifact)

    def forward(self, x):
#         if self.stage == 'train':
#             x, _ = self.backbone(x)
#         else: 
        
        x = self.backbone(x)
        
#         print('x .shape', x.shape)
        x_rv_lv_ratio_gte_1 = self.linear_rv_lv_ratio_gte_1(x)
        x_rv_lv_ratio_lt_1 = self.linear_rv_lv_ratio_lt_1(x)
        x_leftsided_pe = self.linear_leftsided_pe(x)
        x_chronic_pe = self.linear_chronic_pe(x)
        x_rightsided_pe = self.linear_rightsided_pe(x)
        x_acute_and_chronic_pe = self.linear_acute_and_chronic_pe(x)
        x_central_pe = self.linear_central_pe(x)
        x_indeterminate = self.linear_indeterminate(x)

        return torch.cat([x_rv_lv_ratio_gte_1, x_rv_lv_ratio_lt_1, x_leftsided_pe, x_chronic_pe, x_rightsided_pe, x_acute_and_chronic_pe, x_central_pe, x_indeterminate], 1).cpu()

# model = WrapperModel(resnet34).to(device)

In [None]:
# model.load_state_dict(torch.load('../input/resnet342/resnet_1_6473685692657123_0_9219284031043191__0_5446572009448984_0_6214398491481427'))
# model.eval()

In [None]:
model = torch.load('../input/model34pth/model.pth')

In [None]:
with torch.no_grad():
    for i, data in enumerate(tqdm(loader, total=len(loader))):
        data = data[0]
        if type(data) == type(None): continue
        image, files = data
        # Windows from https://pubs.rsna.org/doi/pdf/10.1148/rg.245045008
        image_lung = np.expand_dims(window(image, WL=-600, WW=1500), axis=3)
        image_mediastinal = np.expand_dims(window(image, WL=40, WW=400), axis=3)
        image_pe_specific = np.expand_dims(window(image, WL=100, WW=700), axis=3)
        image = np.concatenate([image_mediastinal, image_pe_specific, image_lung], axis=3)
        rat = MAX_LENGTH / np.max(image.shape[1:])
        image = zoom(image, [1.,rat,rat,1.], prefilter=False, order=1)

        images = [test_ds_trans(x_) for x_ in image]
        x = torch.stack(images).to(device)
        y_pred = torch.sigmoid(model(x))
#         print((y_pred>0.5).sum())
        study_instance = files[0].split('/')[-3]
        image_names = [f[78:-4] for f in files] # images names in current study_instance
        indices = [image_names.index(f) for f in sample_df[sample_df['id'].isin(image_names)].id.values.tolist()] # re-index indices
    
        '''
        filling image level predictions
        '''
#         values = ((y_pred[indices,:]>0.5).sum(1)>0).type(torch.int8).tolist()
        values = y_pred.max(1)[0].tolist()
        sample_df.loc[sample_df['id'].isin(image_names), 'label'] = values
    
        '''
        filling study level predictions
        '''
        ress = y_pred.max(0)[0]
        study_results = ress.tolist()
#         ress_mean = y_pred.mean(0)

#         ##### consistency ##### 
#         '''
#         negative
#         '''
#         if not any(elem > 0 for elem in values):
#             negative_ex = 1 - max(study_results)
#             if ress_mean[-1] > 0.5:
#                 negative_ex = 0
#                 study_results = [0]*len(study_results)
#                 study_results[-1] = 1
#                 sample_df.loc[sample_df.index[sample_df['id'].str.contains(study_instance)], 'label'] = [negative_ex] + study_results
#             else:
#                 negative_ex = 1
#                 study_results = [0]*len(study_results)
#                 sample_df.loc[sample_df.index[sample_df['id'].str.contains(study_instance)], 'label'] = [negative_ex] + study_results
#             continue
#         '''
#         only one label for for RV/LV ratio
#         '''
#         if study_results[0] + study_results[1] != 1:
#             if ress_mean[0] > ress_mean[1]:
#                 study_results[0] = 1
#                 study_results[1] = 0
#             else:
#                 study_results[0] = 0
#                 study_results[1] = 1
#         '''
#         only one label for type
#         '''
#         if study_results[3] + study_results[5] != 1:
#             if ress_mean[3] > ress_mean[5]:
#                 study_results[3] = 1
#                 study_results[5] = 0
#             else:
#                 study_results[3] = 0
#                 study_results[5] = 1
#         '''
#         at least one label for location
#         '''
#         if sum([study_results[i] for i in [2,4,6]]) == 0:
#             v = [ress_mean[i] for i in [2,4,6]]
#             max_i = [2,4,6][v.index(max(v))]
#             study_results[max_i] = 1

#         negative_ex = 0
        negative_ex = 1 - max(study_results)
#         print(max(values))
#         print([negative_ex] + study_results)
        sample_df.loc[sample_df.index[sample_df['id'].str.contains(study_instance)], 'label'] = [negative_ex] + study_results

In [None]:
sample_df

In [None]:
(sample_df.label < 0).any(), (sample_df.label > 1).any()

In [None]:
sample_df.isnull().values.any()

In [None]:
sample_df.to_csv('submission.csv', index = False)