In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, glob, pickle, gc, copy, sys, multiprocessing, time
from joblib import Parallel, delayed

import warnings
import cv2, pydicom
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100) # 表示できる列数

sys.path.append('../src')
from utils import ri, pickle_load, pickle_save

In [2]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import _LRScheduler
from sklearn import metrics

import timm
import pretrainedmodels

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

# Config

In [3]:
# config
DEBUG = False # set False to do all process
col_index = 'SOPInstanceUID'
col_groupby = 'StudyInstanceUID'
df_test_path = "../input/orig/test.csv"
df_sub_path = "../input/orig/sample_submission.csv"
test_image_path = "../input/orig/test"

num_cpu = multiprocessing.cpu_count()
num_features_b0 = 1280
num_features_b2 = 1408
weight_dir_b0_1 = "../output/b0_stage1"
weight_dir_b0_2 = "../output/b0_stage2"
weight_dir_b2_1 = "../output/b2_stage1"
weight_dir_b2_2 = "../output/b2_stage2"
BATCH_SIZE = 64
NUM_FOLD = 5

In [4]:
col_targets = [
    'negative_exam_for_pe',
    'indeterminate',
    'chronic_pe',
    'acute_and_chronic_pe',
    'central_pe',
    'leftsided_pe',
    'rightsided_pe',
    'rv_lv_ratio_gte_1',
    'rv_lv_ratio_lt_1',
    'pe_present_on_image',
]

# Data Loading

In [5]:
# load test data
df_test_full = pd.read_csv(df_test_path)
df_test = copy.deepcopy(df_test_full)
if DEBUG:
    df_test = df_test.iloc[:2000]
df_test = df_test.sort_values([col_groupby, col_index]).reset_index(drop=True)
print(df_test.shape)
df_test.head()

(2000, 3)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID
0,1870d65d0f6a,65a0a30bbf20,00ecce044f48
1,1870d65d0f6a,65a0a30bbf20,013b5b8613e2
2,1870d65d0f6a,65a0a30bbf20,03dec99e9210
3,1870d65d0f6a,65a0a30bbf20,056994b4cdaa
4,1870d65d0f6a,65a0a30bbf20,05b77d2db591


In [6]:
df_test['dicom_path'] = test_image_path + "/" +\
    df_test[col_groupby].values + "/" + \
    df_test['SeriesInstanceUID'].values + "/" + \
    df_test[col_index].values + ".dcm"
print(df_test['dicom_path'][0])
df_test.head()

/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65a0a30bbf20/00ecce044f48.dcm


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,dicom_path
0,1870d65d0f6a,65a0a30bbf20,00ecce044f48,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...
1,1870d65d0f6a,65a0a30bbf20,013b5b8613e2,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...
2,1870d65d0f6a,65a0a30bbf20,03dec99e9210,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...
3,1870d65d0f6a,65a0a30bbf20,056994b4cdaa,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...
4,1870d65d0f6a,65a0a30bbf20,05b77d2db591,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...


In [7]:
# make exam-level dataframe
df_test_exam = df_test[df_test[col_groupby].duplicated()==False]
if DEBUG==False:
    df_test_full_exam = df_test_exam
else:
    df_test_full_exam = df_test_full[df_test_full[col_groupby].duplicated()==False]
df_test_exam['start_index'] = df_test_exam.index.values
df_tmp = df_test.groupby(col_groupby)[col_index].agg(len).reset_index()
df_tmp.columns = [col_groupby, 'num_images']
df_test_exam = pd.merge(df_test_exam, df_tmp, on=col_groupby)
print(df_test_exam.shape)
df_test_exam.head()

(15, 6)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,dicom_path,start_index,num_images
0,1870d65d0f6a,65a0a30bbf20,00ecce044f48,/mnt/disks/data5/rsna2020/test/1870d65d0f6a/65...,0,156
1,26135e3b3b30,57f406bae513,00465b2a5461,/mnt/disks/data5/rsna2020/test/26135e3b3b30/57...,156,154
2,462e805da1f1,6b7ff11f23a8,0185a62557e0,/mnt/disks/data5/rsna2020/test/462e805da1f1/6b...,310,150
3,62dfc5f411e8,ff8593a6a2f3,04eae590c2f5,/mnt/disks/data5/rsna2020/test/62dfc5f411e8/ff...,460,139
4,761f6f1a9f5b,afb21a2656cf,0064081df67d,/mnt/disks/data5/rsna2020/test/761f6f1a9f5b/af...,599,134


# Prediction

In [8]:
class DicomDataset(Dataset):
    def __init__(self, X_study, X_image, transform=None, meta=False, verbose=False):
        self.X_study = X_study
        self.X_image = X_image
        self.transform = transform
        self.verbose = verbose

    def __getitem__(self, index):
        # get df_study
        study = self.X_study[col_groupby][index]
        start_index = self.X_study['start_index'][index]
        end_index = self.X_study['start_index'][index] + self.X_study['num_images'][index]
        df_study = self.X_image.iloc[start_index:end_index].reset_index(drop=True)

        # load dicoms
        images_study = []
        z_pos = []
        for i in range(len(df_study)):
            tmp_path = df_study['dicom_path'][i]
            try:
                tmp_dcm = pydicom.dcmread(tmp_path)
                tmp_npy = np.asarray(tmp_dcm.pixel_array)
                images_study.append(tmp_npy)
                if i==0:
                    RescaleSlope = tmp_dcm['RescaleSlope'].value
                    RescaleIntercept = tmp_dcm['RescaleIntercept'].value
                    PatientPosition = tmp_dcm['PatientPosition'].value
                z_pos.append(tmp_dcm['ImagePositionPatient'].value[-1])
            except:
                print("loading error!!!, study: {}, index: {}".format(study, i))
                tmp_npy = np.zeros([512, 512], np.int16)
                images_study.append(tmp_npy)
                if i==0:
                    RescaleSlope = 1
                    RescaleIntercept = -1024
                    PatientPosition = 'HFS'
                z_pos.append(-10000-i)
                
        images_study = np.array(images_study)
        z_pos = np.array(z_pos)
        images_study = images_study[np.argsort(z_pos)]
        df_study['z_pos'] = z_pos
        df_study = df_study.sort_values('z_pos').reset_index(drop=True)
        df_study['series_index'] = np.arange(len(df_study))
        if self.verbose: print(images_study.shape)
        if self.verbose: print(z_pos)
        if self.verbose: print(RescaleIntercept, RescaleSlope, PatientPosition)
            
        # process images
        images_study_processed = (images_study.astype(np.float32) * RescaleSlope + RescaleIntercept)/1000
        if PatientPosition=='FFP':
            images_study_processed = images_study_processed[:, ::-1, ::-1]
        images_study_processed = images_study_processed.reshape([-1, 1, 512, 512]).astype(np.float16)
        
        return images_study_processed, df_study
    
    def __len__(self):
        return len(self.X_study)

In [9]:
def my_collate(batch):
    return torch.Tensor(batch[0][0]), batch[0][1]

In [10]:
class nnWindow(nn.Module):
    def __init__(self):
        super(nnWindow, self).__init__()
        wso = np.array(((40,80),(80,200),(40,400)))/1000
        conv_ = nn.Conv2d(1,3, kernel_size=(1, 1))
        conv_.weight.data.copy_(torch.tensor([[[[1./wso[0][1]]]],[[[1./wso[1][1]]]],[[[1./wso[2][1]]]]]))
        conv_.bias.data.copy_(torch.tensor([0.5 - wso[0][0]/wso[0][1],
                                            0.5 - wso[1][0]/wso[1][1],
                                            0.5 -wso[2][0]/wso[2][1]]))
        self.window = nn.Sequential(
            conv_,
            nn.Sigmoid(),
            nn.InstanceNorm2d(3)
        )
    def forward(self, input1):
        return self.window(input1)
        
        
class MyEffNet_b0(nn.Module):
    def __init__(self, num_classes=10, base_model='tf_efficientnet_b0_ns'):
        super(MyEffNet_b0, self).__init__()

        self.num_classes = num_classes
        self.mode = 'train'
        self.window = nnWindow()
#         self.base_model = pretrainedmodels.__dict__['resnet18'](num_classes=1000, pretrained='imagenet')
        self.base_model = timm.create_model(base_model, pretrained=False, num_classes=10).to(device, non_blocking=True)
        self.avgpool = nn.AdaptiveAvgPool2d(1)
#         self.last_linear = nn.Linear(512, num_classes+1)
        self.last_linear = nn.Linear(self.base_model.num_features, num_classes)

    def forward(self, input1):
        bs, ch, h, w = input1.size()
        x = self.window(input1)
        x = self.base_model.forward_features(x) #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        feature = self.avgpool(x).view(bs, -1)
        y = self.last_linear(feature)

        return y

    def feature(self, input1):
        bs, ch, h, w = input1.size()
        x = self.window(input1)
        x = self.base_model.forward_features(x) #; print('layer conv1 ',x.size()) # [8, 64, 112, 112]
        feature = self.avgpool(x).view(bs, -1)
        y = self.last_linear(feature)

        return y, feature

In [11]:
class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Conv1d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv1d(channels // reduction, channels, kernel_size=1,
                             padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x
    
class CNN1D(nn.Module):

    def __init__(self, num_classes=400, input_ch=1, verbose=False):

        super(CNN1D, self).__init__()
        pool = 4
        drop = 0.1
        self.verbose = verbose
        self.layer1 = nn.Sequential(
                nn.Conv1d(input_ch//pool, 64, kernel_size=7, stride=1, padding=3, bias=False),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True),
                SEModule(64, 16),
#                 nn.Dropout(drop),
        )
        self.fpool = nn.MaxPool1d(kernel_size=pool, stride=pool, padding=0)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
#         self.upsample = nn.Upsample(scale_factor=2, mode='bilinear')
        self.layer2 = nn.Sequential(
                nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm1d(128),
                nn.ReLU(inplace=True),
                SEModule(128, 16),
#                 nn.Dropout(drop),
        )
        self.layer3 = nn.Sequential(
                nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm1d(256),
                nn.ReLU(inplace=True),
                SEModule(256, 16),
#                 nn.Dropout(drop),
        )
        self.layer4 = nn.Sequential(
                nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm1d(512),
                nn.ReLU(inplace=True),
                SEModule(512, 16),
#                 nn.Dropout(drop),
        )
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc2 = nn.Conv1d(
            input_ch//pool+64+128+256+512, 
            2, kernel_size=1)
#         self.fc = nn.Linear(512, 9)
        self.fc = nn.Sequential(
                nn.Linear(512, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(512, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(512, 9),
        )

    def forward(self, x_input):
        bs, ch, d = x_input.size()
        x0 = torch.transpose(x_input, 1, 2)
        x0 = self.fpool(x0)
        x0 = torch.transpose(x0, 1, 2)
        x1 = self.layer1(x0)
        x1 = self.maxpool(x1)

        x2 = self.layer2(x1)
        x2 = self.maxpool(x2)
        x3 = self.layer3(x2)
        x3 = self.maxpool(x3)
        x4 = self.layer4(x3)
        
#         tmp = F.adaptive_avg_pool1d(x1, d)
#         print(tmp.shape)
#         tmp = F.adaptive_avg_pool1d(x2, d)
#         print(tmp.shape)
        x5 = torch.cat([
            x0,
            F.adaptive_avg_pool1d(x1, d), 
            F.adaptive_avg_pool1d(x2, d), 
            F.adaptive_avg_pool1d(x3, d), 
            F.adaptive_avg_pool1d(x4, d), 
        ], axis=1)
        y2 = self.fc2(x5)
        
        b, ch, d = x_input.size()
#         x1 = self.fc(x)
#         x1 = x1.view(b, -1, 1)
            
        y = self.avgpool(x4)
        y = y.view(b, -1)
        y = self.fc(y)
        return y, y2

In [12]:
dataset_test = DicomDataset(df_test_exam, df_test)
test_loader = DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    num_workers=num_cpu,
    pin_memory=True,
    collate_fn=my_collate
)

In [13]:
lastfunc = nn.Sigmoid().to(device, non_blocking=True)

In [14]:
def batch_padding(batch):
    bs, ch, d = batch.shape
    d_new = int(np.ceil(d/64)*64)
#     d_new = int(np.ceil(1083/64)*64)
    batch_new = torch.from_numpy(np.zeros([bs, ch, d_new], np.float32)).to(device, non_blocking=True)
    batch_new[:, :, :d] = batch
    return batch_new

In [15]:
df_pred_image = []
df_pred_study = []
starttime = time.time()
verbose = False
model_b0s = []
model_b2s = []
model_1dcnns = []
model_1dcnn_b2s = []
for fold in range(NUM_FOLD):
    model_b0s.append(MyEffNet_b0().to(device, non_blocking=True))
    model_b2s.append(MyEffNet_b0(base_model='tf_efficientnet_b2_ns').to(device, non_blocking=True))
    model_1dcnns.append(CNN1D(input_ch=num_features_b0).to(device, non_blocking=True))
    model_1dcnn_b2s.append(CNN1D(input_ch=num_features_b2).to(device, non_blocking=True))
    model_b0s[fold].load_state_dict(torch.load("{}/weight_epoch_16_fold{}.pth".format(weight_dir_b0_1, fold+1)))
    model_b2s[fold].load_state_dict(torch.load("{}/weight_epoch_16_fold{}.pth".format(weight_dir_b2_1, fold+1)))
    model_1dcnns[fold].load_state_dict(torch.load("{}/1dcnn_weight_best_fold{}.pth".format(weight_dir_b0_2, fold+1)))
    model_1dcnn_b2s[fold].load_state_dict(torch.load("{}/1dcnn_weight_best_fold{}.pth".format(weight_dir_b2_2, fold+1)))
    model_b0s[fold].eval()
    model_b2s[fold].eval()
    model_1dcnns[fold].eval()
    model_1dcnn_b2s[fold].eval()
    
for study_index, (images, df_study) in enumerate(test_loader):
    if verbose: print("load index {} done".format(study_index), time.time()-starttime)
    if (study_index+1)%10==0:
        print("{}/{}, sec: {:.1f}".format(study_index+1, len(df_test_exam), time.time()-starttime))
#     if study_index>10: break
    num_batches = int(np.ceil(images.shape[0]/BATCH_SIZE))
    num_images = len(df_study)
    df_study_image = df_study[[col_groupby, col_index, 'SeriesInstanceUID']]
    df_study_study = df_study[[col_groupby]].iloc[:1]
    for fold in range(NUM_FOLD):
#         if verbose: print("load weight start", time.time()-starttime)
        model_b0 = model_b0s[fold]
        model_b2 = model_b2s[fold]
        model_1dcnn = model_1dcnns[fold]
        model_1dcnn_b2 = model_1dcnn_b2s[fold]
        features = []
        features_b2 = []
        for batch_index in range(num_batches):
            with torch.no_grad():
                with torch.cuda.amp.autocast():
                    batch = images[batch_index*BATCH_SIZE:(batch_index+1)*BATCH_SIZE].to(device, non_blocking=True)
                    _, feature = model_b0.feature(batch)
                    _, feature_b2 = model_b2.feature(batch)
#             print(feature.dtype)
            features.append(feature)
            features_b2.append(feature_b2)
        features = torch.cat(features, axis=0) # bs=d, ch
        features = torch.transpose(features, 0,1).reshape([1, num_features_b0, -1])
        features = batch_padding(features)
        features_b2 = torch.cat(features_b2, axis=0) # bs=d, ch
        features_b2 = torch.transpose(features_b2, 0,1).reshape([1, num_features_b2, -1])
        features_b2 = batch_padding(features_b2)
        with torch.no_grad():
#             with torch.cuda.amp.autocast():
            output1, output2 = model_1dcnn(features)
            output2 = output2[:,-1:]
            output1 = lastfunc(output1)
            output2 = lastfunc(output2)[:,:,:num_images]
            output1_b2, output2_b2 = model_1dcnn_b2(features_b2)
            output2_b2 = output2_b2[:,-1:]
            output1_b2 = lastfunc(output1_b2)
            output2_b2 = lastfunc(output2_b2)[:,:,:num_images]
        for i, col in enumerate(col_targets[:-1]):
            df_study_study["{}_pred_fold{}".format(col, fold+1)] = output1[0, i].data.cpu().numpy()
            df_study_study["{}_pred_fold{}_b2".format(col, fold+1)] = output1_b2[0, i].data.cpu().numpy()
        df_study_image["{}_pred_fold{}".format(col_targets[-1], fold+1)] = output2[0, 0].data.cpu().numpy()
        df_study_image["{}_pred_fold{}_b2".format(col_targets[-1], fold+1)] = output2_b2[0, 0].data.cpu().numpy()
            
    df_pred_study.append(df_study_study)
    df_pred_image.append(df_study_image)

10/15, sec: 82.3


# Postprocessing

In [16]:
df_pred_image = pd.concat(df_pred_image).reset_index(drop=True)
df_pred_study = pd.concat(df_pred_study).reset_index(drop=True)
print(df_pred_image.shape, df_pred_study.shape)
df_pred_image.head()

(2000, 13) (15, 91)


Unnamed: 0,StudyInstanceUID,SOPInstanceUID,SeriesInstanceUID,pe_present_on_image_pred_fold1,pe_present_on_image_pred_fold1_b2,pe_present_on_image_pred_fold2,pe_present_on_image_pred_fold2_b2,pe_present_on_image_pred_fold3,pe_present_on_image_pred_fold3_b2,pe_present_on_image_pred_fold4,pe_present_on_image_pred_fold4_b2,pe_present_on_image_pred_fold5,pe_present_on_image_pred_fold5_b2
0,1870d65d0f6a,9e6515ade70b,65a0a30bbf20,0.000452,0.000778,0.001244,0.000297,0.000789,0.000537,0.000876,0.000917,0.00027,0.000123
1,1870d65d0f6a,8611c32ef6a9,65a0a30bbf20,0.000429,0.000838,0.001271,0.000274,0.000798,0.000505,0.000927,0.000866,0.000265,0.00011
2,1870d65d0f6a,d01c2dacd815,65a0a30bbf20,0.0004,0.000912,0.001384,0.00024,0.000912,0.000406,0.000621,0.000849,0.00012,9.6e-05
3,1870d65d0f6a,1ab908ca66ca,65a0a30bbf20,0.000377,0.000845,0.001325,0.000251,0.001059,0.000439,0.000655,0.000852,0.000125,8.2e-05
4,1870d65d0f6a,51b7740ce07c,65a0a30bbf20,0.000566,0.001074,0.001373,0.000296,0.000933,0.000545,0.000829,0.001151,0.000123,0.000119


In [17]:
df_pred_study.head(30)

Unnamed: 0,StudyInstanceUID,negative_exam_for_pe_pred_fold1,negative_exam_for_pe_pred_fold1_b2,indeterminate_pred_fold1,indeterminate_pred_fold1_b2,chronic_pe_pred_fold1,chronic_pe_pred_fold1_b2,acute_and_chronic_pe_pred_fold1,acute_and_chronic_pe_pred_fold1_b2,central_pe_pred_fold1,central_pe_pred_fold1_b2,leftsided_pe_pred_fold1,leftsided_pe_pred_fold1_b2,rightsided_pe_pred_fold1,rightsided_pe_pred_fold1_b2,rv_lv_ratio_gte_1_pred_fold1,rv_lv_ratio_gte_1_pred_fold1_b2,rv_lv_ratio_lt_1_pred_fold1,rv_lv_ratio_lt_1_pred_fold1_b2,negative_exam_for_pe_pred_fold2,negative_exam_for_pe_pred_fold2_b2,indeterminate_pred_fold2,indeterminate_pred_fold2_b2,chronic_pe_pred_fold2,chronic_pe_pred_fold2_b2,acute_and_chronic_pe_pred_fold2,acute_and_chronic_pe_pred_fold2_b2,central_pe_pred_fold2,central_pe_pred_fold2_b2,leftsided_pe_pred_fold2,leftsided_pe_pred_fold2_b2,rightsided_pe_pred_fold2,rightsided_pe_pred_fold2_b2,rv_lv_ratio_gte_1_pred_fold2,rv_lv_ratio_gte_1_pred_fold2_b2,rv_lv_ratio_lt_1_pred_fold2,rv_lv_ratio_lt_1_pred_fold2_b2,negative_exam_for_pe_pred_fold3,negative_exam_for_pe_pred_fold3_b2,indeterminate_pred_fold3,indeterminate_pred_fold3_b2,chronic_pe_pred_fold3,chronic_pe_pred_fold3_b2,acute_and_chronic_pe_pred_fold3,acute_and_chronic_pe_pred_fold3_b2,central_pe_pred_fold3,central_pe_pred_fold3_b2,leftsided_pe_pred_fold3,leftsided_pe_pred_fold3_b2,rightsided_pe_pred_fold3,rightsided_pe_pred_fold3_b2,rv_lv_ratio_gte_1_pred_fold3,rv_lv_ratio_gte_1_pred_fold3_b2,rv_lv_ratio_lt_1_pred_fold3,rv_lv_ratio_lt_1_pred_fold3_b2,negative_exam_for_pe_pred_fold4,negative_exam_for_pe_pred_fold4_b2,indeterminate_pred_fold4,indeterminate_pred_fold4_b2,chronic_pe_pred_fold4,chronic_pe_pred_fold4_b2,acute_and_chronic_pe_pred_fold4,acute_and_chronic_pe_pred_fold4_b2,central_pe_pred_fold4,central_pe_pred_fold4_b2,leftsided_pe_pred_fold4,leftsided_pe_pred_fold4_b2,rightsided_pe_pred_fold4,rightsided_pe_pred_fold4_b2,rv_lv_ratio_gte_1_pred_fold4,rv_lv_ratio_gte_1_pred_fold4_b2,rv_lv_ratio_lt_1_pred_fold4,rv_lv_ratio_lt_1_pred_fold4_b2,negative_exam_for_pe_pred_fold5,negative_exam_for_pe_pred_fold5_b2,indeterminate_pred_fold5,indeterminate_pred_fold5_b2,chronic_pe_pred_fold5,chronic_pe_pred_fold5_b2,acute_and_chronic_pe_pred_fold5,acute_and_chronic_pe_pred_fold5_b2,central_pe_pred_fold5,central_pe_pred_fold5_b2,leftsided_pe_pred_fold5,leftsided_pe_pred_fold5_b2,rightsided_pe_pred_fold5,rightsided_pe_pred_fold5_b2,rv_lv_ratio_gte_1_pred_fold5,rv_lv_ratio_gte_1_pred_fold5_b2,rv_lv_ratio_lt_1_pred_fold5,rv_lv_ratio_lt_1_pred_fold5_b2
0,1870d65d0f6a,0.75327,0.889616,0.004008,0.009249,0.015489,0.016519,0.001641,0.001372,0.000802,0.001216,0.074046,0.043957,0.163997,0.068754,0.020246,0.014432,0.171188,0.070179,0.835424,0.911305,0.01148,0.005037,0.029961,0.012258,0.003924,0.000683,0.002786,0.000548,0.078965,0.034444,0.116345,0.055383,0.01442,0.007226,0.148555,0.069008,0.6416,0.89108,0.014794,0.0082,0.046972,0.023106,0.010432,0.0017,0.00627,0.001274,0.162827,0.051067,0.264346,0.069561,0.04865,0.0186,0.302667,0.083354,0.850735,0.866092,0.029522,0.016818,0.017485,0.020034,0.002637,0.002157,0.000979,0.001135,0.064449,0.052262,0.085316,0.08087,0.010344,0.017095,0.124067,0.090861,0.914994,0.935114,0.009068,0.002772,0.013517,0.007736,0.000899,0.000436,0.000514,0.000312,0.036011,0.021774,0.050403,0.039575,0.007108,0.008172,0.066029,0.042539
1,26135e3b3b30,0.883958,0.816293,0.012922,0.02338,0.016293,0.031864,0.001145,0.004979,0.000706,0.00558,0.042905,0.083089,0.066929,0.113137,0.013245,0.0374,0.063833,0.107995,0.870548,0.843616,0.024912,0.013059,0.024172,0.020133,0.003291,0.002441,0.00541,0.002445,0.060684,0.066239,0.081886,0.094859,0.022872,0.018121,0.081215,0.112292,0.830242,0.844448,0.014248,0.014561,0.029914,0.029863,0.003733,0.003885,0.003704,0.003613,0.090116,0.075947,0.117775,0.10014,0.030832,0.033334,0.125343,0.103269,0.805606,0.72096,0.026617,0.02952,0.034129,0.045472,0.008065,0.01131,0.003884,0.0118,0.110208,0.147072,0.145199,0.202647,0.035459,0.085201,0.163409,0.161757,0.866923,0.85077,0.017736,0.00972,0.027441,0.022138,0.003328,0.003232,0.002826,0.002753,0.066853,0.064825,0.089318,0.107422,0.022894,0.034479,0.095548,0.1014
2,462e805da1f1,0.885173,0.896135,0.012009,0.01021,0.016044,0.014903,0.001068,0.001155,0.000615,0.00115,0.041772,0.039064,0.067485,0.058803,0.012735,0.012557,0.064976,0.059095,0.865334,0.903881,0.012454,0.007951,0.026148,0.013078,0.003022,0.0009,0.002743,0.000808,0.065107,0.037252,0.092205,0.056401,0.017353,0.008196,0.103151,0.069265,0.881886,0.915687,0.008919,0.006837,0.020019,0.017563,0.001509,0.001052,0.001327,0.000907,0.058495,0.038745,0.080085,0.049087,0.015989,0.013925,0.091513,0.058726,0.879685,0.875895,0.022525,0.015287,0.016021,0.018088,0.002409,0.001767,0.001154,0.000998,0.058496,0.047497,0.075528,0.073573,0.014688,0.015696,0.093307,0.079244,0.862916,0.918545,0.016253,0.003406,0.027789,0.009976,0.002975,0.000733,0.001998,0.000547,0.067031,0.029487,0.090144,0.053441,0.019695,0.012014,0.104293,0.055961
3,62dfc5f411e8,0.695241,0.764446,0.016314,0.047226,0.042402,0.044752,0.007993,0.009308,0.005441,0.011214,0.13149,0.113228,0.226202,0.142395,0.061608,0.046728,0.208471,0.15196,0.784831,0.834073,0.038575,0.030648,0.050195,0.023013,0.010834,0.003524,0.012199,0.003138,0.119455,0.066057,0.152074,0.087291,0.039851,0.016358,0.163113,0.114171,0.874001,0.837433,0.013241,0.032595,0.024313,0.030655,0.002043,0.004038,0.001811,0.002971,0.064687,0.069982,0.086096,0.091677,0.01794,0.023972,0.098838,0.112027,0.748774,0.883094,0.042285,0.020208,0.048092,0.019432,0.012121,0.001933,0.004428,0.001294,0.135078,0.046456,0.175924,0.067838,0.035012,0.015868,0.223655,0.070005,0.816258,0.893763,0.043767,0.010815,0.037757,0.015971,0.00588,0.001658,0.004682,0.001432,0.094452,0.04153,0.115136,0.064157,0.02017,0.018565,0.149769,0.065934
4,761f6f1a9f5b,0.816338,0.800406,0.024974,0.023166,0.034096,0.037904,0.004408,0.006517,0.003614,0.006901,0.083621,0.096281,0.119274,0.1293,0.039998,0.049159,0.097025,0.117185,0.826287,0.851656,0.01931,0.01962,0.039527,0.023144,0.007007,0.002884,0.009387,0.003441,0.097363,0.067228,0.12696,0.087792,0.051926,0.022844,0.107359,0.091996,0.750473,0.812182,0.018493,0.017,0.053824,0.043459,0.011097,0.007089,0.009438,0.006746,0.145084,0.100919,0.186156,0.13295,0.072334,0.055517,0.173934,0.122893,0.824188,0.846047,0.035386,0.030219,0.025805,0.02955,0.005468,0.004129,0.003395,0.003446,0.088446,0.069633,0.112136,0.09613,0.026219,0.029576,0.126419,0.090067,0.879044,0.84985,0.024547,0.013834,0.022209,0.021939,0.002495,0.003455,0.002095,0.003222,0.056341,0.064395,0.072051,0.100095,0.013576,0.034021,0.086378,0.093684
5,79981becb2f8,0.839872,0.804016,0.031023,0.031748,0.036784,0.037798,0.004085,0.005942,0.002561,0.006351,0.072532,0.087287,0.106145,0.116369,0.030253,0.04096,0.100138,0.111673,0.9156,0.858403,0.021672,0.040874,0.015264,0.019598,0.001197,0.002373,0.001982,0.001929,0.03193,0.050402,0.046133,0.058773,0.010046,0.014125,0.04957,0.07315,0.821709,0.876272,0.013856,0.010369,0.047908,0.027699,0.005551,0.002439,0.003116,0.001929,0.100756,0.058741,0.130731,0.078293,0.043192,0.026351,0.138612,0.082952,0.889672,0.88142,0.018935,0.029236,0.013847,0.025173,0.001745,0.002489,0.00066,0.001614,0.051079,0.048509,0.06724,0.066789,0.011044,0.019091,0.088017,0.066408,0.904752,0.90196,0.015054,0.017461,0.019624,0.011245,0.001562,0.000925,0.000853,0.00087,0.042584,0.029914,0.056231,0.042475,0.011214,0.011805,0.067893,0.045548
6,7f6fb39566ed,0.016782,0.016586,0.000381,0.00105,0.019165,0.048089,0.034445,0.074441,0.067913,0.214382,0.694953,0.832454,0.908499,0.919075,0.222084,0.417768,0.764232,0.603416,0.012628,0.014882,0.001046,0.001062,0.027861,0.035694,0.076686,0.070278,0.136231,0.169175,0.736625,0.805369,0.903797,0.914334,0.179801,0.347295,0.814533,0.681355,0.032859,0.02422,0.006104,0.0024,0.052388,0.038684,0.092762,0.071453,0.117514,0.115235,0.703939,0.738546,0.852882,0.88491,0.304341,0.301854,0.684232,0.687095,0.04404,0.016997,0.003471,0.000466,0.074595,0.033479,0.114457,0.056325,0.137691,0.153943,0.706284,0.816965,0.833842,0.918169,0.292939,0.337538,0.681351,0.668745,0.011064,0.008957,0.000755,0.000927,0.028632,0.034955,0.054672,0.103931,0.096725,0.237267,0.767418,0.851381,0.921687,0.946109,0.247515,0.421828,0.769215,0.62225
7,acd8f049599c,0.3407,0.766304,0.00542,0.028297,0.044008,0.045457,0.016537,0.008745,0.012644,0.00822,0.27283,0.112382,0.524032,0.157355,0.129348,0.057179,0.455576,0.151978,0.746356,0.863875,0.038029,0.038831,0.067801,0.014484,0.016076,0.001846,0.01415,0.001618,0.145576,0.044429,0.188252,0.053253,0.051454,0.008847,0.205213,0.076364,0.886878,0.887548,0.017555,0.037926,0.01929,0.012786,0.001279,0.000936,0.001163,0.000705,0.051542,0.032918,0.069509,0.039387,0.009449,0.007445,0.094125,0.05699,0.830056,0.757169,0.025034,0.037886,0.02376,0.047214,0.003918,0.010391,0.001177,0.00786,0.080874,0.1224,0.109103,0.168897,0.014782,0.065268,0.151902,0.152901,0.811244,0.850845,0.017756,0.012928,0.039625,0.023625,0.0048,0.003191,0.00244,0.002491,0.093926,0.062054,0.127794,0.100364,0.024879,0.031837,0.164069,0.099541
8,b1c8e2c38aae,0.814226,0.808457,0.031495,0.028715,0.039344,0.03655,0.005373,0.005968,0.004318,0.006717,0.087704,0.089031,0.122928,0.116238,0.043427,0.041493,0.102179,0.111033,0.895673,0.862112,0.016258,0.013851,0.021922,0.024972,0.002393,0.002578,0.003617,0.002737,0.049146,0.064035,0.06874,0.090329,0.020745,0.022696,0.065133,0.092648,0.83355,0.853825,0.011367,0.018076,0.033505,0.033769,0.004033,0.004099,0.003381,0.003578,0.091466,0.072409,0.116678,0.0921,0.039135,0.033486,0.113308,0.097089,0.888238,0.854217,0.023225,0.02864,0.012981,0.028091,0.001796,0.003547,0.000931,0.002894,0.050552,0.064271,0.064871,0.088896,0.011518,0.027894,0.082232,0.081406,0.894169,0.864968,0.021506,0.008802,0.019873,0.020317,0.001967,0.002609,0.001495,0.002165,0.048078,0.057141,0.061697,0.09522,0.0117,0.029653,0.073775,0.090328
9,b44cbf5371f2,0.425665,0.206477,0.026294,0.014207,0.127133,0.105973,0.056159,0.07562,0.091581,0.117059,0.368754,0.535975,0.505356,0.659733,0.358421,0.360139,0.286002,0.457471,0.153214,0.477531,0.010531,0.045861,0.404913,0.149803,0.137501,0.062915,0.17861,0.105329,0.616056,0.351167,0.673931,0.419954,0.766665,0.327077,0.10402,0.254785,0.124982,0.405216,0.01108,0.024231,0.100855,0.101439,0.098937,0.05554,0.1078,0.052963,0.567078,0.336812,0.722658,0.475648,0.359328,0.229952,0.517815,0.391248,0.139698,0.2001,0.002953,0.010558,0.295042,0.083356,0.103824,0.065631,0.238467,0.090408,0.60128,0.510643,0.712724,0.650985,0.751912,0.311897,0.110441,0.494535,0.452986,0.294374,0.026777,0.011129,0.169402,0.076664,0.063861,0.045037,0.053266,0.04858,0.350878,0.389063,0.446622,0.569597,0.31261,0.248169,0.306454,0.434446


In [18]:
for col in col_targets[:-1]:
    cols_tmp = []
    for fold in range(NUM_FOLD):
        cols_tmp.append("{}_pred_fold{}".format(col, fold+1))
        cols_tmp.append("{}_pred_fold{}_b2".format(col, fold+1))
    df_pred_study[col] = df_pred_study[cols_tmp].values.mean(axis=1)
df_pred_study.head(30)

Unnamed: 0,StudyInstanceUID,negative_exam_for_pe_pred_fold1,negative_exam_for_pe_pred_fold1_b2,indeterminate_pred_fold1,indeterminate_pred_fold1_b2,chronic_pe_pred_fold1,chronic_pe_pred_fold1_b2,acute_and_chronic_pe_pred_fold1,acute_and_chronic_pe_pred_fold1_b2,central_pe_pred_fold1,central_pe_pred_fold1_b2,leftsided_pe_pred_fold1,leftsided_pe_pred_fold1_b2,rightsided_pe_pred_fold1,rightsided_pe_pred_fold1_b2,rv_lv_ratio_gte_1_pred_fold1,rv_lv_ratio_gte_1_pred_fold1_b2,rv_lv_ratio_lt_1_pred_fold1,rv_lv_ratio_lt_1_pred_fold1_b2,negative_exam_for_pe_pred_fold2,negative_exam_for_pe_pred_fold2_b2,indeterminate_pred_fold2,indeterminate_pred_fold2_b2,chronic_pe_pred_fold2,chronic_pe_pred_fold2_b2,acute_and_chronic_pe_pred_fold2,acute_and_chronic_pe_pred_fold2_b2,central_pe_pred_fold2,central_pe_pred_fold2_b2,leftsided_pe_pred_fold2,leftsided_pe_pred_fold2_b2,rightsided_pe_pred_fold2,rightsided_pe_pred_fold2_b2,rv_lv_ratio_gte_1_pred_fold2,rv_lv_ratio_gte_1_pred_fold2_b2,rv_lv_ratio_lt_1_pred_fold2,rv_lv_ratio_lt_1_pred_fold2_b2,negative_exam_for_pe_pred_fold3,negative_exam_for_pe_pred_fold3_b2,indeterminate_pred_fold3,indeterminate_pred_fold3_b2,chronic_pe_pred_fold3,chronic_pe_pred_fold3_b2,acute_and_chronic_pe_pred_fold3,acute_and_chronic_pe_pred_fold3_b2,central_pe_pred_fold3,central_pe_pred_fold3_b2,leftsided_pe_pred_fold3,leftsided_pe_pred_fold3_b2,rightsided_pe_pred_fold3,rightsided_pe_pred_fold3_b2,rv_lv_ratio_gte_1_pred_fold3,rv_lv_ratio_gte_1_pred_fold3_b2,rv_lv_ratio_lt_1_pred_fold3,rv_lv_ratio_lt_1_pred_fold3_b2,negative_exam_for_pe_pred_fold4,negative_exam_for_pe_pred_fold4_b2,indeterminate_pred_fold4,indeterminate_pred_fold4_b2,chronic_pe_pred_fold4,chronic_pe_pred_fold4_b2,acute_and_chronic_pe_pred_fold4,acute_and_chronic_pe_pred_fold4_b2,central_pe_pred_fold4,central_pe_pred_fold4_b2,leftsided_pe_pred_fold4,leftsided_pe_pred_fold4_b2,rightsided_pe_pred_fold4,rightsided_pe_pred_fold4_b2,rv_lv_ratio_gte_1_pred_fold4,rv_lv_ratio_gte_1_pred_fold4_b2,rv_lv_ratio_lt_1_pred_fold4,rv_lv_ratio_lt_1_pred_fold4_b2,negative_exam_for_pe_pred_fold5,negative_exam_for_pe_pred_fold5_b2,indeterminate_pred_fold5,indeterminate_pred_fold5_b2,chronic_pe_pred_fold5,chronic_pe_pred_fold5_b2,acute_and_chronic_pe_pred_fold5,acute_and_chronic_pe_pred_fold5_b2,central_pe_pred_fold5,central_pe_pred_fold5_b2,leftsided_pe_pred_fold5,leftsided_pe_pred_fold5_b2,rightsided_pe_pred_fold5,rightsided_pe_pred_fold5_b2,rv_lv_ratio_gte_1_pred_fold5,rv_lv_ratio_gte_1_pred_fold5_b2,rv_lv_ratio_lt_1_pred_fold5,rv_lv_ratio_lt_1_pred_fold5_b2,negative_exam_for_pe,indeterminate,chronic_pe,acute_and_chronic_pe,central_pe,leftsided_pe,rightsided_pe,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1
0,1870d65d0f6a,0.75327,0.889616,0.004008,0.009249,0.015489,0.016519,0.001641,0.001372,0.000802,0.001216,0.074046,0.043957,0.163997,0.068754,0.020246,0.014432,0.171188,0.070179,0.835424,0.911305,0.01148,0.005037,0.029961,0.012258,0.003924,0.000683,0.002786,0.000548,0.078965,0.034444,0.116345,0.055383,0.01442,0.007226,0.148555,0.069008,0.6416,0.89108,0.014794,0.0082,0.046972,0.023106,0.010432,0.0017,0.00627,0.001274,0.162827,0.051067,0.264346,0.069561,0.04865,0.0186,0.302667,0.083354,0.850735,0.866092,0.029522,0.016818,0.017485,0.020034,0.002637,0.002157,0.000979,0.001135,0.064449,0.052262,0.085316,0.08087,0.010344,0.017095,0.124067,0.090861,0.914994,0.935114,0.009068,0.002772,0.013517,0.007736,0.000899,0.000436,0.000514,0.000312,0.036011,0.021774,0.050403,0.039575,0.007108,0.008172,0.066029,0.042539,0.848923,0.011095,0.020308,0.002588,0.001583,0.06198,0.099455,0.016629,0.116845
1,26135e3b3b30,0.883958,0.816293,0.012922,0.02338,0.016293,0.031864,0.001145,0.004979,0.000706,0.00558,0.042905,0.083089,0.066929,0.113137,0.013245,0.0374,0.063833,0.107995,0.870548,0.843616,0.024912,0.013059,0.024172,0.020133,0.003291,0.002441,0.00541,0.002445,0.060684,0.066239,0.081886,0.094859,0.022872,0.018121,0.081215,0.112292,0.830242,0.844448,0.014248,0.014561,0.029914,0.029863,0.003733,0.003885,0.003704,0.003613,0.090116,0.075947,0.117775,0.10014,0.030832,0.033334,0.125343,0.103269,0.805606,0.72096,0.026617,0.02952,0.034129,0.045472,0.008065,0.01131,0.003884,0.0118,0.110208,0.147072,0.145199,0.202647,0.035459,0.085201,0.163409,0.161757,0.866923,0.85077,0.017736,0.00972,0.027441,0.022138,0.003328,0.003232,0.002826,0.002753,0.066853,0.064825,0.089318,0.107422,0.022894,0.034479,0.095548,0.1014,0.833336,0.018668,0.028142,0.004541,0.004272,0.080794,0.111931,0.033384,0.111606
2,462e805da1f1,0.885173,0.896135,0.012009,0.01021,0.016044,0.014903,0.001068,0.001155,0.000615,0.00115,0.041772,0.039064,0.067485,0.058803,0.012735,0.012557,0.064976,0.059095,0.865334,0.903881,0.012454,0.007951,0.026148,0.013078,0.003022,0.0009,0.002743,0.000808,0.065107,0.037252,0.092205,0.056401,0.017353,0.008196,0.103151,0.069265,0.881886,0.915687,0.008919,0.006837,0.020019,0.017563,0.001509,0.001052,0.001327,0.000907,0.058495,0.038745,0.080085,0.049087,0.015989,0.013925,0.091513,0.058726,0.879685,0.875895,0.022525,0.015287,0.016021,0.018088,0.002409,0.001767,0.001154,0.000998,0.058496,0.047497,0.075528,0.073573,0.014688,0.015696,0.093307,0.079244,0.862916,0.918545,0.016253,0.003406,0.027789,0.009976,0.002975,0.000733,0.001998,0.000547,0.067031,0.029487,0.090144,0.053441,0.019695,0.012014,0.104293,0.055961,0.888514,0.011585,0.017963,0.001659,0.001225,0.048295,0.069675,0.014285,0.077953
3,62dfc5f411e8,0.695241,0.764446,0.016314,0.047226,0.042402,0.044752,0.007993,0.009308,0.005441,0.011214,0.13149,0.113228,0.226202,0.142395,0.061608,0.046728,0.208471,0.15196,0.784831,0.834073,0.038575,0.030648,0.050195,0.023013,0.010834,0.003524,0.012199,0.003138,0.119455,0.066057,0.152074,0.087291,0.039851,0.016358,0.163113,0.114171,0.874001,0.837433,0.013241,0.032595,0.024313,0.030655,0.002043,0.004038,0.001811,0.002971,0.064687,0.069982,0.086096,0.091677,0.01794,0.023972,0.098838,0.112027,0.748774,0.883094,0.042285,0.020208,0.048092,0.019432,0.012121,0.001933,0.004428,0.001294,0.135078,0.046456,0.175924,0.067838,0.035012,0.015868,0.223655,0.070005,0.816258,0.893763,0.043767,0.010815,0.037757,0.015971,0.00588,0.001658,0.004682,0.001432,0.094452,0.04153,0.115136,0.064157,0.02017,0.018565,0.149769,0.065934,0.813192,0.029567,0.033658,0.005933,0.004861,0.088242,0.120879,0.029607,0.135794
4,761f6f1a9f5b,0.816338,0.800406,0.024974,0.023166,0.034096,0.037904,0.004408,0.006517,0.003614,0.006901,0.083621,0.096281,0.119274,0.1293,0.039998,0.049159,0.097025,0.117185,0.826287,0.851656,0.01931,0.01962,0.039527,0.023144,0.007007,0.002884,0.009387,0.003441,0.097363,0.067228,0.12696,0.087792,0.051926,0.022844,0.107359,0.091996,0.750473,0.812182,0.018493,0.017,0.053824,0.043459,0.011097,0.007089,0.009438,0.006746,0.145084,0.100919,0.186156,0.13295,0.072334,0.055517,0.173934,0.122893,0.824188,0.846047,0.035386,0.030219,0.025805,0.02955,0.005468,0.004129,0.003395,0.003446,0.088446,0.069633,0.112136,0.09613,0.026219,0.029576,0.126419,0.090067,0.879044,0.84985,0.024547,0.013834,0.022209,0.021939,0.002495,0.003455,0.002095,0.003222,0.056341,0.064395,0.072051,0.100095,0.013576,0.034021,0.086378,0.093684,0.825647,0.022655,0.033146,0.005455,0.005168,0.086931,0.116284,0.039517,0.110694
5,79981becb2f8,0.839872,0.804016,0.031023,0.031748,0.036784,0.037798,0.004085,0.005942,0.002561,0.006351,0.072532,0.087287,0.106145,0.116369,0.030253,0.04096,0.100138,0.111673,0.9156,0.858403,0.021672,0.040874,0.015264,0.019598,0.001197,0.002373,0.001982,0.001929,0.03193,0.050402,0.046133,0.058773,0.010046,0.014125,0.04957,0.07315,0.821709,0.876272,0.013856,0.010369,0.047908,0.027699,0.005551,0.002439,0.003116,0.001929,0.100756,0.058741,0.130731,0.078293,0.043192,0.026351,0.138612,0.082952,0.889672,0.88142,0.018935,0.029236,0.013847,0.025173,0.001745,0.002489,0.00066,0.001614,0.051079,0.048509,0.06724,0.066789,0.011044,0.019091,0.088017,0.066408,0.904752,0.90196,0.015054,0.017461,0.019624,0.011245,0.001562,0.000925,0.000853,0.00087,0.042584,0.029914,0.056231,0.042475,0.011214,0.011805,0.067893,0.045548,0.869368,0.023023,0.025494,0.002831,0.002186,0.057373,0.076918,0.021808,0.082396
6,7f6fb39566ed,0.016782,0.016586,0.000381,0.00105,0.019165,0.048089,0.034445,0.074441,0.067913,0.214382,0.694953,0.832454,0.908499,0.919075,0.222084,0.417768,0.764232,0.603416,0.012628,0.014882,0.001046,0.001062,0.027861,0.035694,0.076686,0.070278,0.136231,0.169175,0.736625,0.805369,0.903797,0.914334,0.179801,0.347295,0.814533,0.681355,0.032859,0.02422,0.006104,0.0024,0.052388,0.038684,0.092762,0.071453,0.117514,0.115235,0.703939,0.738546,0.852882,0.88491,0.304341,0.301854,0.684232,0.687095,0.04404,0.016997,0.003471,0.000466,0.074595,0.033479,0.114457,0.056325,0.137691,0.153943,0.706284,0.816965,0.833842,0.918169,0.292939,0.337538,0.681351,0.668745,0.011064,0.008957,0.000755,0.000927,0.028632,0.034955,0.054672,0.103931,0.096725,0.237267,0.767418,0.851381,0.921687,0.946109,0.247515,0.421828,0.769215,0.62225,0.019902,0.001766,0.039354,0.074945,0.144608,0.765393,0.90033,0.307296,0.697642
7,acd8f049599c,0.3407,0.766304,0.00542,0.028297,0.044008,0.045457,0.016537,0.008745,0.012644,0.00822,0.27283,0.112382,0.524032,0.157355,0.129348,0.057179,0.455576,0.151978,0.746356,0.863875,0.038029,0.038831,0.067801,0.014484,0.016076,0.001846,0.01415,0.001618,0.145576,0.044429,0.188252,0.053253,0.051454,0.008847,0.205213,0.076364,0.886878,0.887548,0.017555,0.037926,0.01929,0.012786,0.001279,0.000936,0.001163,0.000705,0.051542,0.032918,0.069509,0.039387,0.009449,0.007445,0.094125,0.05699,0.830056,0.757169,0.025034,0.037886,0.02376,0.047214,0.003918,0.010391,0.001177,0.00786,0.080874,0.1224,0.109103,0.168897,0.014782,0.065268,0.151902,0.152901,0.811244,0.850845,0.017756,0.012928,0.039625,0.023625,0.0048,0.003191,0.00244,0.002491,0.093926,0.062054,0.127794,0.100364,0.024879,0.031837,0.164069,0.099541,0.774098,0.025966,0.033805,0.006772,0.005247,0.101893,0.153794,0.040049,0.160866
8,b1c8e2c38aae,0.814226,0.808457,0.031495,0.028715,0.039344,0.03655,0.005373,0.005968,0.004318,0.006717,0.087704,0.089031,0.122928,0.116238,0.043427,0.041493,0.102179,0.111033,0.895673,0.862112,0.016258,0.013851,0.021922,0.024972,0.002393,0.002578,0.003617,0.002737,0.049146,0.064035,0.06874,0.090329,0.020745,0.022696,0.065133,0.092648,0.83355,0.853825,0.011367,0.018076,0.033505,0.033769,0.004033,0.004099,0.003381,0.003578,0.091466,0.072409,0.116678,0.0921,0.039135,0.033486,0.113308,0.097089,0.888238,0.854217,0.023225,0.02864,0.012981,0.028091,0.001796,0.003547,0.000931,0.002894,0.050552,0.064271,0.064871,0.088896,0.011518,0.027894,0.082232,0.081406,0.894169,0.864968,0.021506,0.008802,0.019873,0.020317,0.001967,0.002609,0.001495,0.002165,0.048078,0.057141,0.061697,0.09522,0.0117,0.029653,0.073775,0.090328,0.856943,0.020194,0.027132,0.003436,0.003183,0.067383,0.09177,0.028175,0.090913
9,b44cbf5371f2,0.425665,0.206477,0.026294,0.014207,0.127133,0.105973,0.056159,0.07562,0.091581,0.117059,0.368754,0.535975,0.505356,0.659733,0.358421,0.360139,0.286002,0.457471,0.153214,0.477531,0.010531,0.045861,0.404913,0.149803,0.137501,0.062915,0.17861,0.105329,0.616056,0.351167,0.673931,0.419954,0.766665,0.327077,0.10402,0.254785,0.124982,0.405216,0.01108,0.024231,0.100855,0.101439,0.098937,0.05554,0.1078,0.052963,0.567078,0.336812,0.722658,0.475648,0.359328,0.229952,0.517815,0.391248,0.139698,0.2001,0.002953,0.010558,0.295042,0.083356,0.103824,0.065631,0.238467,0.090408,0.60128,0.510643,0.712724,0.650985,0.751912,0.311897,0.110441,0.494535,0.452986,0.294374,0.026777,0.011129,0.169402,0.076664,0.063861,0.045037,0.053266,0.04858,0.350878,0.389063,0.446622,0.569597,0.31261,0.248169,0.306454,0.434446,0.288024,0.018362,0.161458,0.076502,0.108406,0.46277,0.583721,0.402617,0.335722


In [19]:
cols_tmp = []
for fold in range(NUM_FOLD):
    cols_tmp.append("{}_pred_fold{}".format(col_targets[-1], fold+1))
    cols_tmp.append("{}_pred_fold{}_b2".format(col_targets[-1], fold+1))
df_pred_image[col_targets[-1]] = df_pred_image[cols_tmp].values.mean(axis=1)
df_pred_image.head()

Unnamed: 0,StudyInstanceUID,SOPInstanceUID,SeriesInstanceUID,pe_present_on_image_pred_fold1,pe_present_on_image_pred_fold1_b2,pe_present_on_image_pred_fold2,pe_present_on_image_pred_fold2_b2,pe_present_on_image_pred_fold3,pe_present_on_image_pred_fold3_b2,pe_present_on_image_pred_fold4,pe_present_on_image_pred_fold4_b2,pe_present_on_image_pred_fold5,pe_present_on_image_pred_fold5_b2,pe_present_on_image
0,1870d65d0f6a,9e6515ade70b,65a0a30bbf20,0.000452,0.000778,0.001244,0.000297,0.000789,0.000537,0.000876,0.000917,0.00027,0.000123,0.000628
1,1870d65d0f6a,8611c32ef6a9,65a0a30bbf20,0.000429,0.000838,0.001271,0.000274,0.000798,0.000505,0.000927,0.000866,0.000265,0.00011,0.000628
2,1870d65d0f6a,d01c2dacd815,65a0a30bbf20,0.0004,0.000912,0.001384,0.00024,0.000912,0.000406,0.000621,0.000849,0.00012,9.6e-05,0.000594
3,1870d65d0f6a,1ab908ca66ca,65a0a30bbf20,0.000377,0.000845,0.001325,0.000251,0.001059,0.000439,0.000655,0.000852,0.000125,8.2e-05,0.000601
4,1870d65d0f6a,51b7740ce07c,65a0a30bbf20,0.000566,0.001074,0.001373,0.000296,0.000933,0.000545,0.000829,0.001151,0.000123,0.000119,0.000701


In [20]:
# process conflict

def solve_conflict(df_pred_s, df_pred, TH_NEGATIVE=0.5, TH_INDETERMINATE = 0.5, verbose=True):
    index_indeterminate = df_pred_s['indeterminate']>TH_INDETERMINATE
    index_negative = (index_indeterminate==False) & (df_pred_s['negative_exam_for_pe']>TH_NEGATIVE)
    index_positive = (index_indeterminate==False) & (index_negative==False)


    index_negative_and_negative_lte_05 = index_negative & (df_pred_s['negative_exam_for_pe']<=0.5)
    df_pred_s['negative_exam_for_pe'][index_negative_and_negative_lte_05] = 0.5001

    index_indeterminate_and_indeterminate_lte_05 = index_indeterminate & (df_pred_s['indeterminate']<=0.5)
    df_pred_s['indeterminate'][index_indeterminate_and_indeterminate_lte_05] = 0.5001

    index_indeterminate_and_negative_gt_05 = index_indeterminate & (df_pred_s['negative_exam_for_pe']>0.5)
    df_pred_s['negative_exam_for_pe'][index_indeterminate_and_negative_gt_05] = 0.5

    index_negative_and_indeterminate_gt_05 = index_negative & (df_pred_s['indeterminate']>0.5)
    df_pred_s['indeterminate'][index_negative_and_indeterminate_gt_05] = 0.5

    
    index_positive_and_negative_gt_05 = index_positive & (df_pred_s['negative_exam_for_pe']>0.5)
    df_pred_s['negative_exam_for_pe'][index_positive_and_negative_gt_05] = 0.5
    
    index_positive_and_indeterminate_gt_05 = index_positive & (df_pred_s['indeterminate']>0.5)
    df_pred_s['indeterminate'][index_positive_and_indeterminate_gt_05] = 0.5
    
    ################################################
    index_negative_and_rv_lv_ratio_lt_1_gt_05 = (index_positive==False) & (df_pred_s['rv_lv_ratio_lt_1']>0.5)
    df_pred_s['rv_lv_ratio_lt_1'][index_negative_and_rv_lv_ratio_lt_1_gt_05] = 0.5

    index_negative_and_rv_lv_ratio_gte_1_gt_05 = (index_positive==False) & (df_pred_s['rv_lv_ratio_gte_1']>0.5)
    df_pred_s['rv_lv_ratio_gte_1'][index_negative_and_rv_lv_ratio_gte_1_gt_05] = 0.5

    index_negative_and_central_pe_gt_05 = (index_positive==False) & (df_pred_s['central_pe']>0.5)

    index_negative_and_rightsided_pe_gt_05 = (index_positive==False) & (df_pred_s['rightsided_pe']>0.5)
    df_pred_s['rightsided_pe'][index_negative_and_rightsided_pe_gt_05] = 0.5

    index_negative_and_leftsided_pe_gt_05 = (index_positive==False) & (df_pred_s['leftsided_pe']>0.5)
    df_pred_s['leftsided_pe'][index_negative_and_leftsided_pe_gt_05] = 0.5

    index_negative_and_chronic_pe_gt_05 = (index_positive==False) & (df_pred_s['chronic_pe']>0.5)
    df_pred_s['chronic_pe'][index_negative_and_chronic_pe_gt_05] = 0.5

    index_negative_and_acute_and_chronic_pe_gt_05 = (index_positive==False) & (df_pred_s['acute_and_chronic_pe']>0.5)
    df_pred_s['acute_and_chronic_pe'][index_negative_and_acute_and_chronic_pe_gt_05] = 0.5

    ################################################
    index_positive_and_rv_gte_lv = index_positive & (df_pred_s['rv_lv_ratio_lt_1']<=df_pred_s['rv_lv_ratio_gte_1'])
    index_positive_and_rv_lt_lv = index_positive & (df_pred_s['rv_lv_ratio_lt_1']>df_pred_s['rv_lv_ratio_gte_1'])

    index_positive_and_rv_gte_lv_and_rv_lv_ratio_gte_1_lte_05 =\
        (index_positive_and_rv_gte_lv) & (df_pred_s['rv_lv_ratio_gte_1']<=0.5)
    df_pred_s['rv_lv_ratio_gte_1'][index_positive_and_rv_gte_lv_and_rv_lv_ratio_gte_1_lte_05] = 0.5001

    index_positive_and_rv_gte_lv_and_rv_lv_ratio_lt_1_gt_05 =\
        (index_positive_and_rv_gte_lv) & (df_pred_s['rv_lv_ratio_lt_1']>0.5)
    df_pred_s['rv_lv_ratio_lt_1'][index_positive_and_rv_gte_lv_and_rv_lv_ratio_lt_1_gt_05] = 0.5

    index_positive_and_rv_lt_lv_and_rv_lv_ratio_lt_1_lte_05 =\
        (index_positive_and_rv_lt_lv) & (df_pred_s['rv_lv_ratio_lt_1']<=0.5)
    df_pred_s['rv_lv_ratio_lt_1'][index_positive_and_rv_lt_lv_and_rv_lv_ratio_lt_1_lte_05] = 0.5001

    index_positive_and_rv_lt_lv_and_rv_lv_ratio_gte_1_gt_05 =\
        (index_positive_and_rv_lt_lv) & (df_pred_s['rv_lv_ratio_gte_1']>0.5)
    df_pred_s['rv_lv_ratio_gte_1'][index_positive_and_rv_lt_lv_and_rv_lv_ratio_gte_1_gt_05] = 0.5

    index_positive_and_central_is_greatest = index_positive & (df_pred_s['central_pe']>=df_pred_s['rightsided_pe']) & (df_pred_s['central_pe']>=df_pred_s['leftsided_pe'])
    index_positive_and_right_is_greatest = index_positive & (index_positive_and_central_is_greatest==False) & (df_pred_s['rightsided_pe']>=df_pred_s['leftsided_pe'])
    index_positive_and_left_is_greatest = index_positive & (index_positive_and_central_is_greatest==False) & (index_positive_and_right_is_greatest==False) 


    index_positive_and_central_is_greatest_and_central_pe_lte_05 = (index_positive_and_central_is_greatest) & (df_pred_s['central_pe']<=0.5)
    df_pred_s['central_pe'][index_positive_and_central_is_greatest_and_central_pe_lte_05] = 0.5001

    index_positive_and_right_is_greatest_and_rightsided_pe_lte_05 = (index_positive_and_right_is_greatest) & (df_pred_s['rightsided_pe']<=0.5)
    df_pred_s['rightsided_pe'][index_positive_and_right_is_greatest_and_rightsided_pe_lte_05] = 0.5001

    index_positive_and_left_is_greatest_and_leftsided_pe_lte_05 = (index_positive_and_left_is_greatest) & (df_pred_s['leftsided_pe']<=0.5)
    df_pred_s['leftsided_pe'][index_positive_and_left_is_greatest_and_leftsided_pe_lte_05] = 0.5001

     # acute_and_chronic_pe and chronic_pe: only one of them can have p > 0.5; neither having p > 0.5 is allowed.
    index_double_positive = index_positive & (df_pred_s['chronic_pe']>0.5) & (df_pred_s['acute_and_chronic_pe']>0.5)

    index_double_positive_and_chronic_lte_acute_and_chronic = index_double_positive & (df_pred_s['chronic_pe']<=df_pred_s['acute_and_chronic_pe'])
    df_pred_s['chronic_pe'][index_double_positive_and_chronic_lte_acute_and_chronic] = 0.5

    index_double_positive_and_chronic_gt_acute_and_chronic = index_double_positive & (df_pred_s['chronic_pe']>df_pred_s['acute_and_chronic_pe'])
    df_pred_s['acute_and_chronic_pe'][index_double_positive_and_chronic_gt_acute_and_chronic] = 0.5

    ################################################
    df_pred_s['positive'] = 0
    df_pred_s['positive'][index_positive] = 1
    df_pred2 = pd.merge(df_pred, df_pred_s[[col_groupby, 'positive']], on=col_groupby, how='left')

    df_agg = df_pred.groupby(col_groupby)['pe_present_on_image'].agg('max').reset_index()
    df_agg.columns = [col_groupby, 'pe_present_on_image_pred_max']
    df_pred2 = pd.merge(df_pred2, df_agg, on=col_groupby, how='left')
    df_pred2['peak'] = df_pred2['pe_present_on_image']==df_pred2['pe_present_on_image_pred_max']
    # df_tmp = df_s_p[[col_groupby]]
    # df_tmp['positive'] = True

    index_positive_i = df_pred2['positive']==1

    index_negative_and_pe_present_on_image_gt_05_i = (index_positive_i==False) & (df_pred2['pe_present_on_image']>0.5)
    df_pred['pe_present_on_image'][index_negative_and_pe_present_on_image_gt_05_i] = 0.5

    index_positive_and_peak_and_pe_present_on_image_lte_05_i = index_positive_i & (df_pred2['peak']) & (df_pred2['pe_present_on_image']<=0.5)
    df_pred['pe_present_on_image'][index_positive_and_peak_and_pe_present_on_image_lte_05_i] = 0.5001
   
    if verbose:
        print("num study", len(df_pred_s))
        print("num image", len(df_pred))
        print("split to 3 classes")
        print(" num predicted_as_negative:", index_negative.sum())
        print(" num predicted_as_indeterminate:", index_indeterminate.sum())
        print(" num predicted_as_positive:", index_positive.sum())
        print("process 3 class conflict")
        print(" num predicted_as_negative and negative<=0.5:", index_negative_and_negative_lte_05.sum())
        print(" num predicted_as_indeterminate and indeterminate<=0.5:", index_indeterminate_and_indeterminate_lte_05.sum())

        print(" num predicted_as_indeterminate and negative_exam_for_pe>0.5:", index_indeterminate_and_negative_gt_05.sum())
        print(" num predicted_as_negative and indeterminate>0.5:", index_negative_and_indeterminate_gt_05.sum())
        print(" num predicted_as_positive and negative_exam_for_pe>0.5:", index_positive_and_negative_gt_05.sum())
        print(" num predicted_as_positive and indeterminate>0.5:", index_positive_and_indeterminate_gt_05.sum())
      
        print("process negative case")
        print(" num predicted_as_not_positive and rv_lv_ratio_lt_1>0.5:", index_negative_and_rv_lv_ratio_lt_1_gt_05.sum())
        print(" num predicted_as_not_positive and rv_lv_ratio_gte_1>0.5:", index_negative_and_rv_lv_ratio_gte_1_gt_05.sum())
        print(" num predicted_as_not_positive and central_pe>0.5:", index_negative_and_central_pe_gt_05.sum())
        print(" num predicted_as_not_positive and central_pe>0.5:", index_negative_and_rightsided_pe_gt_05.sum())
        print(" num predicted_as_not_positive and leftsided_pe>0.5:", index_negative_and_leftsided_pe_gt_05.sum())
        print(" num predicted_as_not_positive and chronic_pe>0.5:", index_negative_and_chronic_pe_gt_05.sum())
        print(" num predicted_as_not_positive and acute_and_chronic_pe>0.5:", index_negative_and_acute_and_chronic_pe_gt_05.sum())

        print("process positive case")
        print(" num predicted_as_positive and rv_lv_ratio_lt_1<=rv_lv_ratio_gte_1:", index_positive_and_rv_gte_lv.sum())
        print(" num predicted_as_positive and rv_lv_ratio_lt_1>rv_lv_ratio_gte_1:", index_positive_and_rv_lt_lv.sum())
        print(" num predicted_as_positive and (rv_lv_ratio_lt_1<=rv_lv_ratio_gte_1) and (rv_lv_ratio_gte_1<=0.5): ",
               index_positive_and_rv_gte_lv_and_rv_lv_ratio_gte_1_lte_05.sum())
        print(" num predicted_as_positive and (rv_lv_ratio_lt_1<=rv_lv_ratio_gte_1) and rv_lv_ratio_lt_1>0.5: ",
               index_positive_and_rv_gte_lv_and_rv_lv_ratio_lt_1_gt_05.sum())
        print(" num predicted_as_positive and (rv_lv_ratio_lt_1>rv_lv_ratio_gte_1) and rv_lv_ratio_lt_1<=0.5: ",
               index_positive_and_rv_lt_lv_and_rv_lv_ratio_lt_1_lte_05.sum())
        print(" num predicted_as_positive and (rv_lv_ratio_lt_1>rv_lv_ratio_gte_1) and rv_lv_ratio_gte_1>0.5: ",
               index_positive_and_rv_lt_lv_and_rv_lv_ratio_gte_1_gt_05.sum())
        print(" num predicted_as_positive and central is greatest:", index_positive_and_central_is_greatest.sum())
        print(" num predicted_as_positive and right is greatest:", index_positive_and_right_is_greatest.sum())
        print(" num predicted_as_positive and left is greatest:", index_positive_and_left_is_greatest.sum())
        print(" num predicted_as_positive and central is greatest and central_pe<=0.5:", index_positive_and_central_is_greatest_and_central_pe_lte_05.sum())
        print(" num predicted_as_positive and right is greatest and rightsided_pe<=0.5:", index_positive_and_right_is_greatest_and_rightsided_pe_lte_05.sum())
        print(" num predicted_as_positive and left is greatest and leftsided_pe<=0.5:", index_positive_and_left_is_greatest_and_leftsided_pe_lte_05.sum())
        print(" num both chronic_pe and acute_and_chronic_pe is positive:", index_double_positive.sum())
        print(" num both chronic_pe and acute_and_chronic_pe is positive and chronic<=acute_and_chronic:", index_double_positive_and_chronic_lte_acute_and_chronic.sum())
        print(" num both chronic_pe and acute_and_chronic_pe is positive and chronic>acute_and_chronic:", index_double_positive_and_chronic_gt_acute_and_chronic.sum())

        print("process image level")
        print(" num img of predicted_as_positive:", index_positive_i.sum())
        print(" num img of predicted_as_negative:", (index_positive_i==0).sum())
        print(" num img of peak:", df_pred2['peak'].sum())
        print(" num img of predicted_as_negative and pe_present_on_image>0.5:", index_negative_and_pe_present_on_image_gt_05_i.sum())
        print(" num img of predicted_as_positive and peak and pe_present_on_image<=0.5:", index_positive_and_peak_and_pe_present_on_image_lte_05_i.sum())

    return df_pred_s, df_pred

In [21]:
df_pred_study_const, df_pred_image_const = solve_conflict(df_pred_study, df_pred_image)
df_pred_study_const.head()

num study 15
num image 2000
split to 3 classes
 num predicted_as_negative: 13
 num predicted_as_indeterminate: 0
 num predicted_as_positive: 2
process 3 class conflict
 num predicted_as_negative and negative<=0.5: 0
 num predicted_as_indeterminate and indeterminate<=0.5: 0
 num predicted_as_indeterminate and negative_exam_for_pe>0.5: 0
 num predicted_as_negative and indeterminate>0.5: 0
 num predicted_as_positive and negative_exam_for_pe>0.5: 0
 num predicted_as_positive and indeterminate>0.5: 0
process negative case
 num predicted_as_not_positive and rv_lv_ratio_lt_1>0.5: 0
 num predicted_as_not_positive and rv_lv_ratio_gte_1>0.5: 0
 num predicted_as_not_positive and central_pe>0.5: 0
 num predicted_as_not_positive and central_pe>0.5: 0
 num predicted_as_not_positive and leftsided_pe>0.5: 0
 num predicted_as_not_positive and chronic_pe>0.5: 0
 num predicted_as_not_positive and acute_and_chronic_pe>0.5: 0
process positive case
 num predicted_as_positive and rv_lv_ratio_lt_1<=rv_lv_rati

Unnamed: 0,StudyInstanceUID,negative_exam_for_pe_pred_fold1,negative_exam_for_pe_pred_fold1_b2,indeterminate_pred_fold1,indeterminate_pred_fold1_b2,chronic_pe_pred_fold1,chronic_pe_pred_fold1_b2,acute_and_chronic_pe_pred_fold1,acute_and_chronic_pe_pred_fold1_b2,central_pe_pred_fold1,central_pe_pred_fold1_b2,leftsided_pe_pred_fold1,leftsided_pe_pred_fold1_b2,rightsided_pe_pred_fold1,rightsided_pe_pred_fold1_b2,rv_lv_ratio_gte_1_pred_fold1,rv_lv_ratio_gte_1_pred_fold1_b2,rv_lv_ratio_lt_1_pred_fold1,rv_lv_ratio_lt_1_pred_fold1_b2,negative_exam_for_pe_pred_fold2,negative_exam_for_pe_pred_fold2_b2,indeterminate_pred_fold2,indeterminate_pred_fold2_b2,chronic_pe_pred_fold2,chronic_pe_pred_fold2_b2,acute_and_chronic_pe_pred_fold2,acute_and_chronic_pe_pred_fold2_b2,central_pe_pred_fold2,central_pe_pred_fold2_b2,leftsided_pe_pred_fold2,leftsided_pe_pred_fold2_b2,rightsided_pe_pred_fold2,rightsided_pe_pred_fold2_b2,rv_lv_ratio_gte_1_pred_fold2,rv_lv_ratio_gte_1_pred_fold2_b2,rv_lv_ratio_lt_1_pred_fold2,rv_lv_ratio_lt_1_pred_fold2_b2,negative_exam_for_pe_pred_fold3,negative_exam_for_pe_pred_fold3_b2,indeterminate_pred_fold3,indeterminate_pred_fold3_b2,chronic_pe_pred_fold3,chronic_pe_pred_fold3_b2,acute_and_chronic_pe_pred_fold3,acute_and_chronic_pe_pred_fold3_b2,central_pe_pred_fold3,central_pe_pred_fold3_b2,leftsided_pe_pred_fold3,leftsided_pe_pred_fold3_b2,rightsided_pe_pred_fold3,...,rv_lv_ratio_gte_1_pred_fold3,rv_lv_ratio_gte_1_pred_fold3_b2,rv_lv_ratio_lt_1_pred_fold3,rv_lv_ratio_lt_1_pred_fold3_b2,negative_exam_for_pe_pred_fold4,negative_exam_for_pe_pred_fold4_b2,indeterminate_pred_fold4,indeterminate_pred_fold4_b2,chronic_pe_pred_fold4,chronic_pe_pred_fold4_b2,acute_and_chronic_pe_pred_fold4,acute_and_chronic_pe_pred_fold4_b2,central_pe_pred_fold4,central_pe_pred_fold4_b2,leftsided_pe_pred_fold4,leftsided_pe_pred_fold4_b2,rightsided_pe_pred_fold4,rightsided_pe_pred_fold4_b2,rv_lv_ratio_gte_1_pred_fold4,rv_lv_ratio_gte_1_pred_fold4_b2,rv_lv_ratio_lt_1_pred_fold4,rv_lv_ratio_lt_1_pred_fold4_b2,negative_exam_for_pe_pred_fold5,negative_exam_for_pe_pred_fold5_b2,indeterminate_pred_fold5,indeterminate_pred_fold5_b2,chronic_pe_pred_fold5,chronic_pe_pred_fold5_b2,acute_and_chronic_pe_pred_fold5,acute_and_chronic_pe_pred_fold5_b2,central_pe_pred_fold5,central_pe_pred_fold5_b2,leftsided_pe_pred_fold5,leftsided_pe_pred_fold5_b2,rightsided_pe_pred_fold5,rightsided_pe_pred_fold5_b2,rv_lv_ratio_gte_1_pred_fold5,rv_lv_ratio_gte_1_pred_fold5_b2,rv_lv_ratio_lt_1_pred_fold5,rv_lv_ratio_lt_1_pred_fold5_b2,negative_exam_for_pe,indeterminate,chronic_pe,acute_and_chronic_pe,central_pe,leftsided_pe,rightsided_pe,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,positive
0,1870d65d0f6a,0.75327,0.889616,0.004008,0.009249,0.015489,0.016519,0.001641,0.001372,0.000802,0.001216,0.074046,0.043957,0.163997,0.068754,0.020246,0.014432,0.171188,0.070179,0.835424,0.911305,0.01148,0.005037,0.029961,0.012258,0.003924,0.000683,0.002786,0.000548,0.078965,0.034444,0.116345,0.055383,0.01442,0.007226,0.148555,0.069008,0.6416,0.89108,0.014794,0.0082,0.046972,0.023106,0.010432,0.0017,0.00627,0.001274,0.162827,0.051067,0.264346,...,0.04865,0.0186,0.302667,0.083354,0.850735,0.866092,0.029522,0.016818,0.017485,0.020034,0.002637,0.002157,0.000979,0.001135,0.064449,0.052262,0.085316,0.08087,0.010344,0.017095,0.124067,0.090861,0.914994,0.935114,0.009068,0.002772,0.013517,0.007736,0.000899,0.000436,0.000514,0.000312,0.036011,0.021774,0.050403,0.039575,0.007108,0.008172,0.066029,0.042539,0.848923,0.011095,0.020308,0.002588,0.001583,0.06198,0.099455,0.016629,0.116845,0
1,26135e3b3b30,0.883958,0.816293,0.012922,0.02338,0.016293,0.031864,0.001145,0.004979,0.000706,0.00558,0.042905,0.083089,0.066929,0.113137,0.013245,0.0374,0.063833,0.107995,0.870548,0.843616,0.024912,0.013059,0.024172,0.020133,0.003291,0.002441,0.00541,0.002445,0.060684,0.066239,0.081886,0.094859,0.022872,0.018121,0.081215,0.112292,0.830242,0.844448,0.014248,0.014561,0.029914,0.029863,0.003733,0.003885,0.003704,0.003613,0.090116,0.075947,0.117775,...,0.030832,0.033334,0.125343,0.103269,0.805606,0.72096,0.026617,0.02952,0.034129,0.045472,0.008065,0.01131,0.003884,0.0118,0.110208,0.147072,0.145199,0.202647,0.035459,0.085201,0.163409,0.161757,0.866923,0.85077,0.017736,0.00972,0.027441,0.022138,0.003328,0.003232,0.002826,0.002753,0.066853,0.064825,0.089318,0.107422,0.022894,0.034479,0.095548,0.1014,0.833336,0.018668,0.028142,0.004541,0.004272,0.080794,0.111931,0.033384,0.111606,0
2,462e805da1f1,0.885173,0.896135,0.012009,0.01021,0.016044,0.014903,0.001068,0.001155,0.000615,0.00115,0.041772,0.039064,0.067485,0.058803,0.012735,0.012557,0.064976,0.059095,0.865334,0.903881,0.012454,0.007951,0.026148,0.013078,0.003022,0.0009,0.002743,0.000808,0.065107,0.037252,0.092205,0.056401,0.017353,0.008196,0.103151,0.069265,0.881886,0.915687,0.008919,0.006837,0.020019,0.017563,0.001509,0.001052,0.001327,0.000907,0.058495,0.038745,0.080085,...,0.015989,0.013925,0.091513,0.058726,0.879685,0.875895,0.022525,0.015287,0.016021,0.018088,0.002409,0.001767,0.001154,0.000998,0.058496,0.047497,0.075528,0.073573,0.014688,0.015696,0.093307,0.079244,0.862916,0.918545,0.016253,0.003406,0.027789,0.009976,0.002975,0.000733,0.001998,0.000547,0.067031,0.029487,0.090144,0.053441,0.019695,0.012014,0.104293,0.055961,0.888514,0.011585,0.017963,0.001659,0.001225,0.048295,0.069675,0.014285,0.077953,0
3,62dfc5f411e8,0.695241,0.764446,0.016314,0.047226,0.042402,0.044752,0.007993,0.009308,0.005441,0.011214,0.13149,0.113228,0.226202,0.142395,0.061608,0.046728,0.208471,0.15196,0.784831,0.834073,0.038575,0.030648,0.050195,0.023013,0.010834,0.003524,0.012199,0.003138,0.119455,0.066057,0.152074,0.087291,0.039851,0.016358,0.163113,0.114171,0.874001,0.837433,0.013241,0.032595,0.024313,0.030655,0.002043,0.004038,0.001811,0.002971,0.064687,0.069982,0.086096,...,0.01794,0.023972,0.098838,0.112027,0.748774,0.883094,0.042285,0.020208,0.048092,0.019432,0.012121,0.001933,0.004428,0.001294,0.135078,0.046456,0.175924,0.067838,0.035012,0.015868,0.223655,0.070005,0.816258,0.893763,0.043767,0.010815,0.037757,0.015971,0.00588,0.001658,0.004682,0.001432,0.094452,0.04153,0.115136,0.064157,0.02017,0.018565,0.149769,0.065934,0.813192,0.029567,0.033658,0.005933,0.004861,0.088242,0.120879,0.029607,0.135794,0
4,761f6f1a9f5b,0.816338,0.800406,0.024974,0.023166,0.034096,0.037904,0.004408,0.006517,0.003614,0.006901,0.083621,0.096281,0.119274,0.1293,0.039998,0.049159,0.097025,0.117185,0.826287,0.851656,0.01931,0.01962,0.039527,0.023144,0.007007,0.002884,0.009387,0.003441,0.097363,0.067228,0.12696,0.087792,0.051926,0.022844,0.107359,0.091996,0.750473,0.812182,0.018493,0.017,0.053824,0.043459,0.011097,0.007089,0.009438,0.006746,0.145084,0.100919,0.186156,...,0.072334,0.055517,0.173934,0.122893,0.824188,0.846047,0.035386,0.030219,0.025805,0.02955,0.005468,0.004129,0.003395,0.003446,0.088446,0.069633,0.112136,0.09613,0.026219,0.029576,0.126419,0.090067,0.879044,0.84985,0.024547,0.013834,0.022209,0.021939,0.002495,0.003455,0.002095,0.003222,0.056341,0.064395,0.072051,0.100095,0.013576,0.034021,0.086378,0.093684,0.825647,0.022655,0.033146,0.005455,0.005168,0.086931,0.116284,0.039517,0.110694,0


# make submission

In [22]:
df_sub_pred = copy.deepcopy(df_pred_image_const[[col_index, col_targets[-1]]])
df_sub_pred.columns = ['id', 'label']
for i, col in enumerate(col_targets[:-1]):
    df_tmp = df_pred_study_const[[col_groupby, col]]
    df_tmp.columns = ['id', 'label']
    df_tmp['id'] = df_tmp['id'] + '_{}'.format(col)
    df_sub_pred = pd.concat([df_sub_pred, df_tmp])
df_sub_pred = df_sub_pred.reset_index(drop=True)
print(df_sub_pred.shape)
df_sub_pred.head()

(2135, 2)


Unnamed: 0,id,label
0,9e6515ade70b,0.000628
1,8611c32ef6a9,0.000628
2,d01c2dacd815,0.000594
3,1ab908ca66ca,0.000601
4,51b7740ce07c,0.000701


In [23]:
df_sub = pd.read_csv(df_sub_path)
print(df_sub.shape)
df_sub.head()

(152703, 2)


Unnamed: 0,id,label
0,df06fad17bc3_negative_exam_for_pe,0.5
1,c8039e7f9e63_negative_exam_for_pe,0.5
2,761f6f1a9f5b_negative_exam_for_pe,0.5
3,c8db5b1f6b56_negative_exam_for_pe,0.5
4,462e805da1f1_negative_exam_for_pe,0.5


In [24]:
df_sub = pd.merge(df_sub[['id']], df_sub_pred, on='id', how='left')
# df_sub = df_sub.fillna(0.5)
print(df_sub.shape)
df_sub.head()

(152703, 2)


Unnamed: 0,id,label
0,df06fad17bc3_negative_exam_for_pe,0.629891
1,c8039e7f9e63_negative_exam_for_pe,0.795092
2,761f6f1a9f5b_negative_exam_for_pe,0.825647
3,c8db5b1f6b56_negative_exam_for_pe,0.765423
4,462e805da1f1_negative_exam_for_pe,0.888514


In [25]:
# fill na
mean_targets = [
    0.674681,
    0.021569,
    0.040115,
    0.019920,
    0.055090,
    0.212117,
    0.257590,
    0.129139,
    0.174612,
    0.289885,
]
df_sub_mean = copy.deepcopy(df_test_full[[col_index]])
df_sub_mean.columns = ['id']
df_sub_mean['label'] = mean_targets[-1]
for i, col in enumerate(col_targets[:-1]):
    df_tmp = df_test_full_exam[[col_groupby]]
    df_tmp.columns = ['id']
    df_tmp['label'] = mean_targets[i]
    df_tmp['id'] = df_tmp['id'] + '_{}'.format(col)
    df_sub_mean = pd.concat([df_sub_mean, df_tmp])
df_sub_mean = df_sub_mean.reset_index(drop=True)
print(df_sub_mean.shape)

(152703, 2)


In [26]:
df_sub['label'][pd.isna(df_sub['label'])] = pd.merge(df_sub[['id']], df_sub_mean, on='id', how='left')['label'][pd.isna(df_sub['label'])]
df_sub.head(20)

Unnamed: 0,id,label
0,df06fad17bc3_negative_exam_for_pe,0.629891
1,c8039e7f9e63_negative_exam_for_pe,0.795092
2,761f6f1a9f5b_negative_exam_for_pe,0.825647
3,c8db5b1f6b56_negative_exam_for_pe,0.765423
4,462e805da1f1_negative_exam_for_pe,0.888514
5,7f6fb39566ed_negative_exam_for_pe,0.019902
6,b44cbf5371f2_negative_exam_for_pe,0.288024
7,62dfc5f411e8_negative_exam_for_pe,0.813192
8,1870d65d0f6a_negative_exam_for_pe,0.848923
9,26135e3b3b30_negative_exam_for_pe,0.833336


In [27]:
def check_consistency2(df_exam, df_image, test):
    
    '''
    Checks label consistency and returns the errors
    
    Args:
    sub   = submission dataframe (pandas)
    test  = test.csv dataframe (pandas)
    '''

    
    # MERGER
    df = df_exam.merge(df_image, how = 'left', on = 'StudyInstanceUID')
    ids    = ['StudyInstanceUID', 'SeriesInstanceUID', 'SOPInstanceUID']
    labels = [c for c in df.columns if c not in ids]
    df = df[ids + labels]
    
    # SPLIT NEGATIVE AND POSITIVE EXAMS
    df['positive_images_in_exam'] = df['StudyInstanceUID'].map(df.groupby(['StudyInstanceUID']).pe_present_on_image.max())
    df_pos = df.loc[df.positive_images_in_exam >  0.5]
    df_neg = df.loc[df.positive_images_in_exam <= 0.5]
    
    # CHECKING CONSISTENCY OF POSITIVE EXAM LABELS
    rule1a = df_pos.loc[((df_pos.rv_lv_ratio_lt_1  >  0.5)  & 
                         (df_pos.rv_lv_ratio_gte_1 >  0.5)) | 
                        ((df_pos.rv_lv_ratio_lt_1  <= 0.5)  & 
                         (df_pos.rv_lv_ratio_gte_1 <= 0.5))].reset_index(drop = True)
    rule1a['broken_rule'] = '1a'
    rule1b = df_pos.loc[(df_pos.central_pe    <= 0.5) & 
                        (df_pos.rightsided_pe <= 0.5) & 
                        (df_pos.leftsided_pe  <= 0.5)].reset_index(drop = True)
    rule1b['broken_rule'] = '1b'
    rule1c = df_pos.loc[(df_pos.acute_and_chronic_pe > 0.5) & 
                        (df_pos.chronic_pe           > 0.5)].reset_index(drop = True)
    rule1c['broken_rule'] = '1c'
    rule1d = df_pos.loc[(df_pos.indeterminate        > 0.5) | 
                        (df_pos.negative_exam_for_pe > 0.5)].reset_index(drop = True)
    rule1d['broken_rule'] = '1d'

    # CHECKING CONSISTENCY OF NEGATIVE EXAM LABELS
    rule2a = df_neg.loc[((df_neg.indeterminate        >  0.5)  & 
                         (df_neg.negative_exam_for_pe >  0.5)) | 
                        ((df_neg.indeterminate        <= 0.5)  & 
                         (df_neg.negative_exam_for_pe <= 0.5))].reset_index(drop = True)
    rule2a['broken_rule'] = '2a'
    rule2b = df_neg.loc[(df_neg.rv_lv_ratio_lt_1     > 0.5) | 
                        (df_neg.rv_lv_ratio_gte_1    > 0.5) |
                        (df_neg.central_pe           > 0.5) | 
                        (df_neg.rightsided_pe        > 0.5) | 
                        (df_neg.leftsided_pe         > 0.5) |
                        (df_neg.acute_and_chronic_pe > 0.5) | 
                        (df_neg.chronic_pe           > 0.5)].reset_index(drop = True)
    rule2b['broken_rule'] = '2b'
    
    # MERGING INCONSISTENT PREDICTIONS
    errors = pd.concat([rule1a, rule1b, rule1c, rule1d, rule2a, rule2b], axis = 0)
    
    # OUTPUT
    print('Found', len(errors), 'inconsistent predictions')
    return errors

In [28]:
error = check_consistency2(df_pred_study_const, df_pred_image_const, df_test)

Found 0 inconsistent predictions


In [29]:
if len(error)==0:
    df_sub.to_csv('../output/submission.csv', index=None)
else:
    print("error!")

In [30]:
df_sub.head()

Unnamed: 0,id,label
0,df06fad17bc3_negative_exam_for_pe,0.629891
1,c8039e7f9e63_negative_exam_for_pe,0.795092
2,761f6f1a9f5b_negative_exam_for_pe,0.825647
3,c8db5b1f6b56_negative_exam_for_pe,0.765423
4,462e805da1f1_negative_exam_for_pe,0.888514
