In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, glob, pickle, time, gc, copy, sys
import warnings
import cv2
import multiprocessing
from concurrent.futures import ProcessPoolExecutor
import pydicom

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)
sys.path.append('../src')
from utils import ri, pickle_load, pickle_save

# Config

In [2]:
# config
DEBUG = False # set False to do all preprocess
input_dir = "/mnt/disks/data5/rsna2020"
df_train_path = "{}/train.csv".format(input_dir)
train_dir_path = "{}/train".format(input_dir)
# input_dir = "../input/orig"
preprocess_dir = "/mnt/disks/data6/rsna2020/preprocessed"
# preprocess_dir = "../input/preprocess"
train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
os.makedirs(preprocess_dir, exist_ok=True)
os.makedirs(train_npy_dir_path, exist_ok=True)

In [3]:
col_index = 'SOPInstanceUID'
col_groupby = 'StudyInstanceUID'
col_targets = [
    'negative_exam_for_pe',
    'indeterminate',
    'chronic_pe',
    'acute_and_chronic_pe',
    'central_pe',
    'leftsided_pe',
    'rightsided_pe',
    'rv_lv_ratio_gte_1',
    'rv_lv_ratio_lt_1',
    'pe_present_on_image',
]
NUM_CLASS = len(col_targets)
print('NUM_CLASS: {}'.format(NUM_CLASS))

NUM_CLASS: 10


# Data loading

In [4]:
DATA_DIR = "/mnt/disks/data4/rsna2020"
INPUT_DIR = DATA_DIR + "/orig"
PREPROCESSED_DIR = DATA_DIR + "/preprocessed"
df_train = pickle_load("{}/df_train2.pkl".format(PREPROCESSED_DIR))
print(df_train.shape)
df_train.head()

(1790594, 21)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0123_c0f3cb036d06.jpg,123.0,124
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0114_f57ffd3883b6.jpg,114.0,124
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0024_41220fda34a3.jpg,24.0,124
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0023_13b685b4b14f.jpg,23.0,124
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0022_be0b7524ffb4.jpg,22.0,124


In [5]:
# make exam-level train data
df_train_exam = ri(df_train[df_train[col_groupby].duplicated()==False])
print(df_train_exam.shape)
df_train_exam.head()

(7279, 21)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0123_c0f3cb036d06.jpg,123.0,124
1,013358b540bb,2805267980e7,c4cce735427f,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0144_c4cce735427f.jpg,144.0,145
2,0cee26703028,bac7becd2970,648654d7154c,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0143_648654d7154c.jpg,143.0,144
3,c28f3d01b14f,7d17c72fd0ce,a3cba934ec3c,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0104_a3cba934ec3c.jpg,104.0,105
4,c8fbf1e08ac5,275497911f02,0b9418e3c60a,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0080_0b9418e3c60a.jpg,80.0,81


In [6]:
df_train['num_series'] = df_train['num_images']
df_train['dicom_path'] = df_train['path']
# get npy path
df_train['npy_path'] = (
      train_npy_dir_path + '/'
    + df_train['SOPInstanceUID'] + ".npy"
)
print(df_train['npy_path'][0])
df_train.head()

/mnt/disks/data6/rsna2020/preprocessed/train_law_npy/c0f3cb036d06.npy


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images,num_series,dicom_path,npy_path
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0123_c0f3cb036d06.jpg,123.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0114_f57ffd3883b6.jpg,114.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0024_41220fda34a3.jpg,24.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0023_13b685b4b14f.jpg,23.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0022_be0b7524ffb4.jpg,22.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...


In [7]:
# get exam index
df_train_exam['exam_index'] = np.arange(len(df_train_exam))
df_train = pd.merge(df_train, df_train_exam[[col_groupby, 'exam_index']], on=col_groupby, how='left')
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images,num_series,dicom_path,npy_path,exam_index
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0123_c0f3cb036d06.jpg,123.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0114_f57ffd3883b6.jpg,114.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0024_41220fda34a3.jpg,24.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0023_13b685b4b14f.jpg,23.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0022_be0b7524ffb4.jpg,22.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0


In [8]:
# sort data by series index
df_train = pd.merge(df_train_exam[[col_groupby]], df_train.sort_values('series_index'), how='left')
print(df_train.shape)
df_train.head()

(1790594, 25)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images,num_series,dicom_path,npy_path,exam_index
0,6897fa9de148,2bfbb7fd2e8b,baedb900c69c,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0000_baedb900c69c.jpg,0.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
1,6897fa9de148,2bfbb7fd2e8b,52b6b0b793bb,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0001_52b6b0b793bb.jpg,1.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
2,6897fa9de148,2bfbb7fd2e8b,1997c99c9d59,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0002_1997c99c9d59.jpg,2.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
3,6897fa9de148,2bfbb7fd2e8b,c6f29ac6659b,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0003_c6f29ac6659b.jpg,3.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
4,6897fa9de148,2bfbb7fd2e8b,487d9ab5531f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0004_487d9ab5531f.jpg,4.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0


In [20]:
df_tmp = df_train.groupby(col_groupby)['pe_present_on_image'].agg('sum').reset_index()
df_tmp.columns = [col_groupby, 'm_i']
df_train = pd.merge(df_train, df_tmp[[col_groupby, 'm_i']], on=col_groupby, how='left')
df_train['q_i'] = df_train['m_i'] / df_train['num_images'] 
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,path,name,series_index,num_images,num_series,dicom_path,npy_path,exam_index,m_i,q_i
0,6897fa9de148,2bfbb7fd2e8b,baedb900c69c,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0000_baedb900c69c.jpg,0.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0,42.0,0.33871
1,6897fa9de148,2bfbb7fd2e8b,52b6b0b793bb,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0001_52b6b0b793bb.jpg,1.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0,42.0,0.33871
2,6897fa9de148,2bfbb7fd2e8b,1997c99c9d59,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0002_1997c99c9d59.jpg,2.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0,42.0,0.33871
3,6897fa9de148,2bfbb7fd2e8b,c6f29ac6659b,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0003_c6f29ac6659b.jpg,3.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0,42.0,0.33871
4,6897fa9de148,2bfbb7fd2e8b,487d9ab5531f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,0004_487d9ab5531f.jpg,4.0,124,124,/mnt/disks/data4/rsna2020/preprocessed/train-j...,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0,42.0,0.33871


In [23]:
df_tmp = pd.read_csv("/mnt/disks/data4/rsna2020/preprocessed/df_train_meta.csv")
df_train = pd.merge(df_train, df_tmp[[col_groupby, 'RescaleIntercept', 'RescaleSlope', 'PatientPosition']], on=col_groupby, how='left')
df_tmp.head()

Unnamed: 0,SpecificCharacterSet,SOPClassUID,SOPInstanceUID,Modality,SliceThickness,KVP,GantryDetectorTilt,TableHeight,RotationDirection,XRayTubeCurrent,Exposure,ConvolutionKernel,PatientPosition,StudyInstanceUID,SeriesInstanceUID,SeriesNumber,InstanceNumber,FrameOfReferenceUID,SamplesPerPixel,PhotometricInterpretation,Rows,Columns,BitsAllocated,BitsStored,HighBit,PixelRepresentation,RescaleIntercept,RescaleSlope,FileMetaInformationGroupLength,FileMetaInformationVersion,MediaStorageSOPClassUID,MediaStorageSOPInstanceUID,TransferSyntaxUID,ImplementationClassUID,ImplementationVersionName,ImageType_0,ImageType_1,ImageType_2,ImageType_3,ImageType_4,ImageType_5,ImageType_6,ImagePositionPatient_0,ImagePositionPatient_1,ImagePositionPatient_2,ImageOrientationPatient_0,ImageOrientationPatient_1,ImageOrientationPatient_2,ImageOrientationPatient_3,ImageOrientationPatient_4,ImageOrientationPatient_5,PixelSpacing_0,PixelSpacing_1,WindowCenter_0,WindowCenter_1,WindowWidth_0,WindowWidth_1,target,num_images
0,ISO_IR 100,80e30f500244,c0f3cb036d06,CT,2.0,120.0,0.0,139.0,CW,842,108,B30f,HFS,6897fa9de148,2bfbb7fd2e8b,7,1,5d7c26defb9b,1,MONOCHROME2,512,512,16,12,11,0,-1024.0,1.0,172,b'\x00\x01',1.2.840.10008.5.1.4.1.1.2,1.2.826.0.1.3680043.10.474.405991.10049,1.2.840.10008.1.2.1,1.2.40.0.13.1.1.1,dcm4che-1.4.38,ORIGINAL,PRIMARY,AXIAL,CT_SOM5 SPI DUAL,STD,SNRG,DET_AB,-124.738281,-255.738281,-234.5,1.0,0.0,0.0,0.0,1.0,0.0,0.523438,0.523438,40.0,-600.0,400.0,1200.0,124,124
1,ISO_IR 100,80e30f500244,c4cce735427f,CT,2.0,100.0,0.0,190.5,CW,335,95,B30f,HFS,013358b540bb,2805267980e7,6,1,3fbfd9d32342,1,MONOCHROME2,512,512,16,12,11,0,-1024.0,1.0,172,b'\x00\x01',1.2.840.10008.5.1.4.1.1.2,1.2.826.0.1.3680043.10.474.405991.10049,1.2.840.10008.1.2.1,1.2.40.0.13.1.1.1,dcm4che-1.4.38,ORIGINAL,PRIMARY,AXIAL,CT_SOM5 SPI,,,,-144.209961,-316.709961,63.5,1.0,0.0,0.0,0.0,1.0,0.0,0.580078,0.580078,50.0,-600.0,350.0,1200.0,145,145
2,ISO_IR 100,80e30f500244,648654d7154c,CT,2.0,100.0,0.0,175.5,CW,638,280,B30f,HFS,0cee26703028,bac7becd2970,17,1,8b2a03f63f07,1,MONOCHROME2,512,512,16,12,11,0,-1024.0,1.0,172,b'\x00\x01',1.2.840.10008.5.1.4.1.1.2,1.2.826.0.1.3680043.10.474.405991.10049,1.2.840.10008.1.2.1,1.2.40.0.13.1.1.1,dcm4che-1.4.38,ORIGINAL,PRIMARY,AXIAL,CT_SOM5 SPI,,,,-190.192383,-322.692383,-868.8,1.0,0.0,0.0,0.0,1.0,0.0,0.615234,0.615234,40.0,-600.0,400.0,1200.0,144,144
3,ISO_IR 100,80e30f500244,a3cba934ec3c,CT,3.0,120.0,0.0,151.5,CW,1600,160,B30f,HFS,c28f3d01b14f,7d17c72fd0ce,5,1,cd186c7ba22e,1,MONOCHROME2,512,512,16,12,11,0,-1024.0,1.0,172,b'\x00\x01',1.2.840.10008.5.1.4.1.1.2,1.2.826.0.1.3680043.10.474.405991.10049,1.2.840.10008.1.2.1,1.2.40.0.13.1.1.1,dcm4che-1.4.38,ORIGINAL,PRIMARY,AXIAL,CT_SOM5 SPI DUAL,STD,SNRG,DET_AB,-165.675781,-317.175781,-350.0,1.0,0.0,0.0,0.0,1.0,0.0,0.648438,0.648438,40.0,-600.0,400.0,1200.0,105,105
4,ISO_IR 100,80e30f500244,0b9418e3c60a,CT,3.0,140.0,0.0,145.5,CW,1402,160,B30f,HFS,c8fbf1e08ac5,275497911f02,8,1,df0f5bf5896b,1,MONOCHROME2,512,512,16,12,11,0,-1024.0,1.0,172,b'\x00\x01',1.2.840.10008.5.1.4.1.1.2,1.2.826.0.1.3680043.10.474.405991.10049,1.2.840.10008.1.2.1,1.2.40.0.13.1.1.1,dcm4che-1.4.38,ORIGINAL,PRIMARY,AXIAL,CT_SOM5 SPI DUAL,STD,SNRG,DET_AB,-165.675781,-311.175781,-384.7,1.0,0.0,0.0,0.0,1.0,0.0,0.648438,0.648438,40.0,-600.0,400.0,1200.0,81,81


In [25]:
df_train = df_train[[
   
'StudyInstanceUID',
'SeriesInstanceUID',
'SOPInstanceUID',
'pe_present_on_image',
'negative_exam_for_pe',
'qa_motion',
'qa_contrast',
'flow_artifact',
'rv_lv_ratio_gte_1',
'rv_lv_ratio_lt_1',
'leftsided_pe',
'chronic_pe',
'true_filling_defect_not_pe',
'rightsided_pe',
'acute_and_chronic_pe',
'central_pe',
'indeterminate',
'dicom_path',
# 'z_pos',
'RescaleSlope',
'RescaleIntercept',
'PatientPosition',
'series_index',
'num_series',
'm_i',
'q_i',
'npy_path',
'exam_index', 
]]

In [27]:
# save
df_train.to_csv("{}/df_train.csv".format(preprocess_dir), index=None)
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,RescaleSlope,RescaleIntercept,PatientPosition,series_index,num_series,m_i,q_i,npy_path,exam_index
0,6897fa9de148,2bfbb7fd2e8b,baedb900c69c,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,1.0,-1024.0,HFS,0.0,124,42.0,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
1,6897fa9de148,2bfbb7fd2e8b,52b6b0b793bb,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,1.0,-1024.0,HFS,1.0,124,42.0,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
2,6897fa9de148,2bfbb7fd2e8b,1997c99c9d59,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,1.0,-1024.0,HFS,2.0,124,42.0,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
3,6897fa9de148,2bfbb7fd2e8b,c6f29ac6659b,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,1.0,-1024.0,HFS,3.0,124,42.0,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
4,6897fa9de148,2bfbb7fd2e8b,487d9ab5531f,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,/mnt/disks/data4/rsna2020/preprocessed/train-j...,1.0,-1024.0,HFS,4.0,124,42.0,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0


In [19]:
df_train = df_train.iloc[:, :-4]

In [None]:
# postprocess config
# local
# config
# input_dir = "../input/orig"
# preprocess_dir = "../input/preprocess"
input_dir = "/mnt/disks/data5/rsna2020"
preprocess_dir = "/mnt/disks/data6/rsna2020/preprocessed"
df_train_path = "{}/df_train2.csv".format(preprocess_dir)
train_dir_path = "{}/train".format(input_dir)
train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
load_dir_b0 = "/mnt/disks/data4/rsna2020/output/201025_3_2ndNNs_features_pool_flip"
pred_name_b0 = "cnn_preds"
load_dir_b2 = "/mnt/disks/data4/rsna2020/output/201026_3_2ndNNs_features_pool_flip_b0b2"
pred_name_b2 = "cnn_preds"
NUM_FOLD = 5



# github
input_dir = "../input/orig"
preprocess_dir = "../input/preprocess"
df_train_path = "{}/df_train.csv".format(preprocess_dir)
load_dir_b0 = "../output"
pred_name_b0 = "cnn_preds_b0"
load_dir_b2 = "../output"
pred_name_b2 = "cnn_preds_b2"
NUM_FOLD = 5

In [None]:
# preprocess config
# config
DEBUG = True # set False to do all preprocess
input_dir = "../input/orig"
preprocess_dir = "../input/preprocess"
# input_dir = "/mnt/disks/data5/rsna2020"
# preprocess_dir = "/mnt/disks/data6/rsna2020/preprocessed"
df_train_path = "{}/train.csv".format(input_dir)
train_dir_path = "{}/train".format(input_dir)
train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
os.makedirs(preprocess_dir, exist_ok=True)
os.makedirs(train_npy_dir_path, exist_ok=True)

In [None]:
# train_stage1 config
# config
DEBUG = True # set False to do all process
preprocess_dir = "/mnt/disks/data6/rsna2020/preprocessed"
output_dir = "/mnt/disks/data6/rsna2020/output"
# preprocess_dir = "../input/preprocess"
# output_path = "../output"
df_train_path = "{}/df_train.csv".format(preprocess_dir)
# train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
train_npy_dir_path = "/mnt/disks/data4/rsna2020/preprocessed/train_law_npy"
os.makedirs(output_dir, exist_ok=True)

In [None]:
# train_stage2　config
DEBUG = True # set False to do all process
preprocess_dir = "/mnt/disks/data6/rsna2020/preprocessed"
output_dir = "/mnt/disks/data6/rsna2020/output"
# output_dir = "../output"
# preprocess_dir = "../input/preprocess"
# output_path = "../output"
df_train_path = "{}/df_train.csv".format(preprocess_dir)
# train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
train_npy_dir_path = "/mnt/disks/data4/rsna2020/preprocessed/train_law_npy"
feature_dir = "/mnt/disks/data4/rsna2020/output/201022_6_CNN_b0_1loss_512_fp16_bs80_lr1e3"
os.makedirs(output_dir, exist_ok=True)