In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, glob, pickle, time, gc, copy, sys
import warnings
import cv2
import multiprocessing
from concurrent.futures import ProcessPoolExecutor
import pydicom

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)
sys.path.append('../src')
from utils import ri, pickle_load, pickle_save

# Config

In [2]:
# config
DEBUG = False # set False to do all preprocess
input_dir = "../input/orig"
preprocess_dir = "../input/preprocess"
df_train_path = "{}/train.csv".format(input_dir)
train_dir_path = "{}/train".format(input_dir)
train_npy_dir_path = "{}/train_law_npy".format(preprocess_dir)
os.makedirs(preprocess_dir, exist_ok=True)
os.makedirs(train_npy_dir_path, exist_ok=True)

In [3]:
col_index = 'SOPInstanceUID'
col_groupby = 'StudyInstanceUID'
col_targets = [
    'negative_exam_for_pe',
    'indeterminate',
    'chronic_pe',
    'acute_and_chronic_pe',
    'central_pe',
    'leftsided_pe',
    'rightsided_pe',
    'rv_lv_ratio_gte_1',
    'rv_lv_ratio_lt_1',
    'pe_present_on_image',
]
NUM_CLASS = len(col_targets)
print('NUM_CLASS: {}'.format(NUM_CLASS))

NUM_CLASS: 10


# Data loading

In [4]:
# load train data
df_train = pd.read_csv(df_train_path)
if DEBUG:
    df_train = ri(df_train.iloc[:1000])
print(df_train.shape)
df_train.head()

(1000, 17)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0,0,0,0,0,0,1,1,0,0,1,0,0,0
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0,0,0,0,0,0,1,1,0,0,1,0,0,0
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0,0,0,0,0,0,1,1,0,0,1,0,0,0
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0,0,0,0,0,0,1,1,0,0,1,0,0,0
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0,0,0,0,0,0,1,1,0,0,1,0,0,0


# Dicom Data Extraction

In [5]:
# make exam-level train data
df_train_exam = ri(df_train[df_train[col_groupby].duplicated()==False])
print(df_train_exam.shape)
df_train_exam.head()

(9, 17)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0,0,0,0,0,0,1,1,0,0,1,0,0,0
1,013358b540bb,2805267980e7,c4cce735427f,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,0cee26703028,bac7becd2970,648654d7154c,0,0,0,0,0,0,1,0,1,0,1,0,0,0
3,c28f3d01b14f,7d17c72fd0ce,a3cba934ec3c,0,0,0,0,0,0,1,1,0,0,0,0,0,0
4,c8fbf1e08ac5,275497911f02,0b9418e3c60a,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
# add dicom path column
df_train['dicom_path'] = train_dir_path + "/" + \
                         df_train[col_groupby].values + "/" + \
                         df_train['SeriesInstanceUID'].values + "/" + \
                         df_train[col_index].values + ".dcm"
print(df_train['dicom_path'][0])
df_train.head()

/mnt/disks/data5/rsna2020/train/6897fa9de148/2bfbb7fd2e8b/c0f3cb036d06.dcm


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...


In [7]:
# load dicom and save meta data and images
starttime = time.time()

def task(i):
    if (i+1)%100000==0:
        print("{}/{} {:.1f}".format(i+1, len(df_train), time.time()-starttime))
        
    # load dicom
    path = df_train['dicom_path'][i]
    tmp_dcm = pydicom.dcmread(path)
    
    # extract meta data
    image_id = df_train[col_index][i]
    z_pos = tmp_dcm.ImagePositionPatient[-1]
    RescaleSlope = tmp_dcm['RescaleSlope'].value
    RescaleIntercept = tmp_dcm['RescaleIntercept'].value
    PatientPosition = tmp_dcm['PatientPosition'].value
    
    # save npy
    new_path = "{}/{}.npy".format(train_npy_dir_path, image_id)
    tmp_npy = np.asarray(tmp_dcm.pixel_array)
    np.save(new_path, tmp_npy)
    
    return [image_id, z_pos, RescaleSlope, RescaleIntercept, PatientPosition]

executor = ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
# executor = ProcessPoolExecutor(max_workers=2)
futures = [executor.submit(task, i) for i in range(len(df_train.iloc[:]))]

In [8]:
z_pos= []
RescaleSlope = []
RescaleIntercept = []
PatientPosition = []
for i in range(len(futures)):
    z_pos.append(futures[i].result()[0])
    RescaleSlope.append(futures[i].result()[1])
    RescaleIntercept.append(futures[i].result()[2])
    PatientPosition.append(futures[i].result()[3])
df_meta = df_train[[col_index]]
df_meta['z_pos'] = z_pos
df_meta['RescaleSlope'] = RescaleSlope
df_meta['RescaleIntercept'] = RescaleIntercept
df_meta['PatientPosition'] = PatientPosition
df_meta.head()

Unnamed: 0,SOPInstanceUID,z_pos,RescaleSlope,RescaleIntercept,PatientPosition
0,c0f3cb036d06,c0f3cb036d06,-234.5,1.0,-1024.0
1,f57ffd3883b6,f57ffd3883b6,-252.5,1.0,-1024.0
2,41220fda34a3,41220fda34a3,-432.5,1.0,-1024.0
3,13b685b4b14f,13b685b4b14f,-434.5,1.0,-1024.0
4,be0b7524ffb4,be0b7524ffb4,-436.5,1.0,-1024.0


# Preprocessing

In [9]:
df_train = pd.merge(df_train, df_meta, on=col_index, how='left')
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,c0f3cb036d06,-234.5,1.0,-1024.0
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,f57ffd3883b6,-252.5,1.0,-1024.0
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,41220fda34a3,-432.5,1.0,-1024.0
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,13b685b4b14f,-434.5,1.0,-1024.0
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,be0b7524ffb4,-436.5,1.0,-1024.0


In [10]:
# get series index of each exam
df_tmps = []
for i in range(len(df_train_exam)):
    exam = df_train_exam[col_groupby][i]
    df_tmp = ri(df_train[df_train[col_groupby]==exam])
    df_tmp = ri(df_tmp.sort_values('z_pos'))
    df_tmp['series_index'] = np.arange(len(df_tmp))
    df_tmps.append(df_tmp)
df_tmps = pd.concat(df_tmps)
df_train = pd.merge(df_train, df_tmps[[col_index, 'series_index']], on=col_index, how='left')
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index
0,6897fa9de148,2bfbb7fd2e8b,c0f3cb036d06,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,c0f3cb036d06,-234.5,1.0,-1024.0,86
1,6897fa9de148,2bfbb7fd2e8b,f57ffd3883b6,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,f57ffd3883b6,-252.5,1.0,-1024.0,119
2,6897fa9de148,2bfbb7fd2e8b,41220fda34a3,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,41220fda34a3,-432.5,1.0,-1024.0,27
3,6897fa9de148,2bfbb7fd2e8b,13b685b4b14f,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,13b685b4b14f,-434.5,1.0,-1024.0,6
4,6897fa9de148,2bfbb7fd2e8b,be0b7524ffb4,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,be0b7524ffb4,-436.5,1.0,-1024.0,84


In [11]:
# sort data by series index
df_train = pd.merge(df_train_exam[[col_groupby]], df_train.sort_values('series_index'), how='left')
print(df_train.shape)
df_train.head()

(1000, 23)


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index
0,6897fa9de148,2bfbb7fd2e8b,031618cba689,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,031618cba689,-402.5,1.0,-1024.0,0
1,6897fa9de148,2bfbb7fd2e8b,04cb74d5008c,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,04cb74d5008c,-274.5,1.0,-1024.0,1
2,6897fa9de148,2bfbb7fd2e8b,09886998dc28,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09886998dc28,-444.5,1.0,-1024.0,2
3,6897fa9de148,2bfbb7fd2e8b,09bfdbc349e5,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09bfdbc349e5,-308.5,1.0,-1024.0,3
4,6897fa9de148,2bfbb7fd2e8b,0de5072ee990,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,0de5072ee990,-414.5,1.0,-1024.0,4


In [12]:
# calc the number of series
df_tmp = df_train.groupby(col_groupby)[col_index].agg(len).reset_index()
df_tmp.columns = [col_groupby, 'num_series']
df_train = pd.merge(df_train, df_tmp, on=col_groupby)
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index,num_series
0,6897fa9de148,2bfbb7fd2e8b,031618cba689,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,031618cba689,-402.5,1.0,-1024.0,0,124
1,6897fa9de148,2bfbb7fd2e8b,04cb74d5008c,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,04cb74d5008c,-274.5,1.0,-1024.0,1,124
2,6897fa9de148,2bfbb7fd2e8b,09886998dc28,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09886998dc28,-444.5,1.0,-1024.0,2,124
3,6897fa9de148,2bfbb7fd2e8b,09bfdbc349e5,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09bfdbc349e5,-308.5,1.0,-1024.0,3,124
4,6897fa9de148,2bfbb7fd2e8b,0de5072ee990,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,0de5072ee990,-414.5,1.0,-1024.0,4,124


In [13]:
# calc q_i
df_tmp = df_train.groupby(col_groupby)['pe_present_on_image'].agg('sum').reset_index()
df_tmp.columns = [col_groupby, 'm_i']
df_train = pd.merge(df_train, df_tmp, on=col_groupby, how='left')
df_train['q_i'] = df_train['m_i'] / df_train['num_series'] 
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index,num_series,m_i,q_i
0,6897fa9de148,2bfbb7fd2e8b,031618cba689,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,031618cba689,-402.5,1.0,-1024.0,0,124,42,0.33871
1,6897fa9de148,2bfbb7fd2e8b,04cb74d5008c,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,04cb74d5008c,-274.5,1.0,-1024.0,1,124,42,0.33871
2,6897fa9de148,2bfbb7fd2e8b,09886998dc28,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09886998dc28,-444.5,1.0,-1024.0,2,124,42,0.33871
3,6897fa9de148,2bfbb7fd2e8b,09bfdbc349e5,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09bfdbc349e5,-308.5,1.0,-1024.0,3,124,42,0.33871
4,6897fa9de148,2bfbb7fd2e8b,0de5072ee990,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,0de5072ee990,-414.5,1.0,-1024.0,4,124,42,0.33871


In [14]:
# get npy path
df_train['npy_path'] = (
      train_npy_dir_path + '/'
    + df_train['SOPInstanceUID'] + ".npy"
)
print(df_train['npy_path'][0])
df_train.head()

/mnt/disks/data6/rsna2020/preprocessed/train_law_npy/031618cba689.npy


Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index,num_series,m_i,q_i,npy_path
0,6897fa9de148,2bfbb7fd2e8b,031618cba689,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,031618cba689,-402.5,1.0,-1024.0,0,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...
1,6897fa9de148,2bfbb7fd2e8b,04cb74d5008c,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,04cb74d5008c,-274.5,1.0,-1024.0,1,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...
2,6897fa9de148,2bfbb7fd2e8b,09886998dc28,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09886998dc28,-444.5,1.0,-1024.0,2,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...
3,6897fa9de148,2bfbb7fd2e8b,09bfdbc349e5,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09bfdbc349e5,-308.5,1.0,-1024.0,3,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...
4,6897fa9de148,2bfbb7fd2e8b,0de5072ee990,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,0de5072ee990,-414.5,1.0,-1024.0,4,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...


In [15]:
# get exam index
df_train_exam['exam_index'] = np.arange(len(df_train_exam))
df_train = pd.merge(df_train, df_train_exam[[col_groupby, 'exam_index']], on=col_groupby, how='left')
df_train.head()

Unnamed: 0,StudyInstanceUID,SeriesInstanceUID,SOPInstanceUID,pe_present_on_image,negative_exam_for_pe,qa_motion,qa_contrast,flow_artifact,rv_lv_ratio_gte_1,rv_lv_ratio_lt_1,leftsided_pe,chronic_pe,true_filling_defect_not_pe,rightsided_pe,acute_and_chronic_pe,central_pe,indeterminate,dicom_path,z_pos,RescaleSlope,RescaleIntercept,PatientPosition,series_index,num_series,m_i,q_i,npy_path,exam_index
0,6897fa9de148,2bfbb7fd2e8b,031618cba689,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,031618cba689,-402.5,1.0,-1024.0,0,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
1,6897fa9de148,2bfbb7fd2e8b,04cb74d5008c,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,04cb74d5008c,-274.5,1.0,-1024.0,1,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
2,6897fa9de148,2bfbb7fd2e8b,09886998dc28,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09886998dc28,-444.5,1.0,-1024.0,2,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
3,6897fa9de148,2bfbb7fd2e8b,09bfdbc349e5,0,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,09bfdbc349e5,-308.5,1.0,-1024.0,3,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0
4,6897fa9de148,2bfbb7fd2e8b,0de5072ee990,1,0,0,0,0,0,1,1,0,0,1,0,0,0,/mnt/disks/data5/rsna2020/train/6897fa9de148/2...,0de5072ee990,-414.5,1.0,-1024.0,4,124,42,0.33871,/mnt/disks/data6/rsna2020/preprocessed/train_l...,0


In [16]:
# save
df_train.to_csv("{}/df_train.csv".format(preprocess_dir), index=None)

In [17]:
for col in df_train.columns:
    print("'{}',".format(col))

'StudyInstanceUID',
'SeriesInstanceUID',
'SOPInstanceUID',
'pe_present_on_image',
'negative_exam_for_pe',
'qa_motion',
'qa_contrast',
'flow_artifact',
'rv_lv_ratio_gte_1',
'rv_lv_ratio_lt_1',
'leftsided_pe',
'chronic_pe',
'true_filling_defect_not_pe',
'rightsided_pe',
'acute_and_chronic_pe',
'central_pe',
'indeterminate',
'dicom_path',
'z_pos',
'RescaleSlope',
'RescaleIntercept',
'PatientPosition',
'series_index',
'num_series',
'm_i',
'q_i',
'npy_path',
'exam_index',
