This notebook will assemble all DICOM slices that belong the a CT scan, and write out their image IDs along with the patient ID to a CSV file.

In [2]:
import pydicom as dcm
import os
from matplotlib import pyplot as plt
from joblib import Parallel, delayed
from tqdm import tqdm
import csv

In [3]:
in_path_train = '/home/jupyter/rsna-intracranial-hemorrhage-detection/stage_2_train/'
in_path_test = '/home/jupyter/rsna-intracranial-hemorrhage-detection/stage_2_test/'

out_file_train_scans = '/home/jupyter/rsna-intracranial-hemorrhage-detection/train_ct_scans.csv'
out_file_test_scans = '/home/jupyter/rsna-intracranial-hemorrhage-detection/test_ct_scans.csv'
out_file_train_coords = '/home/jupyter/rsna-intracranial-hemorrhage-detection/train_ct_coords.csv'
out_file_test_coords = '/home/jupyter/rsna-intracranial-hemorrhage-detection/test_ct_coords.csv'

In [5]:
scans = {}
img_coords = []

total = set()
with open(out_file_train_scans, 'r') as f:
    line = f.readline()
    while line:
        total.update(line.split(',')[1:])
        line = f.readline()
        
print(len(total))

752521


In [None]:
def add_scans_and_process_coord(img_dir, img_name):
    d = dcm.dcmread(img_dir + img_name)
    img_id, patient_id = img_name.split('.')[0], d.PatientID
    coords = list(d.ImagePositionPatient)
    img_coords.append([img_id] + coords)
    
    del coords, d
    if patient_id in scans:
        scans[patient_id].append(img_id)
    else:
        scans[patient_id] = [img_id]
    del img_id, patient_id

def write_scans_and_coords(img_dir, scans_path, coords_path):
    _ = Parallel(n_jobs=-1, backend='threading', batch_size=5, require='sharedmem')(delayed(add_scans_and_process_coord)(img_dir, img_file_name) for img_file_name in tqdm(os.listdir(img_dir)))
    #write scans
    with open(scans_path, 'w') as output:
        writer = csv.writer(output)
        print(f'Writing scans to {scans_path}')
        for patient_id in tqdm(scans):
            writer.writerow([patient_id] + scans[patient_id])
    
    #write coords
    with open(coords_path, 'w') as output:
        writer = csv.writer(output)
        print(f'Writing coords to {coords_path}')
        for row in tqdm(img_coords):
            writer.writerow(row)

In [None]:
write_scans_and_coords(in_path_train, out_file_train_scans, out_file_train_coords)

In [None]:
write_scans_and_coords(in_path_test, out_file_test_scans, out_file_test_coords)