In [2]:
import os
import pandas as pd
import numpy as np
import glob
import SimpleITK as sitk

try:
    import platipy
except:
    %pip install git+https://github.com/pyplati/platipy.git
    import platipy

from pathlib import Path
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct
from platipy.imaging.tests.data import get_lung_dicom

In [4]:
# read dicom file
proj_dir = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data'
result_dir = '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data'
# list of Studies folder: e.g. 2019-07__Studies
studies_list = list_dir(proj_dir) # len = 29
# list of patient folder: 
# e.g. HARRISON^ JOHN_10025460949_MR_2019-07-08_171415_Abdomen^ABDOMEN.SBRT.(Adult)_Fraction.1.Setup.Scan.Reopt_n144__00000
pat_folders = []
#for i in range(0, len(studies_list)):
#    pat_folders.extend(list_dir(studies_list[i])) # len = 933
for folder in studies_list:
    pat_folders.extend(list_dir(folder)) # len = 933

In [3]:
# ignore .DS_Store
def list_dir(path):
    return glob.glob(os.path.join(path, '*'))

In [6]:
def get_pat_folder(proj_dir):
    ''' get patient folder directory and create unique patient folders

    Args:
    proj_dir: directory to all data (Studies folders)
    '''
    # list of Studies folder: e.g. 2019-07__Studies
    studies_list = list_dir(proj_dir) # len = 29
    # list of patient folder: 
    # e.g. HARRISON^ JOHN_10025460949_MR_2019-07-08_171415_Abdomen^ABDOMEN.SBRT...
    pat_folders = []
    for folder in studies_list:
        pat_folders.extend(list_dir(folder)) # len = 933
    return pat_folders

In [11]:
def create_pat_folder(pat_folders):
    ''' create new patient folders & RTstruct folders

    Args:
    pat_folders: patient folder
    '''
    IDs = []
    path_list = []
    frac_list = []
    for pat in pat_folders:
        temp = pat.split('/')[-1].split('_')
        ID = temp[1]
        IDs.append(ID)
        date = temp[3]
        sixdig = temp[4]
        # check if patient folder exists
        path = result_dir + '/' + ID
        path_list.append(path)
        isExist = os.path.exists(path)
        if not isExist:
            os.makedirs(path)
            print('New patient folder is created for', ID)
        # create RTstruct folders according to original MR image folder paths
        if temp[2] == 'MR':
            frac = temp[6]
            if 'Fraction.' in frac:
                frac_path = path + '/' + date + '_' + sixdig + '_' + frac[:8] + '_' + frac[9]
            else:
                frac_path = path + '/' + date + '_' + sixdig + '_' + frac
            frac_list.append(frac_path)
            frac_isExist = os.path.exists(frac_path)
            if not frac_isExist:
                os.makedirs(frac_path)
                print('New scan id patient folder is created for', ID)
    return path_list, frac_list

In [16]:
path_list, frac_list = create_pat_folder(pat_folders)
print(len(path_list), len(frac_list)) # 933, 253

933 253


In [5]:
def convert_dicom_image(dicom_path):
    '''read and convert DICOM MR image series,
    record MR image series paths,
    export converted MR scan nii.gz file paths

    Args:
    dicom_path: path to the patient MRI folder (pat_folders)

    '''
    mr_paths = [] # save mr paths in pat_folders
    mr_save_paths = [] # save new mr paths
    for pat in dicom_path:
        temp = pat.split('/')[-1].split('_')
        ID = temp[1] # patient id
        date = temp[3] # scan date
        sixdig = temp[4]
        frac = temp[6] # fraction number
        if temp[2] == 'MR':
            mr_paths.append(pat)
            image = sitk.ReadImage(sitk.ImageSeriesReader().GetGDCMSeriesFileNames(pat))
            if 'Fraction.' in frac:
                save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac[:8] + '_' + frac[9] + '.nii.gz')
                mr_save_paths.append(save_path)
                # check if file already exists
                if not os.path.exists(save_path):
                    sitk.WriteImage(image, save_path)
                    print('nii.gz generated for', ID)
            else:
                save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac + '.nii.gz')
                mr_save_paths.append(save_path)
                if not os.path.exists(save_path):
                    sitk.WriteImage(image, save_path)
                    print('nii.gz generated for', ID)
    return mr_paths, mr_save_paths

In [45]:
# force convert dicom image
image = sitk.ReadImage(sitk.ImageSeriesReader().GetGDCMSeriesFileNames(
'/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/MissingData/10144345807/2.16.840.1.114493.1.4.227.3.20210219120037687/2.16.840.1.114493.1.4.227.3.20210315074621570_2_MR'))
sitk.WriteImage(image, '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data/10144345807/MR/2021-02-19_120037_Fraction_2.nii.gz')

In [44]:
# force convert rt struct
# force match MR images & RT structs
platipy.dicom.io.rtstruct_to_nifti.convert_rtstruct( 
'/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/MissingData/10144345807/2.16.840.1.114493.1.4.227.3.20210219120037687/2.16.840.1.114493.1.4.227.3.20210315074621570_2_MR', 
                     dcm_rt_file = '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/MissingData/10144345807/2.16.840.1.114493.1.4.227.3.20210219120037687/2.16.840.1.114493.1.4.227.6.20210311100318133_2_RTst/RTSTRUCT2.16.840.1.114493.1.4.227.6.20210311100318137.dcm', 
                     output_dir= '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data/10144345807/2021-02-19_120037_Fraction_2')

In [83]:
# check if all MR images been converted to nii.gz
def check_MR_convert_(pat_folders):
    mr_paths = [] # save mr paths in pat_folders
    mr_save_paths = [] # save new mr paths
    IDs = []
    for pat in pat_folders:
        temp = pat.split('/')[-1].split('_')
        ID = temp[1] # patient id
        date = temp[3] # scan date
        sixdig = temp[4]
        frac = temp[6] # fraction number
        if temp[2] == 'MR':
            mr_paths.append(pat)
            IDs.append(ID)
            if 'Fraction.' in frac:
                save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac[:8] + '_' + frac[9] + '.nii.gz')
                mr_save_paths.append(save_path)
                # check if file already exists
                if not os.path.exists(save_path):
                    print('MR image nii.gz not generated for', ID)
            else:
                save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac + '.nii.gz')
                mr_save_paths.append(save_path)
                if not os.path.exists(save_path):
                    print('MR image nii.gz not generated for', ID)
    return IDs, mr_paths, mr_save_paths # 253, 253

In [74]:
#mr_paths, mr_save_paths= check_MR_convert_(pat_folders)
#print(len(mr_paths), len(mr_save_paths)) # all MRs are converted to nii.gz files
IDs, mr_paths, mr_save_paths = check_MR_convert_(pat_folders)
print(len(IDs), len(mr_paths), len(mr_save_paths))

253 253 253


In [76]:
pd.DataFrame({'IDs': IDs, 'mr_stored_path':mr_save_paths}).to_csv('mr_save_path.csv')

In [24]:
# test
mr_save_paths = []
mr_paths = []
for pat in pat_folders:
    temp = pat.split('/')[-1].split('_')
    ID = temp[1]
    date = temp[3]
    sixdig = temp[4]
    frac = temp[6]
    if temp[2] == 'MR':
        mr_paths.append(pat)
        if 'Fraction.' in frac:
            save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac[:8] + '_' + frac[9] + '.nii.gz')
            mr_save_paths.append(save_path)
            #if not os.path.exists(new_save_path):
                #os.rename(save_path, new_save_path)
                #mr_save_paths.append(new_save_path)
        else:
            save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac + '.nii.gz')
            mr_save_paths.append(save_path)
            #new_save_path = str(result_dir + '/' + ID + '/' + 'MR' + '/'+ date + '_' + sixdig + '_' + frac + '.nii.gz')
            #if not os.path.exists(new_save_path):
                #os.rename(save_path, new_save_path)
            #mr_save_paths.append(new_save_path)
print(len(mr_paths), len(mr_save_paths))

253 253


In [71]:
path_to_raw_data = proj_dir
#output_path = "/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Test"

fraction_paths = [] # image paths, matches_status = 0 (no), 1 (yes)
dcm_paths = []
phantome_paths = []

# image path: pat_folders

for study_path in os.listdir(path_to_raw_data):
    if ".DS_Store" not in study_path:
        for scan_path in os.listdir(path_to_raw_data+"/"+study_path):
            image_path = path_to_raw_data+"/"+study_path+"/"+scan_path
            if "_Fraction" in scan_path:
                fraction_paths.append([image_path,0]) # 211
            if "_MR" in scan_path and "_Fraction" not in scan_path:
                phantome_paths.append([image_path,0]) # 42
            if "_RTst" in scan_path:
                dcm_paths.append([image_path,0]) # 252
                
matches_lst = [] # MR path, dcm path, new_path-to create

for fraction_path in fraction_paths:
    get_id = int(fraction_path[0].split("_Fraction.")[1].split(".")[0])-1
    get_name = fraction_path[0].split("/")[-1].split("_MR")[0]
    #print(get_id,get_name)
    for dcm_path in dcm_paths:
        if get_name in dcm_path[0] and str(get_id).zfill(5) in dcm_path[0] and \
        fraction_path[0].split("_MR_")[1].split("_")[1]==dcm_path[0].split("_RTst_")[1].split("_")[1] and \
        fraction_path[0].split("_MR_")[1].split("_")[0]==dcm_path[0].split("_RTst_")[1].split("_")[0]:
            matches_lst.append([fraction_path[0],dcm_path[0]+"/"+os.listdir(dcm_path[0])[0],get_name])
            fraction_path[1] = 1
            dcm_path[1] = 1
            break

print(len(matches_lst))
for phantome_path in phantome_paths:
    if phantome_path[1] != 1:
        get_name_ph = phantome_path[0].split("/")[-1].split("_MR")[0]
        for dcm_path in dcm_paths:
            if dcm_path[1] != 1:
                get_name_dc = dcm_path[0].split("/")[-1].split("_RTs")[0]
                #print(phantome_path[0].split("_MR_")[1].split("_")[0])
                if get_name_ph == get_name_dc and phantome_path[0].split("_MR_")[1].split("_")[1]==dcm_path[0].split("_RTst_")[1].split("_")[1]:
                    #and \
                    #print(phantome_path[0].split("_MR_")[1].split("_")[1]==dcm_path[0].split("_RTst_")[1].split("_")[1])# and \
                    #print(phantome_path[0].split("_MR_")[1].split("_")[1]==dcm_path[0].split("_RTst_")[1].split("_")[1])
                    #print(phantome_path[0].split("_MR_")[1].split("_")[1])
                    matches_lst.append([phantome_path[0],dcm_path[0]+"/"+os.listdir(dcm_path[0])[0],get_name_ph])
                    phantome_path[1] = 1
                    dcm_path[1] = 1
                    break

i = 0
print(len(matches_lst))

205
238


In [17]:
def match_MR_RTstruct(pat_folders):
    ''' match MR scan with RT struct
    
    Args:
    pat_folders: path to the patient MRI folder
    '''

    fraction_paths = []
    rtst_paths = []
    phantome_paths = []
    match_list = []

    # get RTst paths 
    for pat in pat_folders:
        if '_RTst' in pat: # 252
            rtst_paths.append([pat,0])
        if "_Fraction" in pat: # 211
            fraction_paths.append([pat,0])
        if "_MR" in pat and "_Fraction" not in pat: # 42
            phantome_paths.append([pat,0])
    
    # match list
    # case 1: MR fraction scan. match if: pat id, frac id, 6 digit, date same
    for fraction_path in fraction_paths:
        temp = fraction_path[0].split('/')[-1].split('_')
        ID = temp[1]
        date = temp[3]
        sixdig = temp[4]
        frac = temp[6][9]
        for rtst_path in rtst_paths:
            rtst_temp = rtst_path[0].split('/')[-1].split('_')
            if ID == rtst_temp[1] and int(frac) == (int(rtst_temp[-1][4])+1) and sixdig == rtst_temp[4] and date == rtst_temp[3]:
                fraction_path[1] = 1
                rtst_path[1] = 1
                match_list.append([ID, fraction_path[0], rtst_path[0]]) # 205
                break
        
    # case 2: MR sim? scan. match if: pat id, 6 digit same
    for phantome_path in phantome_paths:
        # check if matched in case 1
        if phantome_path[1] != 1:
            ID = phantome_path[0].split('/')[-1].split('_')[1]
            sixdig = phantome_path[0].split('/')[-1].split('_')[4]
            for rtst_path in rtst_paths:
                if rtst_path[1] != 1:
                    rtst_temp = rtst_path[0].split('/')[-1].split('_')
                    if ID == rtst_temp[1] and sixdig == rtst_temp[4]:
                        phantome_path[1] = 1
                        rtst_path[1] = 1
                        match_list.append([ID, phantome_path[0], rtst_path[0]]) # 238
                        break
        
    return rtst_paths, fraction_paths, phantome_paths, match_list # len(match_list) = 238

In [18]:
#len(match_MR_RTstruct(pat_folders)[3])
rtst_paths, fraction_paths, phantome_paths, match_list = match_MR_RTstruct(pat_folders)
print(len(rtst_paths), len(fraction_paths), len(phantome_paths), len(match_list)) # 252 211 42 238
#pd.DataFrame(match_list).to_csv('match_list.csv', header=['patient_id', 'mr_image_path', 'rt_struct_path'])

252 211 42 238


In [73]:
pd.DataFrame(match_list).to_csv('match_list.csv', header=['patient_id', 'mr_image_path', 'rt_struct_path'])

In [41]:
def unmatch_MR_RTstruct(rtst_paths, phantome_paths):
    unmatched_rtst = []
    for rtst_path in rtst_paths:
        if rtst_path[1] != 1:
            ID = rtst_path[0].split('/')[-1].split('_')[1]
            unmatched_rtst.append([ID, rtst_path[0]])

    unmatched_mris = []
    for phantome_path in phantome_paths:
        if phantome_path[1] != 1:
            ID = phantome_path[0].split('/')[-1].split('_')[1]
            unmatched_mris.append([ID, phantome_path[0]])

    return unmatched_rtst, unmatched_mris

In [42]:
unmatched_rtst, unmatched_mris = unmatch_MR_RTstruct(rtst_paths, phantome_paths)
#pd.DataFrame(unmatched_rtst).to_csv('unmatched_rtst.csv', header=['patient_id','rtst_path'])
#pd.DataFrame(unmatched_mris).to_csv('unmatched_mris.csv', header=['patient_id','mr_path'])

In [80]:
def convert_rtstruct(pat_id, dcm_img, dcm_rt_file, prefix='Struct_', 
                     output_dir='.', output_img=None, spacing=None):
    dcm_rt_path = list_dir(dcm_rt_file)[0]
    temp = dcm_img.split('/')[-1].split('_')
    frac = temp[6]
    pat_id = temp[1]
    date = temp[3]
    sixdig = temp[4]
    rtstruct_stored_paths = []

    if 'Fraction.' in dcm_img.split('/')[-1]:
        output_folder = result_dir + '/' + pat_id + '/' + date + '_' + sixdig + '_' + frac[:8] + '_' + frac[9]
        rtstruct_stored_paths.append(output_folder)
    else:
        output_folder = result_dir + '/' + pat_id + '/' + date + '_' + sixdig + '_' + frac
        rtstruct_stored_paths.append(output_folder)
    try:
        platipy.dicom.io.rtstruct_to_nifti.convert_rtstruct(dcm_img, dcm_rt_path, prefix='Struct_', 
                        output_dir=output_folder, output_img=None, spacing=None)
        print('Patient', pat_id, 'rtstruct converted to nii.gz.')
    except Exception as e:
        print('patient_id:{} error:{}'.format(pat_id, e))
    return rtstruct_stored_paths

In [81]:
convert_rtstruct(pat_id = match_list[169][0], 
                     dcm_img = match_list[169][1], 
                     dcm_rt_file = match_list[169][2], 
                     output_dir= result_dir)



patient_id:10122928624 error:ufunc 'logical_or' did not contain a loop with signature matching types (None, <class 'numpy.dtype[str_]'>) -> None


['/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data/10122928624/2021-12-07_121612_Fraction_5']

In [None]:
#convert_rtstruct(match_list[0][0], match_list[0][1], match_list[0][2], output_dir= result_dir
#result_dir + '/' + match_list[0][0] + '/RTstruct'
#print(match_list[0][0], match_list[0][1], match_list[0][2])
for match in match_list:
    convert_rtstruct(pat_id = match[0], 
                     dcm_img = match[1], 
                     dcm_rt_file = match[2], 
                     output_dir= result_dir)
l = []
for match in match_list:
    l.extend(convert_rtstruct(pat_id = match[0], 
                     dcm_img = match[1], 
                     dcm_rt_file = match[2], 
                     output_dir= result_dir))
# export data
l_df = pd.DataFrame({'rt_struct_stored_path':l})
l_df.to_csv('l.csv')

In [18]:
convert_rtstruct(pat_id = match_list[169][0], 
                     dcm_img = match_list[169][1], 
                     dcm_rt_file = match_list[169][2], 
                     output_dir= result_dir)

patient_id:10122928624 error:ufunc 'logical_or' did not contain a loop with signature matching types (None, <class 'numpy.dtype[str_]'>) -> None


In [70]:
# force match MR images & RT structs
# 10088707426 F3
platipy.dicom.io.rtstruct_to_nifti.convert_rtstruct( 
                     dcm_img = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data/2020-09__Studies/PRITZKY^ JOSEPH_10088707426_MR_2020-09-21_132259_._Fraction.3.Setup.Scan.Reopt_n144__00000', 
                     dcm_rt_file = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data/2020-09__Studies/PRITZKY^ JOSEPH_10088707426_RTst_2020-09-21_132259_._._n1__00000/2.16.840.1.114493.1.4.227.3.20201203175839603.dcm', 
                     output_dir= '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data/10088707426/2020-09-21_132259_Fraction_3')

In [None]:
# sim
platipy.dicom.io.rtstruct_to_nifti.convert_rtstruct( 
                     dcm_img = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data/2020-01__Studies/TETREAULT^ DAVID_10061253158_MR_2020-01-16_122811_._54x47x43.25sec.LIBH_n144__00000', 
                     dcm_rt_file = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data/2020-05__Studies/TETREAULT^ DAVID_10061253158_RTst_2020-05-07_145548_._._n1__00000/2.16.840.1.114493.1.4.227.5.20200507145547843.dcm', 
                     output_dir= '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data/10061253158/2020-01-16_122811_54x47x43.25sec.LIBH')

In [None]:
if __name__ == '__main__':
    proj_dir = '/Volumes/BWH-KANNLAB/Ben/MRL_Radiomics/MRL_Radiomics_Lung/MRL_Lung_BWH_data'
    result_dir = '/Volumes/BWH-KANNLAB/Nancy/Lung_Cancer_Radiomics/Data'
    pat_folders = get_pat_folder(proj_dir)
    create_pat_folder(pat_folders)
    convert_dicom_image(pat_folders)