In [1]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pydicom
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm.auto import tqdm
import glob
import pandas as pd
from platipy.imaging import ImageVisualiser
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct, read_dicom_image

In [107]:
data_directory = '../../data/UTSW_HNC/dicom'
nifti_dir = '../../data/UTSW_HNC/Nii'
vis_dir = '../../data/UTSW_HNC/vis_snapshots'
patient_dirs = glob.glob(f"{data_directory}/*")
data_path = Path(data_directory)
nii_path = Path(nifti_dir)
vis_path = Path(vis_dir)
nii_path.mkdir(exist_ok=True, parents=True)
vis_path.mkdir(exist_ok=True, parents=True)

In [97]:
study_descriptions = [
    'h/n',
    'h.n',
    'neck',
    'h&n',
]
# least_recent = pd.to_datetime(df['StudyDate').min()
patients = [pth.as_posix().split('/')[-1] for pth in data_path.glob('*')]
sheet_names = ['CT', 'RTSTRUCT']

In [70]:
patient = "71915981"
study_id = "1.2.840.113704.1.111.4620.1383670451.6"
meta_df = pd.read_excel(data_path.joinpath(f"{patient}").joinpath(f"patient.{patient}.xlsx"), sheet_name=sheet_names)
meta_df['CT'].set_index('StudyInstanceUID', inplace=True)
meta_df['RTSTRUCT'].set_index('StudyInstanceUID', inplace=True)
ct_folder = f"{study_id}/CT.{meta_df['CT'].loc[study_id]['SeriesInstanceUID']}"
#rtstruct_file = f"{study_id}/RTSTRUCT.{meta_df['RTSTRUCT'].loc[study_id]['SeriesInstanceUID']}/{meta_df['RTSTRUCT'].loc[study_id]['SOPInstanceUID']}.dcm"
rtstruct_file = f"{study_id}/{meta_df['RTSTRUCT'].loc[study_id]['SOPInstanceUID']}.dcm"
ct_path = data_path.joinpath(patient).joinpath(ct_folder)
rtstruct_path = data_path.joinpath(patient).joinpath(rtstruct_file)
#rtstruct = pydicom.dcmread(rtstruct_path)
#ct_dir = Path("../../data/UTSW_HNC/DIMCOM_Data02-22-2024-142556/70718318/1.2.840.113704.1.111.4620.1383670451.6/CT.1.2.840.113704.1.111.5892.1383673054.8")

In [155]:
for pat in patients: 
    print(pat)
    patient_data_path = data_path.joinpath(pat)
    patient_nii_path = nii_path.joinpath(pat)
    patient_nii_path.mkdir(exist_ok=True, parents=True)
    patient_image_path = patient_nii_path.joinpath('image.nii.gz')

    meta_df = pd.read_excel(patient_data_path.joinpath(f"patient.{pat}.xlsx"), sheet_name=sheet_names)
    
    ct_df = meta_df['CT'].set_index('StudyInstanceUID')
    struct_df = meta_df['RTSTRUCT'].set_index('StudyInstanceUID')
    # flag that assigns 'TRUE' to studies that match a H/N tag 
    hn_flag = ct_df.StudyDescription.str.extract(f"(?i)({'|'.join(study_descriptions)})").notna().values
    ct_df = ct_df[hn_flag]
    
    # get earliest study  (to get the initial scan)
    study_id = ct_df[ct_df.StudyDate==ct_df.StudyDate.min()].index[0]
    print(study_id)
    ct_folder = f"{study_id}/CT.{ct_df.loc[study_id]['SeriesInstanceUID']}"
    ct_path = patient_data_path.joinpath(ct_folder)
    if struct_df.loc[[study_id]].shape[0] > 1:
        chosen_set = struct_df.loc[study_id]['StructureSetTime']==struct_df.loc[study_id]['StructureSetTime'].max()
        series_id = struct_df.loc[study_id][chosen_set]['SeriesInstanceUID'].values[0]
        file_id = struct_df.loc[study_id][chosen_set]['SOPInstanceUID'].values[0]
        print(series_id, file_id)
    else:
        series_id = struct_df.loc[study_id]['SeriesInstanceUID']
        file_id = struct_df.loc[study_id]['SOPInstanceUID']
    rtstruct_file = f"{study_id}/RTSTRUCT.{series_id}/{file_id}.dcm"
    rtstruct_path = patient_data_path.joinpath(rtstruct_file)
    print(ct_path.as_posix())
    print(f"ct good: {os.path.exists(ct_path)}")
    print(rtstruct_path.as_posix())
    print(f"rtstruct good: {os.path.exists(rtstruct_path)}")

    try:
        rtstruct = pydicom.dcmread(rtstruct_path)
    except:
        rtstruct_file = f"{study_id}/{file_id}.dcm"
        rtstruct_path = patient_data_path.joinpath(rtstruct_file)
        rtstruct = pydicom.dcmread(rtstruct_path)
        
    bad_specials = ['?', '\\', '>', '<', '+', '=', '|']
    for seq in rtstruct.StructureSetROISequence:
        if np.any([spec in seq.ROIName for spec in bad_specials]):
            seq.ROIName = ''.join(filter(str.isalnum, seq.ROIName))
    rtstruct.save_as(rtstruct_path)
    try:
        convert_rtstruct(ct_path, rtstruct_path, output_dir=patient_nii_path, output_img=patient_image_path)
    except:
        ct_path = patient_data_path.joinpath(f"{study_id}/CT")
        convert_rtstruct(ct_path, rtstruct_path, output_dir=patient_nii_path, output_img=patient_image_path)

70539409
1.2.840.113704.1.111.5972.1385569577.7
../../data/UTSW_HNC/dicom/70539409/1.2.840.113704.1.111.5972.1385569577.7/CT.1.2.840.113704.1.111.6944.1385579949.8
ct good: True
../../data/UTSW_HNC/dicom/70539409/1.2.840.113704.1.111.5972.1385569577.7/RTSTRUCT.2.16.840.1.113669.2.931128.175282973.20170714133834.140717/2.16.840.1.113669.2.931128.175282973.20170714133834.424809.dcm
rtstruct good: True
70718318
1.2.840.113704.1.111.4620.1383670451.6
../../data/UTSW_HNC/dicom/70718318/1.2.840.113704.1.111.4620.1383670451.6/CT.1.2.840.113704.1.111.5892.1383673054.8
ct good: True
../../data/UTSW_HNC/dicom/70718318/1.2.840.113704.1.111.4620.1383670451.6/RTSTRUCT.2.16.840.1.113669.2.931128.175282973.20170711085400.382872/2.16.840.1.113669.2.931128.175282973.20170711085400.665694.dcm
rtstruct good: True
71165560
1.2.840.113704.1.111.6916.1335376464.1
../../data/UTSW_HNC/dicom/71165560/1.2.840.113704.1.111.6916.1335376464.1/CT.1.2.840.113704.1.111.6916.1335378852.9
ct good: True
../../data/UTSW_

In [169]:
use_ctv = ['90051745', '93601319']
for pat in patients:
    print(pat)
    gtvs = []
    patient_nii_path = nii_path.joinpath(pat)
    for struct in patient_nii_path.glob('Struct*.nii.gz'):
        if 'gtv' in str(struct.as_posix()).lower():
            gtvs.append(struct)
            print(f"    {struct.as_posix().split('/')[-1]}")
        elif pat in use_ctv and 'ctv' in struct.as_posix().lower():
            gtvs.append(struct)
            print(f"    {struct.as_posix().split('/')[-1]}")
        else:
            os.remove(struct)
    if len(gtvs) < 1:
        print("no gtv masks available")
    #print(gtvs)
        

70539409
    Struct_GTV_LN.nii.gz
    Struct_GTV_Primary.nii.gz
70718318
    Struct_GTV-neck.nii.gz
    Struct_GTV-R_BOT.nii.gz
71165560
    Struct_GTV-L_neck.nii.gz
    Struct_GTV-L_tonsil.nii.gz
71915981
    Struct_GTV.nii.gz
    Struct_GTVn.nii.gz
    Struct_GTVp.nii.gz
72596908
    Struct_GTV-R_neck.nii.gz
72774407
    Struct_GTV.nii.gz
    Struct_GTV60.nii.gz
    Struct_UnionGTVSUV6.nii.gz
72827973
    Struct_1GTV1-50_composite-body.nii.gz
    Struct_1GTVcomposite-velocity.nii.gz
90051745
    Struct_CTV_56.1.nii.gz
    Struct_CTV_59.4.nii.gz
    Struct_CTV_69.3.nii.gz
90461190
    Struct_GTV-R_neck.nii.gz
    Struct_GTV-R_tonsil.nii.gz
90475802
    Struct_GTV_LN_70Gy.nii.gz
    Struct_GTV_L_III_LN.nii.gz
    Struct_GTV_primary.nii.gz
90648713
    Struct_GTV-pre-op-L_BOT.nii.gz
    Struct_GTV-pre-op-L_neck.nii.gz
90653327
    Struct_GTV-_LN.nii.gz
    Struct_GTV-_primary.nii.gz
90688001
    Struct_GTV_ethsinus_(p2.nii.gz
    Struct_GTV_ethsinus_(p3.nii.gz
    Struct_GTV_RP_node_(AL

In [186]:
pat = '93384910'
patient_nii_path = nii_path.joinpath(pat)
gtvn1_path = patient_nii_path.joinpath('Struct_DJS_nsGTV.nii.gz')
gtvn2_path = patient_nii_path.joinpath('Struct_nsGTV66.5.nii.gz')
gtvn3_path = patient_nii_path.joinpath('Struct_nsGTV66.5-AIR.nii.gz')
out_path = patient_nii_path.joinpath('Struct_GTVns.nii.gz')
gtvn1 = sitk.ReadImage(gtvn1_path)
gtvn2 = sitk.ReadImage(gtvn2_path)
gtvn3 = sitk.ReadImage(gtvn3_path)

gtvns = gtvn1 + gtvn2 + gtvn3

sitk.WriteImage(gtvns, out_path)

In [181]:
sitk.Show(gtvns)