In [2]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pydicom
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm.auto import tqdm
import glob
import pandas as pd
from platipy.imaging import ImageVisualiser
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct, read_dicom_image

In [3]:
data_directory = '../../data/HNSCC'
nifti_dir = '../../data/HNSCC/HNSCC_Nii_below216'
patient_dirs = glob.glob(f"{data_directory}/*")
data_path = Path(data_directory)
nii_path = Path(nifti_dir)
nii_path.mkdir(exist_ok=True, parents=True)
vis_dir = '../../data/HNSCC/HNSCC_vis_snapshots'
vis_path = Path(vis_dir)
vis_path.mkdir(exist_ok=True, parents=True)
meta_df = pd.read_csv(data_path.joinpath('metadata.csv'))

In [36]:
patient = "HNSCC-01-0225"
rtstruct_row = meta_df[(meta_df["Subject ID"] == patient) & (meta_df["Modality"] == "RTSTRUCT")].iloc[0]
rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
rtstruct_file = list(rtstruct_dir.glob("*"))[0]
rtstruct = pydicom.read_file(rtstruct_file)
ct_row = meta_df[(meta_df["Subject ID"] == patient) & (meta_df["Modality"] == "CT")].iloc[0]
ct_dir = data_path.joinpath(ct_row["File Location"])

In [49]:
# testing a dicom rtstruct conversion from previously failed conversion
patient_nii_path = nii_path.joinpath(patient)
patient_nii_path.mkdir(exist_ok=True, parents=True)
patient_image_path = patient_nii_path.joinpath('image.nii.gz')
convert_rtstruct(ct_dir, rtstruct_file, output_dir=patient_nii_path, output_img=patient_image_path)

In [3]:
selected_rows = []
for pat, df_group in meta_df.groupby("Subject ID"):

    # Select the structure set with the later date
    latest_date = None
    latest_file = None
    linked_ct_uid = None
    for idx, rtstruct_row in df_group[df_group.Modality == "RTSTRUCT"].iterrows():
        rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
        rtstruct_file = list(rtstruct_dir.glob("*"))[0]
        rtstruct = pydicom.read_file(rtstruct_file)
        try:
            rtstruct_date = int(rtstruct.InstanceCreationDate)
        except:
            #print(f'{pat}, RTStruct instance creation date is empty')
            rstruct_date = 1
    
        if latest_date is None or rtstruct_date > latest_date:
            latest_date = rtstruct_date
            latest_file = idx
            linked_ct_uid = rtstruct.ReferencedFrameOfReferenceSequence[0].RTReferencedStudySequence[0].RTReferencedSeriesSequence[0].SeriesInstanceUID
    
    # Select the RTSTRUCT for this patient
    if latest_file is None:
        print(f"{pat} has no RTStruct")
        continue
    selected_rows.append(latest_file)
    
    # Also select the CT image linked to the RTSTRUCT
    try: 
        ct_idx = meta_df[meta_df["Series UID"] == linked_ct_uid].iloc[0].name
        selected_rows.append(ct_idx)
    except:
        print(f"{pat} does not have linked CT or RTStruct")
meta_df_clean = meta_df.loc[selected_rows]

HNSCC-01-0024 has no RTStruct
HNSCC-01-0025 has no RTStruct
HNSCC-01-0037 has no RTStruct
HNSCC-01-0044 has no RTStruct
HNSCC-01-0052 has no RTStruct
HNSCC-01-0080 has no RTStruct
HNSCC-01-0114 has no RTStruct
HNSCC-01-0116 has no RTStruct
HNSCC-01-0143 has no RTStruct
HNSCC-01-0156 has no RTStruct
HNSCC-01-0157 has no RTStruct
HNSCC-01-0169 has no RTStruct
HNSCC-01-0212 has no RTStruct
HNSCC-01-0217 has no RTStruct
HNSCC-01-0311 has no RTStruct
HNSCC-01-0355 has no RTStruct
HNSCC-01-0361 has no RTStruct
HNSCC-01-0454 has no RTStruct
HNSCC-01-0456 has no RTStruct
HNSCC-01-0461 has no RTStruct
HNSCC-01-0476 has no RTStruct
HNSCC-01-0499 has no RTStruct
HNSCC-01-0550 has no RTStruct


In [None]:
checks = [
          'gtv',
          'nod',
          'ln',
         ]
for patient, pat_df in tqdm(meta_df_clean.groupby("Subject ID")):
    
    #if int(patient.split('-')[-1]) == 1: continue
    if int(patient.split('-')[-1]) >= 216: continue
        
    patient_nii_path = nii_path.joinpath(patient)
    patient_nii_path.mkdir(exist_ok=True, parents=True)
    patient_vis_path = vis_path.joinpath(patient)
    patient_vis_path.mkdir(exist_ok=True, parents=True)

    #Convert the CT Image
    ct_row = pat_df[pat_df["Modality"] == "CT"].iloc[0]
    ct_directory = data_path.joinpath(ct_row["File Location"])
    ct_image = read_dicom_image(ct_directory)
    output_file = patient_nii_path.joinpath("image.nii.gz")
    sitk.WriteImage(ct_image, str(output_file))
  
    # Convert the Structures
    rtstruct_row = pat_df[pat_df["Modality"] == "RTSTRUCT"].iloc[0]
    rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
    rtstruct_file = list(rtstruct_dir.glob("*"))[0]
    try:
        convert_rtstruct(ct_directory, rtstruct_file, output_dir=patient_nii_path)
    except:
        print(f"failed: {patient}")
        continue
  
    # Prepare and save the visualisation
    vis = ImageVisualiser(ct_image)
    contours = {s.name.split(".")[0].replace("Struct_", ""): sitk.ReadImage(str(s)) for s in patient_nii_path.glob("Struct_*.nii.gz") if np.any([n in str(s).lower() for n in checks])}
    vis.add_contour(contours)
    try:
        fig = vis.show()
    except:
        print(f"failed to visualize: {patient}")
    output_file_path = patient_vis_path.joinpath(f"{patient}_vis.png")
    fig.savefig(output_file_path, dpi=fig.dpi)
    plt.close()

In [22]:
vis_df = pd.read_csv('snapshot_list.csv')
vis_df.set_index('patient', inplace=True)

In [25]:
checks = [
          'gtv',
          'nod',
          'ln',
         ]
for pat in vis_df.index:
    slices = []
    dim_slices = []
    for s in vis_df['slice'].loc[pat].split(','):
        dim_slices.append(s.split('-'))
    for z in dim_slices[0]:
        for y in dim_slices[1]:
            for x in dim_slices[2]:
                slices.append((int(x), int(y), int(z)))
    patient_nii_path = nii_path.joinpath(pat)
    patient_vis_path = vis_path.joinpath(pat)
    patient_vis_path.mkdir(exist_ok=True, parents=True)

    ct_image = sitk.ReadImage(patient_nii_path.joinpath('image.nii.gz'))
    rtstructs = list(patient_nii_path.glob("*Struct*"))
    rt = {}
    for struct in rtstructs:
        struct_name = struct.as_posix().split('/')[-1].replace('Struct_','').replace('.nii.gz', '')
        if np.any([c in struct_name.lower() for c in checks]):
            rt[struct_name] = sitk.ReadImage(struct)

    for s in slices: 
        vis = ImageVisualiser(ct_image, cut=s)
        vis.add_contour(rt)
        fig = vis.show()
        out_path = patient_vis_path.joinpath(f"a{s[0]}_c{s[1]}_s{s[2]}.jpg")
        fig.savefig(out_path, dpi=fig.dpi)
        plt.close(fig)


In [18]:
for pat in vis_df.index:
    print(pat)
    slices = []
    dim_slices = []
    for s in vis_df['slice'].loc[pat].split(','):
        dim_slices.append(s.split('-'))
    for z in dim_slices[0]:
        for y in dim_slices[1]:
            for x in dim_slices[2]:
                slices.append((int(x), int(y), int(z)))
    print(slices)

0
[(78, 196, 217)]
1
[(58, 213, 287)]
2
[(64, 186, 286), (64, 202, 286), (64, 186, 307), (64, 202, 307)]
3
[(84, 187, 275), (84, 187, 300)]
4
[(71, 166, 196), (71, 166, 229)]
5
[(38, 179, 190), (43, 179, 190), (57, 179, 190), (67, 179, 190), (38, 202, 190), (43, 202, 190), (57, 202, 190), (67, 202, 190), (38, 231, 190), (43, 231, 190), (57, 231, 190), (67, 231, 190), (38, 179, 198), (43, 179, 198), (57, 179, 198), (67, 179, 198), (38, 202, 198), (43, 202, 198), (57, 202, 198), (67, 202, 198), (38, 231, 198), (43, 231, 198), (57, 231, 198), (67, 231, 198), (38, 179, 223), (43, 179, 223), (57, 179, 223), (67, 179, 223), (38, 202, 223), (43, 202, 223), (57, 202, 223), (67, 202, 223), (38, 231, 223), (43, 231, 223), (57, 231, 223), (67, 231, 223)]
6
[(50, 208, 300)]
7
[(53, 177, 250), (77, 177, 250), (53, 196, 250), (77, 196, 250), (53, 177, 289), (77, 177, 289), (53, 196, 289), (77, 196, 289), (53, 177, 304), (77, 177, 304), (53, 196, 304), (77, 196, 304)]
8
[(61, 181, 269), (61, 202, 269