In [None]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import pydicom
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm.auto import tqdm
import glob
import pandas as pd
from platipy.imaging import ImageVisualiser
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct, read_dicom_image

In [None]:
data_directory = '../../data/HNSCC'
nifti_dir = '../../data/HNSCC/HNSCC_Nii_below216'
vis_dir = '../../data/HNSCC/HNSCC_vis_snapshots'
patient_dirs = glob.glob(f"{data_directory}/*")
data_path = Path(data_directory)
nii_path = Path(nifti_dir)
vis_path = Path(vis_dir)
nii_path.mkdir(exist_ok=True, parents=True)
vis_path.mkdir(exist_ok=True, parents=True)
meta_df = pd.read_csv(data_path.joinpath('metadata.csv'))

In [None]:
patient = "HNSCC-01-0225"
rtstruct_row = meta_df[(meta_df["Subject ID"] == patient) & (meta_df["Modality"] == "RTSTRUCT")].iloc[0]
rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
rtstruct_file = list(rtstruct_dir.glob("*"))[0]
rtstruct = pydicom.read_file(rtstruct_file)
ct_row = meta_df[(meta_df["Subject ID"] == patient) & (meta_df["Modality"] == "CT")].iloc[0]
ct_dir = data_path.joinpath(ct_row["File Location"])

In [None]:
# testing a dicom rtstruct conversion from previously failed conversion
patient_nii_path = nii_path.joinpath(patient)
patient_nii_path.mkdir(exist_ok=True, parents=True)
patient_image_path = patient_nii_path.joinpath('image.nii.gz')
convert_rtstruct(ct_dir, rtstruct_file, output_dir=patient_nii_path, output_img=patient_image_path)

In [None]:
selected_rows = []
for pat, df_group in meta_df.groupby("Subject ID"):

    # Select the structure set with the later date
    latest_date = None
    latest_file = None
    linked_ct_uid = None
    for idx, rtstruct_row in df_group[df_group.Modality == "RTSTRUCT"].iterrows():
        rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
        rtstruct_file = list(rtstruct_dir.glob("*"))[0]
        rtstruct = pydicom.read_file(rtstruct_file)
        try:
            rtstruct_date = int(rtstruct.InstanceCreationDate)
        except:
            #print(f'{pat}, RTStruct instance creation date is empty')
            rstruct_date = 1
    
        if latest_date is None or rtstruct_date > latest_date:
            latest_date = rtstruct_date
            latest_file = idx
            linked_ct_uid = rtstruct.ReferencedFrameOfReferenceSequence[0].RTReferencedStudySequence[0].RTReferencedSeriesSequence[0].SeriesInstanceUID
    
    # Select the RTSTRUCT for this patient
    if latest_file is None:
        print(f"{pat} has no RTStruct")
        continue
    selected_rows.append(latest_file)
    
    # Also select the CT image linked to the RTSTRUCT
    try: 
        ct_idx = meta_df[meta_df["Series UID"] == linked_ct_uid].iloc[0].name
        selected_rows.append(ct_idx)
    except:
        print(f"{pat} does not have linked CT or RTStruct")
meta_df_clean = meta_df.loc[selected_rows]

In [None]:
checks = [
          'gtv',
          'nod',
          'ln',
         ]
for patient, pat_df in tqdm(meta_df_clean.groupby("Subject ID")):
    
    #if int(patient.split('-')[-1]) == 1: continue
    if int(patient.split('-')[-1]) >= 216: continue
        
    patient_nii_path = nii_path.joinpath(patient)
    patient_nii_path.mkdir(exist_ok=True, parents=True)
    patient_vis_path = vis_path.joinpath(patient)
    patient_vis_path.mkdir(exist_ok=True, parents=True)

    #Convert the CT Image
    ct_row = pat_df[pat_df["Modality"] == "CT"].iloc[0]
    ct_directory = data_path.joinpath(ct_row["File Location"])
    ct_image = read_dicom_image(ct_directory)
    output_file = patient_nii_path.joinpath("image.nii.gz")
    sitk.WriteImage(ct_image, str(output_file))
  
    # Convert the Structures
    rtstruct_row = pat_df[pat_df["Modality"] == "RTSTRUCT"].iloc[0]
    rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"])
    rtstruct_file = list(rtstruct_dir.glob("*"))[0]
    try:
        convert_rtstruct(ct_directory, rtstruct_file, output_dir=patient_nii_path)
    except:
        print(f"failed: {patient}")
        continue
  
    # Prepare and save the visualisation
    vis = ImageVisualiser(ct_image)
    contours = {s.name.split(".")[0].replace("Struct_", ""): sitk.ReadImage(str(s)) for s in patient_nii_path.glob("Struct_*.nii.gz") if np.any([n in str(s).lower() for n in checks])}
    vis.add_contour(contours)
    try:
        fig = vis.show()
    except:
        print(f"failed to visualize: {patient}")
    output_file_path = patient_vis_path.joinpath(f"{patient}_vis.png")
    fig.savefig(output_file_path, dpi=fig.dpi)
    plt.close()

In [None]:
vis_df = pd.read_csv('snapshot_list.csv')
vis_df.set_index('patient', inplace=True)

In [None]:
checks = [
          'gtv',
          'nod',
          'ln',
         ]
for pat in vis_df.index:
    slices = []
    dim_slices = []
    for s in vis_df['slice'].loc[pat].split(','):
        dim_slices.append(s.split('-'))
    for z in dim_slices[0]:
        for y in dim_slices[1]:
            for x in dim_slices[2]:
                slices.append((int(x), int(y), int(z)))
    patient_nii_path = nii_path.joinpath(pat)
    patient_vis_path = vis_path.joinpath(pat)
    patient_vis_path.mkdir(exist_ok=True, parents=True)

    ct_image = sitk.ReadImage(patient_nii_path.joinpath('image.nii.gz'))
    rtstructs = list(patient_nii_path.glob("*Struct*"))
    rt = {}
    for struct in rtstructs:
        struct_name = struct.as_posix().split('/')[-1].replace('Struct_','').replace('.nii.gz', '')
        if np.any([c in struct_name.lower() for c in checks]):
            rt[struct_name] = sitk.ReadImage(struct)

    for s in slices: 
        vis = ImageVisualiser(ct_image, cut=s)
        vis.add_contour(rt)
        fig = vis.show()
        out_path = patient_vis_path.joinpath(f"a{s[0]}_c{s[1]}_s{s[2]}.jpg")
        fig.savefig(out_path, dpi=fig.dpi)
        plt.close(fig)


In [None]:
for pat in vis_df.index:
    print(pat)
    slices = []
    dim_slices = []
    for s in vis_df['slice'].loc[pat].split(','):
        dim_slices.append(s.split('-'))
    for z in dim_slices[0]:
        for y in dim_slices[1]:
            for x in dim_slices[2]:
                slices.append((int(x), int(y), int(z)))
    print(slices)