# Setup

In [None]:
# !apt-get install lz4

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
lz4 is already the newest version (1.9.3-2build2).
0 upgraded, 0 newly installed, 0 to remove and 56 not upgraded.


In [None]:
# %%capture
# !pip install --upgrade duckdb papermill s5cmd dcm2niix pyplastimatch pyradiomics \
# highdicom pydicom seg-metrics idc-index itk SimpleITK pyaml

In [None]:
# %%capture
# from pyplastimatch.utils.install import install_precompiled_binaries
# install_precompiled_binaries()

In [None]:
# dcmqi_release_url = "https://github.com/QIICR/dcmqi/releases/download/v1.3.4/dcmqi-1.3.4-linux.tar.gz"
# dcmqi_download_path = "dcmqi-1.3.4-linux.tar.gz"
# dcmqi_path = "dcmqi-1.3.4-linux"
# !wget -O $dcmqi_download_path $dcmqi_release_url\
# && tar -xvf $dcmqi_download_path\
# && mv $dcmqi_path/bin/* /bin\
# && rm -r $dcmqi_download_path $dcmqi_path

--2024-09-05 18:55:23--  https://github.com/QIICR/dcmqi/releases/download/v1.3.4/dcmqi-1.3.4-linux.tar.gz
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/50675718/100afc23-a279-4a81-9202-a77bf1e574b9?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20240905%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240905T185523Z&X-Amz-Expires=300&X-Amz-Signature=a2994b53bd3cd6888f3c6ab051ab217f976a64ee907d4230d147e614a8270ad4&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=50675718&response-content-disposition=attachment%3B%20filename%3Ddcmqi-1.3.4-linux.tar.gz&response-content-type=application%2Foctet-stream [following]
--2024-09-05 18:55:23--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/50675718/100afc23-a279-4a81-9202-a77bf1e57

In [None]:
# Imports

import subprocess
import os
import time
import json
import glob

import duckdb
import yaml
import numpy as np

import shutil
from urllib.request import urlopen

import pyplastimatch as pypla

import math
import pandas as pd
import SimpleITK as sitk
import seg_metrics.seg_metrics as sg

import pydicom
# Packages for the structured report

from pathlib import Path

import highdicom as hd

from pydicom.uid import generate_uid
from pydicom.filereader import dcmread
from pydicom.sr.codedict import codes


from idc_index import index
client = index.IDCClient()

# Global Variables


In [None]:
#absolute path output
OUTPUT_PATH = ""#"OUTPUT_EVAL"
#Combination variables -- indicate whole prostate gland code
## if whole prostate gland code not present, combine all the other segments to create whole  prostate gland segment
idcSegSeriesInstanceUIDs = []#list(["1.2.276.0.7230010.3.1.3.0.40454.1698534917.949757"])
res_scheme_format = ""#nii,nrrd

In [None]:
#input and output path definition
base_folder_idc_combination_prostate = os.path.join(OUTPUT_PATH, "seg_prostate_gen")
base_idc_output_nii_folder = os.path.join(base_folder_idc_combination_prostate, "output_nii_combined")
base_idc_output_dicom_folder = os.path.join(base_folder_idc_combination_prostate,"output_nii_combined_resampled_dicom/")

path_eval_csv = os.path.join(OUTPUT_PATH, "output_eval.csv")

#IDC EVALUATION DATA
## Image
IDC_IMAGE = os.path.join(OUTPUT_PATH,"idc_image_data/dicom")
IDC_IMAGE_NII = os.path.join(OUTPUT_PATH,"idc_image_data/nii")
IDC_IMAGE_NRRD = os.path.join(OUTPUT_PATH,"idc_image_data/nrrd")
IDC_IMAGE_RESAMPLED = os.path.join(OUTPUT_PATH,"idc_image_data/nii_res")
# SEG
IDC_SEG = os.path.join(OUTPUT_PATH,"idc_seg_data/dicom")
IDC_SEG_NII = os.path.join(OUTPUT_PATH,"idc_seg_data/nii")
IDC_SEG_RESAMPLED = os.path.join(OUTPUT_PATH,"idc_seg_data/nii_res")

In [None]:
def setup_folders():
	"""
	creates environment data and results folder, first deletes current ones and then creates new ones
	"""
	for folder_setup in [OUTPUT_PATH, IDC_IMAGE, IDC_IMAGE_NII, IDC_IMAGE_NRRD, 
                         base_idc_output_dicom_folder, base_idc_output_nii_folder,
                         IDC_IMAGE_RESAMPLED, IDC_SEG, IDC_SEG_NII, IDC_SEG_RESAMPLED]:
		# !rm -rf $folder_setup
		!mkdir -p $folder_setup

In [None]:
setup_folders()

# Utility functions

## Conversion DICOM <---> NII

In [None]:
def convert_image_dcm_to_nii(input_path, output_path_root, prefix="", format="nii"):
	"""
	Conversion of DICOM MR data to NIFTI using dcm2niix
	input_path : str, folder containing DICOM instances .dcm
	output_path_root : str, output folder
	prefix : str, prefix for output file name
	"""
	if not os.path.exists(output_path_root):
		!mkdir -p $output_path_root
	!dcm2niix -z y -m y -f %i_{prefix} -o $output_path_root $input_path

## Resampling

In [None]:
def resample_seg_with_sitk(input_seg_nifti_path, input_dicom_path, output_seg_path):
  print("Reading Dicom directory:", input_dicom_path)
  reader = sitk.ImageSeriesReader()
  dicom_names = reader.GetGDCMSeriesFileNames(input_dicom_path)
  reader.SetFileNames(dicom_names)
  target_dicom_img_itk = reader.Execute()
  # define a sitk identity transform
  identity_tfx = sitk.Transform(3, sitk.sitkIdentity)
  seg_itk = sitk.ReadImage(input_seg_nifti_path)
  # align the segmentation mask to the target dicom image (to get a uniform z height as per the target dicom image)
  seg_aligned_itk = sitk.Resample(seg_itk, target_dicom_img_itk, identity_tfx, sitk.sitkNearestNeighbor, 0)
  # overwrite output file with aligned image data
  sitk.WriteImage(seg_aligned_itk, output_seg_path)

In [None]:
def resample_preds(input_path_nnunet_preds="", input_path_t2_idc="", output_path=""):
	"""
	Resampling of nnunet_preds to reference MR T2 volumes used as input image
	input_path_nnunet_preds : str, folder containing nnunet preds
	input_path_t2_idc : str, folder containing NIFTI IDC MR T2 volumes
	output_path : str, folder output
	"""
	for pred_path in sorted(glob.glob(os.path.join(input_path_nnunet_preds, "*.nii.gz"))):
		print(f"pred path : {pred_path}")
		resample_args_to_t2_origin = {"input" : pred_path,#change to pred path if no largest_component_retrieval necessary
													"output" : os.path.join(output_path,
																									f"{pred_path.split('/')[-1][:-7]}.nii.gz"),
													"fixed" : input_path_t2_idc,
													"interpolation" : "nn"}

		pypla.resample(verbose = False, **resample_args_to_t2_origin)
		print()

# Download and resample IDC SEGS

In [None]:
for idc_expert_serieUID in idcSegSeriesInstanceUIDs:
	try:
		#download expert SEG idc data
		out_image_dicom = os.path.join(IDC_SEG, idc_expert_serieUID)
		!mkdir -p $out_image_dicom
		client.download_from_selection(seriesInstanceUID=list([idc_expert_serieUID]),
																								downloadDir=out_image_dicom,)
		idc_seg_dicom_path = glob.glob(os.path.join(out_image_dicom, "**", "*.dcm"), recursive=True)[0]
		serieUID_image = str(pydicom.dcmread(idc_seg_dicom_path).ReferencedSeriesSequence[0].SeriesInstanceUID)
		print(f"serieUID image referenced : {serieUID_image}")
		idc_output_nii_folder = os.path.join(IDC_SEG_NII, serieUID_image)
		!mkdir -p $idc_output_nii_folder
		assert os.path.exists(idc_seg_dicom_path)
		assert os.path.exists(idc_output_nii_folder)
		!segimage2itkimage --inputDICOM $idc_seg_dicom_path \
		--outputDirectory $idc_output_nii_folder --outputType nii
		#download IDC reference image for downstream resampling
		out_image_dicom = os.path.join(IDC_IMAGE, serieUID_image)
		!rm -rf $out_image_dicom
		!mkdir -p $out_image_dicom
		client.download_from_selection(seriesInstanceUID=list([serieUID_image]),
																								downloadDir=out_image_dicom,)
		##
		#convert idc reference image to NIFTI/NRRD
		out_image_nii = os.path.join(IDC_IMAGE_NII, serieUID_image)
		out_image_nrrd = os.path.join(IDC_IMAGE_NRRD, serieUID_image)
		##nii
		!rm -rf $out_image_nii
		!mkdir -p $out_image_nii
		##nrrd
		!rm -rf $out_image_nrrd
		!mkdir -p $out_image_nrrd
		#convert image dicom to nii
		!dcm2niix -z y -m y -f %i_{serieUID_image} -o $out_image_nii $out_image_dicom
		if len(glob.glob(os.path.join(out_image_nii, "*.nii.gz"))) > 1:
			!rm -rf $out_image_nii
			!mkdir -p $out_image_nii
			convert_nii_args = {"input":"/".join(glob.glob(os.path.join(out_image_dicom, "**", "*.dcm"), recursive=True)[0].split("/")[:-1]),
												"output-img": os.path.join(out_image_nii, f"{serieUID_image}.nii.gz")}
			pypla.convert(verbose=True, **convert_nii_args)
		#convert image to nrrd
		out_image_dicom_for_nrrd = "/".join(glob.glob(os.path.join(out_image_dicom, "**", "*.dcm"), recursive=True)[0].split("/")[:-1])
		out_img_nrrd_file_path = os.path.join(out_image_nrrd, f"{serieUID_image}.nrrd")
		convert_nrrd_args = {"input":out_image_dicom_for_nrrd,
												"output-img":out_img_nrrd_file_path}
		pypla.convert(verbose=True, **convert_nrrd_args)
		ref_image_nii_path = glob.glob(os.path.join(out_image_nii, '*.nii.gz'))[0]
		ref_image_nrrd_path = glob.glob(os.path.join(out_image_nrrd, '*.nrrd'))[0]
		#resample AI NIFTI SEG object to the reference image space
		output_serie_resampled = os.path.join(IDC_SEG_RESAMPLED, serieUID_image)
		!rm -rf $output_serie_resampled
		!mkdir -p $output_serie_resampled
		#define resampling scheme
		if res_scheme_format == "nrrd":
			ref_image_path = ref_image_nrrd_path
		elif res_scheme_format == "nii":
			ref_image_path = ref_image_nii_path
		else:
			ref_image_path = ref_image_nii_path
		for ai_segment_path in glob.glob(os.path.join(idc_output_nii_folder, "*.nii.gz")):
			print(ai_segment_path)
			out_segment_path = os.path.join(output_serie_resampled, ai_segment_path.split('/')[-1])
			resample_seg_with_sitk(input_seg_nifti_path=ai_segment_path,
				input_dicom_path=out_image_dicom_for_nrrd, 
				output_seg_path=out_segment_path)
		shutil.copy(os.path.join(IDC_SEG_NII, serieUID_image, "meta.json"),
								os.path.join(output_serie_resampled, "meta.json"))
		output_dicom_folder = os.path.join(base_idc_output_dicom_folder, serieUID_image)
		!mkdir -p $output_dicom_folder
		dcmqi_ready_metadata_file = glob.glob(os.path.join(output_serie_resampled, "**", "*.json"), recursive=True)[0]
		formatted_inputImageList = sorted(glob.glob(os.path.join(output_serie_resampled, "*.nii.gz")))
		formatted_inputImageStr = "','".join(formatted_inputImageList)
		output_dcmqi_dicom_file = os.path.join(output_dicom_folder, idc_seg_dicom_path.split("/")[-1])
		!itkimage2segimage --inputImageList $formatted_inputImageStr \
			--inputDICOMDirectory $out_image_dicom \
			--outputDICOM $output_dcmqi_dicom_file \
			--inputMetadata $dcmqi_ready_metadata_file
	except:
		print(f"IDC COMBINATION DID NOT WORK FOR PATH : {idc_expert_serieUID}")

In [None]:
# !tar -C /content/OUTPUT_EVAL -cvf - idc_prostate_gen | lz4 > radiomics_idc_output.tar.lz4

idc_prostate_gen/
idc_prostate_gen/output_nii_combined/
idc_prostate_gen/output_nii_combined/1.3.6.1.4.1.14519.5.2.1.232435338427484273565021170934926902671/
idc_prostate_gen/output_nii_combined/1.3.6.1.4.1.14519.5.2.1.232435338427484273565021170934926902671/meta.json
idc_prostate_gen/output_nii_combined/1.3.6.1.4.1.14519.5.2.1.232435338427484273565021170934926902671/1.nii.gz
idc_prostate_gen/output_nii_combined_resampled_dicom/
idc_prostate_gen/output_nii_combined_resampled_dicom/1.3.6.1.4.1.14519.5.2.1.232435338427484273565021170934926902671/
idc_prostate_gen/output_nii_combined_resampled_dicom/1.3.6.1.4.1.14519.5.2.1.232435338427484273565021170934926902671/3f23933e-9f1a-4341-af96-199160be823d.dcm
