<a href="https://colab.research.google.com/github/vkt1414/Cloud-Resources-Workflows/blob/main/Notebooks/Totalsegmentator/structuredReports.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Environment Setup**

In [None]:
# %%capture

!pip install pyradiomics

Import packages for both use cases. 

In [5]:
import os
import sys
import shutil
import yaml
import time
import tqdm
import copy
import json 

from IPython.display import clear_output

# useful information
curr_dir = !pwd
curr_droid = !hostname
curr_pilot = !whoami

print(time.asctime(time.localtime()))

print("\nCurrent directory :", curr_dir[-1])
print("Hostname          :", curr_droid[-1])
print("Username          :", curr_pilot[-1])

print("Python version    :", sys.version.split('\n')[0])

Fri May  5 19:10:13 2023

Current directory : /content
Hostname          : 2cdbd990a57a
Username          : root
Python version    : 3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0]


Import the other packages that nnUNet and BPR depend on. 

Let's install and import the packages needed to create Structured Reports (SR). 

In [None]:
# Packages for the structured report 

!pip uninstall highdicom
!git clone https://github.com/herrmannlab/highdicom.git
#!cd highdicom && python setup.py install
!cd highdicom && pip install .

import highdicom

from pathlib import Path

import highdicom as hd

from pydicom.uid import generate_uid
from pydicom.filereader import dcmread
from pydicom.sr.codedict import codes

from highdicom.sr.content import (
    FindingSite,
    ImageRegion,
    ImageRegion3D,
    SourceImageForRegion,
    SourceImageForMeasurement,
    SourceImageForMeasurementGroup
)
from highdicom.sr.enum import GraphicTypeValues3D
from highdicom.sr.enum import GraphicTypeValues
from highdicom.sr.sop import Comprehensive3DSR, ComprehensiveSR
from highdicom.sr.templates import (
    DeviceObserverIdentifyingAttributes,
    Measurement,
    MeasurementProperties,
    MeasurementReport,
    MeasurementsAndQualitativeEvaluations,
    ObservationContext,
    ObserverContext,
    PersonObserverIdentifyingAttributes,
    PlanarROIMeasurementsAndQualitativeEvaluations,
    RelationshipTypeValues,
    TrackingIdentifier,
    QualitativeEvaluation,
    ImageLibrary,
    ImageLibraryEntryDescriptors
)
from highdicom.sr.value_types import (
    CodedConcept,
    CodeContentItem,
)

import logging
logger = logging.getLogger("highdicom.sr.sop")
logger.setLevel(logging.INFO)

Copy the JSON metadata file (generated using [...]) from the repo.

In [24]:
# bucket_data_base_uri = os.path.join(bucket_base_uri, "nnunet/data")
# dicomseg_json_uri = "s3://idc-medima-paper/nnunet/data/dicomseg_metadata.json"
# dicomseg_json_path = "/content/data/dicomseg_metadata.json"

# !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $dicomseg_json_uri $dicomseg_json_path

dicomseg_json_path = "/content/data/dicomseg_metadata.json"
!wget -q -N -P '/content/data' https://raw.githubusercontent.com/ImagingDataCommons/ai_medima_misc/main/nnunet/data/dicomseg_metadata.json
!wget -q -N https://raw.githubusercontent.com/ImagingDataCommons/ai_medima_misc/main/nnunet/data/nnunet_segments_code_mapping.csv
nnunet_segments_code_mapping_df = pd.read_csv("nnunet_segments_code_mapping.csv")

!wget -q -N https://raw.githubusercontent.com/ImagingDataCommons/ai_medima_misc/main/nnunet/data/nnunet_shape_features_code_mapping.csv
nnunet_shape_features_code_mapping_df = pd.read_csv("nnunet_shape_features_code_mapping.csv")
nnunet_shape_features_code_mapping_df

Unnamed: 0,shape_feature,quantity_CodingSchemeDesignator,quantity_CodeValue,quantity_CodeMeaning,units_CodingSchemeDesignator,units_CodeValue,units_CodeMeaning
0,Elongation,IBSI,Q3CK,Elongation,UCUM,mm,millimeter
1,Flatness,IBSI,N17B,Flatness,UCUM,mm,millimeter
2,LeastAxisLength,IBSI,7J51,Least Axis in 3D Length,UCUM,mm,millimeter
3,MajorAxisLength,IBSI,TDIC,Major Axis in 3D Length,UCUM,mm,millimeter
4,Maximum3DDiameter,IBSI,L0JK,Maximum 3D Diameter of a Mesh,UCUM,mm,millimeter
5,MeshVolume,IBSI,RNU0,Volume of Mesh,UCUM,mm3,cubic millimeter
6,MinorAxisLength,IBSI,P9VJ,Minor Axis in 3D Length,UCUM,mm,millimeter
7,Sphericity,IBSI,QCFX,Sphericity,UCUM,1,no units
8,SurfaceArea,IBSI,C0JK,Surface Area of Mesh,UCUM,mm2,square millimeter
9,SurfaceVolumeRatio,IBSI,2PR5,Surface to Volume Ratio,UCUM,/mm,per millimeter


Extract list of first order features mentioned in TotalSegmentator

---

# **Function Definition**

In [None]:
def modify_dicomseg_json_file(dicomseg_json_path, dicomseg_json_path_modified, SegmentAlgorithmName):

  """
  This function writes out a new metadata json file for the DICOM Segmentation object. 
  It sets the SegmentAlgorithmName to the one provided as input. 

  Arguments:
    dicomseg_json_path          : path of the original dicomseg json file 
    dicomseg_json_path_modified : the new json file to write to disk 
    SegmentAlgorithmName        : the field to replace
    
  Returns:
    The json file is written out to dicomseg_json_path_modified 

  """
  f = open(dicomseg_json_path)
  meta_json = json.load(f)

  meta_json_modified = copy.deepcopy(meta_json)
  num_regions = len(meta_json_modified['segmentAttributes'])
  for n in range(0,num_regions): 
    meta_json_modified['segmentAttributes'][n][0]['SegmentAlgorithmName'] = SegmentAlgorithmName

  with open(dicomseg_json_path_modified, 'w') as f: 
    json.dump(meta_json_modified, f)

  return 

  # dicomseg_json_uri = "s3://idc-medima-paper/nnunet/data/dicomseg_metadata.json"
  # dicomseg_json_path = "/content/data/dicomseg_metadata.json"



## nnUNet 3D shape features SR creation

In [None]:
def get_label_and_names_from_metadata_json(dicomseg_json):

  """Returns two lists containing the label values and the corresponding
     CodeMeaning values

  Inputs: 
    dicomseg_json : metajson file

  Outputs:
    label_values  : label values from the metajson file 
    label_names   : the corresponding CodeMeaning values 

  """

  f = open(dicomseg_json)
  meta_json = json.load(f)

  print(meta_json)

  num_regions = len(meta_json['segmentAttributes'][0])
  print ('num_regions: ' + str(num_regions))

  label_values = []
  label_names = [] 
  for n in range(0,num_regions):
    # label_values.append(n)
    label_value = meta_json['segmentAttributes'][0][n]['labelID']
    label_name = meta_json['segmentAttributes'][0][n]['SegmentedPropertyTypeCodeSequence']['CodeMeaning']
    label_values.append(label_value)
    label_names.append(label_name)

  return label_values, label_names 

In [None]:
def compute_pyradiomics_3D_features(ct_nifti_path, 
                                    label_values, 
                                    label_names, 
                                    split_pred_nifti_path, 
                                    nnunet_shape_features_code_mapping_df):

  """Function to compute pyradiomics 3D features for each label in a nifti file. 
     A single csv file is written out to disk. 

  Inputs: 
    ct_nifti_path            : the CT nifti file 
    label_values             : the label value for each of the segments from the json file 
    label_names              : the corresponding label name for each of the segments 
    split_pred_nifti_path    : where to save the individual nii segments needed 
                               for pyradiomics
    nnunet_shape_features_code_mapping_df : the df where we will obtain the 
                                            list of the shape features to 
                                            compute

  Outputs:
    Writes the features_csv_path_nnunet to disk. 
    
  """

  # Get the names of the features from the nnunet_shape_features_code_mapping_df
  shape_features = list(nnunet_shape_features_code_mapping_df['shape_feature'].values)

  # Instantiate the extractor and modify the settings to keep the 3D shape features
  extractor = featureextractor.RadiomicsFeatureExtractor()
  extractor.settings['minimumROIDimensions'] = 3 
  extractor.disableAllFeatures()
  extractor.enableFeaturesByName(shape=shape_features) 

  # Calculate features for each label and create a dataframe
  num_labels = len([f for f in os.listdir(split_pred_nifti_path) if f.endswith('.nii.gz')])
  df_list = [] 
  for n in range(0,num_labels):
    mask_path = os.path.join(split_pred_nifti_path, label_names[n] + '.nii.gz')
    # Run the extractor 
    result = extractor.execute(ct_nifti_path, mask_path) # dictionary
    # keep only the features we want
    # Get the corresponding label number -- all might not be present 
    corresponding_label_value = label_values[label_names.index(label_names[n])] 
    dict_keep = {'ReferencedSegment': corresponding_label_value, 
                 'label_name': label_names[n]}
    keys_keep = [f for f in result.keys() if 'original_shape' in f]
    # Just keep the feature keys we want
    dict_keep_new_values = {key_keep: result[key_keep] for key_keep in keys_keep}
    dict_keep.update(dict_keep_new_values)
    df1 = pd.DataFrame([dict_keep])
    # change values of columns to remove original_shape_
    df1.columns = df1.columns.str.replace('original_shape_', '')
    # Append to the ReferencedSegment and label_name df 
    df_list.append(df1)

  # concat all label features 
  df = pd.concat(df_list)

  return df

In [None]:
def order_dicom_files_image_position(dcm_directory):
  """
  Orders the dicom files according to image position and orientation. 

  Arguments:
    dcm_directory : input directory of dcm files to put in order 

  Outputs:
    files_sorted   : dcm files in sorted order 
    sop_all_sorted : the SOPInstanceUIDs in sorted order 
    pos_all_sorted : the image position in sorted order 

  """
  files = [os.path.join(dcm_directory,f) for f in os.listdir(dcm_directory)]

  num_files = len(files)

  pos_all = []  
  sop_all = [] 

  for n in range(0,num_files):
    # read dcm file 
    filename = files[n]
    ds = dcmread(filename)

    # get ImageOrientation (0020, 0037)
    # ImageOrientation = ds['0x0020','0x0037'].value
    ImageOrientation = ds.ImageOrientationPatient

    # get ImagePositionPatient (0020, 0032) 
    # ImagePositionPatient = ds['0x0020','0x0032'].value
    ImagePositionPatient = ds.ImagePositionPatient

    # calculate z value
    x_vector = ImageOrientation[0:3]
    y_vector = ImageOrientation[3:]
    z_vector = np.cross(x_vector,y_vector)

    # multiple z_vector by ImagePositionPatient
    pos = np.dot(z_vector,ImagePositionPatient)
    pos_all.append(pos)

    # get the SOPInstanceUID 
    # sop = ds['0x0008', '0x0018'].value
    sop = ds.SOPInstanceUID
    sop_all.append(sop)


  #----- order the SOPInstanceUID/files by z value ----# 

  sorted_ind = np.argsort(pos_all)
  pos_all_sorted = np.array(pos_all)[sorted_ind.astype(int)]
  sop_all_sorted = np.array(sop_all)[sorted_ind.astype(int)]
  files_sorted = np.array(files)[sorted_ind.astype(int)]

  return files_sorted, sop_all_sorted, pos_all_sorted 

In [None]:
def create_structured_report_metajson_for_shape_features(SeriesInstanceUID, 
                                                         SOPInstanceUID_seg,
                                                         seg_file, 
                                                         dcm_directory, 
                                                         segments_code_mapping_df,
                                                         shape_features_code_mapping_df,
                                                         df_features, 
                                                         ):
  
  """Function that creates the metajson necessary for the creation of a
  structured report from a pandas dataframe of label names and features for 
  each. 

  Inputs: 
    SeriesInstanceUID               : SeriesInstanceUID of the corresponding CT 
                                      file 
    SOPInstanceUID_seg              : SOPInstanceUID of the corresponding SEG file 
    seg_file                        : filename of SEG DCM file 
    dcm_directory                   : ct directory that will be sorted in 
                                      terms of axial ordering according to the 
                                      ImagePositionPatient and ImageOrientation 
                                      fields
    segments_code_mapping_df        : dataframe that holds the names of the 
                                      segments and the associated code values etc.
    shape_features_code_mapping_df  : dataframe that holds the names of the 
                                      features and the associated code values etc. 
    df_features                     : a pandas dataframe holding the segments and a 
                                      set of 3D shape features for each 

  Outputs:
    Returns the metajson for the structured report that will then be used by
    dcmqi tid1500writer to create a structured report 
  """ 

  # --- Get the version number for the pyradiomics package --- #

  pyradiomics_version_number = str(radiomics.__version__)
  
  # --- Sort the dcm files first according to --- # 
  # --- ImagePositionPatient and ImageOrientation --- #

  files_sorted, sop_all_sorted, pos_all_sorted = order_dicom_files_image_position(dcm_directory)
  files_sorted = [os.path.basename(f) for f in files_sorted]

  # --- Create the header for the json --- # 
  
  inputMetadata = {}
  inputMetadata["@schema"]= "https://raw.githubusercontent.com/qiicr/dcmqi/master/doc/schemas/sr-tid1500-schema.json#"
  inputMetadata["SeriesDescription"] = "Measurements"
  inputMetadata["SeriesNumber"] = "1001"
  inputMetadata["InstanceNumber"] = "1"

  inputMetadata["compositeContext"] = [seg_file] # not full path

  inputMetadata["imageLibrary"] = files_sorted # not full path 

  # inputMetadata["observerContext"] = {
  #                                     "ObserverType": "PERSON",
  #                                     "PersonObserverName": "Reader1"
  #                                   }
  # inputMetadata["observerContext"] = {
  #                     "ObserverType": "DEVICE",
  #                     "DeviceObserverName": "pyradiomics",
  #                     "DeviceObserverModelName": "v3.0.1"
  #                   }
  inputMetadata["observerContext"] = {
                      "ObserverType": "DEVICE",
                      "DeviceObserverName": "pyradiomics",
                      "DeviceObserverModelName": pyradiomics_version_number
                    }

  inputMetadata["VerificationFlag"]  = "UNVERIFIED"
  inputMetadata["CompletionFlag"] =  "COMPLETE"
  inputMetadata["activitySession"] = "1"
  inputMetadata["timePoint"] = "1"

  # ------------------------------------------------------------------------- # 
  # --- Create the measurement_dict for each segment - holds all features --- # 

  measurement = [] 

  # --- Now create the dict for all features and all segments --- #

  # --- Loop over the number of segments --- #

  # number of rows in the df_features 
  num_segments = df_features.shape[0]

  # Array of dictionaries - one dictionary for each segment 
  measurement_across_segments_combined = [] 

  for segment_id in range(0,num_segments):

    ReferencedSegment = df_features['ReferencedSegment'].values[segment_id]
    FindingSite = df_features['label_name'].values[segment_id]

    print('segment_id: ' + str(segment_id))
    print('ReferencedSegment: ' + str(ReferencedSegment))
    print('FindingSite: ' + str(FindingSite))

    # --- Create the dict for the Measurements group --- # 
    TrackingIdentifier = "Measurements group " + str(ReferencedSegment)

    segment_row = segments_code_mapping_df[segments_code_mapping_df["segment"] == FindingSite]
    # print(segment_row)
        
    my_dict = {
      "TrackingIdentifier": str(TrackingIdentifier),
      "ReferencedSegment": int(ReferencedSegment),
      "SourceSeriesForImageSegmentation": str(SeriesInstanceUID),
      "segmentationSOPInstanceUID": str(SOPInstanceUID_seg),
      "Finding": {
        "CodeValue": "113343008",
        "CodingSchemeDesignator": "SCT",
        "CodeMeaning": "Organ"
      }, 
      "FindingSite": {
        "CodeValue": str(segment_row["FindingSite_CodeValue"].values[0]),
        "CodingSchemeDesignator": str(segment_row["FindingSite_CodingSchemeDesignator"].values[0]),
        "CodeMeaning": str(segment_row["FindingSite_CodeMeaning"].values[0])
      }
    }

    measurement = []  
    # number of features - number of columns in df_features - 2 (label_name and ReferencedSegment)
    num_values = len(df_features.columns)-2 

    feature_list = df_features.columns[2:] # remove first two 


    # For each measurement per region segment
    for n in range(0,num_values): 
      measurement_dict = {}
      row = df_features.loc[df_features['label_name'] == FindingSite]
      feature_row = shape_features_code_mapping_df.loc[shape_features_code_mapping_df["shape_feature"] == feature_list[n]]
      value = str(np.round(row[feature_list[n]].values[0],3))
      measurement_dict["value"] = value
      measurement_dict["quantity"] = {}
      measurement_dict["quantity"]["CodeValue"] = str(feature_row["quantity_CodeValue"].values[0])
      measurement_dict["quantity"]["CodingSchemeDesignator"] = str(feature_row["quantity_CodingSchemeDesignator"].values[0])
      measurement_dict["quantity"]["CodeMeaning"] = str(feature_row["quantity_CodeMeaning"].values[0])
      measurement_dict["units"] = {}
      measurement_dict["units"]["CodeValue"] = str(feature_row["units_CodeValue"].values[0])
      measurement_dict["units"]["CodingSchemeDesignator"] = str(feature_row["units_CodingSchemeDesignator"].values[0])
      measurement_dict["units"]["CodeMeaning"] = str(feature_row["units_CodeMeaning"].values[0])
      measurement_dict["measurementAlgorithmIdentification"] = {}
      measurement_dict["measurementAlgorithmIdentification"]["AlgorithmName"] = "pyradiomics"
      measurement_dict["measurementAlgorithmIdentification"]["AlgorithmVersion"] = str(pyradiomics_version_number)
      measurement.append(measurement_dict) 

    measurement_combined_dict = {}
    measurement_combined_dict['measurementItems'] = measurement # measurement is an array of dictionaries 

    output_dict_one_segment = {**my_dict, **measurement_combined_dict}

    # append to array for all segments 

    measurement_across_segments_combined.append(output_dict_one_segment)

  # --- Add the measurement data --- # 

  inputMetadata["Measurements"] = {}
  inputMetadata["Measurements"] = measurement_across_segments_combined

  return inputMetadata

In [None]:
def save_structured_report_for_shape_features(SeriesInstanceUID, 
                                              SOPInstanceUID_seg, 
                                              pred_dicomseg_path, 
                                              dicomseg_json_path, 
                                              dcm_directory, 
                                              pred_nifti_path, 
                                              split_pred_nii_path, 
                                              ct_nifti_path, 
                                              segments_code_mapping_df,
                                              shape_features_code_mapping_df,
                                              sr_json_path,
                                              sr_path
                                              ):
  
  """ This function creates the SR necessary for the nnUNet shape features 

  Inputs: 
  SeriesInstanceUID               : SeriesInstanceUID of the corresponding CT 
                                    file 
  SOPInstanceUID_seg              : SOPInstanceUID of the corresponding SEG file 
  pred_dicomseg_path              : filename of DICOM SEG file 
  dicomseg_json_path              : json file for DICOM SEG file 
  dcm_directory                   : list of ct files that will be sorted in 
                                    terms of axial ordering according to the 
                                    ImagePositionPatient and ImageOrientation 
                                    fields
  pred_nifti_path                 : predictions in nifti format 
  nnunet_base_path                : path to hold the split nifti files 
  ct_nifti_path                   : filename for CT nifti file
  segments_code_mapping_df        : dataframe that holds the names of the 
                                    segments and the associated code values etc.
  shape_features_code_mapping_df  : dataframe that holds the names of the 
                                    features and the associated code values etc. 
  sr_json_path                    : the path that the metajson for the SR for 
                                    the 3D shape features will be saved 
  sr_path                         : the path that the SR for the 3D shape 
                                    features will be saved 

  Outputs:
    Returns the metajson for the structured report that will then be used by
    dcmqi tid1500writer to create a structured report 

  """

  # --- get label values and names from metajson file --- #
  label_values, label_names = get_label_and_names_from_metadata_json(dicomseg_json_path)

  # --- split the multilabel nifti into individual files --- #
  split_pred_nii_path = os.path.join(nnunet_base_path, "split_nii")
  if not os.path.isdir(split_pred_nii_path): 
    os.mkdir(split_pred_nii_path)
  split_nii(pred_nifti_path, split_pred_nii_path, label_names)

  # --- compute features and save csv for each region --- #
  if not os.path.isdir(features_csv_path_nnunet):
    os.mkdir(features_csv_path_nnunet) 
  df_features = compute_pyradiomics_3D_features(ct_nifti_path, 
                                                label_values, 
                                                label_names,
                                                split_pred_nii_path, 
                                                nnunet_shape_features_code_mapping_df)
  print ('created df_features')
  
  # --- upload csv file to bucket --- #
  # !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $pred_features_csv_path $gs_uri_features_csv_file

  # remove nii files after saving out pyradiomics results
  !rm $split_pred_nii_path/*
  # remove csv 
  # !rm $pred_features_csv_path

  # --- Create the final metadata for the SR --- #
  inputMetadata = create_structured_report_metajson_for_shape_features(SeriesInstanceUID, 
                                                                       SOPInstanceUID_seg,
                                                                       pred_dicomseg_path, 
                                                                       dcm_directory, 
                                                                       nnunet_segments_code_mapping_df, 
                                                                       nnunet_shape_features_code_mapping_df,
                                                                       df_features)

  print ('created SR json for shape features')

  # --- Write out json --- #

  with open(sr_json_path, 'w') as f:
    json.dump(inputMetadata, f, indent=2)
  print ('wrote out json for shape features')

  # --- Save the SR for nnUNet shape features --- # 
  # inputImageLibraryDirectory = os.path.join("/content", "raw")
  # outputDICOM = os.path.join("/content","features_sr.dcm")
  # inputCompositeContextDirectory = os.path.join("/content","seg")
  inputImageLibraryDirectory = dcm_directory
  # outputDICOM = sr_json_path
  outputDICOM = sr_path
  # the name of the folder where the seg files are located 
  inputCompositeContextDirectory = os.path.basename(pred_dicomseg_path) # might need to check this
  inputMetadata_json = sr_json_path 

  print ('inputImageLibraryDirectory: ' + str(inputImageLibraryDirectory))
  print ('outputDICOM: ' + str(outputDICOM))
  print ('inputCompositeContextDirectory: ' + str(inputCompositeContextDirectory))
  print ('inputMetadata_json: ' + str(inputMetadata_json)) 
  !tid1500writer --inputImageLibraryDirectory $inputImageLibraryDirectory \
                --outputDICOM $outputDICOM  \
                --inputCompositeContextDirectory $inputCompositeContextDirectory \
                --inputMetadata $inputMetadata_json
  print ('wrote out SR for shape features')

  return 

# Putting everything together

## Running the Per-series Analysis

Now we will run the analysis over each SeriesInstanceUID. 

In [None]:
for idx, series_id in enumerate(series_to_process_id_list):


  #  -----------------
  # init

  start_total_nnunet = time.time()

  # init every single time, as the most recent logs are loaded from the bucket
  inference_time_dict_nnunet = dict()
  total_time_dict_nnunet = dict()
  inference_time_dict_bpr = dict()
  total_time_dict_bpr = dict()

  # set up processing flags? - discuss with Dennis 

  clear_output(wait = True)

  print("(%g/%g) Processing series %s"%(idx + 1, len(series_to_process_id_list), series_id))

  # Get the dataframe of the series being analyzed 
  series_df = cohort_df[cohort_df["SeriesInstanceUID"] == series_id]
  num_instances = series_df['num_instances'].to_list()[0]

  # Get the corresponding PatientId 
  patient_id = np.unique(series_df[series_df['Modality'] == "CT"]["PatientID"].values)
  assert len(patient_id) == 1 # sanity check - each PatientID should be unique 
  patient_id = patient_id[0] 
  print('patient_id: ' + str(patient_id))

  # Get the corresponding StudyInstanceUID
  study_id = np.unique(series_df[series_df['Modality'] == "CT"]["StudyInstanceUID"].values)
  assert len(study_id) == 1 # sanity check - each StudyInstanceUID should be unique
  study_id = study_id[0] 
  print('study_id: ' + str(study_id))

  dicomseg_fn = series_id + "_SEG.dcm"

  input_nifti_fn = series_id + "_0000.nii.gz"
  input_nifti_path = os.path.join(model_input_folder_nnunet, input_nifti_fn)

  pred_nifti_fn = series_id + ".nii.gz"
  pred_nifti_path = os.path.join(model_output_folder_nnunet, pred_nifti_fn)

  pred_softmax_folder_name = "pred_softmax"
  pred_softmax_folder_path = os.path.join(processed_nrrd_path_nnunet, series_id, pred_softmax_folder_name)

  pred_features_csv_fn = series_id + ".csv"
  pred_features_csv_path = os.path.join(features_csv_path_nnunet, pred_features_csv_fn)

  sr_fn = series_id + '_SR.dcm'
  sr_path = os.path.join(sr_path_nnunet, sr_fn)

  sr_json_fn = series_id + '_SR.json'
  sr_json_path = os.path.join(sr_path_nnunet, sr_json_fn)

  # -----------------
  # GS URI definition

  # gs URI at which the *nii.gz object is or will be stored in the bucket
  gs_uri_nifti_file = os.path.join(bucket_nifti_folder_uri_nnunet, pred_nifti_fn)

  # gs URI at which the folder storing the *.nrrd softmax probabilities is or will be stored in the bucket
  gs_uri_softmax_pred_folder = os.path.join(bucket_softmax_pred_folder_uri_nnunet, series_id)

  # gs URI at which the DICOM SEG object is or will be stored in the bucket
  gs_uri_dicomseg_file = os.path.join(bucket_dicomseg_folder_uri_nnunet, dicomseg_fn)

  # DK added - gs URI at which the CT to nii file is or will be stored in the bucket 
  gs_uri_ct_nifti_file = os.path.join(bucket_dicomseg_folder_uri_nnunet, pred_nifti_fn)

  # DK added - gs URI at which the features csv is saved if a 3d model is run 
  # gs_uri_features_csv_file = os.path.join(bucket_features_csv_folder_uri_nnunet, pred_features_csv_fn)
  # gs URI at which the DICOM SR ojbect for the shape features is or will be stored in the bucket 
  gs_uri_sr_file = os.path.join(bucket_sr_folder_uri_nnunet, sr_fn)

  # -----------------
  # preprocessing

  # Download the DICOM data 
  download_path = os.path.join(sorted_base_path, series_id) # should be deleted after bpr 
  if not os.path.exists(download_path):
    start_time_download_series_data = time.time()
    download_series_data_s5cmd(raw_base_path = raw_base_path, # --> ADD THIS TO GIT REPO. 
                               sorted_base_path = sorted_base_path,
                               series_df = series_df,
                               remove_raw = True)
    elapsed_time_download_series_data = time.time()-start_time_download_series_data

  # # DICOM CT to NIfTI - required for the processing
  # start_time_ct_to_nii = time.time()
  # preprocessing.pypla_dicom_ct_to_nifti(sorted_base_path = sorted_base_path,
  #                                       processed_nifti_path = processed_nifti_path,
  #                                       pat_id = series_id, 
  #                                       verbose = True)
  # elapsed_time_ct_to_nii = time.time()-start_time_ct_to_nii 

  # DICOM CT to NifTI - required for processing 
  start_time_ct_to_nii = time.time()
  success = dcm2niix_dicom_ct_to_nifti(sorted_base_path = sorted_base_path,
                                       processed_nifti_path = processed_nifti_path,
                                       pat_id = series_id)
  elapsed_time_ct_to_nii = time.time()-start_time_ct_to_nii 
  if success == -1:
    print("Cannot convert DICOM to NifTI using dcm2niix, either created no nii volumes or multiple volumes - stopping processing. ")
    # create file in the log directory 
    dcm2nii_log_path = os.path.join(processed_base_path, 'dcm2nii_log.txt')
    gs_uri_dcm2nii_log = os.path.join(bucket_log_folder_uri_nnunet, series_id + '_dcm2nii_log.txt')
    with open(dcm2nii_log_path, 'w') as f:
      f.write("Cannot convert DICOM to NifTI using dcm2niix, either created no nii volumes or multiple volumes - stopping processing. ")
    f.close()
    # !$s5cmd_path cp $dcm2nii_log_path $gs_uri_dcm2nii_log
    !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $dcm2nii_log_path $gs_uri_dcm2nii_log
    continue 

  # upload nifti file to bucket 
  ct_nifti_path = os.path.join(processed_nifti_path,series_id,series_id+"_CT.nii.gz")
  !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $ct_nifti_path $gs_uri_ct_nifti_file


  # prepare the `model_input` folder for the inference phase
  preprocessing.prep_input_data(processed_nifti_path = processed_nifti_path,
                                model_input_folder = model_input_folder_nnunet,
                                pat_id = series_id)
  
  start_inference_nnunet = time.time()
  # run the DL-based prediction
  processing.process_patient_nnunet(model_input_folder = model_input_folder_nnunet,
                                    model_output_folder = model_output_folder_nnunet, 
                                    nnunet_model = nnunet_model, 
                                    use_tta = use_tta,
                                    export_prob_maps = export_prob_maps)
  elapsed_inference_nnunet = time.time() - start_inference_nnunet

  if export_prob_maps:
    # convert the softmax predictions to NRRD files
    postprocessing.numpy_to_nrrd(model_output_folder = model_output_folder_nnunet,
                                processed_nrrd_path = processed_nrrd_path_nnunet,
                                pat_id = series_id,
                                output_folder_name = pred_softmax_folder_name)

    # copy the nnU-Net *.npz softmax probabilities in the chosen bucket
    !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $pred_softmax_folder_path/ $gs_uri_softmax_pred_folder/

  # copy the nnU-Net *.nii.gz binary masks in the chosen bucket --> Do we need this? 
  # !gsutil -m cp $pred_nifti_path $gs_uri_nifti_file
  !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $pred_nifti_path $gs_uri_nifti_file

  # -----------------
  # post-processing

  # FIXME: consider removing this? (if only NIfTIs will be used to produce the DICOM SEGs)
  # mandatory post-processing to convert the NIfTI file from the pipeline
  # to a NRRD file (same content)
  if not os.path.isdir(os.path.join(processed_nrrd_path_nnunet,series_id)):
    os.mkdir(os.path.join(processed_nrrd_path_nnunet,series_id))
  postprocessing.pypla_postprocess(processed_nrrd_path = processed_nrrd_path_nnunet,
                                  model_output_folder = model_output_folder_nnunet,
                                  pat_id = series_id)

  # Modify the dicomseg_json file so that the SegmentAlgorithmName is representative of the model and other parameters 
  # Writes out the json file 
  SegmentAlgorithmName = experiment_folder_name 
  dicomseg_json_path_modified = "/content/data/dicomseg_metadata_" + SegmentAlgorithmName + '.json'
  modify_dicomseg_json_file(dicomseg_json_path, dicomseg_json_path_modified, SegmentAlgorithmName)
  # upload the json file 
  gs_uri_dicomseg_json_file = os.path.join(bucket_experiment_folder_uri_nnunet, 'dicomseg_metadata_' + SegmentAlgorithmName + '.json')
  !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $dicomseg_json_path_modified $gs_uri_dicomseg_json_file

  # -----------------
  # extract features if nnunet_model is 3d and save structured report 
  if ('3d' in nnunet_model):
    seg_dcm = pydicom.dcmread(pred_dicomseg_path)
    SOPInstanceUID_seg = seg_dcm.file_meta['0x0002', '0x0003'].value
    dcm_directory = os.path.join(sorted_base_path, series_id, 'CT')
    nnunet_features_metajson = save_structured_report_for_shape_features(series_id, 
                                                                         SOPInstanceUID_seg, 
                                                                         pred_dicomseg_path,  
                                                                         dicomseg_json_path, 
                                                                         dcm_directory,
                                                                         pred_nifti_path, 
                                                                         nnunet_base_path, 
                                                                         ct_nifti_path, 
                                                                         nnunet_segments_code_mapping_df,
                                                                         nnunet_shape_features_code_mapping_df,
                                                                         sr_json_path,
                                                                         sr_path
                                                                         )
    # Copy SR to bucket 
    !$s5cmd_path --endpoint-url https://storage.googleapis.com cp $sr_path $gs_uri_sr_file