In [1]:
import os
import csv
import numpy as np
from scipy import ndimage
import nibabel as nib
import argparse
import json


def get_training_dict_hnn(datadir):
    # datadir = /projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256
    data_split_json = os.path.join(datadir, "data_split.json")
    # Load JSON data from a file
    with open(data_split_json, 'r') as file:
        data_paths = json.load(file)
    
    training = []
    for batch in data_paths["training"]:
        image = batch["image"]
        label = batch["seg"]
        contrast = batch["contrast"]
        if "empty" in label:
            # Consider only non empty segmentations
            dict_entry = {
                    "image" : os.path.join(datadir, image),
                    "label" : os.path.join(datadir, label),
                    "contrast": contrast
                }
            training.append(dict_entry)
        else:
            pass
    training_dict = {"training" : training}
    return training_dict


def create_csv(CSV_PATH, DATADIR):
    """ 
    Creation of the complete CSV dataset with size smaller than or equal to 96 in all directions
    """
    header = ['id', 'scan_ct', 'label', 'center_x', 'center_y', 'center_z', 
            'x_extreme_min', 'x_extreme_max', 'y_extreme_min', 'y_extreme_max', 'z_extreme_min', 'z_extreme_max', 'x_size', 'y_size', 'z_size', 'contrast']
    # Getting all files in a folder DATADIR
    ### Create a dictionary with scans and labels paths
    training_dict = get_training_dict_hnn(DATADIR)
    training = training_dict['training']


    with open(CSV_PATH, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(header)
        for mask_path in training:
            # Load mask data
            mask = nib.load(mask_path['label'])
            mask_data = np.asarray(mask.get_fdata())
            # Binary mask
            mask_data = np.where(mask_data > 0.5, 1, 0)
            mask_data = mask_data > 0 
           
            id = mask_path['label'].split("/")[-1].split("_")[-2].split(".nii.gz")[0] # For Head and neck dataset
            label_path = mask_path['label']
            scan_path_ct = mask_path['image']
            
            print(f"Doing case ID: {id}")

            ## These functions give the center of mass and extremes of x, y, z ##
            ### With this is possible to know the exact position of the center of the tumour and bounding box
            center_x, center_y, center_z = None, None, None
            min_x, max_x = None, None
            min_y, max_y = None, None
            min_z, max_z = None, None
            x_size, y_size, z_size = None, None, None
             
            contrast = mask_path['contrast']
            row = [id, scan_path_ct, label_path, center_x, center_y, center_z, min_x, max_x, min_y, max_y, min_z, max_z, x_size, y_size, z_size, contrast]
            writer.writerow(row)
        
        print(f"Done. Saved in {CSV_PATH}")
    return training


    


  from scipy import ndimage


In [2]:
create_csv(CSV_PATH="/projects/brats2023_a_f/Aachen/aritifcial-head-and-neck-cts/WDM3D/wdm-3d/utils/hnn_DA.csv", DATADIR="/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256")

Doing case ID: 4bf416923743bd04502f3b689d1eeeb6
Doing case ID: af4bd27f50e3895831acc6cc6fd66825
Doing case ID: c5706366d1e17f8c0005374b6966631b
Doing case ID: d69eff9b9f4e7d776b1abac4e45b58f7
Doing case ID: e6e28eefa8c48c5962c7bd2de4a373eb
Doing case ID: d066ce0e7f336a3552ef35112f1f5c6f
Doing case ID: b94184c6f07f42a0360cdf71e89643a3
Doing case ID: 161911c8375e1b92f25cbb846bbe440b
Doing case ID: 008d29aa308064130345709f9d200ddd
Doing case ID: fef18b8ee51b2740c88bb539c1a050fc
Doing case ID: e1255432291e0f274506501c04587749
Doing case ID: 0c2b060d8d115c00bb624daa6623f1e5
Doing case ID: 064e1c36ea64f1ae88ab4be6e9993cbe
Doing case ID: 6008c9599cacaa57a59e3917d97c91d4
Doing case ID: 7a9af7c13a938eb4b6290b4bb0cdd21c
Doing case ID: ca1a583fd3f75c19f9a49aa749dc85be
Doing case ID: d4ee7c427f789617cb9400d3dd873b58
Doing case ID: 4b6e21ddce600969718b0cd9985592e0
Doing case ID: e05c9f03858c96f14bc040183b844db1
Doing case ID: 609990baacdcc601c721a4d4becfd87f
Doing case ID: d32e4cc8dcf213e1a9bf2ddbc

[{'image': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/data/quebec_00f4922cb8daa2eb734cece106191dcd.nii.gz',
  'label': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/seg/quebec_4bf416923743bd04502f3b689d1eeeb6_empty.nii.gz',
  'contrast': 0},
 {'image': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/data/cptac_af4bd27f50e3895831acc6cc6fd66825.nii.gz',
  'label': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/seg/cptac_af4bd27f50e3895831acc6cc6fd66825_empty.nii.gz',
  'contrast': 0},
 {'image': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/data/anderson_c5706366d1e17f8c0005374b6966631b.nii.gz',
  'label': '/projects/brats2023_a_f/Aachen/HnN_cancer_data/HnN_cancer_data_1_1_1_256_256_256/seg/anderson_c5706366d1e17f8c0005374b6966631b_empty.nii.gz',
  'contrast': 0},
 {'image': '/projects/brats2023_a_f/Aachen/HnN_