In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import os
import gzip
import shutil

import SimpleITK as sitk
image_viewer = sitk.ImageViewer()
# image_viewer.SetApplication('/Applications/Slicer.app/Contents/MacOS/Slicer')
image_viewer.SetApplication('/Applications/Fiji.app/Contents/MacOS/ImageJ-macosx')

%matplotlib inline
import matplotlib.pyplot as plt

#### Read data

In [3]:
dataset_folder = Path(os.getcwd()+'/dataset')

data = pd.read_excel(dataset_folder/Path('Pretreat-MetsToBrain-Masks_clin_20230918.xlsx'),sheet_name='Data')
description = pd.read_excel(dataset_folder/Path('Pretreat-MetsToBrain-Masks_clin_20230918.xlsx'),sheet_name='Description')

#### Mappings

In [10]:
Sex = {"0": "Male", "1": "Female"}
Ethnicity = {"0": "White", "1": "White Hispanic", "2": "White/Black", "3": "Black", "4": "Asian/Pacific Islander"}
Primary = {"1": "Breast cancer", "2": "Gastrointestinal cancers", "3": "Small cell lung cancer", "4": "Melanoma", "5": "Non small cell lung cancer", "6": "Renal cell carcinoma", "7": "Other"}
Extranodal = {"0": "No", "1": "Yes"}
Death = {"0": "No", "1": "Yes"}
Infratentorial = {"0": "No", "1": "Yes"}

##### Data Selection

In [36]:
def transform_to_single_roi(image_path):
    """
    Reads image from image_path and merges the different ROIs to 1 ROI.
    At the end stores the mapped_image to the same image_path
    """
    image = sitk.ReadImage(image_path)
    array = sitk.GetArrayFromImage(image)
    print(f"Before: {np.unique(array)}, {array.shape}")
    mapped_array = np.where(array == 0, 0, 1).astype("uint16")
    print(f"After: {np.unique(mapped_array)}, {mapped_array.shape}")
    mapped_image = sitk.GetImageFromArray(mapped_array)
    mapped_image.SetSpacing(image.GetSpacing())
    mapped_image.SetOrigin(image.GetOrigin())
    mapped_image.SetDirection(image.GetDirection())
    sitk.WriteImage(mapped_image, image_path)

In [38]:
# rootdir = dataset_folder/Path("test")
rootdir = dataset_folder/Path("Pretreat-MetsToBrain-Masks") # directory with the complete dataset
newdir = dataset_folder/Path("dataset_to_process") # directory to store data we are interested in
if not os.path.exists(newdir): # Create directory if doesn't exist
    os.mkdir(newdir)

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        if file.endswith('.gz') and (file[-10:-7] == 't2w' or file[-10:-7] == 'seg'): # filter files by extension (gz) and scan type (t2w, seg)
            # Copy desired data to dedicated folder
            print(subdir, file)
            sample_name = subdir.split('/')[-1] # Get the name of the subdirectory for each sample
            new_subdir = Path(newdir)/Path(sample_name)
            if not os.path.exists(new_subdir):
                os.mkdir(new_subdir)
            shutil.copy(Path(subdir)/Path(file), new_subdir/Path(file)) # copy from original dataset folder to a new directory
            
            # Unzip and delete gz file
            filepath = os.path.join(new_subdir, file)
            output_filepath = filepath[:-3] # filename without .gz extension
            print(output_filepath)
            with gzip.open(filepath, 'rb') as f_in: # Read from one file
                with open(output_filepath, 'wb') as f_out: # Write to the other file
                    shutil.copyfileobj(f_in, f_out)
            os.remove(filepath)

            # Merge ROIs
            if file[-10:-7] == 'seg':
                transform_to_single_roi(output_filepath)


/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/Pretreat-MetsToBrain-Masks/BraTS-MET-00086-000 BraTS-MET-00086-000-t2w.nii.gz
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/dataset_to_process/BraTS-MET-00086-000/BraTS-MET-00086-000-t2w.nii
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/Pretreat-MetsToBrain-Masks/BraTS-MET-00086-000 BraTS-MET-00086-000-seg.nii.gz
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/dataset_to_process/BraTS-MET-00086-000/BraTS-MET-00086-000-seg.nii
Before: [0. 2. 3.], (155, 240, 240)
After: [0 1], (155, 240, 240)
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/Pretreat-MetsToBrain-Masks/BraTS-MET-00284-000 BraTS-MET-00284-000-seg.nii.gz
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/dataset_to_process/BraTS-MET-00284-000/BraTS-MET-00284-000-seg.nii
Before: [0. 1. 2. 3.], (155, 240, 240)
After: [0 1], (155, 240, 240)
/Users/stelios/workspace/stelios/pyradiomics/v2/dataset/Pretreat-MetsToBrain-Masks/BraTS-MET-0