In [1]:
import os
import nrrd
import numpy as np
from scipy.ndimage import binary_dilation
from helpers import *

In [2]:
#helper to move and rename nrrd files
import shutil

def rename_and_move_nrrd_files(root_dir):
    for subdir, _, files in os.walk(root_dir):
        if os.path.basename(subdir) == 'original_labels':
            raw_dir = os.path.join(os.path.dirname(subdir), 'raw')
            os.makedirs(raw_dir, exist_ok=True)
            
            for file in files:
                old_path = os.path.join(subdir, file)
                
                if file.endswith('_data.nrrd'):
                    new_filename = file.replace('_data.nrrd', '_raw.nrrd')
                    new_path = os.path.join(raw_dir, new_filename)
                elif file.endswith('_raw.nrrd'):
                    new_path = os.path.join(raw_dir, file)
                else:
                    continue  # Skip files that do not match the criteria
                
                shutil.move(old_path, new_path)
                print(f"Moved and renamed: {old_path} -> {new_path}")

In [3]:
if False:
    current_directory = os.getcwd()
    root_directory = f'{current_directory}/data/Vesuvius/'
    rename_and_move_nrrd_files(root_directory)

In [4]:
import os
import nrrd
import numpy as np
from scipy.ndimage import binary_dilation, binary_closing

from concurrent.futures import ProcessPoolExecutor

def process_nrrd_file(nrrd_path, label_dir):
    # print(f"Processing: {nrrd_path}")
    data, header = nrrd.read(nrrd_path)
    unique_values = np.unique(data[data > 0])  # Ignore background & masking
    # print(len(unique_values))

    # Create an empty array for the result
    result = np.zeros_like(data, dtype=np.uint8)

    for value in unique_values:
        structure_mask = data == value
        
        # Fill small holes in the structure (if needed)
        closed_structure = structure_mask  # Assuming this operation is needed
        
        # Dilate the closed structure
        dilated_mask = binary_dilation(closed_structure, iterations=3)
        
        # Assign the border class and foreground class
        border_class = dilated_mask & ~closed_structure
        result[dilated_mask] = 2  # Foreground class
        result[border_class] = 1  # Border class

    # Save the processed array back to an .nrrd file
    output_path = os.path.join(label_dir, f"tri_class_{os.path.basename(nrrd_path)}")
    nrrd.write(output_path, result, header)
    print(f"Processed and saved: {output_path}")

def process_nrrd_files(root_dir):
    for subdir, _, files in os.walk(root_dir):
        if os.path.basename(subdir) == 'original_labels':
            # Determine the corresponding 'label' directory
            label_dir = os.path.join(os.path.dirname(subdir), 'label')
            os.makedirs(label_dir, exist_ok=True)

            # Prepare the arguments for parallel processing
            nrrd_paths = [os.path.join(subdir, file) for file in files if file.endswith('.nrrd')]
            args = [(nrrd_path, label_dir) for nrrd_path in nrrd_paths]
            # for a in args:
            #     print(a)

             # Use ProcessPoolExecutor for parallel processing
            with ProcessPoolExecutor() as executor:
                futures = [executor.submit(process_nrrd_file, *arg) for arg in args]
                for future in futures:
                    future.result()  # This will re-raise any exceptions caught during processing


# def process_nrrd_files(root_dir):
#     for subdir, _, files in os.walk(root_dir):
#         if os.path.basename(subdir) == 'original_labels':
#             # Determine the corresponding 'label' directory
#             label_dir = os.path.join(os.path.dirname(subdir), 'label')
#             os.makedirs(label_dir, exist_ok=True)

#             for file in files:
#                 if file.endswith('.nrrd'):
#                     nrrd_path = os.path.join(subdir, file)
#                     data, header = nrrd.read(nrrd_path)
                    
#                     unique_values = np.unique(data)
#                     unique_values = unique_values[unique_values > 0]  # Ignore background & masking
                    
#                     # Create an empty array for the result
#                     result = np.zeros_like(data)
                    
#                     for value in unique_values:
#                         structure_mask = data == value
                        
#                         # Fill small holes in the structure
#                         closed_structure = structure_mask  # Keeping this as is
                        
#                         # Dilate the closed structure
#                         dilated_mask = binary_dilation(closed_structure, iterations=3)
                        
#                         # Assign the border class and foreground class
#                         border_class = dilated_mask & ~closed_structure
#                         result[~border_class & dilated_mask] = 2  # Foreground class
#                         result[border_class] = 1  # Border class
                    
#                     # Save the processed array back to an .nrrd file
#                     output_path = os.path.join(label_dir, f"tri_class_{file}")
#                     nrrd.write(output_path, result, header)
#                     print(f"Processed and saved: {output_path}")

In [5]:
import os

# Get the current working directory
current_directory = os.getcwd()
root_directory = f'{current_directory}/data/Vesuvius/'
process_nrrd_files(root_directory)

Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_3606_4000_8450_xyz_256_res1_s4_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_3606_4000_8706_xyz_256_res1_s4_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_3606_4256_8706_xyz_256_res1_s4_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_3350_4000_8706_xyz_256_res1_s4_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_3606_4256_8450_xyz_256_res1_s4_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_manual_2_iters_160_rot_0_densified_label.nrrd
Processed and saved: /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/label/tri_class_manual_2_iters_40_rot_2_densified_label.nrrd
Proces

In [8]:
from ipywidgets import interact, IntSlider
import matplotlib.pyplot as plt
current_directory = os.getcwd()
data_split = 'train'
data_type = 'label'
filename = 'tri_class_manual_1_label.nrrd'
filename = 'tri_class_manual_1_iters_120_rot_0_densified_label.nrrd'
pred = nrrd.read(f'{current_directory}/data/Vesuvius/{data_split}/{data_type}/{filename}')[0]
# pred = nrrd.read(f'{os.getcwd()}/data/Vesuvius/val/raw/manual_1_raw.nrrd')[0]
#show final clipped instance segmentation
def plot_slice(slice_index, axis=0):
    plt.figure(figsize=(8, 6))
    if axis == 1:
        plt.imshow(pred[:,slice_index,:])
    elif axis == 2:
        plt.imshow(pred[:,:,slice_index])
    else:
        plt.imshow(pred[slice_index,:,:])
    plt.colorbar()
    plt.title(f'Slice {slice_index}')
    plt.show()

interact(plot_slice, slice_index=IntSlider(min=0, max=pred.shape[0]-1, step=1, value=0), axis=IntSlider(min=0, max=2, step=1, value=0))

interactive(children=(IntSlider(value=0, description='slice_index', max=255), IntSlider(value=0, description='…

<function __main__.plot_slice(slice_index, axis=0)>

In [9]:
import os
import nrrd
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider
import scipy.ndimage

# Ensure we are in the correct directory
current_directory = os.getcwd()

data_split = 'train'
data_type = 'label'
filename = 'tri_class_manual_2_label.nrrd'
filename = 'tri_class_manual_1_iters_120_rot_0_densified_label.nrrd'
pred = nrrd.read(f'{current_directory}/data/Vesuvius/{data_split}/{data_type}/{filename}')[0]
foreground = pred == 2

# Label the connected components
fg, num_features = scipy.ndimage.label(foreground)

# Define minimum size threshold
min_size = 100000  # Adjust this value based on your specific needs

# Calculate the size of each structure
structure_sizes = np.array(scipy.ndimage.sum(foreground, fg, range(num_features + 1)))

# Create a mask to remove small structures
remove_mask = structure_sizes < min_size
remove_mask[0] = 0  # Ensure the background is not removed

# Remove small structures
fg[remove_mask[fg]] = 0

# Relabel the structures after removal
fg, num_features = scipy.ndimage.label(fg > 0)
max_value = fg.max()

max_value = fg.max()
if fg.max() > 30:
    max_value = None

print(pred.shape, pred.min(), pred.max())
print("Max value in fg:", max_value)

# Define the function to plot a slice
def plot_slice(slice_index, axis=0):
    plt.figure(figsize=(8, 6))
    if axis == 1:
        plt.imshow(fg[:, slice_index, :], vmin=0, vmax=max_value)
    elif axis == 2:
        plt.imshow(fg[:, :, slice_index], vmin=0, vmax=max_value)
    else:
        plt.imshow(fg[slice_index, :, :], vmin=0, vmax=max_value)
    plt.colorbar()
    plt.title(f'Slice {slice_index}')
    plt.show()

# Create interactive widget
interact(plot_slice, 
         slice_index=IntSlider(min=0, max=pred.shape[0]-1, step=1, value=0),
         axis=IntSlider(min=0, max=2, step=1, value=0))


(256, 256, 256) 0 2
Max value in fg: 12


interactive(children=(IntSlider(value=0, description='slice_index', max=255), IntSlider(value=0, description='…

<function __main__.plot_slice(slice_index, axis=0)>

In [10]:
import scipy.ndimage
def label_foreground_structures(input_array, min_size=100000): #use aggressize min_size to remove small structures, then decent overlap stride to get ones that are erronously cut off on the edges
    # Find connected components in the foreground (value 2)
    foreground = (input_array == 2)
    
    # Label connected components
    labeled_array, num_features = scipy.ndimage.label(foreground)
    
    # Measure the size of each connected component
    component_sizes = np.bincount(labeled_array.ravel())
    
    # Create a mask for components larger than the minimum size
    large_components = component_sizes >= min_size
    
    # Ensure background is not considered a component
    large_components[0] = False
    
    # Create a filtered array to hold only large components
    filtered_array = labeled_array.copy()
    
    # Set small components to 0 (background)
    filtered_array[~large_components[labeled_array]] = 0
    
    print(f"Number of connected foreground structures before filtering: {num_features}")
    print(f"Number of connected foreground structures after filtering: {np.max(filtered_array)}")
    
    return filtered_array


In [13]:
labeled_arr = label_foreground_structures(pred)
img_path = f'{current_directory}/data/Vesuvius/train/raw/manual_1_iters_120_rot_0_densified_raw.nrrd'
img, _ = nrrd.read(img_path)
gt_label = nrrd.read(f'{current_directory}/data/Vesuvius/train/original_labels/manual_1_iters_120_rot_0_densified_label.nrrd')[0]
print(np.unique(gt_label))

Number of connected foreground structures before filtering: 219
Number of connected foreground structures after filtering: 14
[-1  0  1  2  4  5  6  7  8  9 10 11 12 13 14]


In [14]:
def plot_slice(slice_index, axis=0):
    plt.figure(figsize=(8, 6))
    if axis == 1:
        plt.imshow(mark_boundaries_color(img[:,slice_index,:], labeled_arr[:,slice_index,:]))
    elif axis == 2:
        plt.imshow(mark_boundaries_color(img[:,:,slice_index], labeled_arr[:,:,slice_index]))
    else:
        plt.imshow(mark_boundaries_color(img[slice_index,:,:], labeled_arr[slice_index,:,:]))
    plt.colorbar()
    plt.title(f'Slice {slice_index}')
    plt.show()

interact(plot_slice, slice_index=IntSlider(min=0, max=pred.shape[0]-1, step=1, value=0), axis=IntSlider(min=0, max=2, step=1, value=0))

interactive(children=(IntSlider(value=0, description='slice_index', max=255), IntSlider(value=0, description='…

<function __main__.plot_slice(slice_index, axis=0)>

In [15]:
from ipywidgets import interact, IntSlider
import matplotlib.pyplot as plt
pred2 = nrrd.read(f'{current_directory}/data/Vesuvius/train/original_labels/manual_1_iters_120_rot_0_densified_label.nrrd')[0]
#show final clipped instance segmentation
def plot_slice(slice_index, axis=0):
    plt.figure(figsize=(8, 6))
    if axis == 1:
        plt.imshow(pred2[:,slice_index,:])
    elif axis == 2:
        plt.imshow(pred2[:,:,slice_index])
    else:
        plt.imshow(pred2[slice_index,:,:])
    plt.colorbar()
    plt.title(f'Slice {slice_index}')
    plt.show()

interact(plot_slice, slice_index=IntSlider(min=0, max=pred2.shape[0]-1, step=1, value=0), axis=IntSlider(min=0, max=2, step=1, value=0))

interactive(children=(IntSlider(value=0, description='slice_index', max=255), IntSlider(value=0, description='…

<function __main__.plot_slice(slice_index, axis=0)>

In [16]:
import os
import h5py
import nrrd
import numpy as np

def create_hdf5_files(raw_dir, label_dir, output_dir, weight_dir=None):
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    for raw_filename in os.listdir(raw_dir):
        if raw_filename.endswith('.nrrd'):
            # Construct full file paths for raw and label
            raw_path = os.path.join(raw_dir, raw_filename)
            base_name = '_'.join(raw_filename.split('_')[:-1])
            label_filename = "tri_class_"+base_name+'_label.nrrd'
            label_path = os.path.join(label_dir, label_filename)

            print(f"Processing: {raw_filename} and {label_filename}")
            
            if not os.path.exists(label_path):
                print(f"Label file not found for {label_filename}")
                label_filename = f"tri_class_{raw_filename}"
                print(f"trying {label_filename}")
                label_path = os.path.join(label_dir, label_filename)
            if not os.path.exists(label_path):
                print(f"Label file not found for {raw_filename}, skipping.")
                continue
            
            # Read raw and label data
            raw_data, raw_header = nrrd.read(raw_path)
            label_data, label_header = nrrd.read(label_path)
            
            # Optionally read weight data
            weight_data = None
            if weight_dir:
                weight_path = os.path.join(weight_dir, label_filename)
                if os.path.exists(weight_path):
                    weight_data, weight_header = nrrd.read(weight_path)
            
            # Convert to numpy arrays
            raw_data = np.asarray(raw_data, dtype=np.float32)
            label_data = np.asarray(label_data, dtype=np.uint8)
            if weight_data is not None:
                weight_data = np.asarray(weight_data, dtype=np.float32)
            
            # Construct output file path
            output_filename = base_name + '.h5'
            output_path = os.path.join(output_dir, output_filename)
            
            # Create HDF5 file
            with h5py.File(output_path, 'w') as hdf5_file:
                hdf5_file.create_dataset('raw', data=raw_data, dtype='float32')
                hdf5_file.create_dataset('label', data=label_data, dtype='uint8')
                if weight_data is not None:
                    hdf5_file.create_dataset('weight', data=weight_data, dtype='float32')
            
            print(f"Processed and saved {raw_filename} and {label_filename} to {output_path}")

In [17]:
current_directory = os.getcwd()
sub_dir = 'test'
raw_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/raw'
label_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/label'
output_hdf5_path = f'{current_directory}/data/Vesuvius/{sub_dir}/dataset'
create_hdf5_files(raw_directory, label_directory, output_hdf5_path)

Processing: manual_2_raw.nrrd and tri_class_manual_2_label.nrrd
Processed and saved manual_2_raw.nrrd and tri_class_manual_2_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/test/dataset/manual_2.h5
Processing: manual_1_raw.nrrd and tri_class_manual_1_label.nrrd
Processed and saved manual_1_raw.nrrd and tri_class_manual_1_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/test/dataset/manual_1.h5


In [20]:
sub_dir = 'val'
raw_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/raw'
label_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/label'
output_hdf5_path = f'{current_directory}/data/Vesuvius/{sub_dir}/dataset'
create_hdf5_files(raw_directory, label_directory, output_hdf5_path)

Processing: manual_2_raw.nrrd and tri_class_manual_2_label.nrrd
Processed and saved manual_2_raw.nrrd and tri_class_manual_2_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/val/dataset/manual_2.h5
Processing: manual_1_raw.nrrd and tri_class_manual_1_label.nrrd
Processed and saved manual_1_raw.nrrd and tri_class_manual_1_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/val/dataset/manual_1.h5


In [19]:
sub_dir = 'train'
raw_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/raw'
label_directory = f'{current_directory}/data/Vesuvius/{sub_dir}/label'
output_hdf5_path = f'{current_directory}/data/Vesuvius/{sub_dir}/dataset'
create_hdf5_files(raw_directory, label_directory, output_hdf5_path)

Processing: manual_2_iters_100_rot_2_densified_raw.nrrd and tri_class_manual_2_iters_100_rot_2_densified_label.nrrd
Processed and saved manual_2_iters_100_rot_2_densified_raw.nrrd and tri_class_manual_2_iters_100_rot_2_densified_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/dataset/manual_2_iters_100_rot_2_densified.h5
Processing: 3350_4256_8706_xyz_256_res1_s4_raw.nrrd and tri_class_3350_4256_8706_xyz_256_res1_s4_label.nrrd
Processed and saved 3350_4256_8706_xyz_256_res1_s4_raw.nrrd and tri_class_3350_4256_8706_xyz_256_res1_s4_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/dataset/3350_4256_8706_xyz_256_res1_s4.h5
Processing: manual_1_iters_120_rot_0_densified_raw.nrrd and tri_class_manual_1_iters_120_rot_0_densified_label.nrrd
Processed and saved manual_1_iters_120_rot_0_densified_raw.nrrd and tri_class_manual_1_iters_120_rot_0_densified_label.nrrd to /home/james/Documents/VS/3dUnet-tri-class/data/Vesuvius/train/dataset/manua