# Getting Started with Exploring Segmentations 

## Test on my one dataset

In [14]:
import shapeworks as sw

## 1. Defining and exploring your dataset

### Defining dataset location


In [15]:
#import relevant libraries
import numpy as np
import glob 
from pathlib import Path 


In [16]:
data_path="./Label-Maps/"
datasetName="FULGUR"
shapeExtention = '.nii.gz'

### What is available in the dataset?

First let's see how many shapes we have in the dataset.

**File formats:** For binary segmentations, all [itk-supported image formats](https://insightsoftwareconsortium.github.io/itk-js/docs/image_formats.html) can be used.

In [17]:

# let's get a list of files for available segmentations in this dataset
# * here is a wild character used to retrieve all filenames 
# in the shape directory with the file extensnion
shapeFilenames = sorted(glob.glob(data_path + '*' + shapeExtention)) 

print ('Number of shapes: ' + str(len(shapeFilenames)))
print('Shape files found:')

for shapeFilename in shapeFilenames:
    shapeFilename = Path(shapeFilename)
    print(shapeFilename)

Number of shapes: 0
Shape files found:


## 2. Exploring your dataset

We would like to better understand the given dataset to decide the appropriate grooming (preprocessing) pipeline/step to prepare it for shape modeling.

### Loading your dataset

First step is to load the dataset. 

**Note:** If your dataset is large (large volumes and/or large number of segmentations), you could select a subset for this exploration step.

In [18]:
# list of shape segmentations
shapeSegList = []

# list of shape names (shape files prefixes) to be used 
# for saving outputs and visualizations
shapeNames   = [] 

# loop over all shape files and load individual segmentations
for shapeFilename in shapeFilenames:
    print('Loading: ' + shapeFilename)
    
    # current shape name
    segFilename = shapeFilename.split('/')[-1] 
    shapeName   = segFilename[:-len(shapeExtention)]
    shapeNames.append(shapeName)
    
    # load segmentation
    shapeSeg = sw.Image(shapeFilename)
    
    # append to the shape list
    shapeSegList.append(shapeSeg)

num_samples = len(shapeSegList)
print('\n' + str(num_samples) + 
      ' segmentations are loaded for the ' + datasetName + ' dataset ...')


0 segmentations are loaded for the FULGUR dataset ...


In [19]:
shapeSegList

[]

In [20]:

def convert_to_one_hot(image, num_classes):
    data = image.toArray()  # Get the image data as a numpy array
    shape = data.shape
    one_hot = np.zeros((num_classes, *shape), dtype=np.uint8)  # Create the one-hot array
    
    for i in range(num_classes):
        one_hot[i] = (data == i).astype(np.uint8)  # Set pixels for each class
        
    return one_hot

#Prepare your lists
shapeSegList_all = []
shape_nameList_final = []

#Number of segmentation classes (including background)
num_classes = 19

#Loop over each segmentation image and process it
for idx, shapeSeg in enumerate(shapeSegList):
    one_hot = convert_to_one_hot(shapeSeg, num_classes)
    print(np.shape(one_hot))
    one_hot_transposed = np.transpose(one_hot, (0, 3, 2, 1))
    print("Transposed shape:", np.shape(one_hot_transposed))
    # Ensure the array is C-contiguous
    one_hot_contiguous = np.ascontiguousarray(one_hot_transposed)    
    
    # Exclude background and store each class as a separate shape
    for class_idx in range(1, num_classes):
    #for class_idx in range(1, 2):
        class_data = one_hot_contiguous[class_idx,:, :, :]

        # Create a ShapeWorks image object
        class_image = sw.Image(class_data.astype(np.float32))
        shape_name = shapeNames[idx]
        shape_name_final = f'{shape_name}_class_{class_idx}.nii.gz'
        shapeSegList_all.append(class_image)
        shape_nameList_final.append(shape_name_final)
        

        class_data = one_hot[class_idx]
        """
        # Create a ShapeWorks image object
        class_image = sw.Image(class_data.astype(np.float32))
        shape_name = shapeNames[idx]
        shape_name_final=os.path.join(output_dir, f'{shape_name}_class_{class_idx}.nii.gz')
        shape_name_temp=f'{shape_name}_class_{class_idx}.nii.gz'
        shape_nameList_final.append(shape_name_temp)
        shapeSegList_all.append(shape_name_final)
        """
        
        print(f'Prepared: {shape_name_final}')

#Prepare your lists
shapeSegList_all = []
shape_nameList_final = []

#Number of segmentation classes (including background)
num_classes = 19

#Loop over each segmentation image and process it
for idx, shapeSeg in enumerate(shapeSegList):
    # Exclude background and store each class as a separate shape
    for class_idx in range(1, num_classes):
    #for class_idx in range(1, 2):
        # Create a ShapeWorks image object
        class_image = sw.Image(class_data.astype(np.float32))
        shape_name = shapeNames[idx]
        shape_name_final=os.path.join(output_dir, f'{shape_name}_class_{class_idx}.nii.gz')
        shape_name_temp=f'{shape_name}_class_{class_idx}.nii.gz'
        shape_nameList_final.append(shape_name_temp)
        shapeSegList_all.append(shape_name_final)
        print(f'Prepared: {shape_name_final}')

# Once converted into separated nifti files for each muscle for each subject

In [21]:
import os
from pathlib import Path
from scipy.ndimage import zoom
import matplotlib.pyplot as plt

In [22]:
# Function to downsample the volume if it's too large
def downsample_volume(volume, factor):
    data = volume.toArray()
    zoom_factors = (1 / factor, 1 / factor, 1 / factor)
    downsampled_data = zoom(data, zoom_factors, order=1)  # Using linear interpolation
    return sw.Image(downsampled_data.astype(np.float32))


In [23]:
data_path_onehot='./Label-Maps/'
shapeFilenames_onehot = sorted(glob.glob(data_path_onehot + '*' + shapeExtention)) 
#print (shapeFilenames_onehot)

print ('Number of shapes: ' + str(len(shapeFilenames_onehot)))
print('Shape files found:')

for shapeFilename in shapeFilenames_onehot:
    shapeFilename = Path(shapeFilename)
    print(shapeFilename)



Number of shapes: 0
Shape files found:




#list of shape segmentations
small_shapes = []

#list of shape names (shape files prefixes) to be used 
#for saving outputs and visualizations
small_shapeNames   = [] 

#loop over all shape files and load individual segmentations
for shapeFilename in shapeFilenames_onehot:
    print('Loading: ' + shapeFilename)
    
    # current shape name
    segFilename = shapeFilename.split('/')[-1] 
    shapeName   = segFilename[:-len(shapeExtention)]
    small_shapeNames.append(shapeName)
    
    # load segmentation
    shapeSeg = sw.Image(shapeFilename)
    
    # append to the shape list
    small_shapes.append(shapeSeg)

num_samples = len(small_shapes)
print('\n' + str(num_samples) + 
      ' segmentations are loaded for the ' + datasetName + ' dataset ...')

In [24]:
# Adjust the downsample factor based on your hardware capabilities
downsample_factor = 2

# List of shape segmentations
small_shapes = []

# List of shape names (shape files prefixes) to be used for saving outputs and visualizations
small_shapeNames = [] 

# Loop over all shape files and load individual segmentations
for shapeFilename in shapeFilenames_onehot:
    print('Loading: ' + str(shapeFilename))
    
    # Convert to Path object to access .name attribute
    shapeFilename = Path(shapeFilename)
    
    # Current shape name
    segFilename = shapeFilename.name
    shapeName = segFilename[:-len(shapeExtention)]
    small_shapeNames.append(shapeName)
    
    # Load segmentation
    shapeSeg = sw.Image(str(shapeFilename))
    
    # Downsample the volume if needed
    downsampled_shapeSeg = downsample_volume(shapeSeg, downsample_factor)
    
    # Append to the shape list
    small_shapes.append(downsampled_shapeSeg)

num_samples = len(small_shapes)
print('\n' + str(num_samples) + ' segmentations are loaded for the dataset...')



0 segmentations are loaded for the dataset...


### Visualizing your dataset

Now let's visualize all samples in a grid using `pyvista`. You may need to call `pv.close_all()` every once in a while to clean up the unclosed plotters.
We will use `sw.plot_volumes` function from the Shapeworks python module. This function will take in a list of shapeworks images as input and initiate a pyvista plotter to render multiple windows, each with a single segmentation, add segmentations to the plotter, and start rendering.

In [25]:
#define parameters that controls the plotter
use_same_window = False # plot using multiple rendering windows if false
notebook        = False # True will enable the plots to lie inline
show_borders    = True  # show borders for each rendering window
shade_volumes   = True  # use shading when performing volume rendering
color_map       = "viridis" # color map for volume rendering, e.g., 'bone', 'coolwarm', 'cool', 'viridis', 'magma'
show_axes       = True  # show a vtk axes widget for each rendering window
show_bounds     = True  # show volume bounding box
show_all_edges  = True  # add an unlabeled and unticked box at the boundaries of plot. 
font_size       = 10    # text font size for windows
link_views      = True  # link all rendering windows so that they share same camera and axes boundaries

#plot all segmentations in the shape list

#Set the options for the plot
use_same_window = True
notebook = True
show_borders = True
shade_volumes = True
color_map = 'viridis'
show_axes = True
show_bounds = True
show_all_edges = True
font_size = 12
link_views = True

small_shapes=shapeSegList_all[0:3]
print(small_shapes)
small_shapeNames=shape_nameList_final[0:3]
print(small_shapeNames)

In [26]:

sw.plot_volumes(small_shapes,    
             volumeNames     = small_shapeNames, 
             use_same_window = use_same_window,
             notebook        = notebook,
             show_borders    = show_borders,  
             shade_volumes   = shade_volumes, 
             color_map       = color_map,
             show_axes       = show_axes,  
             show_bounds     = show_bounds,
             show_all_edges  = show_all_edges, 
             font_size       = font_size,   
             link_views      = link_views
             )

ValueError: max() arg is an empty sequence

#Custom function to plot volumes and add a title
def plot_volumes_with_title(volumes, volumeNames, title, use_same_window=True):
    plotter = pv.Plotter(shape=(1, len(volumes)), title=title)
    
    for i, volume in enumerate(volumes):
        plotter.subplot(0, i)
        plotter.add_volume(volume.toArray(), cmap='viridis', shade=True)
        plotter.add_text(volumeNames[i], font_size=10)
    
    plotter.link_views()
    plotter.show_axes()
    plotter.show_bounds()
    plotter.show()

#Plot the volumes with a single title
plot_volumes_with_title(small_shapes, small_shapeNames, 'Volume Segmentation Visualization')

#If you still want to use matplotlib, you can try to integrate it like this
plt.figure(figsize=(10, 8))
plt.suptitle('Volume Segmentation Visualization', fontsize=20)
plt.show()

## 3. Deciding the grooming pipeline needed for your dataset

Does this dataset need grooming? What are grooming steps needed? Let's inspect the segmentations. What do we observe?

### Voxel spacing

Voxel spacing are not isotropic, i.e., voxel size in each of the three dimensions are not equal.This can be identified if you zoom-in in the visualization below and observe the step size in the x,y,z direction. Anisotropic spacing could adversely impact particles optimization since shapeworks assumes equal voxel spacing. Some datasets might also have different voxel spacings for each segmentation. 

*Hence, it is necessary to bring all segmentations to the same voxel spacing that is equal in all dimensions.* 

Another observation is voxel spacing is relatively large. This can be observed by the pixelated volume rendering and the jagged isosurface. 

*We can improve the segmentation resolution by decreasing voxel spacing.*

In [15]:
import pyvista as pv
# to better appreciate the pixelated nature of these segmentations, we need to only visualize 
# the binary segmentation, notice the thick slices

shapeIdx = 1
shapeSeg = small_shapes[shapeIdx]
 

shapeSeg_vtk = sw.sw2vtkImage(shapeSeg, verbose = True)

sw.plot_volumes(shapeSeg_vtk)

shapeworks image header information: 
{
	dims: [230, 223, 772],
	origin: [0, 0, 0],
	size: [230, 223, 772],
	spacing: [1, 1, 1]
}

vtk image header information: 
ImageData (0x1d7b8bfd8e0)
  N Cells:      39196098
  N Points:     39595880
  X Bounds:     0.000e+00, 2.290e+02
  Y Bounds:     0.000e+00, 2.220e+02
  Z Bounds:     0.000e+00, 7.710e+02
  Dimensions:   230, 223, 772
  Spacing:      1.000e+00, 1.000e+00, 1.000e+00
  N Arrays:     1


### Segmentations and image boundaries 

Some segmentations are very close to the image boundary, not leaving enough room for particles (correspondences) to move and spread over these surface regions. In particular, particles could overshoot outside the image boundary during optimization. 

Furthermore, if a segmentation touches the image boundary, this will result in an artificially (i.e., not real) open surface. 

*Hence, these segmentations needs to be padded with background voxels (zero-valued) to create more room along each dimension.*

In [16]:
# let's inspect a segmentation that touches the image boundaries
shapeIdx = 0
shapeSeg = small_shapes[shapeIdx]

shapeSeg_vtk = sw.sw2vtkImage(shapeSeg, verbose = False)

sw.plot_volumes(shapeSeg_vtk)

<p><img src="./Tuto_img/tuto_visualize0.png"></p>


This segmentation touches the image boundary and hence will result in an artificially open surface. To inspect this behavior, we need extract a surface mesh (isosurface) from each segmentation. An isosurface is a three-dimensional surface that represents points of a constant value (aka isovalue) within the given volume of space.

In [17]:
# let's see if there's a function that extracts an isosurface from an image
# use dot-tap to get a list of functions/apis available for shapeSeg

# found it - toMesh, let's see its help
help(shapeSeg.toMesh)

Help on method toMesh in module shapeworks_py:

toMesh(...) method of shapeworks_py.Image instance
    toMesh(self: shapeworks_py.Image, isovalue: float) -> shapeworks::Mesh
    
    converts image to mesh at specified isovalue



The `toMesh` function needs an isovalue, which is the constant value the represents the surface of interest. Since a shape segmentation is a binary image, the foreground is expected to have the value of 1 (white) and the background should have a zero value (black), so an appropriate isovalue to extract the foregound-background interface a value in between, e.g., 0.5

In [18]:
import numpy as np
# let's make sure that our assumptions about the voxel values are correct
# is the given volume a binary segmentation?

# first convert to numpy array
shapeSeg_array = shapeSeg.toArray()

# make sure that it is a binary segmentation
voxelValues = np.unique(shapeSeg_array)
print('\nVoxel values:' + str(voxelValues))

if len(voxelValues) > 2:
    print('WARNING: ' + shapeName + ' is not a bindary segmentation. Voxels have more than two distinct values')
    print('PLEASE make sure to use binary segmentations')
else:
    print('Shape ' + shapeName + ' is a binary segmentation')


Voxel values:[0.         0.09131903 0.09252457 ... 0.89141685 0.8925598  1.        ]
PLEASE make sure to use binary segmentations


In [19]:
# now define the isovalue, in case a binary segmentation has a foreground label that is not 1
# we need to obtain a value inbetween

# get min and max values
minVal = shapeSeg_array.min()
maxVal = shapeSeg_array.max()

print('\nMinimum voxel value: ' + str(minVal))
print('Maximum voxel value: ' + str(maxVal))

isoValue = (maxVal - minVal)/2.0
print('\nisoValue = ' + str(isoValue))


Minimum voxel value: 0.0
Maximum voxel value: 1.0

isoValue = 0.5


In [20]:
# let's extract the segmentation isosurface and visualize it

# extract isosurface
shapeMesh = shapeSeg.toMesh(isovalue = isoValue)
 
# sw to vtk
shapeMesh_vtk = sw.sw2vtkMesh(shapeMesh)

sw.plot_meshes([shapeMesh_vtk])

<p><img src="./Tuto_img/tuto_visualize.png"></p>

In [21]:

output_dir = './vtk_output/'
# Convert VTK mesh to PyVista object
pv_mesh = pv.wrap(shapeMesh_vtk)

# Define output filename
output_filename = os.path.join(output_dir, 'segmentation_mesh.vtk')

# Save the VTK mesh
pv_mesh.save(output_filename)

print(f'Saved VTK mesh to: {output_filename}')

Saved VTK mesh to: ./vtk_output/segmentation_mesh.vtk


So, we have been able to extract a segmentation's isosurface and visualize it as a surface mesh. It is worth noting that the jagged surface is due to the anisotropic voxel space (with spacing in z-dimension is double that of x- and y-dimensions) and large voxel size.

### Shape alignment

One can observe from the segmentation visualization that they are not roughly aligned, i.e., they do not share the same coordinate frame where each individual shape is located differently compared to other shapes. 

*Aligning shapes is a critical preprocessing step to avoid the shape model to encode variabilities pertaining to global transformations such as rotation and translation.* 

In [22]:
# let's inspect some segmentations where we can observe misalignment
shapeIdxs = [0,1]

shapeSegSubset   = [small_shapes[shapeIdx] for shapeIdx in shapeIdxs ]
shapeNamesSubset = [small_shapeNames[shapeIdx]   for shapeIdx in shapeIdxs ]

To inspect how mutliple segmentation are spatially aligned with respect to each other, we will visualize their surfaces in the same rendering window. 

In [23]:
shapeSegIsosurfaces     = []
shapeSegIsosurfaces_vtk = []

for shapeSeg in shapeSegSubset:
    
    # extract isosurface
    shapeIsosurface = shapeSeg.toMesh(isovalue = isoValue)
    
    shapeSegIsosurfaces.append(shapeIsosurface)
 
    # sw to vtk
    shapeSegIsosurfaces_vtk.append(sw.sw2vtkMesh(shapeIsosurface, verbose = False))


In [24]:
sw.plot_meshes(shapeSegIsosurfaces,       
            use_same_window = True, 
            notebook        = False,  
            show_borders    = True,  
            meshes_color    = ['tan', 'blue','red'], 
            mesh_style      = "surface", 
            show_mesh_edges = False, 
            show_axes       = True,  
            show_bounds     = True,  
            show_all_edges  = True,  
            font_size       = 10,    
            link_views      = True   
           )

<p><img src="./Tuto_img/tuto_visualize2.png"></p>

### Too much background

Image boundaries are not tight around shapes, leaving irrelevant background voxels that might increase the memory footprint when optimizing the shape model. 

*We can crop segmentations to remove unnecessary background.*

In [25]:
shapeIdx = 1
shapeSeg = small_shapes[shapeIdx]

shapeSeg_vtk = sw.sw2vtkImage(shapeSeg, verbose = False)

sw.plot_volumes(shapeSeg_vtk)

<p><img src="./Tuto_img/tuto_visualize3.png"></p>

### Binary segmentations

In general, this binary representation is not useful for finite numerical calculation of surface geometry and features that are required in shape modeling, which assumes the image is a sampling of a smooth function. 

Hence, ShapeWorks makes use of the signed distance transform of the binary segmentation that does satisfy this criterion. 

*For the correspondence optimization step, shapes can be represented as the zero level set of a smooth signed distance transform.*

### Tentative grooming

Hence, a tentative grooming pipeline entails the following steps:   
1. Resampling segmentations to have smaller and isotropic voxel spacing   
2. Rigidly aligning shapes   
3. Cropping and padding segmentations   
4. Converting segmentations to smooth signed distance transforms   


Let the fun begins!!! Please visit [Getting Started with Grooming Segmentations](getting-started-with-grooming-segmentations.ipynb) to learn how to groom your dataset.

# Save Meshes as vtk

In [26]:
import shapeworks as sw
import pyvista as pv
import glob
import os
from pathlib import Path

# Define the data path and shape extension
data_path_onehot = './Label-Maps/'
shapeExtention = '.nii.gz'
output_dir = './vtk_output/'
os.makedirs(output_dir, exist_ok=True)

# Get list of shape filenames
shapeFilenames_onehot = sorted(glob.glob(data_path_onehot + '*' + shapeExtention))

print('Number of shapes: ' + str(len(shapeFilenames_onehot)))
print('Shape files found:')

for shapeFilename in shapeFilenames_onehot:
    shapeFilename = Path(shapeFilename)
    print(shapeFilename)

# Load each segmentation, convert to VTK, and save
for shapeFilename in shapeFilenames_onehot:
    shapeFilename = Path(shapeFilename)
    print(f'Loading: {shapeFilename}')

    # Load segmentation
    shapeSeg = sw.Image(str(shapeFilename))
    
    # Convert to VTK format
    shapeSeg_vtk = sw.sw2vtkImage(shapeSeg, verbose=True)
    
    # Generate output filename
    segFilename = shapeFilename.name
    vtk_output_filename = os.path.join(output_dir, segFilename.replace(shapeExtention, '.vtk'))
    
    # Save the VTK mesh
    pv.wrap(shapeSeg_vtk).save(vtk_output_filename)
    
    print(f'Saved: {vtk_output_filename}')


Number of shapes: 2
Shape files found:
Label-Maps\F006_label_4_RF.nii.gz
Label-Maps\F099_label_4_RF.nii.gz
Loading: Label-Maps\F006_label_4_RF.nii.gz
shapeworks image header information: 
{
	dims: [466, 451, 1518],
	origin: [2.61327, -102.748, -439.474],
	size: [256.299, 248.049, 834.898],
	spacing: [0.549999, 0.549999, 0.549999]
}

vtk image header information: 
ImageData (0x1d7ffd810a0)
  N Cells:      317432250
  N Points:     319031988
  X Bounds:     2.613e+00, 2.584e+02
  Y Bounds:     -1.027e+02, 1.448e+02
  Z Bounds:     -4.395e+02, 3.949e+02
  Dimensions:   466, 451, 1518
  Spacing:      5.500e-01, 5.500e-01, 5.500e-01
  N Arrays:     1
Saved: ./vtk_output/F006_label_4_RF.vtk
Loading: Label-Maps\F099_label_4_RF.nii.gz
shapeworks image header information: 
{
	dims: [461, 446, 1544],
	origin: [-1.35085, -79.2534, -437.211],
	size: [253.552, 245.302, 849.207],
	spacing: [0.550005, 0.550005, 0.550005]
}

vtk image header information: 
ImageData (0x1d7b8ef8a00)
  N Cells:      3158