In [None]:
import h5py
import numpy as np
import os
from glob import glob
import tifffile

data_dir = r"C:\Users\hamis\OneDrive\Documents\GitHub\Vasculature-3DUnet\sample_data"

def create_hdf5_dataset(data_dir, dataset_name):
    # Paths to images and labels
    image_dir = os.path.join(data_dir, 'train', dataset_name, 'images')
    label_dir = os.path.join(data_dir, 'train', dataset_name, 'labels')

    # Get sorted list of image and label paths
    image_paths = sorted(glob(os.path.join(image_dir, '*.tif')))
    label_paths = sorted(glob(os.path.join(label_dir, '*.tif')))

    # Read the first image to get dimensions
    sample_image = tifffile.imread(image_paths[0])
    Z, Y, X = len(image_paths), sample_image.shape[0], sample_image.shape[1]

    # get the data type of the tif
    print(sample_image.dtype)

    print(Z, Y, X)

    # Create HDF5 file in the same directory as the notebook
    hdf5_path = os.path.join(os.getcwd(), f'{dataset_name}.hdf5')
    with h5py.File(hdf5_path, 'w') as hdf5_file:
        # Create datasets for raw and label
        hdf5_file.create_dataset('raw', (Z, Y, X), np.uint16)
        hdf5_file.create_dataset('label', (Z, Y, X), np.uint16)

        # Load images and labels into the HDF5 file
        for i, (img_path, label_path) in enumerate(zip(image_paths, label_paths)):
            img = tifffile.imread(img_path)
            label = tifffile.imread(label_path)
            print(f"Image {img_path} - Min: {img.min()}, Max: {img.max()}")
            
            hdf5_file['raw'][i, ...] = img
            hdf5_file['label'][i, ...] = label

    print(f'HDF5 file created for {dataset_name}: {hdf5_path}')

# Create HDF5 for each dataset
datasets = ['kidney_1_dense']
for dataset in datasets:
    create_hdf5_dataset(data_dir, dataset)


In [None]:
# try to read the 500th image from the hdf5 file
from PIL import Image

file = 'kidney_1_dense.hdf5'

with h5py.File(file, 'r') as f:
    print(f.keys())
    print(f['raw'].shape)
    print(f['raw'][500, ...].shape)
    print(f['raw'][500, ...].dtype)
    print(f['raw'][500, ...].min())
    print(f['raw'][500, ...].max())

    print(f['label'].shape)
    print(f['label'][500, ...].shape)
    print(f['label'][500, ...].dtype)
    print(f['label'][500, ...].min())
    print(f['label'][500, ...].max())

    img = Image.fromarray(f['label'][501, ...])
    img.show()    

In [None]:
import h5py
import numpy as np
import os

original_hdf5_path = './kidney_1_dense.hdf5'  # Path to the original HDF5 file
train_hdf5_path = './kidney_1_dense_train.hdf5'  # Path for the training HDF5 file
val_hdf5_path = './kidney_1_dense_val.hdf5'  # Path for the validation HDF5 file

# Open the original HDF5 file
with h5py.File(original_hdf5_path, 'r') as original_file:
    # Read the 'raw' and 'label' datasets
    raw_dataset = original_file['raw']
    label_dataset = original_file['label']

    # Calculate the number of slices
    num_slices = raw_dataset.shape[0]

    print(raw_dataset.shape[0], raw_dataset.shape[1], raw_dataset.shape[2])

    # Calculate the number of validation slices
    num_val_slices = num_slices // 10 + (1 if num_slices % 10 != 0 else 0)

    # Create new HDF5 files for training and validation
    with h5py.File(train_hdf5_path, 'w') as train_file, h5py.File(val_hdf5_path, 'w') as val_file:
        # Create datasets in the new HDF5 files
        train_file.create_dataset('raw', (num_slices - num_val_slices, raw_dataset.shape[1], raw_dataset.shape[2]), np.uint16)
        train_file.create_dataset('label', (num_slices - num_val_slices, label_dataset.shape[1], label_dataset.shape[2]), np.uint16)
        val_file.create_dataset('raw', (num_val_slices, raw_dataset.shape[1], raw_dataset.shape[2]), np.uint16)
        val_file.create_dataset('label', (num_val_slices, label_dataset.shape[1], label_dataset.shape[2]), np.uint16)

        # Indices for training and validation datasets
        train_idx, val_idx = 0, 0

        # Iterate over the slices and distribute them into training and validation sets
        for i in range(num_slices):
            if i % 10 == 0:
                # Add to validation set
                val_file['raw'][val_idx, ...] = raw_dataset[i, ...]
                val_file['label'][val_idx, ...] = label_dataset[i, ...]
                val_idx += 1
            else:
                # Add to training set
                train_file['raw'][train_idx, ...] = raw_dataset[i, ...]
                train_file['label'][train_idx, ...] = label_dataset[i, ...]
                train_idx += 1

print("Training and validation HDF5 files have been created.")


In [None]:


with h5py.File('sample_ovule_val.h5', 'r') as f:
    print(f['raw'].shape)
    print(f['label'].shape)

with h5py.File('kidney_1_dense_train.hdf5', 'r') as f:
    print(f['raw'].shape)
    print(f['label'].shape)

# # use PIL to show the first image from sample_ovule_val.h5
# with h5py.File('sample_ovule_val.h5', 'r') as f:
#     img = Image.fromarray(f['raw'][100, ...])
#     img.show()


# show the type of each channel in kidney_1_dense_train.hdf5
with h5py.File('kidney_1_dense_train.hdf5', 'r') as f:
    print(f['raw'].dtype)
    print(f['label'].dtype)

# show the label image from kidney_1_dense_train.hdf5
with h5py.File('kidney_1_dense_train.hdf5', 'r') as f:
    img = Image.fromarray(f['raw'][1000, ...])
    img.show()


In [None]:
#get me the dimension of the kidney val dataset
with h5py.File('kidney_1_dense_val.hdf5', 'r') as f:
    print(f['raw'].shape)
    print(f['label'].shape)

In [None]:
# print the size of the validation data
with h5py.File('kidney_1_dense_val.hdf5', 'r') as f:
    print(f['raw'].shape[0])
    

In [None]:
# count how many images are in the training dataset
with h5py.File('kidney_1_dense_train.hdf5', 'r') as f:
    print(f['raw'].shape[0])

In [None]:
# show a grid of 8 images from the val dataset
import matplotlib.pyplot as plt
import h5py

with h5py.File('kidney_1_dense_val.hdf5', 'r') as f:
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(8, 4))

    for i, ax in enumerate(axes.flat):
        ax.imshow(f['raw'][i, ...], cmap='gray')
        ax.set_title(f['label'][i, ...].max())
        ax.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
test_config_xml = """
    # SliceBuilder configuration
    slice_builder:
      name: FilterSliceBuilder
      # train patch size given to the network (adapt to fit in your GPU mem, generally the bigger patch the better)
      patch_shape: [60, 200, 200]
      # train stride between patches
      stride_shape: [30, 100, 100]
      # minimum volume of the labels in the patch
      threshold: 0.01
      # probability of accepting patches which do not fulfil the threshold criterion
      slack_acceptance: 0.01
     # data augmentation
    transformer:
      raw:
        - name: Normalize
          # parameters for the normalization
          norm01: true
        - name: ToTensor
          expand_dims: true
      label:
        - name: Normalize
          # parameters for the normalization
          norm01: true
        - name: ToTensor
          expand_dims: true
    """

import yaml
# Parse the YAML configuration file
test_config = yaml.safe_load(test_config_xml)

# test the configuration
test_config['slice_builder']

In [None]:

import matplotlib.pyplot as plt
# import pytorch3dunet.datasets import FolderDataset from parent directory
import sys
import os

# Assuming the 'pytorch3dunet' folder is in the parent directory of the notebook's directory
parent_dir = os.path.dirname(os.getcwd())
sys.path.append(parent_dir)


# Now you can import from pytorch3dunet
from pytorch3dunet.datasets.folder import FolderDataset


# Assuming FolderDataset is already defined and available
# Define the paths to your images and labels
image_path = 'kidney_1_dense/images'
label_path = 'kidney_1_dense/labels'

# Initialize the FolderDataset object
dataset = FolderDataset(image_path, label_path, phase='train', slice_builder_config=test_config['slice_builder'], transformer_config=test_config['transformer'])

def show_sample(image, label, index):
    plt.figure(figsize=(12, 6))

    # Display the first slice of the first channel of the image
    plt.subplot(121)
    plt.imshow(image[0, 0, :, :], cmap='gray')  # First channel, first slice
    plt.title(f'Image - Slice {index}')
    plt.axis('off')

    # Display the first slice of the first channel of the label
    plt.subplot(122)
    plt.imshow(label[0, 0, :, :], cmap='gray')  # First channel, first slice
    plt.title(f'Label - Slice {index}')
    plt.axis('off')

    plt.show()

# Example usage
for i in range(500, 510, 1):
    image, label = dataset[i]
    print(f'Image shape: {image.shape}, Label shape: {label.shape}')
    show_sample(image, label, i)

In [None]:
# check the datatype of

kidney_2 = r'C:\Users\hamis\OneDrive\Documents\GitHub\human-vasculature-ml-comp\blood-vessel-segmentation\train\kidney_1_dense'

# check data type of images inside
import os
from glob import glob
import tifffile

image_dir = os.path.join(kidney_2, 'images')
label_dir = os.path.join(kidney_2, 'labels')

# Get sorted list of image and label paths
image_paths = sorted(glob(os.path.join(image_dir, '*.tif')))

# Read the first image to get datatype and dimensions
sample_image = tifffile.imread(image_paths[0])

print(sample_image.dtype)
print(sample_image.shape)


In [None]:

val_dataset = r'C:\Users\hamis\OneDrive\Documents\GitHub\human-vasculature-ml-comp\blood-vessel-segmentation\train\kidney_2'

## put every 4th image and label into a new folder called kidney_2_val' ##

import os
from glob import glob

image_dir = os.path.join(val_dataset, 'images')
label_dir = os.path.join(val_dataset, 'labels')

# Get sorted list of image and label paths
image_paths = sorted(glob(os.path.join(image_dir, '*.tif')))
label_paths = sorted(glob(os.path.join(label_dir, '*.tif')))

# make a new folder called kidney_2_val
os.mkdir(os.path.join(val_dataset, 'kidney_2_val'))

# make a new folder called images inside kidney_2_val
os.mkdir(os.path.join(val_dataset, 'kidney_2_val', 'images'))

# make a new folder called labels inside kidney_2_val
os.mkdir(os.path.join(val_dataset, 'kidney_2_val', 'labels'))

# get the paths to the new folders
new_image_dir = os.path.join(val_dataset, 'kidney_2_val', 'images')
new_label_dir = os.path.join(val_dataset, 'kidney_2_val', 'labels')

# copy every 4th image and label into the new folders
for i in range(0, len(image_paths), 4):
    os.rename(image_paths[i], os.path.join(new_image_dir, os.path.basename(image_paths[i])))
    os.rename(label_paths[i], os.path.join(new_label_dir, os.path.basename(label_paths[i])))


In [1]:
import surface_distance
import os
from glob import glob
import tifffile

# get the first image and label from the kidney_1_dense to use as a test
label = tifffile.imread(r'C:\Users\hamis\OneDrive\Documents\GitHub\human-vasculature-ml-comp\blood-vessel-segmentation\train\kidney_1_dense\labels\1000.tif')
label_2 = tifffile.imread(r'C:\Users\hamis\OneDrive\Documents\GitHub\human-vasculature-ml-comp\blood-vessel-segmentation\train\kidney_1_dense\labels\1001.tif')

# normalise to norm01
label = label / label.max()
label_2 = label_2 / label_2.max()

# convert label to boolean
label = label.astype(bool)
label_2 = label_2.astype(bool)

# check if any of the values in the label are true

# get the surface distance
surface_dist = surface_distance.compute_surface_distances(label, label_2, [1, 1])

# compute surface dice
surface_dice = surface_distance.compute_surface_dice_at_tolerance(surface_dist, 0)

# print the surface dice
print(surface_dice)

0.5505622659658369


In [9]:
from pytorch3dunet.unet3d.metrics import SurfaceDice
# import tensor conversion
import torch

# create a surface distance object
surface_dice = SurfaceDice(tolerance=0, spacing=[1,1])

# compute the surface dice
surface_dice(torch.from_numpy(label), torch.from_numpy(label_2))

0.5505622659658369