In [1]:
import h5py
import nibabel as nib
import numpy as np
import os
import math
import PIL
from PIL import Image

In [10]:
volumeDir = "Dataset/Volumes/"
segmentationDir = "Dataset/Segmentations/"

fileName = "TestDataset.hdf5"
numFiles = len([name for name in os.listdir(volumeDir) if os.path.isfile(os.path.join(volumeDir, name))])

#Percent of slices to keep from each scan, starts from middle of array
keepRate = 0.3

#Resize all slices/segmentations to imageDim x imageDim
imageDim = 256

In [11]:
file = h5py.File(fileName, "w")

livers = 0
total = 0

for i, name in enumerate(os.listdir(volumeDir)):
    #Disregards hidden files
    if name[0] == '.':
        continue
    
    #Creates group for current scan
    currGroup = file.create_group("Scan" + str(i))

    #Loads segmentation and volume data from .nii file
    ctScan = nib.load(volumeDir + name)
    volumeData = ctScan.get_fdata()

    segmentation = nib.load(segmentationDir + os.listdir(segmentationDir)[i])
    segmentData = segmentation.get_fdata()

    #Loops through all usable slices and adds data to h5 file
    #Finds middle index, subtracts half * keepRate from it, goes to middle index + half * keepRate
    for plane in range(math.ceil(((volumeData.shape[2] - 1) / 2) - (((volumeData.shape[2] - 1) / 2) * keepRate)), 
    math.floor(((volumeData.shape[2] - 1) / 2) + (((volumeData.shape[2] - 1) / 2) * keepRate))):

        volumeSlice = np.array(Image.fromarray(volumeData[:,:,plane].astype(np.int16)).resize((imageDim, imageDim), Image.BILINEAR))
        segmentSlice = segmentData[:,:,plane].astype(np.int16)

        #Gets max value of current segmenation, limits it to 1 (1 if contains liver, 0 if not)
        label = min(np.amax(segmentSlice), 1)
        segmentSlice = np.array(Image.fromarray(segmentSlice).resize((imageDim, imageDim), Image.NEAREST))

        livers += label
        total += 1

        #Creates subgroup for current slice in current scan, adds slice/segmentation/label data
        currSlice = currGroup.create_group("Slice" + str(plane - math.ceil(((volumeData.shape[2] - 1) / 2) - (((volumeData.shape[2] - 1) / 2) * keepRate))))
        currSlice.create_dataset("Slice", data=volumeSlice)
        currSlice.create_dataset("Segmentation", data=segmentSlice)
        currSlice.attrs.create("ImageLabel", label, (1,), "int")

print(f"Liver Present: {livers} Total: {total}")

file.close()

Liver Present: 126 Total: 803


In [4]:
dataFile = h5py.File(fileName, 'r')
print(list(dataFile.keys()))

sumLiver = 0
total = 0

#Prints each piece of data in each slice in each scan
for scan in list(dataFile.keys()):
    scanData = dataFile[scan]
    print(list(scanData.keys()))
    
    for slice in scanData:
        print(scanData[slice]["Slice"])
        print(scanData[slice]["Segmentation"])
        print(scanData[slice].attrs.get("ImageLabel"))

        #Calculates the rate of liver in the dataset
        sumLiver += scanData[slice].attrs.get("ImageLabel")
        total += 1

print(f"{sumLiver} {total}")

dataFile.close()

['Scan0', 'Scan1', 'Scan2', 'Scan3', 'Scan4', 'Scan5']
['Slice0', 'Slice1', 'Slice10', 'Slice11', 'Slice12', 'Slice13', 'Slice14', 'Slice15', 'Slice16', 'Slice17', 'Slice2', 'Slice3', 'Slice4', 'Slice5', 'Slice6', 'Slice7', 'Slice8', 'Slice9']
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<HDF5 dataset "Segmentation": shape (256, 256), type "<i4">
[0]
<HDF5 dataset "Slice": shape (256, 256), type "<i4">
<