# Make predictions from INPUT_FOLDER

Note that we assume your input data is stored as **.h5 files**, which contain both raw data (image) and its mask (label needed for metrics computations). The input data is in INPUT_FOLDER.

**Remark** : if your data is in your GGDrive, you need to mount it first. All the code is provided.

We return new h5 files, with "raw", "label" and "pred" as keys. The output data will be in a new folder OUTPUT_FOLDER.

Enable GPU acceleration in "Notebook parameters" for faster predictions.

In [None]:
INPUT_FOLDER = '/content/drive/MyDrive/6_aneurysm_segmentation/challenge_dataset/' # data to predict folder
OUTPUT_FOLDER = '/content/predictions/'
PRETRAINED_MODEL_PATH = '/content/drive/MyDrive/6_aneurysm_segmentation/3D_model_resnet18_Noneweights_100epochs_jaccard.best.hdf5'
COMPUTE_METRICS = True

# Don't touch these - should be removed for final version
CROP = 64 # None or 64, None keeps the samples unchanged, 64 crops them to (64,64,64)
CENTER_CUBE_ONLY = True # False train on all data (split volume in 9 cubes), True train only on the middle (64,64,64) cube around the aneurysm = less data

## Imports and Drive mount

In [None]:
!pip install segmentation-models-3D --quiet

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting segmentation-models-3D
  Downloading segmentation_models_3D-1.0.4-py3-none-any.whl (33 kB)
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 KB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Collecting classification-models-3D>=1.0.6
  Downloading classification_models_3D-1.0.6-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.9/62.9 KB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: classification-models-3D, keras-applications, segmentation-models-3D
Successfully installed classification-models-3D-1.0.6 keras-applications-1.0.8 segmentation-models-3D-1.0.4


In [None]:
# connect your drive to the session
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# import librairies
import numpy as np
import tensorflow as tf
import h5py
import os
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
import segmentation_models_3D as sm
from tqdm import tqdm

Segmentation Models: using `tf.keras` framework.


In [None]:
def load_data_from(path_folder):
    """
    Loads data from the folder and return 3 arrays (images, masks, filenames)
    """
    # get file names
    file_names = os.listdir(path_folder)
    N = len(file_names)
    print(f'{N} samples in dataset.')
    print(file_names)

    # open all .h5 files, split inputs and target masks, store all in np.arrays
    raw_data = []
    labels = []
    names = []

    for file_name in tqdm(file_names):
        f = h5py.File(f'{path_folder}/{file_name}', 'r')

        X, Y = np.array(f['raw']), np.array(f['label'])

        if CROP is None:
            raw_data.append(X)
            labels.append(Y)
            names.append(file_name)

        else:
            if CENTER_CUBE_ONLY: # only keep the center cube (over 9 candidates)
                X = X[:,CROP:2*CROP,CROP:2*CROP]
                Y = Y[:,CROP:2*CROP,CROP:2*CROP]

                raw_data.append(X)
                labels.append(Y)
                names.append(file_name)

            else: # keep all cubes = more data
                X_patches = patchify(X, (64, 64, 64), step=64)  # Step=64 for 64 patches means no overlap
                X_patches_resh = np.reshape(X_patches, (-1, X_patches.shape[3], X_patches.shape[4], X_patches.shape[5]))
                Y_patches = patchify(Y, (64, 64, 64), step=64)  # Step=64 for 64 patches means no overlap
                Y_patches_resh = np.reshape(Y_patches, (-1, Y_patches.shape[3], Y_patches.shape[4], Y_patches.shape[5]))
                raw_data.append(X_patches_resh)
                labels.append(Y_patches_resh)
                names.append(file_name)

    # convert to arrays for patchify
    raw_data = np.array(raw_data)
    labels = np.array(labels)

    if (CROP is not None) and (not CENTER_CUBE_ONLY): # only keep the center cube (over 9 candidates)
        raw_data = np.reshape(raw_data, (-1, raw_data.shape[2], raw_data.shape[3], raw_data.shape[4]))
        labels = np.reshape(labels, (-1, labels.shape[2], labels.shape[3], labels.shape[4]))

    return raw_data, labels, names


def analytics(y_test, y_pred01):
    print(f'------ AFTER THRESHOLDING AT {THRESHOLD} ------')
    print('> sm.metrics.IOUScore :', sm.metrics.IOUScore()(y_test, y_pred01))

    # precision_recall_fscore_support report
    precision, recall, fscore, support = precision_recall_fscore_support(y_test.flatten(), 
                                                                      y_pred01.flatten()) 
    print('> Precision :', precision[1])
    print('> Recall :', recall[1])
    print('> Fscore :', fscore[1])

In [None]:
# load pretrained model
print(f"Reload from : {PRETRAINED_MODEL_PATH}")
model = tf.keras.models.load_model(PRETRAINED_MODEL_PATH, compile=False)

Reload from : /content/drive/MyDrive/6_aneurysm_segmentation/3D_model_resnet18_Noneweights_100epochs_jaccard.best.hdf5


The next cell runs the entire data loading, model loading, predictions and export process. Computing metrics is not mendatory but useful to evaluate the performance of the model.

In [None]:
# load data from INPUT_FOLDER
raw_data, labels, names = load_data_from(INPUT_FOLDER)

# create output folder
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

print(f'\nCreated {OUTPUT_FOLDER}')

X_to_predict = np.expand_dims(raw_data, axis=-1)

y_pred = model.predict(X_to_predict, batch_size=2)
# convert to binary
THRESHOLD = 0.5
y_pred01 = (y_pred > THRESHOLD).squeeze()

print('Predictions complete.')

if COMPUTE_METRICS:
    # can take a few seconds, fill free to remove if you don't care about metrics
    analytics(np.array(labels, dtype='float32'), y_pred01)

# convert to uint8 to match the initial format of the masks
y_pred01 = y_pred01.astype(np.uint8)


# save into OUTPUT_FOLDER
for i, filename in enumerate(names):
    raw = raw_data[i]
    label = labels[i]
    y_pred = y_pred01[i]

    h5f = h5py.File(f'{OUTPUT_FOLDER}{filename}', 'w')

    h5f.create_dataset('raw', data=raw)
    h5f.create_dataset('label', data=label)
    h5f.create_dataset('pred', data=y_pred)

    h5f.close()

print('\nExport complete.')

103 samples in dataset.
['scan_1.h5', 'scan_3.h5', 'scan_2.h5', 'scan_4.h5', 'scan_5.h5', 'scan_6.h5', 'scan_7.h5', 'scan_8.h5', 'scan_9.h5', 'scan_11.h5', 'scan_10.h5', 'scan_13.h5', 'scan_14.h5', 'scan_12.h5', 'scan_16.h5', 'scan_18.h5', 'scan_17.h5', 'scan_15.h5', 'scan_19.h5', 'scan_20.h5', 'scan_21.h5', 'scan_23.h5', 'scan_22.h5', 'scan_24.h5', 'scan_27.h5', 'scan_26.h5', 'scan_28.h5', 'scan_25.h5', 'scan_31.h5', 'scan_29.h5', 'scan_30.h5', 'scan_34.h5', 'scan_35.h5', 'scan_33.h5', 'scan_32.h5', 'scan_38.h5', 'scan_37.h5', 'scan_36.h5', 'scan_40.h5', 'scan_41.h5', 'scan_39.h5', 'scan_43.h5', 'scan_42.h5', 'scan_44.h5', 'scan_48.h5', 'scan_46.h5', 'scan_45.h5', 'scan_47.h5', 'scan_51.h5', 'scan_49.h5', 'scan_50.h5', 'scan_55.h5', 'scan_54.h5', 'scan_53.h5', 'scan_52.h5', 'scan_57.h5', 'scan_58.h5', 'scan_56.h5', 'scan_60.h5', 'scan_61.h5', 'scan_59.h5', 'scan_64.h5', 'scan_62.h5', 'scan_63.h5', 'scan_65.h5', 'scan_66.h5', 'scan_67.h5', 'scan_70.h5', 'scan_69.h5', 'scan_68.h5', 'sca

100%|██████████| 103/103 [00:11<00:00,  8.69it/s]



Created /content/predictions/
Predictions complete.
------ AFTER THRESHOLDING AT 0.5 ------
> sm.metrics.IOUScore : tf.Tensor(0.53295004, shape=(), dtype=float32)
> Precision : 0.6465867888760054
> Recall : 0.7520121585752205
> Fscore : 0.6953260299458679

Export complete.
