In [2]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import tensorflow as tf

from scipy import ndimage
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

## Directory structure:
* **Dataset Directory**: $HOME/Datasets/ImageCLEF/
* extracted .nii.gz files are in a Dataset subfolder in Dataset Directory
* metadata file is in the Dataset Directory

In [3]:
home = os.path.expanduser('~')
base = os.path.join(home, 'Datasets', 'ImageCLEF')

dataset_dir = os.path.join(base, 'Dataset')

label_path = os.path.join(base, '4231cdb3-af46-4674-be08-95b904a62093_TrainSet_metaData.csv')
df = pd.read_csv(label_path)

df.head(10)

Unnamed: 0,FileName,TypeOfTB
0,TRN_0001.nii.gz,1
1,TRN_0002.nii.gz,1
2,TRN_0003.nii.gz,1
3,TRN_0004.nii.gz,1
4,TRN_0005.nii.gz,1
5,TRN_0006.nii.gz,1
6,TRN_0007.nii.gz,4
7,TRN_0008.nii.gz,1
8,TRN_0009.nii.gz,1
9,TRN_0010.nii.gz,1


In [4]:
filenames = df['FileName'].tolist()
num_samples = len(filenames)

labels = df['TypeOfTB'].to_numpy() - 1
num_classes = labels.max() + 1

labels = tf.one_hot(labels, depth=num_classes)

idxs = [i for i in range(num_samples)]

train_idxs, val_idxs = train_test_split(idxs, test_size=0.2)

del num_classes, idxs

In [5]:
def read_nifti_file(filepath):
    """Read and load volume"""
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan

def normalize(volume):
    """Normalize the volume"""
    min = -1000
    max = 400
    volume[volume < min] = min
    volume[volume > max] = max
    volume = (volume - min) / (max - min)
    volume = volume.astype("float32")
    return volume

def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    
#     print(f"Shape: {img.shape}")
    desired_depth = 64
    desired_width = 512
    desired_height = 512
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    
#     img = ndimage.rotate(img, 90, reshape=False)
    # Resize across z-axis
    
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img


def process_scan(path):
    """Read and resize volume"""
    # Read scan
    volume = read_nifti_file(path)
    # Normalize
#     volume = normalize(volume)
    # Resize width, height and depth
    volume = resize_volume(volume)
    return volume

In [58]:
def read_fn(file_names, labels, file_idxs):
    for i, idx in enumerate(file_idxs):
        img_path = os.path.join(dataset_dir, file_names[idx])
        processed = process_scan(img_path)
        
        image = tf.convert_to_tensor(processed)
        image = image[np.newaxis, ...]
        y = labels[idx]
        
        yield image, y[np.newaxis, ...]

In [59]:
def train_f():
    fn = read_fn(filenames, labels, train_idxs)
    ex = next(fn)
    yield ex
    
def val_f():
    fn = read_fn(filenames, labels, val_idxs)
    ex = next(fn)
    yield ex

In [60]:
train_batch_size = 32

train_dataset = tf.data.Dataset.from_generator(train_f, output_types=tf.float64)
train_dataset = train_dataset.repeat(None)
train_dataset = train_dataset.batch(train_batch_size)
train_dataset = train_dataset.prefetch(2)


val_batch_size = 128

val_dataset = tf.data.Dataset.from_generator(val_f, output_types=tf.float64)
val_dataset = val_dataset.repeat(None)
val_dataset = val_dataset.batch(val_batch_size)
val_dataset = val_dataset.prefetch(2)

train_steps = int(len(train_idxs) / train_batch_size)
val_steps = int(len(val_idxs) / val_batch_size)

In [61]:
X, y = None, None
for i, j in read_fn(filenames, labels, train_idxs):
    X, y = i, j
    break

In [63]:
print(X.shape)
print(y.shape)

(1, 512, 512, 64)
(1, 5)


In [52]:
def get_model(width=512, height=512, depth=64):
    """Build a 3D convolutional neural network model."""

    inputs = keras.Input((width, height, depth, 1))

    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPool3D(pool_size=2)(x)
    x = layers.BatchNormalization()(x)

    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(units=512, activation="relu")(x)
    x = layers.Dropout(0.3)(x)

    outputs = layers.Dense(units=5, activation="softmax")(x)

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=512, height=512, depth=64)
model.summary()

Model: "3dcnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 512, 512, 64, 1)] 0         
_________________________________________________________________
conv3d_20 (Conv3D)           (None, 510, 510, 62, 64)  1792      
_________________________________________________________________
max_pooling3d_20 (MaxPooling (None, 255, 255, 31, 64)  0         
_________________________________________________________________
batch_normalization_20 (Batc (None, 255, 255, 31, 64)  256       
_________________________________________________________________
conv3d_21 (Conv3D)           (None, 253, 253, 29, 64)  110656    
_________________________________________________________________
max_pooling3d_21 (MaxPooling (None, 126, 126, 14, 64)  0         
_________________________________________________________________
batch_normalization_21 (Batc (None, 126, 126, 14, 64)  256   

In [None]:
initial_learning_rate = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    metrics=["acc"],
)

# Define callbacks.
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "3d_image_classification.h5", save_best_only=True
)

early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# Train the model, doing validation at the end of each epoch
epochs = 100
model.fit(
    X,
    y,
#     validation_data=val_dataset,
    epochs=epochs,
    shuffle=True,
    verbose=2,
    steps_per_epoch=train_steps,
#     validation_steps=val_steps,
#     callbacks=[checkpoint_cb, early_stopping_cb]
)

Epoch 1/100
22/22 - 152s - loss: 0.4959 - acc: 0.9545
Epoch 2/100
