In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import nibabel as nib
import tensorflow as tf

from scipy import ndimage
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split

In [2]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)

# tf.config.gpu.set_per_process_memory_growth(True)

In [4]:
model_name = 'UNet_3D_v1'
# 3d_image_classification_normalized_history.
random_state=1

## Directory structure:
* **Dataset Directory**: $HOME/Datasets/ImageCLEF/
* extracted .nii.gz files are in a Dataset subfolder in Dataset Directory
* metadata file is in the Dataset Directory

In [5]:
home = os.path.expanduser('~')
base = os.path.join(home, 'Datasets', 'ImageCLEF')

dataset_dir = os.path.join(base, 'Dataset')

label_path = os.path.join(base, '4231cdb3-af46-4674-be08-95b904a62093_TrainSet_metaData.csv')
df = pd.read_csv(label_path)

df.head(10)

Unnamed: 0,FileName,TypeOfTB
0,TRN_0001.nii.gz,1
1,TRN_0002.nii.gz,1
2,TRN_0003.nii.gz,1
3,TRN_0004.nii.gz,1
4,TRN_0005.nii.gz,1
5,TRN_0006.nii.gz,1
6,TRN_0007.nii.gz,4
7,TRN_0008.nii.gz,1
8,TRN_0009.nii.gz,1
9,TRN_0010.nii.gz,1


In [8]:
filenames = df['FileName'].tolist()
num_samples = len(filenames)

labels = df['TypeOfTB'].to_numpy() - 1
stratify = df['TypeOfTB'].to_numpy() - 1
num_classes = labels.max() + 1

labels = tf.one_hot(labels, depth=num_classes)

idxs = [i for i in range(num_samples)]

train_idxs, val_idxs = train_test_split(idxs, test_size=0.2, random_state=random_state, stratify=df['TypeOfTB'].to_numpy() - 1)

del num_classes, idxs, stratify

In [7]:
img_depth = 84

def read_nifti_file(filepath):
    """Read and load volume"""
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan

def normalize(volume):
    """Normalize the volume"""
    min = -1000
    max = -300
    volume[volume < min] = min
    volume[volume > max] = max
    volume = (volume - min) / (max - min)
    volume = volume.astype("float32")
    return volume

def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    
#     print(f"Shape: {img.shape}")
    desired_depth = img_depth
    desired_width = 512
    desired_height = 512
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    
#     img = ndimage.rotate(img, 90, reshape=False)
    # Resize across z-axis
    
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img


def process_scan(path):
    """Read and resize volume"""
    # Read scan
    volume = read_nifti_file(path)
    # Normalize
    volume = normalize(volume)
    # Resize width, height and depth
    volume = resize_volume(volume)
    print(path)
    return volume

In [7]:
def train_f():
    file_names, file_idxs = filenames, train_idxs
    for i, idx in enumerate(file_idxs):
        img_path = os.path.join(dataset_dir, file_names[idx])
        processed = process_scan(img_path)
        
        image = tf.convert_to_tensor(processed, dtype=tf.float16)
        image = image[..., np.newaxis]
        y = labels[idx]
        
        yield image, y
    
def val_f():
    file_names, file_idxs = filenames, val_idxs
    for i, idx in enumerate(file_idxs):
        img_path = os.path.join(dataset_dir, file_names[idx])
        processed = process_scan(img_path)
        
        image = tf.convert_to_tensor(processed, dtype=tf.float16)
        image = image[..., np.newaxis]
        y = labels[idx]
        
        yield image, y

In [8]:
train_batch_size = 1

train_dataset = tf.data.Dataset.from_generator(
                    train_f,
                    (tf.float32, tf.float32),
                    (tf.TensorShape([512, 512, img_depth, 1]), tf.TensorShape([5])))

train_dataset = train_dataset.repeat(None)
train_dataset = train_dataset.batch(train_batch_size)
train_dataset = train_dataset.prefetch(2)


val_batch_size = 1

val_dataset = tf.data.Dataset.from_generator(
                    val_f,
                    (tf.float32, tf.float32),
                    (tf.TensorShape([512, 512, img_depth, 1]), tf.TensorShape([5])))
val_dataset = val_dataset.repeat(None)
val_dataset = val_dataset.batch(val_batch_size)
val_dataset = val_dataset.prefetch(2)

train_steps = int(len(train_idxs) / (train_batch_size * 2))
val_steps = int(len(val_idxs) / (val_batch_size * 3))
# val_steps = 64

In [9]:
# Load Model

model_path = os.path.join(home, 'ImageCLEF2021', 'Models', '3D_CNNs', f'{model_name}.h5')
model = tf.keras.models.load_model(model_path)
# model.trainable = False

# initial_learning_rate = 0.0001
# lr_schedule = keras.optimizers.schedules.ExponentialDecay(
#     initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
# )
# model.compile(
#     loss="binary_crossentropy",
#     optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
#     metrics=["acc"],
# )

model.summary()

Model: "3d_image_classification_normalized"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 512, 512, 84, 1)] 0         
_________________________________________________________________
3D_64_1 (Conv3D)             (None, 510, 510, 82, 64)  1792      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 255, 255, 41, 64)  0         
_________________________________________________________________
batch_normalization (BatchNo (None, 255, 255, 41, 64)  256       
_________________________________________________________________
3D_64_2 (Conv3D)             (None, 253, 253, 39, 64)  110656    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 126, 126, 19, 64)  0         
_________________________________________________________________
batch_normalization_1 (Batch (No

In [19]:
model.evaluate(val_dataset, steps=len(val_idxs))



[0.6995394825935364, 0.8000015]

In [20]:
valid_preds = np.argmax(validation_preds, axis=1)

In [10]:
validation_preds = model.predict(val_dataset, steps=len(val_idxs))

/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0162.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0852.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0146.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0235.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0612.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0902.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0020.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0195.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0002.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0037.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0641.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0473.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0387.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0111.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0165.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/

/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0607.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0082.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0478.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0239.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0775.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0031.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0032.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0100.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0900.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0386.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0370.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0327.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0359.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0063.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0062.nii.gz
/home/ayushman.singh/Datasets/ImageCLEF/

In [21]:
val_kappa = cohen_kappa_score(v_targets, valid_preds)
val_acc = accuracy_score(v_targets, valid_preds)

In [11]:
# datum = val_dataset.as_numpy_iterator()

# for k in range(0, 6):
#     sample, target = next(datum)
#     print(sample.shape)
#     print(target.shape)
#     print(np.mean(sample))
#     print(model.predict(sample))
#     print(target)

k = 0

targets = []

for sample, target in val_dataset.take(184):
    targets.append(target)
    print(target)

/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0162.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0852.nii.gz
tf.Tensor([[0. 0. 0. 1. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0146.nii.gz
tf.Tensor([[0. 1. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0235.nii.gz
tf.Tensor([[0. 0. 0. 1. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0612.nii.gz
tf.Tensor([[0. 1. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0902.nii.gz
tf.Tensor([[0. 0. 0. 0. 1.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0020.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0195.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.s

/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0206.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0862.nii.gz
tf.Tensor([[0. 0. 0. 0. 1.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0291.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0799.nii.gz
tf.Tensor([[0. 0. 0. 1. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0600.nii.gz
tf.Tensor([[0. 1. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0592.nii.gz
tf.Tensor([[0. 1. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0897.nii.gz
tf.Tensor([[0. 0. 0. 0. 1.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0822.nii.gz
tf.Tensor([[0. 0. 0. 1. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.s

/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0031.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0032.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0100.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0900.nii.gz
tf.Tensor([[0. 0. 0. 0. 1.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0386.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0370.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0327.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.singh/Datasets/ImageCLEF/Dataset/TRN_0359.nii.gz
tf.Tensor([[1. 0. 0. 0. 0.]], shape=(1, 5), dtype=float32)
/home/ayushman.s

In [12]:
targets = np.array(targets)
print(targets.shape)
print(validation_preds.shape)

(184, 1, 5)
(184, 5)


In [13]:
nntargets = np.squeeze(targets)
print(nntargets.shape)

(184, 5)


In [15]:
print(validation_preds)

[[0.09383484 0.2973625  0.24450141 0.07458068 0.28972065]
 [0.09459009 0.29723155 0.24425493 0.07455722 0.2893662 ]
 [0.09220521 0.3008202  0.2442399  0.07592657 0.28680816]
 [0.0932093  0.29857403 0.24374254 0.07483567 0.28963846]
 [0.09597096 0.30763125 0.24304302 0.07126224 0.28209248]
 [0.09372228 0.3026055  0.24548467 0.07446311 0.28372446]
 [0.09465257 0.30673414 0.23967448 0.07344808 0.28549072]
 [0.09571104 0.31005877 0.24098706 0.07341459 0.27982855]
 [0.09398782 0.30547643 0.24251258 0.07517307 0.28285006]
 [0.0936588  0.30007592 0.24238634 0.0748946  0.28898436]
 [0.09208001 0.30341545 0.24580176 0.07719088 0.28151196]
 [0.09129915 0.3004295  0.24721839 0.07741944 0.28363356]
 [0.09676892 0.30352548 0.2430516  0.07748672 0.27916732]
 [0.09373535 0.30183992 0.24658675 0.07496567 0.28287232]
 [0.09604157 0.30357948 0.2412813  0.07319196 0.2859057 ]
 [0.09499448 0.3052154  0.24285166 0.07359576 0.28334266]
 [0.09630956 0.3031522  0.24126828 0.07306378 0.2862062 ]
 [0.09246875 0

In [17]:
v_pr = np.argmax(validation_preds, axis=1)
print(v_pr)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [14]:
print(nntargets)

[[1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0.

In [19]:
nnt = np.argmax(nntargets, axis=1)

In [20]:
from sklearn.metrics import accuracy_score

In [22]:
accuracy_score(v_pr, nnt)

0.24456521739130435