In [2]:
'''
The code here transforms the data from Resampling.ipynb to a data format suitable for the 3D-DSN:
https://www.sciencedirect.com/science/article/pii/S1361841517300725?via%3Dihub
'''

import SimpleITK as sitk
import os, sys
#sys.path.insert(1, './Models/Resnet-3D')
from resnet3d import Resnet3DBuilder
import numpy as np
import random
import keras
from sklearn.model_selection import train_test_split
import math

Using TensorFlow backend.


In [3]:
%load_ext autoreload
%autoreload 2
#if this is not here, the loaded notebook will not detect ensuing changes in imported numpy scripts 

In [4]:
#generator for (ct,pet)->centre prediction models

def zip_ct_pet(ct, pet):
    ct = np.array(ct)[:,:,:,:,np.newaxis]
    pet = np.array(pet)[:,:,:,:,np.newaxis]
    return np.concatenate((ct, pet), axis=-1)

def centre_predictor_generator(pet_files, ct_files, centres, shuffle=True, batch_size=4):
    
    while True:
        nth_batch = 0
        
        if shuffle:
            z = list(zip(pet_files, ct_files, centres))
            random.shuffle(z)
            pet_files, ct_files, centres = zip(*z)
            
        pet_batch = []
        ct_batch = []
        centre_batch = []
        
        for i in range(len(pet_files)):
                
            ct_batch.append(np.load(ct_files[i]))
            pet_batch.append(np.load(pet_files[i]))
            centre_batch.append(centres[i])
            
            if len(pet_batch) == batch_size:
                yield zip_ct_pet(ct_batch, pet_batch), np.array(centre_batch)
                
                pet_batch.clear()
                ct_batch.clear()
                centre_batch.clear()
 
        if len(ct_batch) > 0:
            yield zip_ct_pet(ct_batch, pet_batch), np.array(centre_batch)

In [5]:
#load training data from disk
pet_files = []
ct_files = []
centre_files = []
mask_files = []

for root, dirs, files in os.walk('/home/jzhe0882/numpydata/PET'):
    for name in files:
        file_path = os.path.join(root, name)
        pet_files.append(file_path)

for root, dirs, files in os.walk('/home/jzhe0882/numpydata/CT'):
    for name in files:
        file_path = os.path.join(root, name)
        ct_files.append(file_path)

for root, dirs, files in os.walk('/home/jzhe0882/numpydata/MaskCentres'):
    for name in files:
        file_path = os.path.join(root, name)
        centre_files.append(file_path)
        
for root, dirs, files in os.walk('/home/jzhe0882/numpydata/Mask'):
    for name in files:
        file_path = os.path.join(root, name)
        mask_files.append(file_path)

pet_files = sorted(pet_files)
ct_files = sorted(ct_files)
centre_files = sorted(centre_files)
centres = [np.load(c) for c in centre_files] #can load all of these into memory (other volumes are too large)
mask_files = sorted(mask_files)

#Inputs are PET/CT data, outputs are centres or masks
X_train, X_test, y_train, y_test = train_test_split(list(zip(pet_files, ct_files)), 
                                                    list(zip(centres, mask_files)), 
                                                    test_size=0.33, shuffle=True, random_state=9)

pet_train, ct_train = zip(*X_train)
pet_test, ct_test = zip(*X_test)
centre_train, mask_train = zip(*y_train)
centre_test, mask_test = zip(*y_test)

print(pet_train[0], ct_train[0], centre_train[0], mask_train[0])
print(pet_test[0], ct_test[0], centre_test[0], mask_test[0])

/home/jzhe0882/numpydata/PET/HN-HGJ-052.npy /home/jzhe0882/numpydata/CT/HN-HGJ-052.npy [61 40 50] /home/jzhe0882/numpydata/Mask/HN-HGJ-052.npy
/home/jzhe0882/numpydata/PET/HN-CHUM-028.npy /home/jzhe0882/numpydata/CT/HN-CHUM-028.npy [65 48 54] /home/jzhe0882/numpydata/Mask/HN-CHUM-028.npy


In [72]:
#test to see if generator works

batch_size = 4
test_generator = centre_predictor_generator(pet_test, ct_test, centre_test, batch_size=batch_size, shuffle=False)
print('{} batches of {} samples taken over {} total samples'.format(math.ceil(len(ct_test)/batch_size), batch_size, len(ct_test)))

for i in range(math.ceil(len(ct_test)/batch_size)):
    ctpet, centre = next(test_generator)
    print(i, ctpet.shape, centre.shape)

17 batches of 4 samples taken over 65 total samples
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16


In [8]:
batch_size = 4
cycles_per_epoch = 10 # how many times the entire training set should be cycled over for each epoch
total_cycles = 100 # how many times the entire training set should be cycled in total
input_shape = (128, 128, 96, 2)
train_generator = centre_predictor_generator(pet_train, ct_train, centre_train, batch_size=batch_size)
validation_generator = centre_predictor_generator(pet_test, ct_test, centre_test, batch_size=batch_size, shuffle=False)

detection_model = Resnet3DBuilder.build_resnet_18(input_shape, 3)
detection_model.compile(optimizer='adam',
              loss='mean_squared_error')
detection_model.fit_generator(train_generator, validation_data=validation_generator, validation_steps=math.ceil(len(ct_test)/batch_size),
                   steps_per_epoch=cycles_per_epoch*math.ceil(len(ct_train)/batch_size), epochs=math.ceil(total_cycles/cycles_per_epoch))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7efb6c610ba8>

In [10]:
detection_model.save('Models/keras models/detection_model_resnet18.h5')

In [None]:
from keras.models import load_model

detection_model = load_model('Models/keras models/detection_model_resnet18.h5')

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
#generates a bounding box around the centre
def get_bounding_box(source_volume, centre, maximal_extents):
    maxima = centre + maximal_extents
    minima = centre - maximal_extents
    
    #keep bounding box dimensions within the mask dimensions
    maxima = np.minimum(maxima, np.array(source_volume.shape)).astype(int)
    minima = np.maximum(minima, [0,0,0]).astype(int) 
        
    bounding_box_values = source_volume[minima[0]:maxima[0],
                              minima[1]:maxima[1],
                              minima[2]:maxima[2]]
    
    relative_centre = maximal_extents
    relative_maxima = (relative_centre + maxima - centre).astype(int)
    relative_minima = (relative_centre + minima - centre).astype(int)
    
    bounding_box = np.zeros(2 * maximal_extents)
    
    bounding_box[relative_minima[0]:relative_maxima[0],
                relative_minima[1]:relative_maxima[1],
                relative_minima[2]:relative_maxima[2]] = bounding_box_values
    
    #print(relative_minima, relative_maxima)
    
    return bounding_box

#translates the bounding box so that its values have a new reference centre
def align_bounding_box(bounding_box, box_centre, target_centre):
    displacement = (target_centre - box_centre).astype(int)
                      
    for i in range(len(displacement)): #if true, the centres are too far apart for original bounding box values to be seen
        if displacement[i] >= bounding_box.shape[i]:
            return np.zeros(bounding_box.shape[i])
    
    new_box = np.roll(bounding_box, displacement, axis=(0,1,2))
                      
    #boxes shifted backward have a trail of zeroes at the end of the array
    #boxes shifted forward have a trail of zeroes at the beginning of the array
    if displacement[0] < 0:
        new_box[displacement[0]:, :, :] = 0
    else:
        new_box[:displacement[0], :, :] = 0
                      
    if displacement[1] < 0:
        new_box[:, displacement[1]:, :] = 0
    else:
        new_box[:, :displacement[1], :] = 0
    
    if displacement[2] < 0:
        new_box[:, :, displacement[2]:] = 0
    else:
        new_box[:, :, :displacement[2]] = 0
        
    return new_box

#values derived from NumpyAnalysis.ipynb
maximal_extents = np.array([22, 13, 28], dtype=int)

#generator for (ct bounding box, pet bounding_box)-> mask bounding box prediction models
def mask_bounding_box_predictor_generator(pet_files, ct_files, mask_files, centres, shuffle=True, batch_size=4):
    
    while True:
        if shuffle:
            z = list(zip(pet_files, ct_files, centres, mask_files))
            random.shuffle(z)
            pet_files, ct_files, centres, mask_files = zip(*z)
            
        pet_batch = []
        ct_batch = []
        mask_batch = []
        
        for i in range(len(pet_files)):
             
            ct_batch.append(get_bounding_box(np.load(ct_files[i]), centres[i], maximal_extents))
            pet_batch.append(get_bounding_box(np.load(pet_files[i]), centres[i], maximal_extents))
            mask_batch.append(get_bounding_box(np.load(mask_files[i]), centres[i], maximal_extents))
            
            if len(pet_batch) == batch_size:
                yield zip_ct_pet(ct_batch, pet_batch), np.array(mask_batch)
                
                pet_batch.clear()
                ct_batch.clear()
                mask_batch.clear()
                
        yield zip_ct_pet(ct_batch, pet_batch), np.array(mask_batch)

In [7]:
test = np.arange(1,31).reshape(2,3,5)
print(test)
centre = [0,0,0]
print(get_bounding_box(test, np.array(centre), np.array([1,1,1])))
print(test[centre[0], centre[1], centre[2]])
test = align_bounding_box(test, np.array([0,0,0]), np.array([-2,1,1]))
print(test)


[[[ 1  2  3  4  5]
  [ 6  7  8  9 10]
  [11 12 13 14 15]]

 [[16 17 18 19 20]
  [21 22 23 24 25]
  [26 27 28 29 30]]]
[[[0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 1.]]]
1
[[[0 0 0 0 0]
  [0 0 0 0 0]
  [0 0 0 0 0]]

 [[0 0 0 0 0]
  [0 0 0 0 0]
  [0 0 0 0 0]]]


In [12]:
from scipy.spatial import distance

def dice_eval(pred_mask, true_mask):
    pred_mask = pred_mask.flatten()
    true_mask = true_mask.flatten()
    ret = distance.dice(pred_mask, true_mask)

    return ret

In [121]:
#Evaluate model

from sklearn.metrics import mean_squared_error
    
test_generator = centre_predictor_generator(pet_test, ct_test, centre_test, batch_size=batch_size, shuffle=False)    
detected_centres = detection_model.predict_generator(test_generator, steps=math.ceil(len(ct_test)/batch_size))
    
dice_sum = 0
for i in range(len(detected_centres)):
    mask = np.load(mask_test[i])
    
    pred_mask = get_bounding_box(mask, np.rint(detected_centres[i]).astype(int), maximal_extents)
    pred_mask = align_bounding_box(pred_mask, detected_centres[i], centre_test[i])
    
    true_mask = get_bounding_box(mask, centre_test[i].astype(int), maximal_extents)
    
    dice_index = dice_eval(pred_mask, true_mask)
    
    dice_sum += dice_index
    
print('dice accuracy', 1 - dice_sum / len(detected_centres))
    
print(detection_model.evaluate_generator(validation_generator, steps=math.ceil(len(ct_test)/batch_size)))
#print(detected_centres.shape)
print(mean_squared_error(detected_centres, centre_test, multioutput='raw_values'))

dice accuracy 0.09947735814980829
26.426832785973183
[23.34948369 11.0483397  36.41964497]


In [208]:
generator2 = mask_bounding_box_predictor_generator(ct_train, pet_train, mask_train, centre_train, batch_size=100)

for i in range(4):
    ctpet, mask = next(generator2)
    print(ctpet.shape,mask.shape)

(100, 44, 26, 56, 2) (100, 44, 26, 56)
(29, 44, 26, 56, 2) (29, 44, 26, 56)
(100, 44, 26, 56, 2) (100, 44, 26, 56)
(29, 44, 26, 56, 2) (29, 44, 26, 56)


In [148]:
detection_model.fit_generator(train_generator, validation_data=validation_generator, validation_steps=math.ceil(len(ct_test)/batch_size),
                   steps_per_epoch=math.ceil(len(ct_train)/batch_size), epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7fc7e626f358>

In [100]:
def blyat(arr):
    return np.roll(arr, [2,1], [0,1])
    
test = np.arange(20).reshape(4,5)
print(test)
test=blyat(test)
print(test)
print(test[-0:, 1:])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]
[[14 10 11 12 13]
 [19 15 16 17 18]
 [ 4  0  1  2  3]
 [ 9  5  6  7  8]]
[[10 11 12 13]
 [15 16 17 18]
 [ 0  1  2  3]
 [ 5  6  7  8]]
