In [60]:
# define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.Notebook_imports import *

search for root_dir and set working directory
Working directory set to: /mnt/ssd/git/dynamic-cmr-models


# Test the main concepts of a DL generator

A Generator yields a tuple of (x,y) and is used to feed data into a deep learning model

Each x/y tuple has the following shape: 

$batchsize * exampleSize * exampleShape$ 

--> The $exampleShape$ could be:
- for 1D vector-data: $len(vector_{data})$
- for 2D images: $height * width$: 
- for 3D volumes: $depth * height * width$)


# Create fake data interactively

Please define:

- The total number of $examples$ (the size of the dataset), each $example$ represents one patient/entity
- the $exampleSize$ (number of objects per example), this could represent different timesteps of a multi input generator
- the $batchsize$, meaning how many examples should be yielded in one step

Usually one example would reflect the path to a file. Which, when loaded will reult in a given $exampleShape$.

In this example we will create random numbers define the input and output shape with $inputObjShape$ and $outputObjShape$.

Each random number represents a 1D/2D or 3D object

In [61]:
# Define the size of our fake data
upper_example_limit = 1000
upper_example_size_limit = 8
upper_batchsize_limit = 20

@interact_manual
def create_fake_data(examples_=(1,upper_example_limit), 
                     input_objects_=(1,upper_example_size_limit),
                     output_objects_=(1,upper_example_size_limit),
                     batchsize_=(1,upper_batchsize_limit), 
                     input_obj_shape='(10,10,10)', 
                     ouptut_obj_shape='(10,10,10)'):
    
    global examples, input_objects, output_objects , batchsize, indexes, X, Y, x_dict, y_dict, batches
    examples = examples_
    batchsize = batchsize_
    input_objects = input_objects_
    output_objects = output_objects_
    
    # make sure the dimensions have the correct formating
    x_dim = tuple(map(int, input_obj_shape.replace(')', '').replace('(','').split(',')))
    y_dim = tuple(map(int, ouptut_obj_shape.replace(')', '').replace('(','').split(',')))
    
    # create some fake data
    x_dict = {}
    y_dict = {}  
    X = []
    for example in range(examples_):
        x_dict[example] = np.stack([np.round(np.random.sample(x_dim),2)+example for i in range(input_objects)])
        y_dict[example] = np.stack([np.round(np.random.sample(y_dim),2)+(10*example) for i in range(output_objects)])
        # testing purpose if lists are faster than dicts
        X.append(np.stack([np.round(np.random.sample(x_dim),2)+example for i in range(input_objects)]))
    # index our data, we can use the indicies to select one example in a list or dictionary
    # By this we dont need to shuffle the data itself
    indexes = list(range(len(x_dict)))
    batches = int(np.floor(examples/batchsize))-1
    print('Shape of one patient X: {}, Y: {}'.format(x_dict[0].shape, y_dict[0].shape))

interactive(children=(IntSlider(value=500, description='examples_', max=1000, min=1), IntSlider(value=4, descr…

In [66]:
# Select one batch and yield the corresponding values (batchsize x example_size)
@interact
def select_batch(selected_batch = (0,batches), shuffle_indexes=False, debug=False):
    global indexes
    
    if shuffle_indexes:
        random.shuffle(indexes)
    # make sure indexes are correctly initialised
    assert len(indexes) == examples, print('len indexes: {}, number of examples: {}'.format(len(indexes), examples))
    
    # define the lower/upper index slicing borders of the current batch
    start_idx = selected_batch*batchsize
    end_idx = (selected_batch+1)*batchsize
    
    # we slice the indexes of the current batch from the index list
    batch_indexes = indexes[start_idx: end_idx]
    
    # print the restrictions of the current batch
    print('selected batch: {} of {} with a batchsize of {} and total {} examples'.format(selected_batch, batches, batchsize, examples))
    print('start idx: {}, end idx: {}'.format(start_idx, end_idx))
    print('Indexes of the currrent batch: {}'.format(batch_indexes))
    print('-'*40)
    
    # stack the entities of the current batch
    batch_x = np.stack([x_dict[k] for k in batch_indexes])
    batch_y = np.stack([y_dict[k] for k in batch_indexes])
    if debug:
        [print('index: {}: value: {}'.format(k, x_dict[k])) for k in batch_indexes]
        [print('index: {}: value: {}'.format(k, y_dict[k])) for k in batch_indexes]
    
    return([batch_x.shape, batch_y.shape])

interactive(children=(IntSlider(value=249, description='selected_batch', max=499), Checkbox(value=False, descr…

# Performance measurements of different indexing mehtods

In [4]:
from random import randint
# We have a list --> X and a dictionary --> x_dict, 
# len(X) == len(x_dict)
# and X[i] == x_dict[i] for all i in range(len(X))
# We create n random indicies within the range of len(X)
samples = [randint(0, examples-1) for _ in range(10000000)]

In [5]:
%%timeit
# access the list n times, append the value to a new list
temp = None
for i in samples:
    temp = X[i]
    

248 ms ± 2.87 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
# access the dict n times, append the value to a new list
temp = None
for i in samples:
    temp = x_dict[i]

398 ms ± 2.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Regression target
Create a 1D array with interpolated values at the given indicies

In [25]:
timesteps = 12
vol_shape = (20,20,20)
volume = np.random.rand(timesteps, *vol_shape)
print(volume.shape)
indicies = [2,4,6,8,10]
# create a vector with 1 at index 2
target_vect = np.empty((timesteps))
target_vect[:] = np.nan
print(target_vect.shape)

for label,idx in enumerate(indicies):
    label = label+1
    target_vect[idx] = label
print(target_vect)



(12, 20, 20, 20)
(12,)
[nan nan  1. nan  2. nan  3. nan  4. nan  5. nan]


In [26]:
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

nans, x= nan_helper(target_vect)
target_vect[nans]= np.interp(x(nans), x(~nans), target_vect[~nans])
print(target_vect)

[1.  1.  1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5. ]


# From Index to one-hot-encoding

In [38]:
# from index to one-hot
# each row is one phase and points to the index in time
indicies = np.array([2,4,6,8,10])
print(indicies)
onehot = np.zeros((indicies.size, indicies.max()+1))
onehot[np.arange(indicies.size),indicies] = 1
print(onehot)
print(onehot.shape)

[ 2  4  6  8 10]
[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
(5, 11)


In [72]:
# from index to one-hot
# each row is one timestep and points to the corresponding phase
indicies = [2,4,6,8,10]
# create a vector with 1 at index 2
target_vect = np.zeros((timesteps)).astype(int)
#target_vect[:] = np.nan
print(target_vect.shape)

for label,idx in enumerate(indicies):
    label = label+1
    target_vect[idx] = label
print(target_vect)

onehot = np.zeros((target_vect.size, target_vect.max()+1))
onehot[np.arange(target_vect.size),target_vect] = 1
print(onehot)
print(onehot.shape)


(12,)
[0 0 1 0 2 0 3 0 4 0 5 0]
[[1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0.]]
(12, 6)


In [113]:
# from index to one-hot, drop the zero-class
# each row is one timestep and points to the corresponding phase
indicies = [2,4,6,8,10]
# create a vector with 1 at index 2
target_vect = np.zeros((timesteps)).astype(int)
#target_vect[:] = np.nan
print(target_vect.shape)

for label,idx in enumerate(indicies):
    label = label+1
    target_vect[idx] = label
print(target_vect)

onehot = np.zeros((target_vect.size, target_vect.max()+1))
onehot[np.arange(target_vect.size),target_vect] = 1
# drop/slice the zero class
onehot = onehot[:,1:]
print(onehot)
print(onehot.shape)


(12,)
[0 0 1 0 2 0 3 0 4 0 5 0]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]]
(12, 5)


In [87]:
# inverse the steps
from_onehot_to_index = [np.where(r==1)[0] for r in onehot]
from_onehot_to_index

[array([], dtype=int64),
 array([], dtype=int64),
 array([0]),
 array([], dtype=int64),
 array([1]),
 array([], dtype=int64),
 array([2]),
 array([], dtype=int64),
 array([3]),
 array([], dtype=int64),
 array([4]),
 array([], dtype=int64)]

# Convolve the one-hot vector along the target class

In [111]:
# smooth the one-hot vectors along the indexes
# each phase has a small range of valid indexes with a convolved smoothing
import sys
temp = np.copy(onehot)
kernelsize = 6
lower = 0.2
upper = 0.5
# create a kernel with linearly increasing, than one, than decreasing smoothing
kernel = np.concatenate([np.linspace(lower,upper,kernelsize//2), [1], np.linspace(upper,lower,kernelsize//2)])
print('Kernel: {}'.format(kernel))
for idx in range(temp.shape[1]):
    print(temp[:,idx])
    smoothed = np.convolve(temp[:,idx],kernel, mode='same')
    print('convolved: {}'.format(smoothed))
    #smoothed = smoothed/(sum(smoothed) + sys.float_info.epsilon)
    print('normalised: {}'.format(smoothed))
    temp[:,idx] = smoothed
print(temp)

Kernel: [0.2  0.35 0.5  1.   0.5  0.35 0.2 ]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
convolved: [0.35 0.5  1.   0.5  0.35 0.2  0.   0.   0.   0.   0.   0.  ]
normalised: [0.35 0.5  1.   0.5  0.35 0.2  0.   0.   0.   0.   0.   0.  ]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
convolved: [0.   0.2  0.35 0.5  1.   0.5  0.35 0.2  0.   0.   0.   0.  ]
normalised: [0.   0.2  0.35 0.5  1.   0.5  0.35 0.2  0.   0.   0.   0.  ]
[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
convolved: [0.   0.   0.   0.2  0.35 0.5  1.   0.5  0.35 0.2  0.   0.  ]
normalised: [0.   0.   0.   0.2  0.35 0.5  1.   0.5  0.35 0.2  0.   0.  ]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
convolved: [0.   0.   0.   0.   0.   0.2  0.35 0.5  1.   0.5  0.35 0.2 ]
normalised: [0.   0.   0.   0.   0.   0.2  0.35 0.5  1.   0.5  0.35 0.2 ]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
convolved: [0.   0.   0.   0.   0.   0.   0.   0.2  0.35 0.5  1.   0.5 ]
normalised: [0.   0.   0.   0.   0.   0.   0.   0.2  0.35 0.5  1.   0.5 ]
[[0.35 0.   0.   0.   0.  ]
 [

In [110]:
# invert the predicted phase per temporal index to get the temporal position in the whole temporal stack
np.argmax(temp, axis=0)

array([ 2,  4,  6,  8, 10])

# Smoothed 1D array

In [47]:
def smooth(x, window_len=5, window='hanning'):
    """
    smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.

    input:
        x: the input signal 
        window_len: the dimension of the smoothing window
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal

    example:

    import numpy as np    
    t = np.linspace(-2,2,0.1)
    x = np.sin(t)+np.random.randn(len(t))*0.1
    y = smooth(x)

    see also: 

    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter

    TODO: the window parameter could be the window itself if an array instead of a string   
    """
    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")

    if window_len < 3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

    s=np.r_[2*x[0]-x[window_len:1:-1], x, 2*x[-1]-x[-1:-window_len:-1]]
    #print(len(s))

    if window == 'flat': #moving average
        w = np.ones(window_len,'d')
    else:
        w = getattr(np, window)(window_len)
    y = np.convolve(w/w.sum(), s, mode='same')
    return y[window_len-1:-window_len+1]

In [49]:
timesteps = 12
vol_shape = (20,20,20)
volume = np.random.rand(timesteps, *vol_shape)
print(volume.shape)
indicies = [2,4,6,8,10]
# create a vector with 1 at index 2
target_vect = np.empty((timesteps))
target_vect[:] = np.nan
print(target_vect.shape)

for label,idx in enumerate(indicies):
    label = label+1
    target_vect[idx] = label
print(target_vect)
smooth_target_vect = smooth(target_vect)
print(smooth_target_vect)

(12, 20, 20, 20)
(12,)
[nan nan  1. nan  2. nan  3. nan  4. nan  5. nan]
[nan nan nan nan nan nan nan nan nan nan nan nan]


# Test with np.interp, does not work and the ring functionality is not given by this representation

In [50]:
timesteps = 12
vol_shape = (20,20,20)
volume = np.random.rand(timesteps, *vol_shape)
print(volume.shape)
indicies = [2,4,6,8,10]
# create a vector with 1 at index 2
target_vect = np.empty((timesteps))
target_vect[:] = np.nan
print(target_vect.shape)

for label,idx in enumerate(indicies):
    label = label+1
    target_vect[idx] = label
print(target_vect)
np.interp(indicies, target_vect)

(12, 20, 20, 20)
(12,)
[nan nan  1. nan  2. nan  3. nan  4. nan  5. nan]


TypeError: _interp_dispatcher() missing 1 required positional argument: 'fp'

# One-Hot per timestep