In [1]:
# define logging and working directory
from ProjectRoot import change_wd_to_project_root
change_wd_to_project_root()
from src.utils.Notebook_imports import *

search for root_dir and set working directory
Working directory set to: /mnt/ssd/git/dynamic-cmr-models


# Test the main concepts of a DL generator

A Generator yields a tuple of (x,y) and is used to feed data into a deep learning model

Each x/y tuple has the following shape: 

$batchsize * exampleSize * exampleShape$ 

--> The $exampleShape$ could be:
- for 1D vector-data: $len(vector_{data})$
- for 2D images: $height * width$: 
- for 3D volumes: $depth * height * width$)


# Interactive fake data generation

Please define:

- The total number of $examples$ (the size of the dataset), each $example$ represents one patient/entity
- the $exampleSize$ (number of objects per example), this could represent different timesteps of a multi input generator
- the $batchsize$, meaning how many examples should be yielded in one step

Usually one example would reflect the path to a file. Which, when loaded will reult in a given $exampleShape$.

In this example we will create random numbers with the shape of 1 to make the printing easier.

Each random number represents a 1D/2D or 3D object

In [2]:
# Define the size of our fake data
upper_example_limit = 1000
upper_example_size_limit = 8
upper_batchsize_limit = 20

@interact_manual
def create_fake_data(examples_=(1,upper_example_limit), 
                     input_objects_=(1,upper_example_size_limit),
                     output_objects_=(1,upper_example_size_limit),
                     batchsize_=(1,upper_batchsize_limit), 
                     input_obj_shape='(10,10,10)', 
                     ouptut_obj_shape='(10,10,10)'):
    
    global examples, input_objects, output_objects , batchsize, indexes, X, Y, x_dict, y_dict, batches
    examples = examples_
    batchsize = batchsize_
    input_objects = input_objects_
    output_objects = output_objects_
    
    # make sure the dimensions have the correct formating
    x_dim = tuple(map(int, input_obj_shape.replace(')', '').replace('(','').split(',')))
    y_dim = tuple(map(int, ouptut_obj_shape.replace(')', '').replace('(','').split(',')))
    
    # create some fake data
    x_dict = {}
    y_dict = {}  
    X = []
    for example in range(examples_):
        x_dict[example] = np.stack([np.round(np.random.sample(x_dim),2)+example for i in range(input_objects)])
        y_dict[example] = np.stack([np.round(np.random.sample(y_dim),2)+(10*example) for i in range(output_objects)])
        # testing purpose if lists are faster than dicts
        X.append(np.stack([np.round(np.random.sample(x_dim),2)+example for i in range(input_objects)]))
    # index our data, we can use the indicies to select one example in a list or dictionary
    indexes = list(range(len(x_dict)))
    batches = int(np.floor(examples/batchsize))-1
    print('Shape of one patient X: {}, Y: {}'.format(x_dict[0].shape, y_dict[0].shape))

interactive(children=(IntSlider(value=500, description='examples_', max=1000, min=1), IntSlider(value=4, descr…

In [3]:
# Select one batch and yield the corresponding values (batchsize x example_size)
@interact
def select_batch(selected_batch = (0,batches), shuffle_indexes=False, debug=False):
    global indexes
    
    if shuffle_indexes:
        random.shuffle(indexes)
    # make sure indexes are correctly initialised
    assert len(indexes) == examples, print('len indexes: {}, number of examples: {}'.format(len(indexes), examples))
    
    # define the lower/upper index slicing borders 
    start_idx = selected_batch*batchsize
    end_idx = (selected_batch+1)*batchsize
    
    # slice the indexes of the current batch
    batch_indexes = indexes[start_idx: end_idx]
    
    # print the restrictions of the current batch
    print('selected batch: {} of {} with a batchsize of {} and total {} examples'.format(selected_batch, batches, batchsize, examples))
    print('start idx: {}, end idx: {}'.format(start_idx, end_idx))
    print('Indexes of the currrent batch: {}'.format(batch_indexes))
    print('-'*40)
    
    # stack the entities of the current batch
    batch_x = np.stack([x_dict[k] for k in batch_indexes])
    batch_y = np.stack([y_dict[k] for k in batch_indexes])
    if debug:
        [print('index: {}: value: {}'.format(k, x_dict[k])) for k in batch_indexes]
        [print('index: {}: value: {}'.format(k, y_dict[k])) for k in batch_indexes]
    
    return([batch_x.shape, batch_y.shape])

interactive(children=(IntSlider(value=49, description='selected_batch', max=99), Checkbox(value=False, descrip…

In [4]:
from random import randint
# We have a list --> X and a dictionary --> x_dict, 
# len(X) == len(x_dict)
# and X[i] == x_dict[i] for all i in range(len(X))
# We create n random indicies within the range of len(X)
samples = [randint(0, examples-1) for _ in range(10000000)]

In [5]:
%%timeit
# access the list n times, append the value to a new list
temp = None
for i in samples:
    temp = X[i]
    

248 ms ± 2.87 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
# access the dict n times, append the value to a new list
temp = None
for i in samples:
    temp = x_dict[i]

398 ms ± 2.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Some hardcoded examples how we construct the fake batches

In [25]:
## we asume x and y to be a list of lists (representing the model input and output with variable length)
# create some fake data
examples = 10
example_size = 5
x = [list(i for i in range(example_size)) for _ in range(examples)]
y = [list(i+10 for i in range(example_size)) for _ in range(examples)]
print(x)
print(y)

[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]
[[10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14], [10, 11, 12, 13, 14]]


In [26]:
# create unique indexes, which than can be shuffeled
indexes = list(range(len(x)))
indexes

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [27]:
import random
random.shuffle(indexes)
indexes

[4, 2, 1, 6, 5, 0, 8, 3, 9, 7]

In [28]:
# get the indexes for a hard-coded batchsize of two
indexes[0:2]

[4, 2]

In [29]:
# get a variable size
batchsize = 2
index = 0
batch_indexes = indexes[index*batchsize: (index+1)*batchsize]
print(batch_indexes)

[4, 2]


In [None]:
# zip two lists and use them in a lambda function
list(map(lambda x : print('input: {} output: {}'.format(x[0].shape, x[1].shape)), zip(X[:10],X[:10])))