In [None]:
import sys
import os
niftynet_path = '/home/tom/phd/NiftyNet-Generator-PR/NiftyNet'
sys.path.append(niftynet_path)
os.environ['CUDA_VISIBLE_DEVICES'] = ''
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from niftynet.io.image_reader import ImageReader
from niftynet.io.image_sets_partitioner import ImageSetsPartitioner
from collections import namedtuple

from niftynet.contrib.preprocessors.preprocessing import Preprocessing
from niftynet.contrib.csv_reader.sampler_csv_rows import ImageWindowDatasetCSV
from niftynet.contrib.csv_reader.sampler_resize_v2_csv import ResizeSamplerCSV as ResizeSampler
from niftynet.contrib.csv_reader.csv_reader import CSVReader

In [None]:
#### Some setup
NetParam = namedtuple('NetParam', 'normalise_foreground_only foreground_type multimod_foreground_type histogram_ref_file norm_type cutoff normalisation whitening')
ActionParam = namedtuple('ActionParam', 'random_flipping_axes scaling_percentage rotation_angle rotation_angle_x rotation_angle_y rotation_angle_z do_elastic_deformation num_ctrl_points deformation_sigma proportion_to_deform')
class TaskParam:
    def __init__(self, classes):
        self.image = classes
net_param = NetParam(normalise_foreground_only=False, foreground_type='threshold_plus', multimod_foreground_type = 'and', histogram_ref_file='mapping.txt', norm_type='percentile', cutoff=(0.05, 0.95), normalisation=False, whitening=True)
action_param = ActionParam(random_flipping_axes=[], scaling_percentage=[], rotation_angle=None, rotation_angle_x=None, rotation_angle_y=None, rotation_angle_z=None, do_elastic_deformation=False, num_ctrl_points=6, deformation_sigma=50, proportion_to_deform=0.9)

### 1) Create a csv of labels and show how it can be returned by the CSV Reader


In [None]:
from niftynet.utilities.download import download
download('mr_ct_regression_model_zoo_data')
labels_location = 'ct.csv'
files = [file for file in os.listdir('/home/tom/niftynet/data/mr_ct_regression/CT_zero_mean') if file.endswith('.nii.gz')]
pd.DataFrame(data=[(file.replace('.nii.gz', ''), file.replace('.nii.gz', '')) for file in files]).to_csv('label.csv', index=None, header=['subject_id', 'label'])
pd.read_csv('label.csv')

In [None]:
#### Testing the CSV Reader on labels
# Make sure we accept 'Label', 'label', 'LABEL'
task_param = TaskParam(['image'])
image_data_param = {'CT': {'path_to_search': '~/niftynet/data/mr_ct_regression/CT_zero_mean', 'filename_contains': 'nii'}}
# Change csv_file -> csv_path_file, csv_data_file is a csv with data
csv_data_param = {'label': {'csv_data_file': 'label.csv', 'to_ohe': True}}
grouping_param = {'image': (['CT'])}

image_sets_partitioner = ImageSetsPartitioner().initialise(image_data_param)
image_reader = ImageReader().initialise(image_data_param, grouping_param, file_list=image_sets_partitioner.all_files)
preprocessing = Preprocessing(net_param, action_param, task_param)
normalisation_layers = preprocessing.prepare_normalisation_layers()
augmentation_layers = preprocessing.prepare_augmentation_layers()
image_reader.add_preprocessing_layers(normalisation_layers + augmentation_layers)
csv_reader = CSVReader().initialise(csv_data_param, 'label', file_list=image_sets_partitioner.all_files)
print('One sample from the csv_reader:', np.squeeze(csv_reader(idx=13)[1]['label']))
window_sizes = {'image': (100, 100, 1), 'label': (1, 1, 1)}
sampler = ResizeSampler(reader=image_reader,
                        csv_reader=csv_reader,
                        window_sizes=window_sizes,
                        num_threads=2,
                        smaller_final_batch_mode='drop',
                        batch_size=2,
                        queue_length=2)
sample = next(sampler())
print(sample['image'].shape)
print(sample['label'].shape)

### 2) Create a csv of features and show how it can be returned by the CSV Reader


In [None]:
from niftynet.utilities.download import download
download('mr_ct_regression_model_zoo_data')
labels_location = 'ct.csv'
files = [file.replace('.nii.gz', '') for file in os.listdir('/home/tom/niftynet/data/mr_ct_regression/CT_zero_mean') if file.endswith('.nii.gz')]

pd.DataFrame(data=[tuple([file] + list(np.random.randn(10))) for file in files]).to_csv('features.csv', index=None, header=['subject_id'] + [str(x) for x in range(10)])
pd.read_csv('features.csv')

In [None]:
task_param = TaskParam(['image'])
image_data_param = {'CT': {'path_to_search': '~/niftynet/data/mr_ct_regression/CT_zero_mean', 'filename_contains': 'nii'}}
csv_data_param = {'features': {'csv_data_file': 'features.csv', 'to_ohe': False}}
grouping_param = {'image': (['CT'])}
image_sets_partitioner = ImageSetsPartitioner().initialise(image_data_param)
image_reader = ImageReader().initialise(image_data_param, grouping_param, file_list=image_sets_partitioner.all_files)
preprocessing = Preprocessing(net_param, action_param, task_param)
normalisation_layers = preprocessing.prepare_normalisation_layers()
augmentation_layers = preprocessing.prepare_augmentation_layers()
image_reader.add_preprocessing_layers(normalisation_layers + augmentation_layers)
csv_reader = CSVReader().initialise(csv_data_param, 'features', file_list=image_sets_partitioner.all_files)
print('One sample from the csv_reader:', np.squeeze(csv_reader(idx=13)[1]['features']))
window_sizes = {'image': (100, 100, 1), 'features': (1, 1, 1)}
sampler = ResizeSampler(reader=image_reader,
                        csv_reader=csv_reader,
                        window_sizes=window_sizes,
                        num_threads=2,
                        smaller_final_batch_mode='drop',
                        batch_size=2,
                        queue_length=2)
sample = next(sampler())
print(sample['image'].shape)
print(sample['features'].shape)
print(sample.keys())

In [None]:
### Create random patches with labels ###
for file in os.listdir('/home/tom/niftynet/data/mr_ct_regression/CT_zero_mean'):
    subject_id = file.replace('.nii.gz', '')
    

In [None]:
# -*- coding: utf-8 -*-
"""
Resize input image as output window.
"""
from __future__ import absolute_import, print_function, division

import numpy as np
import scipy.ndimage
import tensorflow as tf

from niftynet.contrib.csv_reader.sampler_csv_rows import ImageWindowDatasetCSV
from niftynet.engine.image_window import LOCATION_FORMAT


class PatchBasedSamplerCSV(ImageWindowDatasetCSV):
    """
    This class generates samples by reading a csv file
    with coordinates specifying where the user should
    sample from.
    
    Assuming the reader's output is 5d:
    ``Height x Width x Depth x time x Modality``
    """

    def __init__(self,
                 reader,
                 csv_reader=None,
                 window_sizes=None,
                 batch_size=1,
                 spatial_window_size=None,
                 windows_per_image=1,
                 shuffle=True,
                 queue_length=10,
                 num_threads=4,
                 smaller_final_batch_mode='pad',
                 name='resize_sampler_v2'):
        tf.logging.info('reading size of preprocessed images')
        self.csv_reader = csv_reader
        ImageWindowDatasetCSV.__init__(
            self,
            reader=reader,
            csv_reader=csv_reader,
            window_sizes=window_sizes,
            batch_size=batch_size,
            windows_per_image=windows_per_image,
            queue_length=queue_length,
            num_threads=num_threads,
            shuffle=shuffle,
            epoch=-1 if shuffle else 1,
            smaller_final_batch_mode=smaller_final_batch_mode,
            name=name)
        if spatial_window_size:
            # override all spatial window defined in input
            # modalities sections
            # this is useful when do inference with a spatial window
            # which is different from the training specifications
            self.window.set_spatial_shape(spatial_window_size)
        # tf.logging.info("initialised resize sampler %s ", self.window.shapes)
        # tf.logging.info('CSV reader is {}'.format(self.csv_reader))

    def layer_op(self, idx=None):
        """
        This function generates sampling windows to the input buffer
        image data are from ``self.reader()``.

        It first completes window shapes based on image data,
        then resize each image as window and output
        a dictionary (required by input buffer)

        :return: output data dictionary ``{'image_modality': data_array}``
        """
        while True:
            image_id, data, interp_orders = self.reader(idx=idx)
            image_shapes = \
                dict((name, data[name].shape) for name in self.window.names)
            # window shapes can be dynamic, here they
            # are converted to static ones
            # as now we know the image shapes
            static_window_shapes = self.window.match_image_shapes(image_shapes)

            # for resize sampler the coordinates are not used
            # simply use the spatial dims of the input image
            output_dict = {}
            for name in list(data):
                # prepare output dictionary keys
                coordinates_key = LOCATION_FORMAT.format(name)
                image_data_key = name

                output_dict[coordinates_key] = self.dummy_coordinates(
                    image_id, static_window_shapes[name], self.window.n_samples)
                image_array = []
                for _ in range(self.window.n_samples):
                    # prepare image data
                    image_shape = image_shapes[name]
                    window_shape = static_window_shapes[name]

                    if image_shape == window_shape or interp_orders[name][0] < 0:
                        # already in the same shape
                        image_window = data[name]
                    else:
                        zoom_ratio = [float(p) / float(d) for p, d in
                                      zip(window_shape, image_shape)]
                        image_window = zoom_3d(image=data[name],
                                               ratio=zoom_ratio, interp_order=
                                               interp_orders[name][0])
                    image_array.append(image_window[np.newaxis, ...])
                if len(image_array) > 1:
                    output_dict[image_data_key] = \
                        np.concatenate(image_array, axis=0)
                else:
                    output_dict[image_data_key] = image_array[0]
            # the output image shape should be
            # [enqueue_batch_size, x, y, z, time, modality]
            # here enqueue_batch_size = 1 as we only have one sample
            # per image
            if self.csv_reader is not None:
                _, label_dict, _ = self.csv_reader(idx=image_id)
                output_dict['label'] = label_dict['label']
                output_dict['label_location'] = output_dict['image_location']
            yield output_dict


def zoom_3d(image, ratio, interp_order):
    """
    Taking 5D image as input, and zoom each 3D slice independently
    """
    assert image.ndim == 5, "input images should be 5D array"
    output = []
    for time_pt in range(image.shape[3]):
        output_mod = []
        for mod in range(image.shape[4]):
            zoomed = scipy.ndimage.zoom(
                image[..., time_pt, mod], ratio[:3], order=interp_order)
            output_mod.append(zoomed[..., np.newaxis, np.newaxis])
        output.append(np.concatenate(output_mod, axis=-1))
    return np.concatenate(output, axis=-2)


In [None]:
import time

num_parallel_calls = [2, 4, 8, 16]
print(num_parallel_calls)
total_times_dict = {}
batches = 10
batch_size = 100
for num_parallel_call in num_parallel_calls:
    window_sizes = {'image': (100, 100, 100), 'label': (1, 1, 1)}
    sampler = ResizeSampler(reader=image_reader,
                            csv_reader=csv_reader,
                            window_sizes=window_sizes,
                            num_threads=num_parallel_call,
                            smaller_final_batch_mode='drop',
                            batch_size=batch_size,
                            queue_length=num_parallel_call)
    next_window = sampler.pop_batch_op()
    with tf.Session() as sess:
        print('Num Parallel Calls: {}'.format(num_parallel_call))
        t0 = time.time()
        batch_times = []
        sess.run(sampler.iterator.make_initializer(sampler.dataset))
        for i in range(batches):
            try:
                value = sess.run(next_window)
                print(value['image'].shape, value['label'].shape)
            except Exception as e:
                print(e)
            batch_time = time.time() - t0
            batch_times.append(batch_time)
            print('Batch {} / {}'.format(i+1, batches))
            print('Time per batch: {}'.format(batch_time))
            t0 = time.time()
        total_times_dict[num_parallel_call] = batch_times
        print('Mean batch time: {}'.format(sum(batch_times[1:])/len(batch_times[1:])))
    if sampler._enqueuer is not None:
        sampler._enqueuer.stop()


In [None]:
plt.figure()
to_plot = [2, 4, 8, 16]
means = [np.mean(total_times_dict[num][1:]) for num in to_plot]
ideal = [np.mean(total_times_dict[num][1:]) * 2 / num for num in to_plot]
plt.plot(to_plot, means, label='observed')
plt.plot(to_plot, ideal, label='ideal')
plt.title('Mean time per image as threads increases for 80 thread machine')
plt.xlabel('Threads')
plt.ylabel('mean time')
plt.legend()
plt.grid()
plt.show()

In [None]:
df = pd.DataFrame(data=([(1, 0, 1, 0, 0) for _ in range(10)]))

In [None]:
df.iloc[:, 1:]