In [1]:
from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
import tensorflow_hub as tfh

from sklearn.metrics import roc_auc_score
from pathlib import Path
from uuid import uuid4

W0429 00:46:31.843410 140414395987712 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14


In [3]:
POSITIVE_LABEL = 'cancer'
NEGATIVE_LABEL = 'normal'

ROOT_DIR = Path("/home/matejg/Project/crc_ml_model")
DATASET_DIR = ROOT_DIR / 'data' / 'processed'
TRAIN_DIR = DATASET_DIR / 'train_slides'
TEST_DIR = DATASET_DIR / 'test_slides'
VALID_DIR = DATASET_DIR / 'valid_slides'
RESULT_DIR = ROOT_DIR / 'reports' / 'evaluation'

In [4]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
CANCER_SAMPLE_PROBA = 0.5

In [2]:
tf.enable_eager_execution()  

In [5]:
def _load_and_preprocess_image(path, seed, apply_augmentation=True):
    image = tf.io.read_file(path)
    return _preprocess_image(image, apply_augmentation, seed=seed)

def _preprocess_image(image, apply_augmentation, seed):
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [299, 299])
    image /= 255.0  # normalize to [0,1] range
    if apply_augmentation:
        image = _flip_rotate_augmentation(image, seed)
        image = _color_augmentation(image, seed)
    image = 2*image-1       # rescale to [-1,1]
    return image

def _color_augmentation(x, seed):
    x = tf.image.random_hue(x, 0.04, seed)
    x = tf.image.random_saturation(x, 0, 0.25, seed)
    x = tf.image.random_brightness(x, 64/255, seed)
    x = tf.image.random_contrast(x, 0, 0.75, seed)
    x = tf.clip_by_value(x, 0, 1)
    return x

def _flip_rotate_augmentation(x, seed):
    x = tf.image.random_flip_left_right(x, seed)
    rotation = np.random.choice([0, 1, 2, 3])
    x = tf.image.rot90(x, k=rotation)
    return x

def _get_labels(all_image_paths):
    # Translate labels to integers
    label_to_index = {POSITIVE_LABEL: 1, NEGATIVE_LABEL: 0}
    all_image_labels = [label_to_index[Path(path).parent.name] for path in all_image_paths]
    return all_image_labels

In [56]:
def get_val_dataset(batch_size, seed=None):
    all_validation_image_paths = list(VALID_DIR.glob('*/*/*.png'))
    print('Found: {} validation images.'.format(len(all_validation_image_paths)))
    return _prepare_eval_dataset(all_validation_image_paths, batch_size, seed)

def get_test_dataset(batch_size, seed=None):
    for test_slide in TEST_DIR.iterdir():
        all_test_image_paths = test_slide.glob('*/*.png')
        val_paths, val_ds, val_ds_steps = _prepare_eval_dataset(all_test_image_paths, batch_size, seed)
        yield (test_slide.name, iter(val_paths), val_ds, val_ds_steps)

def _prepare_eval_dataset(all_evaluation_image_paths, batch_size, seed):
    np.random.shuffle(all_evaluation_image_paths)
    all_evaluation_image_paths = [str(path) for path in all_evaluation_image_paths]
    image_count = len(all_evaluation_image_paths)
    val_path_ds = tf.data.Dataset.from_tensor_slices(all_evaluation_image_paths)
    val_image_ds = val_path_ds.map(lambda x: _load_and_preprocess_image(x, seed=seed, apply_augmentation=False), num_parallel_calls=AUTOTUNE)
    
    label_to_index = {POSITIVE_LABEL:1, NEGATIVE_LABEL:0}
    all_val_image_labels = [label_to_index[Path(path).parent.name]
                        for path in all_evaluation_image_paths]
    val_labels_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_val_image_labels, tf.int64))
    val_image_label_ds = tf.data.Dataset.zip((val_image_ds, val_labels_ds))

    val_ds = val_image_label_ds.shuffle(buffer_size=image_count, seed=seed)
    val_ds = val_ds.batch(batch_size)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
    return all_evaluation_image_paths, val_ds, int(np.ceil(image_count / batch_size))

In [61]:
str(RESULT_DIR / 'xxx-{epoch}.hdfs').format(epoch=2)

'/home/matejg/Project/crc_ml_model/reports/evaluation/xxx-2.hdfs'

In [24]:
len(list(TEST_DIR.glob('*/*/*.png')))

16466