In [1]:
from breastcancer.inference import check_subsetting, predict_mitoses
from time import gmtime, strftime
import shutil

Using TensorFlow backend.


## Predict the mitosis number for each ROI
1. generate the ROIs for each slide
2. generate the tiles for each ROI
3. load the model to predict the mitosis number for each ROI
4. parallelize the workflow using PySpark and run them on GPU

In [2]:
# Ship a fresh copy of the `breastcancer` package to the Spark workers.
# Note: The zip must include the `breastcancer` directory itself,
# as well as all files within it for `addPyFile` to work correctly.
# This is equivalent to `zip -r breastcancer.zip breastcancer`.
dirname = "breastcancer"
zipname = dirname + ".zip"
shutil.make_archive(dirname, 'zip', dirname + "/..", dirname)
sc.addPyFile(zipname)
sc.addPyFile("train_mitoses.py")
sc.addPyFile("preprocess_mitoses.py")
sc.addPyFile("resnet50.py")

In [None]:
dir = "/home/fei/deep-histopath/deep-histopath/data/training_image_data/"
model_file = '/home/fei/deep-histopath/deep-histopath/model/0.95114_acc_0.58515_loss_530_epoch_model.hdf5'
model_name = 'vgg'
suffix = '*-49*.svs'
node_num = 1
gpu_per_node = 4
partition_num = gpu_per_node * node_num
ROI_size=6000
ROI_overlap=0
ROI_channel = 3
skipROI=False
tile_size=64
tile_overlap=0
tile_channel = 3
batch_size = 128
threshold=0.5
isGPU = True
isDebug = True
save_mitosis_locations=True
save_mask=True
isDebug=True

predict_result_rdd = predict_mitoses(sc, model_path=model_file, model_name = model_name, input_dir=dir, 
                                     file_suffix=suffix, partition_num=partition_num,
                                     ROI_size=ROI_size, ROI_overlap=ROI_overlap, ROI_channel=ROI_channel,
                                     skipROI=skipROI,
                                     tile_size=tile_size, tile_overlap=tile_overlap, tile_channel=tile_channel,
                                     threshold=threshold, isGPU=isGPU, 
                                     save_mitosis_locations=save_mitosis_locations,
                                     save_mask=save_mask,
                                     batch_size=batch_size, isDebug=isDebug)
predict_result_rdd.cache()

Counter({'rr-ram3.softlayer.com': 0})
[(0, 'rr-ram3.softlayer.com'), (1, 'rr-ram3.softlayer.com'), (2, 'rr-ram3.softlayer.com'), (3, 'rr-ram3.softlayer.com')]
{0: 3, 1: 2, 2: 1, 3: 0}


PythonRDD[2] at RDD at PythonRDD.scala:48

In [None]:
start_time = strftime("%Y-%m-%d %H:%M:%S",gmtime())
print(start_time)
result = predict_result_rdd.collect()
print(result)
end_time = strftime("%Y-%m-%d %H:%M:%S",gmtime())
print(end_time)

2017-10-06 22:25:15


## Experiment Test
1. Generate a ROI
2. Generate the tiles from the ROI
3. Check if the tiles match the ROI
4. Predict the mitosis number for each tile and sum the prediction result
5. This could be used to test the performance of a single ROI prediction

In [13]:
import openslide
import numpy as np
import keras
from keras.models import load_model
from breastcancer.preprocessing import create_tile_generator, get_20x_zoom_level
from skimage.util.shape import view_as_windows

model_file = 'model/0.95114_acc_0.58515_loss_530_epoch_model.hdf5'
filepath = 'data/training_image_data/TUPAC-TR-500.svs'
ROI_size = 6000
ROI_overlap = 0
tile_size = 64
tile_overlap = 10
threshold = 0.5
base_model = load_model(model_file)
probs = keras.layers.Activation('sigmoid')(base_model.output)
model = keras.models.Model(inputs=base_model.input, outputs=probs)
slide = openslide.open_slide(str(filepath))
generator = create_tile_generator(slide, ROI_size, ROI_overlap)
zoom_level = get_20x_zoom_level(slide, generator)
cols, rows = generator.level_tiles[zoom_level]
ROI_indices = [(zoom_level, col, row) for col in range(cols) for row in range(rows)]
ROI_index = ROI_indices[15]
zl, col, row = ROI_index
ROI = np.asarray(generator.get_tile(zl, (col, row)))



In [16]:
tiles = view_as_windows(ROI, (tile_size, tile_size, 3), step=tile_size - tile_overlap).reshape(-1, tile_size, tile_size, 3)
print(f"The shape of tiles {tiles.shape};\nThe shape of ROI {ROI.shape}")

The shape of tiles (12100, 64, 64, 3);
The shape of ROI (6000, 6000, 3)


In [18]:
isSame = check_subsetting(ROI, ROI_size, tiles, tile_size, tile_overlap)
print(f"Is the ROI subsetting right? {isSame}")

Is the ROI subsetting right? True


In [29]:
prediction = model.predict(tiles, batch_size=128, verbose=True) > threshold
result = np.sum(prediction, dtype=np.int32)
print(f"The number of mitoses is {result}")

The number of mitoses is 8
