In [1]:
import os.path as osp
import openslide
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline 
# 이부분 python 에서는 뺴주기

from skimage.filters import threshold_otsu
from openslide.deepzoom import DeepZoomGenerator
import cv2
from keras.utils.np_utils import to_categorical

# network
from keras.models import Sequential
from keras.layers import Lambda, Dropout
from keras.layers.convolutional import Convolution2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.models import load_model

# Unet
import numpy as np 
import os

import skimage.transform as trans
#import numpy as np
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as keras


# train

from sklearn.model_selection import StratifiedShuffleSplit
from datetime import datetime

# evaluate
import matplotlib.gridspec as gridspec
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

import math
from PIL import Image
from xml.etree.ElementTree import ElementTree, Element, SubElement
from io import BytesIO
import skimage.io as io

from tensorflow.python.client import device_lib
#print(device_lib.list_local_devices())
import keras.backend.tensorflow_backend as K
from sklearn import metrics

from keras.preprocessing.image import *

Using TensorFlow backend.


In [9]:
PATCH_SIZE = 512
IS_TRAIN = True
def find_patches_from_slide(slide_path, truth_path, patch_size=PATCH_SIZE,filter_non_tissue=True,filter_only_all_tumor=True):
    
    slide_contains_tumor = 'pos' in slide_path
    
    ############### read_region을 위한 start, level, size를 구함 #######################
    BOUNDS_OFFSET_PROPS = (openslide.PROPERTY_NAME_BOUNDS_X, openslide.PROPERTY_NAME_BOUNDS_Y)
    BOUNDS_SIZE_PROPS = (openslide.PROPERTY_NAME_BOUNDS_WIDTH, openslide.PROPERTY_NAME_BOUNDS_HEIGHT)


    if slide_contains_tumor:
        with openslide.open_slide(slide_path) as slide:
            start = (int(slide.properties.get('openslide.bounds-x',0)),int(slide.properties.get('openslide.bounds-y',0)))
            level = np.log2(patch_size) 
            level = int(level)
            
            size_scale = tuple(int(slide.properties.get(prop, l0_lim)) / l0_lim
                            for prop, l0_lim in zip(BOUNDS_SIZE_PROPS,
                            slide.dimensions))
            _l_dimensions = tuple(tuple(int(math.ceil(l_lim * scale))
                            for l_lim, scale in zip(l_size, size_scale))
                            for l_size in slide.level_dimensions)
            size = _l_dimensions[level]
            
            
            with openslide.open_slide(truth_path) as truth:
                print('truth dimensions: ',truth.dimensions)
                z_dimensions=[]
                z_size = truth.dimensions
                z_dimensions.append(z_size)
                while z_size[0] > 1 or z_size[1] > 1:
                    
                    z_size = tuple(max(1, int(math.ceil(z / 2))) for z in z_size)
                    z_dimensions.append(z_size)
                print('truth_4_dimension_size:',z_dimensions[4]) # level-4
            size = z_dimensions[level-4]
            slide4 = slide.read_region(start,level,size)
            print('slide4_size',slide4.size)
    else :
        with openslide.open_slide(slide_path) as slide:
            start = (0,0)
            level = np.log2(patch_size) 
            level = int(level)
            
            size_scale = (1,1)
            _l_dimensions = tuple(tuple(int(math.ceil(l_lim * scale))
                            for l_lim, scale in zip(l_size, size_scale))
                            for l_size in slide.level_dimensions)
            size = _l_dimensions[level]
            
            slide4 = slide.read_region(start,level,size) 
    ####################################################################################
    
    
    # is_tissue 부분 
    slide4_grey = np.array(slide4.convert('L'))
    binary = slide4_grey > 0  # black이면 0임
    
    # 검은색 제외하고 흰색영역(배경이라고 여겨지는)에 대해서도 작업해주어야함.
    slide4_not_black = slide4_grey[slide4_grey>0]
    thresh = threshold_otsu(slide4_not_black)
    
    I, J = slide4_grey.shape
    for i in range(I):
        for j in range(J):
            if slide4_grey[i,j] > thresh :
                binary[i,j] = False
    patches = pd.DataFrame(pd.DataFrame(binary).stack())
    patches['is_tissue'] = patches[0]
    patches.drop(0, axis=1,inplace =True)
    patches.loc[:,'slide_path'] = slide_path
    

    if slide_contains_tumor:
        with openslide.open_slide(truth_path) as truth:
            thumbnail_truth = truth.get_thumbnail(size) 
        
        patches_y = pd.DataFrame(pd.DataFrame(np.array(thumbnail_truth.convert("L"))).stack())
        # !!
        patches_y['is_tumor'] = patches_y[0] > 0
        
        # mask된 영역이 애매할 수도 있으므로
        # !!
        patches_y['is_all_tumor'] = patches_y[0] == 255
        patches_y.drop(0, axis=1, inplace=True)
        samples = pd.concat([patches, patches_y], axis=1) #len(samples)
    else:
        samples = patches
        #dfmi.loc[:,('one','second')] = value
        samples.loc[:,'is_tumor'] = False
        samples.loc[:,'is_all_tumor'] = False
    
    if filter_non_tissue:
        samples = samples[samples.is_tissue == True] # remove patches with no tissue #samples = samples[samples.is_tissue == True]
    
    if filter_only_all_tumor :
        samples['tile_loc'] = list(samples.index)
        all_tissue_samples1 = samples[samples.is_tumor==False]
        all_tissue_samples1 = all_tissue_samples1.append(samples[samples.is_all_tumor==True])
        
        all_tissue_samples1.reset_index(inplace=True, drop=True)
    else :
        return samples
    
    return all_tissue_samples1


NUM_CLASSES = 2 # not_tumor, tumor

file_handles=[]
def gen_imgs(all_image_path, all_mask_path, samples, batch_size, patch_size = PATCH_SIZE, shuffle=True):
   
    num_samples = len(samples)
    # 특정 몇개의 slide만 open 해서 쓰기
    # 4개씩 묶었으니까 
  
    slide_path0 = all_image_path[0]
    slide_path1 = all_image_path[1]
    slide_path2 = all_image_path[2]
    slide_path3 = all_image_path[3]
    
    
    # slide 0~3 까지 미리 열어두기
    slide0 = openslide.open_slide(slide_path0)
    slide1 = openslide.open_slide(slide_path1)
    slide2 = openslide.open_slide(slide_path2)
    slide3 = openslide.open_slide(slide_path3)
    file_handles.append(slide0)
    file_handles.append(slide1)
    file_handles.append(slide2)
    file_handles.append(slide3)
    
    # with openslide.open_slide(slide_path) as slide
    tiles0 = DeepZoomGenerator(slide0,tile_size=patch_size, overlap=0, limit_bounds=False) 
    tiles1 = DeepZoomGenerator(slide1,tile_size=patch_size, overlap=0, limit_bounds=False)
    tiles2 = DeepZoomGenerator(slide2,tile_size=patch_size, overlap=0, limit_bounds=False)
    tiles3 = DeepZoomGenerator(slide3,tile_size=patch_size, overlap=0, limit_bounds=False)
    
    
    if 'pos' in slide_path0:
        start_x0 = int(slide0.properties.get('openslide.bounds-x',0))
        start_y0 = int(slide0.properties.get('openslide.bounds-y',0))
        start_x0 = start_x0 / patch_size
        start_y0 = start_y0 / patch_size
        
        truth0 = openslide.open_slide(all_mask_path[0])
        truth_tiles0 = DeepZoomGenerator(truth0, tile_size=32, overlap=0, limit_bounds=False) 
        
    else : 
        start_x0 = 0
        start_y0 = 0
    
    if 'pos' in slide_path1:
        start_x1 = int(slide1.properties.get('openslide.bounds-x',0))
        start_y1 = int(slide1.properties.get('openslide.bounds-y',0))
        start_x1 = start_x1 / patch_size
        start_y1 = start_y1 / patch_size
        
        truth1 = openslide.open_slide(all_mask_path[1])
        truth_tiles1 = DeepZoomGenerator(truth1, tile_size=32, overlap=0, limit_bounds=False) 
        
    else : 
        start_x1 = 0
        start_y1 = 0
    
    if 'pos' in slide_path2:
        start_x2 = int(slide2.properties.get('openslide.bounds-x',0))
        start_y2 = int(slide2.properties.get('openslide.bounds-y',0))
        start_x2 = start_x2 / patch_size
        start_y2 = start_y2 / patch_size
        
        truth2 = openslide.open_slide(all_mask_path[2])
        truth_tiles2 = DeepZoomGenerator(truth2, tile_size=32, overlap=0, limit_bounds=False) 
        
    else : 
        start_x2 = 0
        start_y2 = 0
        
    if 'pos' in slide_path3:
        start_x3 = int(slide3.properties.get('openslide.bounds-x',0))
        start_y3 = int(slide3.properties.get('openslide.bounds-y',0))
        start_x3 = start_x3 / patch_size
        start_y3 = start_y3 / patch_size
        
        truth3 = openslide.open_slide(all_mask_path[3])
        truth_tiles3 = DeepZoomGenerator(truth3, tile_size=32, overlap=0, limit_bounds=False) 
        
    else : 
        start_x3 = 0
        start_y3 = 0
    

    
    for epo in range(5): # Loop forever so the generator never terminates
        if shuffle:
            samples = samples.sample(frac=1) # shuffle samples

        for offset in range(0, num_samples, batch_size):
            batch_samples = samples.iloc[offset:offset+batch_size]
            images = []
            masks = []
            for _, batch_sample in batch_samples.iterrows(): # 배치마다 deep zoom 하네 약간 비효율적
                
                # 여기서 하나씩 4개 체크해서 해당되는 부분으로 가야지. for 4번 돌리면서 가야한다.
                mask_size_up = np.zeros((patch_size,patch_size))
                a,b=mask_size_up.shape
                
                if batch_sample.slide_path == slide_path0:
                    x, y = batch_sample.tile_loc[::-1]
                    x += start_x0
                    y += start_y0
                    img = tiles0.get_tile(tiles0.level_count-1, (x,y))
                    if 'pos' in slide_path0:
                        mask = truth_tiles0.get_tile(truth_tiles0.level_count-1, batch_sample.tile_loc[::-1])
                        mask = (cv2.cvtColor(np.array(mask), cv2.COLOR_RGB2GRAY) > 0).astype(int)
                            # mask_size_up , 32 to ,512
                        for i in range(a):
                            for j in range(b) :
                                k = i//16
                                l = j//16
                                mask_size_up[i,j] = mask[k,l]
                    
                elif batch_sample.slide_path == slide_path1:
                    x, y = batch_sample.tile_loc[::-1]
                    x += start_x1
                    y += start_y1
                    img = tiles1.get_tile(tiles1.level_count-1, (x,y))
                    if 'pos' in slide_path1:
                        mask = truth_tiles1.get_tile(truth_tiles1.level_count-1, batch_sample.tile_loc[::-1])
                        mask = (cv2.cvtColor(np.array(mask), cv2.COLOR_RGB2GRAY) > 0).astype(int)
                            # mask_size_up , 16 to 512
                        for i in range(a):
                            for j in range(b) :
                                k = i//16
                                l = j//16
                                mask_size_up[i,j] = mask[k,l]
                
                elif batch_sample.slide_path == slide_path2:
                    x, y = batch_sample.tile_loc[::-1]
                    x += start_x2
                    y += start_y2
                    img = tiles2.get_tile(tiles2.level_count-1, (x,y))
                    if 'pos' in slide_path2:
                        mask = truth_tiles2.get_tile(truth_tiles2.level_count-1, batch_sample.tile_loc[::-1])
                        mask = (cv2.cvtColor(np.array(mask), cv2.COLOR_RGB2GRAY) > 0).astype(int)
                            # mask_size_up , 16 to 256
                        for i in range(a):
                            for j in range(b) :
                                k = i//16
                                l = j//16
                                mask_size_up[i,j] = mask[k,l]
                
                else:
                    x, y = batch_sample.tile_loc[::-1]
                    x += start_x3
                    y += start_y3
                    img = tiles3.get_tile(tiles3.level_count-1, (x,y))
                    if 'pos' in slide_path3:
                        mask = truth_tiles3.get_tile(truth_tiles3.level_count-1, batch_sample.tile_loc[::-1])
                        mask = (cv2.cvtColor(np.array(mask), cv2.COLOR_RGB2GRAY) > 0).astype(int)
                            # mask_size_up , 16 to 256
                        for i in range(a):
                            for j in range(b) :
                                k = i//16
                                l = j//16
                                mask_size_up[i,j] = mask[k,l]
                
                    

                images.append(np.array(img))
                masks.append(mask_size_up)

            X_train = np.array(images)
            y_train = np.array(masks)
            #print('x_train_shape :', X_train.shape)
            
            y_train = to_categorical(y_train, num_classes=2).reshape(y_train.shape[0], patch_size, patch_size, 2) 
            #print('y_train_shape : ',y_train.shape)
            
            #X_train, y_train = datagen().flow(X_train,y = y_train,batch_size = batch_size)
            X_train, y_train = next(ImageDataGenerator(
                rotation_range=45,
                horizontal_flip=True,
                vertical_flip=True,
                brightness_range =(0.4,1.)).flow(X_train,y=y_train,batch_size=batch_size))
            #print(X_train.shape)
            #print(y_train.shape)
            yield X_train, y_train
            
def predict_batch_from_model(patches, model):
    """Predict which pixels are tumor.
    
    input: patch: `batch_size`x256x256x3, rgb image
    input: model: keras model
    output: prediction: 256x256x1, per-pixel tumor probability
    """
    predictions = model.predict(patches)
    predictions = predictions[:, :, :, 1]
    return predictions
def predict_from_model(patch, model):
    """Predict which pixels are tumor.
    
    input: patch: 256x256x3, rgb image
    input: model: keras model
    output: prediction: 256x256x1, per-pixel tumor probability
    """
    
    prediction = model.predict(patch.reshape(1, PATCH_SIZE, PATCH_SIZE, 3))
    prediction = prediction[:, :, :, 1].reshape(PATCH_SIZE, PATCH_SIZE)
    return prediction

def predict_from_model_n(patch, model):
    """Predict which pixels are tumor.
    
    input: patch: 256x256x3, rgb image
    input: model: keras model
    output: prediction: 256x256x1, per-pixel tumor probability
    """
    
    prediction = model.predict(patch.reshape(1, PATCH_SIZE, PATCH_SIZE, 3))
    prediction = prediction[:, :, :, 0].reshape(PATCH_SIZE, PATCH_SIZE)
    return prediction

def simple_model(pretrained_weights = None):
    model = Sequential()
    model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape=(PATCH_SIZE, PATCH_SIZE, 3)))
    model.add(Convolution2D(100, (3, 3), strides=(2, 2), activation='elu', padding='same'))
    model.add(MaxPooling2D())
    model.add(Convolution2D(200, (3, 3), strides=(2, 2), activation='elu', padding='same'))
    model.add(MaxPooling2D())
    model.add(Convolution2D(300, (3, 3), activation='elu', padding='same'))
    model.add(Convolution2D(300, (3, 3), activation='elu',  padding='same'))
    model.add(Dropout(0.2))
    model.add(Convolution2D(2, (1, 1))) # this is called upscore layer for some reason?
    model.add(Conv2DTranspose(2, (31, 31), strides=(16, 16), activation='softmax', padding='same'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    if(pretrained_weights):
        model.load_weights(pretrained_weights)
        
    return model

In [3]:
def read_data_path():
    image_paths = []
    with open('train.txt','r') as f:
        for line in f:
            line = line.rstrip('\n')
            image_paths.append(line)
    #print('image_path # : ',len(image_paths))

    tumor_mask_paths = []

    with open('train_mask.txt','r') as f:
        for line in f:
            line = line.rstrip('\n')
            tumor_mask_paths.append(line)
    #print('mask_patch # : ',len(tumor_mask_paths))
    
    return image_paths, tumor_mask_paths

def read_test_data_path():
    image_paths = []
    with open('test.txt','r') as f:
        for line in f:
            line = line.rstrip('\n')
            image_paths.append(line)
    #print('image_path # : ',len(image_paths))
    
    return image_paths

test_image_paths = read_test_data_path()
image_paths, tumor_mask_paths = read_data_path()
image_paths = []
with open('train.txt','r') as f:
    for line in f:
        line = line.rstrip('\n')
        image_paths.append(line)
print('image_path # : ',len(image_paths))

tumor_mask_paths = []
with open('train_mask.txt','r') as f:
    for line in f:
        line = line.rstrip('\n')
        tumor_mask_paths.append(line)
print('mask_patch # : ',len(tumor_mask_paths))

slide_4_list_1 = [[102,104,29,44],[144,55,30,18],[125,56,35,40],[54,65,21,36],[139,82,1,49],[73,108,7,23],[107,117,24,52],[106,103,27,13]
               ,[105,151,15,2],[75,100,41,9],[156,113,32,37],[150,88,39,10],[84,122,5,50],[93,118,53,47],[87,78,45,34],[116,98,48,46],
                [72,131,22,42]]
slide_4_list_2 = [[109,58,14,28],[101,69,11,43],[94,74,3,20],[64,140,17,16],[92,154,8,26],[99,60,0,33],[86,146,25,19],[68,112,38,51],
                 [71,136,31,4],[59,91,12,6]]
slide_4_list_3 = [[143,132,124,85],[95,120,81,77],[97,96,110,83],[152,128,149,155],[153,111,57,138],[134,135,114,76],
                  [123,90,121,61],[147,148,119,142],[66,137,63,80],[70,79,115,133],[129,141,127,145]]
slide_4_test = [[55,55,0,0],[55,55,0,0]]

all_image_path = []
all_mask_path = []
for j in range(4):
    image_path = image_paths[slide_4_test[0][j]][1:] # 이 부분은 data 읽을때 고치자 ( [1:] 빼야함)
    mask_path = tumor_mask_paths[slide_4_test[0][j]][1:] # 이 부분은 data 읽을때 고치자
    all_image_path.append(image_path)
    all_mask_path.append(mask_path)

image_path # :  157
mask_patch # :  157


In [4]:
model = simple_model(pretrained_weights='s_512.h5')

In [10]:
columns = ['is_tissue','slide_path','is_tumor','is_all_tumor','tile_loc']

BATCH_SIZE = 32
N_EPOCHS = 5

for i in range(len(slide_4_test)):
    
    # [1] dataset , 2 pos, 2 neg, mean ratio = 3:1
    four_samples = pd.DataFrame(columns = columns)
    four_image_path = list()
    four_mask_path = list()    
    for j in range(4):
        image_path = image_paths[slide_4_test[i][j]][1:] # 이 부분은 data 읽을때 고치자 ( [1:] 빼야함)
        mask_path = tumor_mask_paths[slide_4_test[i][j]][1:] # 이 부분은 data 읽을때 고치자
        samples = find_patches_from_slide(image_path, mask_path)
        
        four_samples = four_samples.append(samples)   
        four_image_path.append(image_path)
        four_mask_path.append(mask_path)
    NUM_SAMPLES = len(four_samples)
    if NUM_SAMPLES > 10000:
        NUM_SAMPLES = 10000
    
    samples = four_samples.sample(NUM_SAMPLES, random_state=42)
    samples.reset_index(drop=True, inplace=True)
    
    tumor_samples = four_samples[four_samples.is_tumor == True]
    print(len(tumor_samples))
    non_tumor_samples = four_samples[four_samples.is_tumor == False]
    print(len(non_tumor_samples))
    non_tumor_samples_3_ratio = non_tumor_samples.sample(len(tumor_samples) * 3, random_state = 42,replace=True)
    
    all_sample = tumor_samples.append(non_tumor_samples_3_ratio)

    split = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
    for train_index, test_index in split.split(samples, samples["is_tumor"]):
            train_samples = samples.loc[train_index]
            validation_samples = samples.loc[test_index]
    
    train_generator = gen_imgs(four_image_path,four_mask_path,train_samples, BATCH_SIZE)
    validation_generator = gen_imgs(four_image_path,four_mask_path,validation_samples, BATCH_SIZE)
    
    train_start_time = datetime.now()
    history = model.fit_generator(train_generator, np.ceil(len(train_samples) / BATCH_SIZE),
        validation_data=validation_generator,
        validation_steps=np.ceil(len(validation_samples) / BATCH_SIZE),
        epochs=N_EPOCHS)
    if file_handles != []:
        for fh in file_handles:
            fh.close()
    file_handles=[]
    #del train_generator
    #del validation_generator
    train_end_time = datetime.now()
    print("Model training time: %.1f minutes" % ((train_end_time - train_start_time).seconds / 60,))
    model.save('s_512.h5')
    # split
    # data gen : all_image_path, all_mask_path

truth dimensions:  (5316, 10007)
truth_4_dimension_size: (333, 626)
slide4_size (167, 313)
truth dimensions:  (5316, 10007)
truth_4_dimension_size: (333, 626)
slide4_size (167, 313)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


14538
39456
Epoch 1/5
  2/282 [..............................] - ETA: 49:14 - loss: 2.1324 - acc: 0.62 - ETA: 45:46 - loss: 1.7225 - acc: 0.6405

  % delta_t_median)


  4/282 [..............................] - ETA: 44:10 - loss: 1.3583 - acc: 0.67 - ETA: 44:03 - loss: 1.1564 - acc: 0.7030

  % delta_t_median)


  5/282 [..............................] - ETA: 43:51 - loss: 1.0286 - acc: 0.7368

  % delta_t_median)


  6/282 [..............................] - ETA: 43:57 - loss: 0.9546 - acc: 0.7481

  % delta_t_median)


  9/282 [..............................] - ETA: 43:21 - loss: 0.9083 - acc: 0.74 - ETA: 43:18 - loss: 0.8687 - acc: 0.74 - ETA: 43:14 - loss: 0.8414 - acc: 0.7449

  % delta_t_median)


 12/282 [>.............................] - ETA: 43:02 - loss: 0.8171 - acc: 0.74 - ETA: 42:52 - loss: 0.7970 - acc: 0.74 - ETA: 41:30 - loss: 0.7845 - acc: 0.7382

  % delta_t_median)


 14/282 [>.............................] - ETA: 40:24 - loss: 0.7682 - acc: 0.74 - ETA: 39:21 - loss: 0.7567 - acc: 0.7398

  % delta_t_median)




Epoch 2/5
 14/282 [>.............................] - ETA: 55s - loss: 0.3787 - acc: 0.81 - ETA: 1:00 - loss: 0.3412 - acc: 0.851 - ETA: 1:02 - loss: 0.3172 - acc: 0.869 - ETA: 1:03 - loss: 0.3192 - acc: 0.873 - ETA: 1:05 - loss: 0.3250 - acc: 0.869 - ETA: 1:19 - loss: 0.3174 - acc: 0.874 - ETA: 1:16 - loss: 0.3141 - acc: 0.874 - ETA: 1:15 - loss: 0.3144 - acc: 0.875 - ETA: 1:24 - loss: 0.3190 - acc: 0.873 - ETA: 1:27 - loss: 0.3096 - acc: 0.877 - ETA: 1:29 - loss: 0.3127 - acc: 0.876 - ETA: 3:29 - loss: 0.3204 - acc: 0.874 - ETA: 6:23 - loss: 0.3168 - acc: 0.875 - ETA: 8:42 - loss: 0.3158 - acc: 0.8751

  % delta_t_median)


 16/282 [>.............................] - ETA: 10:57 - loss: 0.3102 - acc: 0.87 - ETA: 12:43 - loss: 0.3112 - acc: 0.8772

  % delta_t_median)


 17/282 [>.............................] - ETA: 14:13 - loss: 0.3164 - acc: 0.8745

  % delta_t_median)


 19/282 [=>............................] - ETA: 15:27 - loss: 0.3151 - acc: 0.87 - ETA: 16:40 - loss: 0.3188 - acc: 0.8719

  % delta_t_median)


 20/282 [=>............................] - ETA: 17:43 - loss: 0.3179 - acc: 0.8718

  % delta_t_median)


 24/282 [=>............................] - ETA: 18:37 - loss: 0.3200 - acc: 0.87 - ETA: 19:15 - loss: 0.3175 - acc: 0.87 - ETA: 19:28 - loss: 0.3171 - acc: 0.87 - ETA: 19:38 - loss: 0.3184 - acc: 0.8697

  % delta_t_median)


 25/282 [=>............................] - ETA: 19:47 - loss: 0.3227 - acc: 0.8676

  % delta_t_median)




Epoch 3/5
  1/282 [..............................] - ETA: 55s - loss: 0.2684 - acc: 0.8958

  % delta_t_median)


  2/282 [..............................] - ETA: 1:36 - loss: 0.2501 - acc: 0.8975

  % delta_t_median)


  4/282 [..............................] - ETA: 1:43 - loss: 0.2427 - acc: 0.905 - ETA: 1:54 - loss: 0.2508 - acc: 0.9005

  % delta_t_median)


  7/282 [..............................] - ETA: 2:06 - loss: 0.2448 - acc: 0.902 - ETA: 1:54 - loss: 0.2736 - acc: 0.898 - ETA: 1:48 - loss: 0.2585 - acc: 0.9043

  % delta_t_median)




Epoch 4/5
  1/282 [..............................] - ETA: 54s - loss: 0.1535 - acc: 0.9443

  % delta_t_median)


  2/282 [..............................] - ETA: 1:47 - loss: 0.1893 - acc: 0.9303

  % delta_t_median)


  3/282 [..............................] - ETA: 2:04 - loss: 0.2248 - acc: 0.9138

  % delta_t_median)


  4/282 [..............................] - ETA: 2:19 - loss: 0.2704 - acc: 0.8897

  % delta_t_median)


  5/282 [..............................] - ETA: 2:29 - loss: 0.2753 - acc: 0.8858

  % delta_t_median)


  8/282 [..............................] - ETA: 2:36 - loss: 0.2882 - acc: 0.880 - ETA: 2:34 - loss: 0.2981 - acc: 0.874 - ETA: 2:29 - loss: 0.2914 - acc: 0.8767

  % delta_t_median)


  9/282 [..............................] - ETA: 2:19 - loss: 0.2849 - acc: 0.8798

  % delta_t_median)


 10/282 [>.............................] - ETA: 2:17 - loss: 0.2714 - acc: 0.8862

  % delta_t_median)


 13/282 [>.............................] - ETA: 2:16 - loss: 0.2705 - acc: 0.887 - ETA: 3:31 - loss: 0.2681 - acc: 0.889 - ETA: 6:24 - loss: 0.2647 - acc: 0.8914

  % delta_t_median)


 14/282 [>.............................] - ETA: 8:45 - loss: 0.2667 - acc: 0.8906

  % delta_t_median)


 17/282 [>.............................] - ETA: 10:50 - loss: 0.2652 - acc: 0.89 - ETA: 12:40 - loss: 0.2638 - acc: 0.89 - ETA: 14:20 - loss: 0.2607 - acc: 0.8942

  % delta_t_median)


 20/282 [=>............................] - ETA: 15:41 - loss: 0.2636 - acc: 0.89 - ETA: 16:54 - loss: 0.2638 - acc: 0.89 - ETA: 18:00 - loss: 0.2627 - acc: 0.8921

  % delta_t_median)


 22/282 [=>............................] - ETA: 18:59 - loss: 0.2594 - acc: 0.89 - ETA: 19:47 - loss: 0.2555 - acc: 0.8960

  % delta_t_median)


 25/282 [=>............................] - ETA: 19:57 - loss: 0.2556 - acc: 0.89 - ETA: 20:08 - loss: 0.2551 - acc: 0.89 - ETA: 20:18 - loss: 0.2502 - acc: 0.8979

  % delta_t_median)




Epoch 5/5
  2/282 [..............................] - ETA: 59s - loss: 0.1521 - acc: 0.94 - ETA: 1:17 - loss: 0.1977 - acc: 0.9215

  % delta_t_median)


  4/282 [..............................] - ETA: 1:36 - loss: 0.1914 - acc: 0.921 - ETA: 1:27 - loss: 0.1993 - acc: 0.9189

  % delta_t_median)


  5/282 [..............................] - ETA: 1:34 - loss: 0.1951 - acc: 0.9202

  % delta_t_median)


  6/282 [..............................] - ETA: 1:50 - loss: 0.2063 - acc: 0.9166

  % delta_t_median)


  7/282 [..............................] - ETA: 2:05 - loss: 0.2157 - acc: 0.9131

  % delta_t_median)


  8/282 [..............................] - ETA: 2:04 - loss: 0.2184 - acc: 0.9124

  % delta_t_median)


 14/282 [>.............................] - ETA: 1:57 - loss: 0.2243 - acc: 0.910 - ETA: 1:52 - loss: 0.2232 - acc: 0.909 - ETA: 1:53 - loss: 0.2259 - acc: 0.909 - ETA: 3:34 - loss: 0.2203 - acc: 0.912 - ETA: 6:28 - loss: 0.2197 - acc: 0.913 - ETA: 8:49 - loss: 0.2209 - acc: 0.9120

  % delta_t_median)


 16/282 [>.............................] - ETA: 10:57 - loss: 0.2175 - acc: 0.91 - ETA: 12:46 - loss: 0.2182 - acc: 0.9124

  % delta_t_median)


 17/282 [>.............................] - ETA: 14:19 - loss: 0.2196 - acc: 0.9119

  % delta_t_median)


 18/282 [>.............................] - ETA: 15:40 - loss: 0.2178 - acc: 0.9127

  % delta_t_median)


 20/282 [=>............................] - ETA: 16:56 - loss: 0.2180 - acc: 0.91 - ETA: 18:01 - loss: 0.2205 - acc: 0.9101

  % delta_t_median)


 22/282 [=>............................] - ETA: 19:00 - loss: 0.2234 - acc: 0.90 - ETA: 19:50 - loss: 0.2230 - acc: 0.9092

  % delta_t_median)




Model training time: 151.6 minutes
truth dimensions:  (5316, 10007)
truth_4_dimension_size: (333, 626)
slide4_size (167, 313)
truth dimensions:  (5316, 10007)
truth_4_dimension_size: (333, 626)
slide4_size (167, 313)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


14538
39456
Epoch 1/5
  1/282 [..............................] - ETA: 47:34 - loss: 0.2003 - acc: 0.9235

  % delta_t_median)


  2/282 [..............................] - ETA: 43:56 - loss: 0.2667 - acc: 0.8887

  % delta_t_median)


  4/282 [..............................] - ETA: 43:24 - loss: 0.2304 - acc: 0.90 - ETA: 42:49 - loss: 0.2188 - acc: 0.9089

  % delta_t_median)


  6/282 [..............................] - ETA: 42:28 - loss: 0.2045 - acc: 0.91 - ETA: 41:45 - loss: 0.2121 - acc: 0.9160

  % delta_t_median)


 11/282 [>.............................] - ETA: 41:16 - loss: 0.2342 - acc: 0.90 - ETA: 40:56 - loss: 0.2285 - acc: 0.90 - ETA: 40:52 - loss: 0.2283 - acc: 0.90 - ETA: 40:43 - loss: 0.2353 - acc: 0.90 - ETA: 40:25 - loss: 0.2313 - acc: 0.9060

  % delta_t_median)


 12/282 [>.............................] - ETA: 39:08 - loss: 0.2327 - acc: 0.9047

  % delta_t_median)


 13/282 [>.............................] - ETA: 38:03 - loss: 0.2288 - acc: 0.9067

  % delta_t_median)




Epoch 2/5
  1/282 [..............................] - ETA: 57s - loss: 0.2011 - acc: 0.9105

  % delta_t_median)


  2/282 [..............................] - ETA: 1:58 - loss: 0.2720 - acc: 0.8881

  % delta_t_median)


  3/282 [..............................] - ETA: 2:30 - loss: 0.2567 - acc: 0.8950

  % delta_t_median)


  4/282 [..............................] - ETA: 2:31 - loss: 0.2383 - acc: 0.9034

  % delta_t_median)


  5/282 [..............................] - ETA: 2:36 - loss: 0.2169 - acc: 0.9135

  % delta_t_median)


  6/282 [..............................] - ETA: 2:41 - loss: 0.2285 - acc: 0.9082

  % delta_t_median)


  8/282 [..............................] - ETA: 2:43 - loss: 0.2375 - acc: 0.903 - ETA: 2:31 - loss: 0.2459 - acc: 0.9014

  % delta_t_median)


 10/282 [>.............................] - ETA: 2:21 - loss: 0.2357 - acc: 0.906 - ETA: 2:13 - loss: 0.2334 - acc: 0.9070

  % delta_t_median)


 11/282 [>.............................] - ETA: 2:09 - loss: 0.2323 - acc: 0.9068

  % delta_t_median)


 12/282 [>.............................] - ETA: 3:25 - loss: 0.2265 - acc: 0.9091

  % delta_t_median)




Epoch 3/5
  1/282 [..............................] - ETA: 56s - loss: 0.2430 - acc: 0.8960

  % delta_t_median)


  2/282 [..............................] - ETA: 1:40 - loss: 0.2265 - acc: 0.9034

  % delta_t_median)


  3/282 [..............................] - ETA: 2:15 - loss: 0.2010 - acc: 0.9157

  % delta_t_median)


  4/282 [..............................] - ETA: 2:20 - loss: 0.2423 - acc: 0.8983

  % delta_t_median)


  5/282 [..............................] - ETA: 2:05 - loss: 0.2399 - acc: 0.9001

  % delta_t_median)


  6/282 [..............................] - ETA: 2:10 - loss: 0.2304 - acc: 0.9059

  % delta_t_median)


  8/282 [..............................] - ETA: 2:16 - loss: 0.2167 - acc: 0.912 - ETA: 2:20 - loss: 0.2098 - acc: 0.9165

  % delta_t_median)


  9/282 [..............................] - ETA: 2:28 - loss: 0.2163 - acc: 0.9163

  % delta_t_median)


 12/282 [>.............................] - ETA: 2:33 - loss: 0.2173 - acc: 0.915 - ETA: 2:35 - loss: 0.2092 - acc: 0.918 - ETA: 3:39 - loss: 0.2190 - acc: 0.9130

  % delta_t_median)


 13/282 [>.............................] - ETA: 6:29 - loss: 0.2123 - acc: 0.9159

  % delta_t_median)


 14/282 [>.............................] - ETA: 8:52 - loss: 0.2133 - acc: 0.9145

  % delta_t_median)


 15/282 [>.............................] - ETA: 10:54 - loss: 0.2110 - acc: 0.9157

  % delta_t_median)


 17/282 [>.............................] - ETA: 12:48 - loss: 0.2105 - acc: 0.91 - ETA: 14:15 - loss: 0.2068 - acc: 0.9176

  % delta_t_median)


 19/282 [=>............................] - ETA: 15:39 - loss: 0.2064 - acc: 0.91 - ETA: 16:49 - loss: 0.2051 - acc: 0.9174

  % delta_t_median)


 20/282 [=>............................] - ETA: 17:52 - loss: 0.1992 - acc: 0.9198

  % delta_t_median)


 24/282 [=>............................] - ETA: 18:46 - loss: 0.2020 - acc: 0.91 - ETA: 19:35 - loss: 0.2048 - acc: 0.91 - ETA: 19:51 - loss: 0.2035 - acc: 0.91 - ETA: 20:02 - loss: 0.2068 - acc: 0.9169

  % delta_t_median)




Epoch 4/5
  1/282 [..............................] - ETA: 56s - loss: 0.1821 - acc: 0.9301

  % delta_t_median)


  2/282 [..............................] - ETA: 1:38 - loss: 0.1664 - acc: 0.9321

  % delta_t_median)


  8/282 [..............................] - ETA: 2:17 - loss: 0.1902 - acc: 0.923 - ETA: 1:57 - loss: 0.2157 - acc: 0.915 - ETA: 1:47 - loss: 0.2114 - acc: 0.916 - ETA: 1:40 - loss: 0.1956 - acc: 0.922 - ETA: 1:51 - loss: 0.2034 - acc: 0.919 - ETA: 2:00 - loss: 0.2071 - acc: 0.9191

  % delta_t_median)


  9/282 [..............................] - ETA: 2:08 - loss: 0.2055 - acc: 0.9198

  % delta_t_median)


 10/282 [>.............................] - ETA: 2:12 - loss: 0.2154 - acc: 0.9150

  % delta_t_median)


 13/282 [>.............................] - ETA: 2:14 - loss: 0.2125 - acc: 0.915 - ETA: 3:32 - loss: 0.2072 - acc: 0.918 - ETA: 6:19 - loss: 0.2129 - acc: 0.9155

  % delta_t_median)


 14/282 [>.............................] - ETA: 8:50 - loss: 0.2126 - acc: 0.9158

  % delta_t_median)


 16/282 [>.............................] - ETA: 10:54 - loss: 0.2059 - acc: 0.91 - ETA: 12:43 - loss: 0.2032 - acc: 0.9192

  % delta_t_median)


 17/282 [>.............................] - ETA: 14:14 - loss: 0.2029 - acc: 0.9188

  % delta_t_median)


 19/282 [=>............................] - ETA: 15:33 - loss: 0.1992 - acc: 0.92 - ETA: 16:44 - loss: 0.1976 - acc: 0.9208

  % delta_t_median)




Epoch 5/5
  1/282 [..............................] - ETA: 57s - loss: 0.3196 - acc: 0.8679

  % delta_t_median)


  2/282 [..............................] - ETA: 1:55 - loss: 0.2612 - acc: 0.8971

  % delta_t_median)


  3/282 [..............................] - ETA: 2:31 - loss: 0.2853 - acc: 0.8875

  % delta_t_median)


  4/282 [..............................] - ETA: 2:42 - loss: 0.2554 - acc: 0.9037

  % delta_t_median)


  6/282 [..............................] - ETA: 2:45 - loss: 0.2539 - acc: 0.903 - ETA: 2:45 - loss: 0.2524 - acc: 0.9025

  % delta_t_median)


  9/282 [..............................] - ETA: 2:51 - loss: 0.2509 - acc: 0.902 - ETA: 2:37 - loss: 0.2450 - acc: 0.905 - ETA: 2:26 - loss: 0.2495 - acc: 0.9026

  % delta_t_median)


 10/282 [>.............................] - ETA: 2:24 - loss: 0.2363 - acc: 0.9091

  % delta_t_median)


 11/282 [>.............................] - ETA: 2:16 - loss: 0.2328 - acc: 0.9115

  % delta_t_median)


 17/282 [>.............................] - ETA: 3:37 - loss: 0.2388 - acc: 0.907 - ETA: 6:31 - loss: 0.2307 - acc: 0.910 - ETA: 8:55 - loss: 0.2288 - acc: 0.911 - ETA: 11:06 - loss: 0.2277 - acc: 0.91 - ETA: 12:52 - loss: 0.2248 - acc: 0.91 - ETA: 14:26 - loss: 0.2257 - acc: 0.9117

  % delta_t_median)


 19/282 [=>............................] - ETA: 15:46 - loss: 0.2256 - acc: 0.91 - ETA: 16:54 - loss: 0.2237 - acc: 0.9114

  % delta_t_median)


 20/282 [=>............................] - ETA: 18:01 - loss: 0.2210 - acc: 0.9126

  % delta_t_median)




Model training time: 151.6 minutes


# Test

In [11]:
ipath = all_image_path[0]
tpath = all_mask_path[0]

all_tissue_samples = find_patches_from_slide(ipath,tpath)
print('Total patches in slide: %d' % len(all_tissue_samples)) 
all_tissue_samples.iloc[:10]
all_tissue_samples.is_tumor.value_counts() 
test_start_time = datetime.now()
sample_gen = gen_imgs(all_image_path,all_mask_path,all_tissue_samples, 5000, shuffle=True)
%time example_X, example_y  = next(sample_gen)
test_end_time = datetime.now()
print("5000 gen time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))

start_x = PATCH_SIZE//4
start_y = PATCH_SIZE//4
pred_size = PATCH_SIZE//2

test_start_time = datetime.now()
preds = []
labels = []
for i in range(5000):
    prediction = predict_from_model(example_X[i],model)
    pred_X = np.zeros((pred_size,pred_size))
    y = example_y[i].argmax
    for x in range(start_x,start_x+pred_size):
        for y in range(start_y, start_y+pred_size):
            pred_X[x-start_x][y-start_y] = prediction[x][y]
            
    pred_s = pd.Series(pred_X.flatten())
    max_p = np.max(pred_s)
    
    y = np.max(example_y[i].argmax(axis=2))
    preds.append(max_p)
    labels.append(y)
test_end_time = datetime.now()
print("Model test time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))    
fpr, tpr, thresholds = metrics.roc_curve(labels,preds,pos_label=1)
print(metrics.auc(fpr,tpr))

truth dimensions:  (5316, 10007)
truth_4_dimension_size: (333, 626)
slide4_size (167, 313)
Total patches in slide: 25627


MemoryError: 

5000 gen time: 17.8 minutes


NameError: name 'example_X' is not defined

In [None]:
ipath = all_image_path[0]
tpath = all_mask_path[0]

all_tissue_samples = find_patches_from_slide(ipath,tpath)
print('Total patches in slide: %d' % len(all_tissue_samples)) 
all_tissue_samples.iloc[:10]
all_tissue_samples.is_tumor.value_counts() 
test_start_time = datetime.now()
sample_gen = gen_imgs(all_image_path,all_mask_path,all_tissue_samples, 5000, shuffle=True)
%time example_X, example_y  = next(sample_gen)
test_end_time = datetime.now()
print("5000 gen time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))

start_x = PATCH_SIZE//4
start_y = PATCH_SIZE//4
pred_size = PATCH_SIZE//2

test_start_time = datetime.now()
preds = []
labels = []
for i in range(5000):
    prediction = predict_from_model(example_X[i],model)
    pred_X = np.zeros((pred_size,pred_size))
    y = example_y[i].argmax
    for x in range(start_x,start_x+pred_size):
        for y in range(start_y, start_y+pred_size):
            pred_X[x-start_x][y-start_y] = prediction[x][y]
            
    pred_s = pd.Series(pred_X.flatten())
    max_p = np.max(pred_s)
    
    y = np.max(example_y[i].argmax(axis=2))
    preds.append(max_p)
    labels.append(y)
test_end_time = datetime.now()
print("Model test time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))    
fpr, tpr, thresholds = metrics.roc_curve(labels,preds,pos_label=1)
print(metrics.auc(fpr,tpr))

In [None]:
ipath = all_image_path[0]
tpath = all_mask_path[0]

all_tissue_samples = find_patches_from_slide(ipath,tpath)
print('Total patches in slide: %d' % len(all_tissue_samples)) 
all_tissue_samples.iloc[:10]
all_tissue_samples.is_tumor.value_counts() 
test_start_time = datetime.now()
sample_gen = gen_imgs(all_image_path,all_mask_path,all_tissue_samples, 5000, shuffle=True)
%time example_X, example_y  = next(sample_gen)
test_end_time = datetime.now()
print("5000 gen time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))

start_x = PATCH_SIZE//4
start_y = PATCH_SIZE//4
pred_size = PATCH_SIZE//2

test_start_time = datetime.now()
preds = []
labels = []
for i in range(5000):
    prediction = predict_from_model(example_X[i],model)
    pred_X = np.zeros((pred_size,pred_size))
    y = example_y[i].argmax
    pred_s = pd.Series(prediction.flatten())
    max_p = np.max(pred_s)
    
    y = np.max(example_y[i].argmax(axis=2))
    preds.append(max_p)
    labels.append(y)
test_end_time = datetime.now()
print("Model test time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))    
fpr, tpr, thresholds = metrics.roc_curve(labels,preds,pos_label=1)
print(metrics.auc(fpr,tpr))

In [None]:
ipath = all_image_path[0]
tpath = all_mask_path[0]

all_tissue_samples = find_patches_from_slide(ipath,tpath)
print('Total patches in slide: %d' % len(all_tissue_samples)) 
all_tissue_samples.iloc[:10]
all_tissue_samples.is_tumor.value_counts() 
test_start_time = datetime.now()
sample_gen = gen_imgs(all_image_path,all_mask_path,all_tissue_samples, 5000, shuffle=True)
%time example_X, example_y  = next(sample_gen)
test_end_time = datetime.now()
print("5000 gen time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))

start_x = 32
start_y = 32
pred_size = 192

test_start_time = datetime.now()
preds = []
labels = []
for i in range(5000):
    prediction = predict_from_model(example_X[i],model)
    pred_X = np.zeros((pred_size,pred_size))
    y = example_y[i].argmax
    for x in range(start_x,start_x+pred_size):
        for y in range(start_y, start_y+pred_size):
            pred_X[x-start_x][y-start_y] = prediction[x][y]
            
    pred_s = pd.Series(pred_X.flatten())
    max_p = np.max(pred_s)
    
    y = np.max(example_y[i].argmax(axis=2))
    preds.append(max_p)
    labels.append(y)
test_end_time = datetime.now()
print("Model test time: %.1f minutes" % ((test_end_time - test_start_time).seconds / 60,))    
fpr, tpr, thresholds = metrics.roc_curve(labels,preds,pos_label=1)
print(metrics.auc(fpr,tpr))

In [17]:
max_pred_x = np.max(preds)
max_pred_x

min_pred_x = np.min(preds)
min_pred_x

0.0004315432452131063

In [23]:
test_start_time = datetime.now()
slide = openslide.open_slide(ipath)
tiles = DeepZoomGenerator(slide,tile_size=256,overlap=0, limit_bounds=False) 
test_end_time = datetime.now()
print("Model test time: %.1f minutes" % ((test_end_time - test_start_time).seconds))

Model test time: 0.0 minutes
