In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from openslide import open_slide, __library_version__ as openslide_version
import os
from PIL import Image
from skimage.color import rgb2gray
import random
import pickle

Get fig

In [2]:
def get_fig(dataset):
  res = []
  for fig in dataset:
    slide_path = '../images/tumor_'+fig +'.tif'
    tumor_mask_path = '../images/tumor_' + fig +'_mask.tif'
    slide,tumor_mask = open_slide(slide_path),open_slide(tumor_mask_path)
    res.append((slide,tumor_mask))
  return res
#slide = open_slide(slide_path) slide: level_dimensions[0][0 or 1],level_dimensions[i],level_downsamples[i] 

read slide

In [3]:
# Read a region from the slide
# Return a numpy RBG array
def read_slide(slide, x, y, level, width, height, as_float=False):
    im = slide.read_region((x,y), level, (width, height))
    im = im.convert('RGB') # drop the alpha channel
    if as_float:
        im = np.asarray(im, dtype=np.float32)
    else:
        im = np.asarray(im)
    assert im.shape == (height, width, 3)
    return im

sliding window

In [4]:
def get_center(slide):
  coor = []
  x_list = list(range((299//2)*8, (slide.level_dimensions[6][0] - 299//2)*8 , 128))
  y_list = list(range((299//2)*8, (slide.level_dimensions[6][1] - 299//2)*8 , 128))
  for x in x_list:
    for y in y_list:
      coor.append((x,y))
  return coor
#coor[i][0],coor[i][1]:x,y

In [5]:
#coor_list: center node from level 3
def detect_tumor(slide,tumor_mask,coor_list):
  tumor_coor, normal_coor = [],[]
  for coor in coor_list:
    #use 299*299 to detect whether it's tissue or not
    area = read_slide(slide,x = (coor[0]-299//2)*8, y = (coor[1]-299//2)*8,level = 3, width = 299, height = 299)
    im_gray = rgb2gray(area)
    indices = np.where(im_gray <= 0.8)
    if indices:
      #use 128*128 to give the label
      mask = read_slide(tumor_mask,x = (coor[0]-128//2)*8, y = (coor[1]-128//2)*8, level = 3, width = 128, height = 128 )[:,:,0]
      tumor_coor.append(coor) if np.any(mask==1) else normal_coor.append(coor)
  #print(tumor_coor, normal_coor)
  length = min(len(tumor_coor),len(normal_coor))
  print(length)
  random.shuffle(tumor_coor)
  random.shuffle(normal_coor)
  label = np.array(list(np.ones([length])) + list(np.zeros([length])))
  final_coor = tumor_coor[:length] + normal_coor[:length]
  
  state = np.random.get_state()
  np.random.shuffle(final_coor)
  np.random.set_state(state)
  np.random.shuffle(label)
  return final_coor, label
# return the center node of level 3 after balancing 

extract patchces

In [6]:
#extract patches for exact coors
def extractpatches(image,x,y,level_num):
  scale = 2**(level_num - 3)
  scale2 = 2**level_num
  region = read_slide(image, (x//scale - 299//2)*scale2, (y//scale - 299//2)*scale2, level = level_num, width=299, height=299)
  return region

In [7]:
def preparetraindata(image,coor_list,level_list):
  res = []
  for coor in coor_list:
      patch = []
      for level in level_list:
        area = extractpatches(image,coor[0],coor[1],level)
        patch.append(area)
      res.append(patch)
  res = np.array(res)
  return res #res: np (#, 4,299,299,3) 

get train data

In [8]:
def get_train(dataset,level_list):
  res = get_fig(dataset)
  for i,fig in enumerate(dataset):
    slide,tumor_mask = res[i][0], res[i][1]
    coor_list = get_center(slide)
    final_coor, label = detect_tumor(slide,tumor_mask,coor_list)
    region = preparetraindata(slide,final_coor,level_list)
    Data = [region, label]
    print(region.shape)
    with open('../data/dp_train/patches'+fig+'.txt', 'wb') as f:
        pickle.dump(Data, f)
  return Data

In [12]:
def get_test(dataset,level_list):
  res = get_fig(dataset)
  for i,fig in enumerate(dataset):
    slide,tumor_mask = res[i][0], res[i][1]
    coor_list = get_center(slide)
    final_coor, label = detect_tumor(slide,tumor_mask,coor_list)
    region = preparetraindata(slide,final_coor,level_list)
    Data = [region, label]
    print(region.shape)
    with open('../data/patches'+fig+'.txt', 'wb') as f:
        pickle.dump(Data, f)
  return Data

In [24]:
#train_dataset = ['001','005','016','031','064','075','091','094','096','110']
# train_dataset = ['110']
# region, label = get_train(train_dataset,[3,4,5,6])

test_dataset = ['096']
region, label = get_test(test_dataset,[3,4,5,6])

97
(194, 4, 299, 299, 3)


In [None]:
plt.imshow(region[5,3,:,:,:])

In [None]:
#(001,52),(078,2762),(012,6),(110,2452)
#()