# Imports

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
#%matplotlib notebook
import sys
###
#!pip install openslide-python
#https://github.com/deroneriksson/python-wsi-preprocessing/
###
sys.path.append('../python-wsi-preprocessing/')
from deephistopath.wsi import slide, filter, tiles, util

import fastai
from fastai.vision import *
from fastai.core import parallel

import pandas as pd
import numpy as np
import os
from pathlib import Path
from functools import partial, update_wrapper
from tqdm import tqdm_notebook as tqdm
from enum import Enum
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import cv2
import openslide
from openslide.lowlevel import *
Image.MAX_IMAGE_PIXELS = 10000000000

PATH = Path('/home/Deep_Learner/private/network/datasets/Hypophysenadenome/')
FONT_PATH=PATH/'1984-Happines-Regular.ttf'
ROIS_CORTICOTROP = PATH/'rois_corticotrop'
ROIS_CORTICOTROP_FILTERED = PATH/'rois_corticotrop_filtered'
ROIS_GONADOTROP = PATH/'rois_gonadotrop'
ROIS_GONADOTROP_FILTERED = PATH/'rois_gonadotrop_filtered'
TILES_CORTICOTROP = PATH/'tiles_corticotrop'
TILES_GONADOTROP = PATH/'tiles_gonadotrop'

ROIS_EXPERIMENTING = PATH/'rois_experimenting'
ROIS_EXPERIMENTING_FILTERED = PATH/'rois_experimenting_filtered'
TILES_EXPERIMENTING = PATH/'tiles_experimenting'

PATH_RELAPSE = Path('/home/Deep_Learner/private/network/datasets/Hypophysenadenome-Rezidive/')
RELAPSE_WSIS_EXPERIMENTING = PATH_RELAPSE/'wsis_experimenting'
RELAPSE_IMAGES_EXPERIMENTING = PATH_RELAPSE/'imgs_experimenting'
RELAPSE_IMAGES_FILTERED_EXPERIMENTING = PATH_RELAPSE/'imgs_filtered_experimenting'
RELAPSE_TILES_EXPERIMENTING = PATH_RELAPSE/'tiles_experimenting'

NONE_RELAPSE_WSIS = PATH_RELAPSE/'wsis_non_relapse'
NONE_RELAPSE_IMAGES = PATH_RELAPSE/'imgs_non_relapse'
NONE_RELAPSE_IMAGES_FILTERED = PATH_RELAPSE/'imgs_filtered_non_relapse'
NONE_RELAPSE_TILES = PATH_RELAPSE/'tiles_non_relapse'

RELAPSE_WSIS = PATH_RELAPSE/'wsis_relapse'
RELAPSE_IMAGES = PATH_RELAPSE/'imgs_relapse'
RELAPSE_IMAGES_FILTERED = PATH_RELAPSE/'imgs_filtered_relapse'
RELAPSE_TILES = PATH_RELAPSE/'tiles_relapse'


def show(np):
    return util.np_to_pil(np)

Path.ls = lambda x: [p for p in list(x.iterdir()) if '.ipynb_checkpoints' not in p.name]

def show_multiple_images(path, rows = 3, figsize=(128, 64)):
    imgs = [open_image(p) for p in path.ls()]
    show_all(imgs=imgs, r=rows, figsize=figsize)
    
def show_multiple_images_big(path:pathlib.Path):
    for p in path.ls():
        plt.imshow(mpimg.imread(str(p)))
        plt.show()
        
class AdenomaType(Enum):
    experimenting = 0
    corticotrop = 1
    gonadotrop = 2
    relapse_experimenting = 3
    relapse = 4
    non_relapse = 5

In [None]:
adenomaType = AdenomaType.relapse

if adenomaType == AdenomaType.gonadotrop:
    rois_filtered_path = ROIS_GONADOTROP_FILTERED
    rois_path = ROIS_GONADOTROP
    tiles_path = TILES_GONADOTROP
    
if adenomaType == AdenomaType.corticotrop:
    rois_filtered_path = ROIS_CORTICOTROP_FILTERED
    rois_path = ROIS_CORTICOTROP
    tiles_path = TILES_CORTICOTROP
    
if adenomaType == AdenomaType.experimenting:
    wsi_path = ROIS_EXPERIMENTING
    rois_filtered_path = ROIS_EXPERIMENTING_FILTERED
    rois_path = ROIS_EXPERIMENTING
    tiles_path = TILES_EXPERIMENTING
    
if adenomaType == AdenomaType.relapse_experimenting:
    wsi_path = RELAPSE_WSIS_EXPERIMENTING
    rois_filtered_path = RELAPSE_IMAGES_FILTERED_EXPERIMENTING
    rois_path = RELAPSE_IMAGES_EXPERIMENTING
    tiles_path = RELAPSE_TILES_EXPERIMENTING
    
if adenomaType == AdenomaType.relapse:
    wsi_path = RELAPSE_WSIS
    rois_path = RELAPSE_IMAGES
    rois_filtered_path = RELAPSE_IMAGES_FILTERED
    tiles_path = RELAPSE_TILES  
    
if adenomaType == AdenomaType.non_relapse:
    wsi_path = NONE_RELAPSE_WSIS
    rois_path = NONE_RELAPSE_IMAGES
    rois_filtered_path = NONE_RELAPSE_IMAGES_FILTERED
    tiles_path = NONE_RELAPSE_TILES

In [None]:
rois_path.mkdir(exist_ok=True)
rois_filtered_path.mkdir(exist_ok=True)
tiles_path.mkdir(exist_ok=True)

# Overwrite parts of wsi lib

In [None]:
slide.SCALE_FACTOR = 2


slide.BASE_DIR = PATH/'data'
slide.SRC_TRAIN_EXT = "ndpi"
tiles.SUMMARY_TITLE_FONT_PATH = str(FONT_PATH)
tiles.FONT_PATH = str(FONT_PATH)
tiles.DISPLAY_TILE_SUMMARY_LABELS = True
tiles.LABEL_ALL_TILES_IN_TOP_TILE_SUMMARY = True
tiles.BORDER_ALL_TILES_IN_TOP_TILE_SUMMARY = True
tiles.TISSUE_LOW_THRESH = 20

tiles.ROW_TILE_SIZE = 1024
tiles.COL_TILE_SIZE = 1024

##################################################################################################################

slide.open_slide = slide.open_image

##################################################################################################################

def get_image_path(folder_path, slide_num):
    return folder_path.ls()[slide_num]

slide.get_filter_image_result = partial(get_image_path, rois_filtered_path)
slide.get_training_image_path = partial(get_image_path, rois_path)
slide.get_training_slide_path = partial(get_image_path, wsi_path)

##################################################################################################################

def parse_dimensions_from_image_filename(img_path):
    shape = slide.open_image_np(img_path).shape
    return shape[1], shape[0], shape[1], shape[0]

slide.parse_dimensions_from_image_filename = parse_dimensions_from_image_filename

##################################################################################################################

def tile_to_pil_tile(tile):
  """
  Convert tile information into the corresponding tile as a PIL image read from the whole-slide image file.

  Args:
    tile: Tile object.

  Return:
    Tile as a PIL image.
  """
  t = tile
  filepath = slide.get_training_image_path(t.slide_num)
  img = slide.open_image_np(filepath)
  #x, y = t.o_c_s, t.o_r_s
  #w, h = t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s
  tile = img[int(t.o_r_s/slide.SCALE_FACTOR):int(t.o_r_e/slide.SCALE_FACTOR), int(t.o_c_s/slide.SCALE_FACTOR):int(t.o_c_e/slide.SCALE_FACTOR),:]
  tile_pil = util.np_to_pil(tile)
  return tile_pil

tiles.tile_to_pil_tile = tile_to_pil_tile

##################################################################################################################
def get_tile_image_path(tile):
  """
  Obtain tile image path based on tile information such as row, column, row pixel position, column pixel position,
  pixel width, and pixel height.

  Args:
    tile: Tile object.

  Returns:
    Path to image tile.
  """
  t = tile
  roi_name = slide.get_training_image_path(t.slide_num).stem
  tile_path = os.path.join(PATH, tiles_path,
                           roi_name + "-" + 'tile' + "-r%d-c%d-x%d-y%d-w%d-h%d" % (
                             t.r, t.c, t.o_c_s, t.o_r_s, t.o_c_e - t.o_c_s, t.o_r_e - t.o_r_s) + "." + 'png')
  return tile_path

slide.get_tile_image_path = get_tile_image_path

##################################################################################################################

def top_tiles(self):
    """
    Retrieve the top-scoring tiles.

    Returns:
       List of the top-scoring tiles.
    """
    sorted_tiles = self.tiles_by_score()
    top_tiles = [tile for tile in sorted_tiles
                 if check_tile(tile)]
    print(f'Number of top tiles/all tiles: {len(top_tiles)}/{len(sorted_tiles)}')
    return top_tiles

def check_tile(tile):
    width = tile.o_c_e - tile.o_c_s
    height = tile.o_r_e - tile.o_r_s
    return tile.score > 0.55 and width >= 0.7*tiles.COL_TILE_SIZE and height >= 0.7*tiles.ROW_TILE_SIZE


tiles.TileSummary.top_tiles = top_tiles

##################################################################################################################
from deephistopath.wsi.tiles import (hsv_saturation_and_value_factor, 
                                     hsv_purple_pink_factor, 
                                     tissue_quantity_factor, 
                                     tissue_quantity)

def score_tile(np_tile, tissue_percent, slide_num, row, col):
  """
  Score tile based on tissue percentage, color factor, saturation/value factor, and tissue quantity factor.

  Args:
    np_tile: Tile as NumPy array.
    tissue_percent: The percentage of the tile judged to be tissue.
    slide_num: Slide number.
    row: Tile row.
    col: Tile column.

  Returns tuple consisting of score, color factor, saturation/value factor, and tissue quantity factor.
  """
  color_factor = hsv_purple_pink_factor(np_tile)
  s_and_v_factor = hsv_saturation_and_value_factor(np_tile)
  amount = tissue_quantity(tissue_percent)
  quantity_factor = tissue_quantity_factor(amount)
  combined_factor = color_factor * s_and_v_factor
  score = tissue_percent * combined_factor / 1000.0
  # scale score to between 0 and 1
  score = 1.0 - (10.0 / (10.0 + score))
  #print(f'tp: {tissue_percent}')
  #print(f'cf: {combined_factor}')
  #print(f'score: {score}')  
  return score, color_factor, s_and_v_factor, quantity_factor

tiles.score_tile = score_tile
#tiles.score_tile = tiles.score_tile
#############################################################################################################

def _load_image(buf, size):
        '''buf must be a buffer.'''

        # Load entire buffer at once if possible
        MAX_PIXELS_PER_LOAD = (1 << 29) - 1
        # Otherwise, use chunks smaller than the maximum to reduce memory
        # requirements
        PIXELS_PER_LOAD = 1 << 26

        def do_load(buf, size):
            '''buf can be a string, but should be a ctypes buffer to avoid an
            extra copy in the caller.'''
            # First reorder the bytes in a pixel from native-endian aRGB to
            # big-endian RGBa to work around limitations in RGBa loader
            rawmode = (sys.byteorder == 'little') and 'BGRA' or 'ARGB'
            buf = PIL.Image.frombuffer('RGBA', size, buf, 'raw', rawmode, 0, 1)
            # Image.tobytes() is named tostring() in Pillow 1.x and PIL
            buf = (getattr(buf, 'tobytes', None) or buf.tostring)()
            # Now load the image as RGBA, undoing premultiplication
            return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBa', 0, 1)

        # Fast path for small buffers
        w, h = size
        if w * h <= MAX_PIXELS_PER_LOAD:
            return do_load(buf, size)

        # Load in chunks to avoid OverflowError in PIL.Image.frombuffer()
        # https://github.com/python-pillow/Pillow/issues/1475
        if w > PIXELS_PER_LOAD:
            # We could support this, but it seems like overkill
            raise ValueError('Width %d is too large (maximum %d)' %
                    (w, PIXELS_PER_LOAD))
        rows_per_load = PIXELS_PER_LOAD // w
        img = PIL.Image.new('RGBA', (w, h))
        for y in range(0, h, rows_per_load):
            rows = min(h - y, rows_per_load)
            if sys.version[0] == '2':
                chunk = buffer(buf, 4 * y * w, 4 * rows * w)
            else:
                # PIL.Image.frombuffer() won't take a memoryview or
                # bytearray, so we can't avoid copying
                chunk = memoryview(buf)[y * w:(y + rows) * w].tobytes()
            img.paste(do_load(chunk, (w, rows)), (0, y))
        return img

openslide.lowlevel._load_image = _load_image


def slide_to_scaled_pil_image(slide_filepath):
  """
  Convert a WSI training slide to a scaled-down PIL image.

  Args:
    slide_number: The slide number.

  Returns:
    Tuple consisting of scaled-down PIL image, original width, original height, new width, and new height.
  """
  sl = openslide.open_slide(str(slide_filepath))

  large_w, large_h = sl.dimensions
  new_w = math.floor(large_w / slide.SCALE_FACTOR)
  new_h = math.floor(large_h / slide.SCALE_FACTOR)
  level = sl.get_best_level_for_downsample(slide.SCALE_FACTOR)
  whole_slide_image = sl.read_region((0, 0), level, sl.level_dimensions[level])
  whole_slide_image = whole_slide_image.convert("RGB")
  img = whole_slide_image.resize((new_w, new_h), PIL.Image.BILINEAR)
  return img, large_w, large_h, new_w, new_h

slide.slide_to_scaled_pil_image = slide_to_scaled_pil_image


def training_slide_to_image(slide_filepath):
  """
  Convert a WSI training slide to a saved scaled-down image in a format such as jpg or png.

  Args:
    slide_number: The slide number.
  """
  img, large_w, large_h, new_w, new_h = slide_to_scaled_pil_image(slide_filepath)
  img_path = f'{rois_path}/{slide_filepath.stem}-scale_factor_{slide.SCALE_FACTOR}.png'
  print("Saving image to: " + img_path)
  img.save(img_path)

slide.training_slide_to_image = training_slide_to_image

# Convert WSIs

In [None]:
#def convert_wsi_to_png(path:pathlib.Path, index:int):
#    #try:
#        slide.training_slide_to_image(path)
#        print(f'Saved {path.stem}.png')
#    #except:
#        #print(path)
#        
#fastai.core.parallel(convert_wsi_to_png, wsi_path.ls(), max_workers=20)

In [None]:
for p in tqdm(wsi_path.ls()):
    for b in rois_path.ls():
        if p.stem == b.stem:
            break
    else:
        slide.training_slide_to_image(p)

# Filter images

In [None]:
##
# for 'normal' img formats like .png
##

def filter_roi(img_path:pathlib.Path, index:int):
    if img_path.suffix == '.png':
        try:
            img_pil = slide.open_image(img_path)
            img_np = util.pil_to_np_rgb(img_pil)
            grayscale_np = filter.filter_rgb_to_grayscale(img_np)
            complement_np = filter.filter_complement(grayscale_np)
            otsu_np = filter.filter_otsu_threshold(complement_np).astype(np.bool)
            filtered_img_np = util.mask_rgb(img_np, otsu_np)
            filtered_img_pil = util.np_to_pil(filtered_img_np)
            #filtered_path = rois_filtered_path/f'{img_path.stem}-filtered{img_path.suffix}'
            filtered_path = rois_filtered_path/f'{img_path.stem}{img_path.suffix}'
            try:
                filtered_img_pil = util.np_to_pil(filtered_img_np)
                filtered_img_pil.save(filtered_path)
            except ValueError as e:
                #cv2 expects the array to have bgr as channel order
                bgr = filtered_img_np[...,[2,1,0]]
                cv2.imwrite(str(filtered_path), bgr)
        except:
            print(img_path)
###
# use this line instead of the following for loop to do the filtering in parallel with mutiple threads
###
#parallel(filter_roi, rois_path.ls(), max_workers=25)

failed = []
for p in tqdm(rois_path.ls()):
    try:
        filter_roi(p,0)
    except:
        failed.append(p)
    
print(failed)

## Fix for PIL Value Error
PIL raises an ValueEerror, when using Image.fromarray() with very big numpy arrays.
In this case use cv2. Beware that you have to switch rgb to bgr in numpy array before saving.

In [None]:
pa = Path('/home/Deep_Learner/private/network/datasets/Hypophysenadenome-Rezidive/imgs_relapse/495-09-III-HE-scale_factor_2.png')

In [None]:
img_pil = slide.open_image(pa)
img_np = util.pil_to_np_rgb(img_pil)
grayscale_np = filter.filter_rgb_to_grayscale(img_np)
complement_np = filter.filter_complement(grayscale_np)
otsu_np = filter.filter_otsu_threshold(complement_np).astype(np.bool)
filtered_img_np = util.mask_rgb(img_np, otsu_np)

In [None]:
plt.imshow(filtered_img_np)

In [None]:
filtered_path = rois_filtered_path/f'{pa.stem}{pa.suffix}'

In [None]:
try:
    filtered_img_pil = util.np_to_pil(filtered_img_np)
    filtered_img_pil.save(filtered_path)
except ValueError as e:
    bgr = filtered_img_np[...,[2,1,0]]
    cv2.imwrite(str(filtered_path), bgr)

In [None]:
ü = filtered_img_np[10000:12000,3500:5000,:]

In [None]:
plt.imshow(ü)

In [None]:
path_pil = rois_filtered_path/f'{pa.stem}-pil-{pa.suffix}'
filtered_img_pil = util.np_to_pil(ü)

In [None]:
path_cv = rois_filtered_path/f'{pa.stem}-cv2-{pa.suffix}'
bgr = ü[...,[2,1,0]]
cv2.imwrite(str(path_cv), bgr)

In [None]:
open_image(path_cv)

In [None]:
open_image(path_pil)

# Create tiles

## multi process

In [None]:
tiles.multiprocess_filtered_images_to_tiles(display=False, 
                                            save_summary=False, 
                                            save_data=False, 
                                            save_top_tiles=True,
                                            html=False, 
                                            image_num_list=list(range(0, len(rois_filtered_path.ls()))))

In [None]:
len(tiles_path.ls())

In [None]:
show_multiple_images_big(tiles_path)

In [None]:
#!rm -r {rois_filtered_path/'*'}
#!rm -r {tiles_path/'*'}

## single process

In [None]:
#!rm -r {tiles_path/'*'}

In [None]:
failed = []
for n, p in tqdm(enumerate(rois_filtered_path.ls()[:1]), total=len(rois_filtered_path.ls())-1):
    if p.suffix == '.png':
        try:
            print(p)
            tiles.summary_and_tiles(n, display=False, save_summary=False, save_data=False, save_top_tiles=True)
        except:
            failed.append(p)
            
print(failed)

# exp

In [None]:
import PIL
p = '/home/Deep_Learner/work/network/datasets/Hypophysenadenome/rois_corticotrop/1000-13-III-HE-ROI_1-ACTH.png'

img_pil = slide.open_image(p)
img_np = util.pil_to_np_rgb(img_pil)
grayscale_np = filter.filter_rgb_to_grayscale(img_np)
complement_np = filter.filter_complement(grayscale_np)
otsu_np = filter.filter_otsu_threshold(complement_np).astype(np.bool)
filtered_img_np = util.mask_rgb(img_np, otsu_np)

plt.imshow(filtered_img_np)

tiles = []
sz = 512
for i in range(int(filtered_img_np.shape[0]/sz)):
    for j in range(int(filtered_img_np.shape[1]/sz)):
         tiles.append(filtered_img_np[i*sz:(i+1)*sz,j*sz:(j+1)*sz])
        

for t in tiles:
    print(t.shape)
    nz = np.count_nonzero(t)// 3
    print(f'tissue percentage:{(nz/(t.shape[0]*t.shape[1]))*100}')
    plt.imshow(t)
    plt.show()