In [22]:
!sudo apt-get install openslide-tools -y
!sudo apt-get install python-openslide -y
!pip install openslide-python
!pip install opencv-python

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openslide-tools is already the newest version (3.4.1+dfsg-5).
0 upgraded, 0 newly installed, 0 to remove and 19 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package python-openslide


In [44]:
import openslide
import numpy as np
from PIL import Image

import PIL

import openslide
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
import cv2 as cv
import h5py
import os

PIL.Image.MAX_IMAGE_PIXELS = 164945368800 


In [45]:
SLIDE_PATH = "/home/jupyter/cadaver slide/76308.svs"

destination_PATH = "/home/jupyter/Patch_extraction/0/76308/"
os.mkdir(destination_PATH)

FileExistsError: [Errno 17] File exists: '/home/jupyter/Patch_extraction/2/76320/'

In [48]:
PERCENT_WHITE_PIXELS_THRESHOLD = 0.95
SAT_THRESHOLD = 0.05
MAX_PIXEL_THRESHOLD = 500

TILE_SIZE = (512,512)
TILE_NAME_PATTERN = "76308" # I find it most useful to use the slide name
FORMAT = "jpg"

In [49]:
def extract_tile(x_pos: int, y_pos: int, tile_size: tuple, opened_slide):
    '''
    extracts a tile from an OpenSlide object given a specific position
    :param slide_path: path to whole slide image
    :param x_pos: x coordinate of the top left corner of the tile
    :param y_pos: y coordinate of the top left corner of the tile
    :param opened_slide: OpenSlide object
    :return: numpy array of the tile at the given position
    '''

    tile = opened_slide.read_region(location=(x_pos, y_pos), level=0, size=tile_size)

    return tile

In [50]:
def open_slide(slide_path: str):
    '''
    opens a slide with OpenSlide
    :param slide_path: path to whole slide image
    :return: opened slide object
    '''
    opened_slide = openslide.open_slide(slide_path)
    return opened_slide

In [51]:
def is_patch_nonwhite(patch, white_thresh, sat_thresh):
    hsv_patch = matplotlib.colors.rgb_to_hsv(patch)
    saturation = hsv_patch[:,:,1]
    percent = np.mean(saturation < SAT_THRESHOLD)
    return percent <= PERCENT_WHITE_PIXELS_THRESHOLD


In [52]:

def is_grad_nonzero(patch, max_pixel_threshold):
    gray = cv.cvtColor(np.array(patch), cv.COLOR_RGB2GRAY)
    sobelx = cv.Sobel(gray, cv.CV_64F, 1, 0, ksize=5)
    sobely = cv.Sobel(gray, cv.CV_64F, 0, 1, ksize=5)
    mag = np.abs(sobelx) + np.abs(sobely)
    return np.sum(mag == 0) <= max_pixel_threshold


In [53]:

def tile_entire_slide(slide_path: str, tile_size: tuple, tile_name_pattern: str, format: str):
    '''
    determines how many tiles a slide is made up from and tiles each valid position
    :param slide_path: path to a whole slide image
    :param tile_size: size of the tile (e.g. (512, 512))
    :param tile_name_pattern: each tile will be named {tile_name_pattern}-{x_pos}-{y_pos}.{format}
    :param format: file extension (must be valid PIL format) (e.g. jpeg, png, etc)
    :return: None
    '''

    opened_slide = open_slide(slide_path)

    # determine how many tiles the slide is made up of in the x direction
    num_valid_x_pos = int(opened_slide.dimensions[0] / tile_size[0])
    # determine how many tiles the slide is made up of in the y direction
    num_valid_y_pos = int(opened_slide.dimensions[1] / tile_size[1])

    # tile all valid tile positions within the slide
    for x in range(num_valid_x_pos):
        for y in range(num_valid_y_pos):
            tile = extract_tile(x_pos=x*tile_size[0], y_pos=y*tile_size[1],tile_size=tile_size, opened_slide=opened_slide).convert('RGB')
            is_nonwhite = is_patch_nonwhite(tile, white_thresh=PERCENT_WHITE_PIXELS_THRESHOLD, sat_thresh=SAT_THRESHOLD)\
                    and is_grad_nonzero(tile, max_pixel_threshold=MAX_PIXEL_THRESHOLD)

            if is_nonwhite:
                tile = np.array(tile)
                tile = Image.fromarray(tile)
                # tile.save("/content/drive/MyDrive/cadaver project/17721+str(x).png")
                tile.save(destination_PATH + f'{tile_name_pattern}-{int(x*tile_size[0])}-{int(y*tile_size[1])}_class2.png')

In [None]:
def main():
    '''
    tiles the whole slide image
    '''
    tile_entire_slide(
        slide_path=SLIDE_PATH,
        tile_size=TILE_SIZE,
        tile_name_pattern=TILE_NAME_PATTERN,
        format=FORMAT
    )

if __name__ == '__main__':
    main()