In [1]:
import os, copy, openslide, pypinyin, random, h5py, cv2, shutil
from openslide.deepzoom import DeepZoomGenerator
import numpy as np
import pandas as pd
from tqdm import trange,tqdm
import matplotlib.pyplot as plt
from PIL import Image,ImageOps
Image.MAX_IMAGE_PIXELS = None
SVS_path = r'Y:/admin/Pan_cancar/TCGA_OV_106WSIs'
Label_path = r'Y:/admin/Pan_cancar/TCGA_OV_tumor_qupath/export'
Output_path = r'Y:/admin/Pan_cancar/TCGA_OV_106WSIs_tumor_tiles'
im_size = 512

ModuleNotFoundError: No module named 'openslide'

In [11]:
def gamma_adjust(src_image, gamma_val = 0.5):
    gamma_table = [np.power(x / 255.0, gamma_val) * 255.0 for x in range(256)]
    gamma_table = np.round(np.array(gamma_table)).astype(np.uint8)
    src_image = cv2.LUT(src_image, gamma_table)
    return src_image

def mask(image_rgb, kernel = 9, sigma = 5000,threshold = 220):
    img_cv2 = cv2.cvtColor(np.asarray(image_rgb),cv2.COLOR_RGB2BGR)
    img_cv2_gray = cv2.cvtColor(img_cv2,cv2.COLOR_RGB2GRAY)
    img_cv2_gray = gamma_adjust(img_cv2_gray)
    img_cv2_blur = cv2.GaussianBlur(img_cv2_gray,(kernel,kernel),sigma)
    _,mask = cv2.threshold(img_cv2_blur,threshold,255,cv2.THRESH_BINARY)
    image = np.array(image_rgb,dtype=np.int16)
    image_masked = np.zeros_like(image,dtype=np.int16)
    for i in range(3):
        image_masked[:,:,i] = image[:,:,i] + mask
    image_masked = np.clip(image_masked,0,255)
    intensity = np.sum(image_masked)/3/im_size/im_size/255
    # intensity = np.sum(image_masked)/3/im_size/im_size/255

    return intensity

In [12]:
def resize_image(label_image, m, n):
    gray_label_image = ImageOps.grayscale(label_image)
    gray_label_image = np.array(gray_label_image)
    if np.sum(gray_label_image) == 0:
        gray_label_image = np.zeros((n,m),dtype=np.uint8)
    else:
        gray_label_image = gray_label_image/np.max(gray_label_image)
        gray_label_image = gray_label_image.astype(np.uint8)
        gray_label_image = cv2.resize(gray_label_image,(m,n),interpolation=cv2.INTER_CUBIC)
    return gray_label_image

In [13]:
def gray_image(label_image):
    gray_label_image = ImageOps.grayscale(label_image)
    gray_label_image = np.array(gray_label_image)
    gray_label_image = gray_label_image/np.max(gray_label_image)
    gray_label_image = gray_label_image.astype(np.uint8)
    return gray_label_image

In [14]:
def count_tumor_inputs(tumor_label_filename, im_size, level = 0):
    image_path = os.path.join(Output_path, tumor_label_filename.replace('-labels.png',''))
    filename = tumor_label_filename.replace('-labels.png','.svs')
    if not os.path.exists(image_path):
        os.makedirs(image_path)
    slide = openslide.open_slide(os.path.join(SVS_path, filename))
    [m,n] = slide.dimensions
    [m_level, n_level] = slide.level_dimensions[level]
    coef = int(m/m_level)
    tumor_label = Image.open(os.path.join(Label_path, tumor_label_filename))
    tumor_sum = np.sum(tumor_label)
    idx = 0
    threshold = im_size*im_size*0.75
    if tumor_sum > 0:
        tumor_label_gray = resize_image(tumor_label,m_level,n_level)
        tumor_label.close()
        for x in np.arange(0, m-im_size*coef, im_size*coef/2):
            for y in np.arange(0, n-im_size*coef, im_size*coef/2):
                x = int(x)
                y = int(y)
                x_level = int(x // coef)
                y_level = int(y // coef)
                crop_tumor = tumor_label_gray[y_level:y_level+im_size, x_level:x_level+im_size]
                if np.sum(crop_tumor) < threshold:
                    idx += 1
    prob = 4000/idx
    print(image_path,prob)

In [15]:
def read_tumor_inputs(tumor_label_filename, im_size, level = 0):
    image_path = os.path.join(Output_path, tumor_label_filename.replace('-labels.png',''))
    filename = tumor_label_filename.replace('-labels.png','.svs')
    if not os.path.exists(image_path):
        os.makedirs(image_path)
    slide = openslide.open_slide(os.path.join(SVS_path, filename))
    [m,n] = slide.dimensions
    [m_level, n_level] = slide.level_dimensions[level]
    coef = int(m/m_level)
    tumor_label = Image.open(os.path.join(Label_path, tumor_label_filename))
    tumor_sum = np.sum(tumor_label)
    idx = 0
    count = 0
    threshold = im_size*im_size*0.75
    if tumor_sum > 0:
        tumor_label_gray = resize_image(tumor_label,m_level,n_level)
        tumor_label.close()
        for x in np.arange(0, m-im_size*coef, im_size*coef/2):
            for y in np.arange(0, n-im_size*coef, im_size*coef/2):
                x = int(x)
                y = int(y)
                x_level = int(x // coef)
                y_level = int(y // coef)
                crop_tumor = tumor_label_gray[y_level:y_level+im_size, x_level:x_level+im_size]
                if np.sum(crop_tumor) < threshold:
                    count += 1
        prob = 4000/count
        print(count)
        for x in np.arange(0, m-im_size*coef, im_size*coef/2):
            for y in np.arange(0, n-im_size*coef, im_size*coef/2):
                x = int(x)
                y = int(y)
                x_level = int(x // coef)
                y_level = int(y // coef)
                crop_tumor = tumor_label_gray[y_level:y_level+im_size, x_level:x_level+im_size]
                if np.sum(crop_tumor) < threshold:

                    image = slide.read_region((x,y),level, (im_size,im_size))
                    image_rgb = image.convert("RGB")
                    intensity = mask(image_rgb)
                    if intensity<0.9:
                        if random.random()<prob:
                            image_rgb.save(os.path.join(image_path,filename.replace('.svs','_tumor'+str(idx)+'_'+str(level)+'.jpg')))
                            idx += 1
                        # elif random.random() < 0.7 and random.random() > 0.5:
                        #     image_rgb.save(os.path.join(image_path_val,filename.replace('.svs','_tumor'+str(idx)+'_'+str(level)+'.jpg')))
                    # elif random.random()<0.1:
                    #     if random.random()<0.5:
                    #         image_rgb.save(os.path.join(image_path,filename.replace('.svs','_tumor'+str(idx)+'_'+str(level)+'.jpg')))
                    #     else:
                    #         image_rgb.save(os.path.join(image_path_val,filename.replace('.svs','_tumor'+str(idx)+'_'+str(level)+'.jpg')))
                    #     idx += 1

In [16]:
svs_dir = os.listdir(Label_path)
print(len(svs_dir))

print(svs_dir)

7
['K21-15652-2-chenchaohua-labels.png', 'K21-15652-2-chenchaohua1-labels.png', 'K22-00953-1-lizhengguo-labels.png', 'K22-00953-1-lizhengguo1-labels.png', 'K22-01308-1-fuyunfu-labels.png', 'K22-04037-1-dingzhengfan-labels.png', 'K22-04037-1-dingzhengfan1-labels.png']


In [17]:
svs_paths = [x for x in os.listdir(SVS_path) if '.svs' in x]
path = svs_paths[0]
print(len(svs_paths))
print(path)

31
21-09908-1-chenchaoyuan.svs


In [18]:
for path in tqdm(svs_dir):
    read_tumor_inputs(path, im_size=im_size, level=0)

  0%|          | 0/7 [00:00<?, ?it/s]

990


 14%|█▍        | 1/7 [00:27<02:47, 27.90s/it]

4422


 29%|██▊       | 2/7 [02:36<07:15, 87.19s/it]

1228


 43%|████▎     | 3/7 [03:59<05:41, 85.31s/it]

463


 57%|█████▋    | 4/7 [04:22<03:02, 60.73s/it]

295


 71%|███████▏  | 5/7 [05:08<01:50, 55.32s/it]

367


 86%|████████▌ | 6/7 [05:23<00:41, 41.48s/it]

1443


100%|██████████| 7/7 [06:41<00:00, 57.39s/it]
