In [1]:
import geojson
import os
import sys
import numpy as np
from PIL import Image, ImageDraw
import tifffile
from matplotlib import pyplot as plt
from skimage.measure import label, regionprops
import imagecodecs
import shutil
import pathlib,glob,random


In [2]:
def create_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
        
def save_to_file(data, pathname,name):    
    x = datetime.datetime.now()
    fname='model/accuracy/{}_{}_{}_{}'.format(x.strftime("%Y"),x.strftime("%m"),x.strftime("%d"),x.strftime("%X")).replace(':', '_')
    create_folder(os.path.dirname(fname))
    with open(fname, 'wb') as f:
        pickle.dump(data, f)
        
def load_file(fname):
    with open(fname, 'rb') as f:
        return pickle.load( f)      

def split_wsi (wsi_root='inputs',img_ext = '.tif',json_ext = '.geojson',save_path='wsi_input' ):
    img_list = glob.glob(f"{wsi_root}/*{img_ext}")
    index_map = list(range(len(img_list)))
    random.shuffle(index_map)
    split_frac = 0.8
    split_idx = int(len(index_map)*split_frac)
    train_list, remaining_list = index_map[:split_idx], index_map[split_idx:]
    
    remaining_idx = int(len(remaining_list)*0.5)
    val_list, test_list = remaining_list[:remaining_idx], remaining_list[remaining_idx:]
    
    train_list = [img_list[index] for index in train_list]
    val_list = [img_list[index] for index in val_list]
    test_list = [img_list[index] for index in test_list]
    
    save_path_wsi=f'{save_path}/train'
    create_folder(save_path_wsi)
    for item in train_list:
        shutil.copy(item, save_path_wsi)
        shutil.copy(pathlib.Path(item).with_suffix(json_ext),save_path_wsi)
        
    save_path_wsi=f'{save_path}/test'
    create_folder(save_path_wsi)
    for item in test_list:
        shutil.copy(item, save_path_wsi)
        shutil.copy(pathlib.Path(item).with_suffix(json_ext),save_path_wsi)

    save_path_wsi=f'{save_path}/val'
    create_folder(save_path_wsi)
    for item in val_list:
        shutil.copy(item, save_path_wsi)
        shutil.copy(pathlib.Path(item).with_suffix(json_ext),save_path_wsi)    
        
    return(train_list,val_list, test_list)


def is_purple(crop: np.ndarray, purple_threshold: int,
              purple_scale_size: int) -> bool:
    """
    Determines if a given portion of an image is purple.

    Args:
        crop: Portion of the image to check for being purple.
        purple_threshold: Number of purple points for region to be considered purple.
        purple_scale_size: Scalar to use for reducing image to check for purple.

    Returns:
        A boolean representing whether the image is purple or not.
    """
    block_size = (crop.shape[0] // purple_scale_size,
                  crop.shape[1] // purple_scale_size, 1)
    pooled = block_reduce(image=crop, block_size=block_size, func=np.average)

    # Calculate boolean arrays for determining if portion is purple.
    r, g, b = pooled[..., 0], pooled[..., 1], pooled[..., 2]
    cond1 = r > g - 10
    cond2 = b > g - 10
    cond3 = ((r + b) / 2) > g + 20

    # Find the indexes of pooled satisfying all 3 conditions.
    pooled = pooled[cond1 & cond2 & cond3]
    num_purple = pooled.shape[0]

    return num_purple > purple_threshold



In [3]:
def create_patches(img_root = 'tif',json_root = 'geojson',savepath='patches',patche_type='train',tile_size=128,inverse_overlap_factor=0.5): 
    #all_json = [os.path.join(json_root, file) for file in os.listdir(json_root)]
    #all_img = [os.path.join(img_root, file) for file in os.listdir(img_root)]
    all_json=glob.glob(json_root)
    all_img=glob.glob(img_root)
    
    
    #tile_size = 128
    step_size = int(tile_size * inverse_overlap_factor)
    
    gang_dst_path = f'{savepath}/{patche_type}/ganglia'
    non_gang_dst_path = f'{savepath}/{patche_type}/non-ganglia'
    create_folder(gang_dst_path)
    create_folder(non_gang_dst_path)

    for idx in range( len(all_json)):
        json_path, img_path = all_json[idx], all_img[idx]
        saveroot = os.path.basename(json_path.split('.')[0])
        file = geojson.load(open(json_path))
        img = tifffile.imread(img_path)
        
        coords = []
        for polygon in file['features']:
            coord = polygon['geometry']['coordinates'][0]
            try: 
                coords.append(tuple([(x,y) for x,y in coord]))
            except:
                pass
        img2 = Image.fromarray(img)
        draw = ImageDraw.Draw(img2)
        for coord in coords: 
            draw.polygon(coord, fill=(0,255,255))
        img2 = Image.blend(Image.fromarray(img), img2, 1.0)
        
        del draw
        
        img2 = np.array(img2)
        img3bw = (img2[:,:,0]==0) & (img2[:,:,1]==255) & (img2[:,:,2]==255)
        
        del img2
        
        labels = label(img3bw)
        stats = regionprops(labels)
        
        del labels
        
        sub_imgs = []
        for sub_idx in range(len(stats)):
            r1,c1,r2,c2 = stats[sub_idx].bbox
            sub_img = img[r1:r2,c1:c2,:]
            dims = sub_img.shape
            if dims[0] < tile_size and dims[1] >= tile_size:
                diff = tile_size - dims[0]
                r1 = r1 - diff // 2
                r2 = r2 + diff - diff // 2
                sub_img = img[r1:r2,c1:c2,:]
            elif dims[0] >= tile_size and dims[1] < tile_size:
                diff = tile_size - dims[1]
                c1 = c1 - diff // 2
                c2 = c2 + diff - diff // 2
                sub_img = img[r1:r2,c1:c2,:]
            elif dims[0] < tile_size and dims[1] < tile_size:
                diffr = tile_size - dims[0]
                diffc = tile_size - dims[1]
                r1 = r1 - diffr // 2
                r2 = r2 + diffr - diffr // 2
                c1 = c1 - diffc // 2
                c2 = c2 + diffc - diffc // 2
                sub_img = img[r1:r2,c1:c2,:]

            for r in range(0,sub_img.shape[0],  step_size):
                for c in range(0, sub_img.shape[1],  step_size):
                    tile = sub_img[r:r+tile_size, c:c+tile_size]
                    if tile.shape == (128,128,3):
                        sub_imgs.append(tile)
                        
        for count, sub_img in enumerate(sub_imgs):
            sub_im = Image.fromarray(sub_img)
            savepath = os.path.join(gang_dst_path, saveroot + '-' + str(count) + '.png')
            sub_im.save(savepath)
            
        img[(img3bw==1)&(img3bw==1)&(img3bw==1)] = [255,255,255]
        
        del img3bw
        
        count = 0
        attempts = 0
        tissue_imgs = []
        previous = []
        while count < len(sub_imgs):
            r = np.random.choice([num for num in range(img.shape[0])])
            c = np.random.choice([num for num in range(img.shape[1])])
            if [r,c] not in previous:
                previous.append([r,c])
                tissue_img = img[r:r+tile_size, c:c+tile_size,:]
                back = np.sum((tissue_img[:,:,0]>=180)&(tissue_img[:,:,1]>=180)&(tissue_img[:,:,2]>=180))
                tissue = np.sum((tissue_img[:,:,0]<180)&(tissue_img[:,:,1]<180)&(tissue_img[:,:,2]<180))
                ratio = tissue / (back + tissue)
                if ratio >= 0.25:
                    tissue_imgs.append(tissue_img)
                    count += 1
                attempts += 1
            else:
                pass
            if attempts > 10000:
                count = 100000000000
                
        for count, tissue_img in enumerate(tissue_imgs):
            tissue_im = Image.fromarray(tissue_img)
            savepath = os.path.join(non_gang_dst_path, saveroot + '-' + str(count) + '.png')
            tissue_im.save(savepath)
            
        del img
        del stats
        
        sys.stdout.write('Completed: {}-{}\r'.format(idx/len(all_json), idx))
        sys.stdout.flush()
            

        


In [None]:
create_patches(img_root = 'wsi_input/train/*.tif',json_root = 'wsi_input/train/*.geojson',savepath='patches',patche_type='train')
create_patches(img_root = 'wsi_input/val/*.tif',json_root = 'wsi_input/val/*.geojson',savepath='patches',patche_type='val')
create_patches(img_root = 'wsi_input/test/*.tif',json_root = 'wsi_input/test/*.geojson',savepath='patches',patche_type='test')

Completed: 0.9166666666666666-665

In [5]:
split_wsi (wsi_root='inputs',img_ext = '.tif',json_ext = '.geojson',save_path='wsi_input' )

(['inputs/2_A1_0_orig.tif',
  'inputs/0_B1_0_orig.tif',
  'inputs/0_B2_0_orig.tif',
  'inputs/10_A2_0_orig.tif',
  'inputs/118_B1_0_orig.tif',
  'inputs/7_A3_0_orig.tif',
  'inputs/124_A1_0_orig.tif',
  'inputs/9_A2_0_orig.tif',
  'inputs/126_A2_0_orig.tif',
  'inputs/122_A1_0_orig.tif',
  'inputs/18_G1_0_orig.tif',
  'inputs/6_A1_0_orig.tif',
  'inputs/7_A6_0_orig.tif',
  'inputs/8_A1_0_orig.tif',
  'inputs/22_A1_0_orig.tif',
  'inputs/116_A5_0_orig.tif',
  'inputs/13_A2_0_orig.tif',
  'inputs/3_A1_0_orig.tif',
  'inputs/116_A20_0_orig.tif',
  'inputs/116_A21_0_orig.tif',
  'inputs/14_A2_0_orig.tif',
  'inputs/12_A2_0_orig.tif',
  'inputs/124_A2_0_orig.tif',
  'inputs/26_A1_0_orig.tif',
  'inputs/12_A1_0_orig.tif',
  'inputs/16_B3_0_orig.tif',
  'inputs/26_A3_0_orig.tif',
  'inputs/21_A1_0_orig.tif',
  'inputs/9_A1_0_orig.tif',
  'inputs/19_C1_0_orig.tif',
  'inputs/25_A2_0_orig.tif',
  'inputs/26_A4_0_orig.tif',
  'inputs/1_E2_0_orig.tif',
  'inputs/16_B2_0_orig.tif',
  'inputs/2_A2_

In [4]:
json_root = 'geojson'
img_root = 'tif'

all_json = [os.path.join(json_root, file) for file in os.listdir(json_root)]
all_img = [os.path.join(img_root, file) for file in os.listdir(img_root)]

In [5]:
idx

89

In [6]:
all_json

['geojson/127_A2_0_orig.geojson',
 'geojson/18_G1_0_orig.geojson',
 'geojson/21_A1_0_orig.geojson',
 'geojson/16_B1_0_orig.geojson',
 'geojson/0_B2_0_orig.geojson',
 'geojson/124_A1_0_orig.geojson',
 'geojson/129_A3_0_orig.geojson',
 'geojson/2_A2_0_orig.geojson',
 'geojson/26_A3_0_orig.geojson',
 'geojson/7_A6_0_orig.geojson',
 'geojson/16_B3_0_orig.geojson',
 'geojson/20_A1_0_orig.geojson',
 'geojson/26_A2_0_orig.geojson',
 'geojson/123_A1_0_orig.geojson',
 'geojson/25_A1_0_orig.geojson',
 'geojson/0_B1_0_orig.geojson',
 'geojson/5_A2_0_orig.geojson',
 'geojson/9_A1_0_orig.geojson',
 'geojson/2_A1_0_orig.geojson',
 'geojson/127_B1_0_orig.geojson',
 'geojson/127_A1_0_orig.geojson',
 'geojson/11_C2_0_orig.geojson',
 'geojson/3_A1_0_orig.geojson',
 'geojson/19_C1_0_orig.geojson',
 'geojson/122_A1_0_orig.geojson',
 'geojson/20_A10_0_orig.geojson',
 'geojson/21_A2_0_orig.geojson',
 'geojson/7_A4_0_orig.geojson',
 'geojson/13_A1_0_orig.geojson',
 'geojson/23_A2_0_orig.geojson',
 'geojson/1