In [1]:
import os, gc, subprocess, time, sys, shutil

import scipy
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
import cv2
#from tqdm import tqdm
from tqdm.notebook import tqdm
import skimage.io
from skimage.transform import resize, rescale
from math import sqrt
import statistics

sys.path.append("..")

In [2]:
import PIL.Image
#PIL.Image.MAX_IMAGE_PIXELS = 400000000
PIL.Image.MAX_IMAGE_PIXELS = None

In [3]:
%matplotlib inline

In [4]:
from panda_bvv_config import *

## 1. Inputs ##

In [5]:
Zoom = 2
cfv = 1
new_proc_folder = 'ts2_16x320_inv'
input_folder = train_size2
kw_for_tile_func = {'sz': 80, 'N':16}
#kw = {'gauss_blur':20}

In [6]:
# last_train_label = 9000
# last_valid_label = 10617

In [7]:
# wrapping inputs:
new_proc_folder = os.path.join(base_path, new_proc_folder)
if not os.path.exists(new_proc_folder):
    print("[INFO] 'creating {}' directory".format(new_proc_folder))
    os.makedirs(new_proc_folder)
input_mask_folder = mask_size2 if Zoom == 2 else mask_size1

## 2. Standard functions ##

In [8]:
def display_mask(img_num, chan = 2):
    i=3
    f, ax = plt.subplots(1, 3, figsize=(8,10))
    image_path = os.path.join(mask_size2, img_num + "_mask.png")
    mask_data = skimage.io.imread(image_path)        
    cmap = matplotlib.colors.ListedColormap(['black', 'lightgray', 'darkgreen', 'yellow', 'orange', 'red'])
    ax[i%3].imshow(np.asarray(mask_data)[:,:,chan], cmap=cmap, interpolation='nearest', vmin=0, vmax=5)      
    ax[i%3].axis('off')
    ax[i%3].axis('off')
            
    plt.show()

In [9]:
#wrapper for image processing function
def process_all_images(proc_func):
    
    def wrapper(zoom = Zoom,
                input_biopsy_folder = input_folder,
                output_data_path = new_proc_folder,
                df_name = train_labels,
                tif_file = False,
                with_mask = False,
                input_mask_folder = input_mask_folder,
                **kw):
        print(input_biopsy_folder)
        print(new_proc_folder)
        time.sleep(15)
        for  i, row in enumerate(tqdm(df_name.iterrows())):
            img_num = row[1]['image_id']
            provider = row[1]['data_provider']                         
            mask = None
            if tif_file:
                try:
                    biopsy = skimage.io.MultiImage(os.path.join(input_biopsy_folder,                                                        img_num + '.tiff'))[zoom]               
                except:
                    print('Failed to read tiff:', img_num)                        
            else:
                try:
                    biopsy = skimage.io.imread(os.path.join(input_biopsy_folder, img_num + '.png'))
                    shape = biopsy.shape
                    if shape[0]<shape[1]: 
                        biopsy = np.rot90(biopsy)
                        shape = biopsy.shape 
                except:
                    print(f'can not proceed with {img_num}')   
            if with_mask:
                try:
                    mask = skimage.io.imread(os.path.join(input_mask_folder, img_num + '_mask.png'))
                    shape = mask.shape
                    if shape[0]<shape[1]: 
                        mask = np.rot90(mask)
                        shape = mask.shape 
                except:
                    print('Failed to process mask:', img_num)
            try:
                data_new = proc_func(biopsy, mask, provider, **kw)
                cv2.imwrite(os.path.join(output_data_path, img_num + '.png'),                                         data_new, [int(cv2.IMWRITE_PNG_COMPRESSION), 9])
            except Exception as ee:
                print('Processing mistake:\n', ee, '\n', img_num)            
            try:
                del biopsy, mask, data_new, mask_new
            except:
                pass
        gc.collect()
        return
    
    return wrapper

In [10]:
def change_karolinska_mask_for_isup_grade_direct(mask_data, chan):
    """
    This function returns pre-processed radbound mask for image multiplication
    """

    try:
        new_mask = np.zeros(mask_data.shape)#.astype('uint8')
        cancerous_tissue = False
        try:             
            mask_area = np.where(mask_data[:,:,chan] == 2)
            new_mask[mask_area[0], mask_area[1], :] = [1, 1, 1] 
            del mask_area, mask_data
            if new_mask.any() >0: 
                cancerous_tissue = True    
        except Exception as ee:
            print("Mask {%s} processing mistake: " %mask_image, ee)
    except Exception as ee:
        print("Mask {%s} read mistake: "  %mask_image, str(ee))

    gc.collect()                   
    
    return cancerous_tissue, new_mask

In [11]:
def change_karolinska_mask_for_isup_grade_direct(mask_data, chan):
    """
    This function returns pre-processed radbound mask for image multiplication
    """

    try:
        new_mask = np.zeros(mask_data.shape)#.astype('uint8')
        cancerous_tissue = False
        try:             
            mask_area = np.where(mask_data[:,:,chan] == 2)
            new_mask[mask_area[0], mask_area[1], :] = [1, 1, 1] 
            del mask_area, mask_data
            if new_mask.any() >0: 
                cancerous_tissue = True    
        except Exception as ee:
            print("Mask {%s} processing mistake: " %mask_image, ee)
    except Exception as ee:
        print("Mask {%s} read mistake: "  %mask_image, str(ee))

    gc.collect()                   
    
    return cancerous_tissue, new_mask

In [12]:
def change_radboud_mask_for_isup_grade_direct(mask_data, chan,
                                      gauss_bl = 10):
    """
    This function returns pre-processed radbound mask for image multiplication
    """

    try:
        new_mask = np.zeros(mask_data.shape)#.astype('uint8')
        cancerous_tissue = False
        try: 
            for area_value in range(3,5):
                mask_area = np.where(mask_data[:,:,chan] == area_value)
                new_mask[mask_area[0], mask_area[1], :] = [1, 1, 1]
                del mask_area
            if new_mask.any() >0: 
                cancerous_tissue = True
            del mask_data,    
        except Exception as ee:
            print("Mask {%s} processing mistake: " %mask_image, ee)
    except Exception as ee:
        print("Mask {%s} read mistake: "  %mask_image, str(ee))
        
    #new_mask[:,:,:] = cv2.blur(new_mask[:,:, :],(gauss_bl,gauss_bl))
    new_mask[:,:,:] = cv2.bilateralFilter(np.float32(new_mask[:,:,:]),gauss_bl,25,25)
    #medianBlur(new_mask[:,:, 2],5)
    #new_mask[:,:,:] = cv2.GaussianBlur(new_mask[:,:,:],(gauss_bl,gauss_bl), 0)
    #new_mask[:,:,:] =  np.rint(new_mask[:,:,:]) #.astype(int) 
    new_mask[:,:,:] =  np.ceil(new_mask[:,:,:]) #.astype(int) 
    #gc.collect()                   
    
    return cancerous_tissue, new_mask

In [13]:
def get_cancer_area_from_mask(cancer_mask, biopsy):
    """
    Returns multiplication of mask and image
    """
    try:
        only_cancer = np.multiply(cancer_mask,biopsy).astype(int)
        #cv2.imwrite(save_path, only_cancer, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
        #del cancer_mask, biopsy #, only_cancer
    except Exception as ee:
        print(str(ee))
    
    return only_cancer  

## 3. Processing functions ##

In [14]:
@process_all_images
def tiff_to_png_size2(img, mask, provider, **kw):
    return img

In [15]:
@process_all_images
def get_cancer_area_for_isup_grade(biopsy, mask, provider, **kw):
    """
    Extracts cancer area only (if cancerous), from image and mask; or unchanged (if non-cancerous).
    Do NOT split cancer areas on Gleason.
    This function applicable for ISUP-grade based training
    Do NOT tile image
    """
    
    gauss_bl = kw.get('gauss_bl') if kw.get('gauss_bl') else 20    
    channel = 0 if len(np.unique(mask[:,:,0])) >1 else 2    
              
    if provider == 'karolinska':
        cancerous, new_mask  = change_karolinska_mask_for_isup_grade_direct(mask, chan = channel)
    elif provider == 'radboud':
        cancerous, new_mask  = change_radboud_mask_for_isup_grade_direct(mask, chan = channel,
                                      gauss_bl = gauss_bl)
    if cancerous:
        temp_im = get_cancer_area_from_mask(new_mask, biopsy)
        temp_im = temp_im.astype('uint8') * 255
        inv = cv2.bitwise_not(temp_im )
        #inv = biopsy
    else:
        inv = biopsy
    
    data_new = inv
    #data_new = cv2.bitwise_not(inv)
    
    return data_new
    
    #eliminate_white_direct(biopsy = inv,
    #                       save_file_name = save_path,
    #                       zoom =2,                          
    #                     fault_name = 'faults_size2_large_mask.txt', 
    #                      tile_square_yside = tile_square_yside,
    #                        tile_square_xside = tile_square_xside, 
    #                      tile_fill = tile_fill,
    #                      threshold = threshold,
    #                          transform_ratios = transform_ratios,
    #                    file_list = None,                         
    #                      compression = compression)
        
    #cv2.imwrite(save_path, inv, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
    #else:
        #execute_cp_command(biopsy_file_path, save_path)

In [16]:
@process_all_images
def tile16_simple(img, mask, provider, **kw):        
    sz=kw.get('sz') if kw.get('sz') else 32    
    N = kw.get('N') if kw.get('N') else 16
    final_dim = kw.get('final_dim') if kw.get('final_dim') else N**0.5 
    result = []
    shape = img.shape
    pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz
    img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],
                constant_values=255)
    img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
    img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)
    if len(img) < N:
        img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255)
    idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N]
    img = img[idxs]
    a, b, c, d = img.shape
    data_new = (img.reshape(int(final_dim), int(final_dim),\
                                                    b, c, d).swapaxes(1,2).reshape(b*int(final_dim),\
                                                    c*int(final_dim), d))
    data_new = cv2.bitwise_not(data_new)
    
    return data_new

## 4. Process input files for training purpose ##

In [17]:
# tile16_simple(df_name = train_labels,
#               **kw_for_tile_func)

In [18]:
#mask_labels.loc[mask_labels.image_id == '05f2ae45ead3e5737d102656366752df']

In [19]:
# tiff_to_png_size2(tif_file = True,
#                   df_name = train_labels[1872:])

In [20]:
# get_cancer_area_for_isup_grade(with_mask = True,
#                                df_name = train_labels[:10],
#                                **kw)

## 5. Distribute processed files among training and validation folders ##

In [21]:
try:
    shutil.rmtree(train_cnn)
    shutil.rmtree(valid_cnn)
except OSError as ee:
    print ("Error: %s: " % ee)
os.mkdir(train_cnn)
os.mkdir(valid_cnn)
for grade in isup_classes:
    os.mkdir(os.path.join(train_cnn, f'isup{grade}'))
    os.mkdir(os.path.join(valid_cnn, f'isup{grade}'))
# labels_perm = train_labels.sample(frac = 1, random_state=npseed)
# train_cnn_labels = labels_perm[:last_train_label]
# valid_cnn_labels = labels_perm[last_train_label:last_valid_label ]

In [29]:
#cross-fold validation:
if cfv == 1:
    train_cnn_labels = train_labels[:-2123]
    valid_cnn_labels = train_labels[-2123:]
else:
    train_cnn_labels = pd.concat([train_labels[:-2123*cfv], train_labels[-2123*(cfv-1):]], sort=False)
    valid_cnn_labels = train_labels[-2123*cfv:-2123*(cfv-1)]

In [23]:
%%time
# #split train files among ISUP categories
for j, row in tqdm(train_cnn_labels.iterrows()):
    filename = row['image_id'] + '.png'       
    path_to_train_class = os.path.join(train_cnn, 'isup' + str(row['isup_grade']), filename)
    path_to_copy_from = os.path.join(new_proc_folder, filename)
    bashCommand = "cp " + path_to_copy_from + ' ' + path_to_train_class
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    if error: print(str(error))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


CPU times: user 8.7 s, sys: 15.9 s, total: 24.6 s
Wall time: 1min 54s


In [24]:
%%time
# #split validation files among ISUP categories
for j, row in tqdm(valid_cnn_labels.iterrows()):
    filename = row['image_id'] + '.png'
    path_to_valid_class = os.path.join(valid_cnn, 'isup' + str(row['isup_grade']), filename)    
    path_to_copy_from = os.path.join(new_proc_folder, filename)
    bashCommand = "cp " + path_to_copy_from + ' ' + path_to_valid_class    
    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    if error: print(str(error))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


CPU times: user 2.16 s, sys: 3.97 s, total: 6.12 s
Wall time: 28.8 s


## ################################################################## ##