# Whole image processing

In [1]:
# General
from glob import glob
from tqdm.notebook import tqdm
import os
import json

# Data Handling
import numpy as np
import pandas as pd

# Plotting
import seaborn as sns
import matplotlib.pyplot as plt

# Image processing
import rasterio
from rasterio.windows import Window
from skimage.draw import polygon2mask
from scipy import ndimage as ndi
from skimage.morphology import remove_small_objects

# Deep Learning
import tensorflow as tf

In [2]:
import resource

In [3]:
cwd = os.getcwd()

if cwd == '/kaggle/working':
    base = "../input/"
else:
    base = "./"

In [4]:
def get_memory():
    with open('/proc/meminfo', 'r') as mem:
        free_memory = 0
        for i in mem:
            sline = i.split()
            if str(sline[0]) in ('MemFree:', 'Buffers:', 'Cached:'):
                free_memory += int(sline[1])
    return free_memory

In [5]:
if base == "./":
    soft, hard = resource.getrlimit(resource.RLIMIT_AS)
    resource.setrlimit(resource.RLIMIT_AS, (get_memory() * 1024 / 4*3, hard))

## Load Model

In [6]:
generator = tf.keras.models.load_model(f'{base}hubmap-generator/generator/', compile=False, )

In [7]:
info_df = pd.read_csv(f'{base}hubmap-kidney-segmentation/HuBMAP-20-dataset_information.csv')

## Test image prediction

###  Helper functions

In [8]:
def dice_coef(a, b):
    
    intersection = a & b
    
    int_sum = (a & b).sum()
    
    return int_sum*2/(a.sum()+b.sum())

In [9]:
def get_mask(image_id, window=None, out_shape=None):

    w, h = info_df[info_df.image_file == image_id +
                   '.tiff'][['width_pixels', 'height_pixels']].values.flatten()
    
    mask = np.zeros((w*h,), dtype=bool)

    for i, row in train_df[train_df.id == image_id].iterrows():
        start = row.start
        mask[start] = 1

        for j in range(row.run):
            start += 1
            mask[start] = 1

    mask = mask.reshape(w, h).transpose()
    
    if window:
        min_y, max_y, min_x, max_x = window
        mask = mask[min_y:max_y, min_x:max_x]
    
    if out_shape:
        mask = array_resize(mask, out_shape)
    
    return mask

In [10]:
def array_resize(array, out_shape):
    h, w = out_shape

    row_idx = np.round(np.linspace(0, array.shape[0]-1, h)).astype(int)
    col_idx = np.round(np.linspace(0, array.shape[1]-1, w)).astype(int)

    array = array[row_idx][:,col_idx]
    
    return array

In [11]:
if base == "./":
    train_df = pd.read_csv('hubmap-kidney-segmentation/train.csv')

    # Unroll encoding column
    temp_df = pd.DataFrame(columns=['id', 'start', 'run'])
    for i, [image_id, encoding] in train_df.iterrows():
        new_section = pd.DataFrame(columns=temp_df.columns)
        encoding = encoding.split()

        start = encoding[::2]
        run = encoding[1::2]
        encoding = np.array(list(zip(start, run))).astype(int)

        new_section['start'] = encoding[:,0]
        new_section['run'] = encoding[:,1]
        new_section['id'] = image_id

        temp_df = temp_df.append(new_section)
    train_df = temp_df
    del temp_df, new_section

In [12]:
image_data_json = {}

for i, row in info_df.iterrows():
    image_file = row.image_file
    image_id = image_file.split('.')[0]
    
        
    path = glob(f'{base}hubmap-kidney-segmentation/*/{image_id}-anatomical-structure.json')[0]

    with open(path, 'r') as json_file:
        anat_data = json.load(json_file)
        
    path = glob(f'{base}hubmap-kidney-segmentation/train/{image_id}.json')
    
    if path:
        
        with open(path[0], 'r') as json_file:
            glom_data = json.load(json_file)
        
        image_data_json[image_id] = {'anat': anat_data, 'glom': glom_data}
    else:
        image_data_json[image_id] = {'anat': anat_data}


In [13]:
def get_cortex_mask(image_id, size=None, x_anchor=0, y_anchor=0):
    anat_data = image_data_json[image_id]['anat']
    
    w, h = info_df[info_df.image_file == image_id+'.tiff'][['width_pixels', 'height_pixels' ]].values[0]
    
    if size:
        w = h = size
    
    
    # keep only the cortex information
    cortex_data = [tissue for tissue in anat_data if tissue['properties']
                   ['classification']['name'] == 'Cortex'][0]
    # Extracting polygon vertex
    polygon = np.array(cortex_data['geometry']['coordinates']).reshape(-1, 2)
    if cortex_data['geometry']['type'] == 'Polygon':
        polygon = np.array([polygon])
    else:
        polygon = np.array([polygon]).reshape(-1)
        
    cortex_mask_shape = (256,256)
    
    cortex_mask = np.zeros(cortex_mask_shape, dtype=bool)
    
    for subpolygon in polygon:
        subpolygon = np.array(subpolygon)

        subpolygon[:,0] -= x_anchor
        subpolygon[:,1] -= y_anchor
        
        subpolygon[:,0] = subpolygon[:,0]/w*256
        subpolygon[:,1] = subpolygon[:,1]/h*256        

        cortex_mask = cortex_mask + polygon2mask(cortex_mask_shape, subpolygon[:,::-1])
        
    return cortex_mask

In [14]:
def get_object_sizes(arr):
    selem = ndi.generate_binary_structure(arr.ndim, 2)
    ccs = np.zeros_like(arr, dtype=np.int32)
    ndi.label(arr, selem, output=ccs)
    
    component_sizes = np.bincount(ccs.ravel())
    return component_sizes

In [15]:
def rl_encoder_generator(arr):
    
    start = None
    counter = 0
    for row in arr:
        for col in row:
            counter += 1
            if col:
                if start is None:
                    start = counter
                    run = 1
                else:
                    run += 1
            else:
                if start is not None:
                    yield start, run
                    start = None
                    run = 0

### Process Image

In [16]:
n = 20

In [17]:
# Train images
train_file_list = glob(f"{base}hubmap-kidney-segmentation/train/*.tiff")
train_image_id_list = [file.split('/')[-1].split('.')[0] for file in  train_file_list]

test_file_list = glob(f"{base}hubmap-kidney-segmentation/test/*.tiff")
test_image_id_list = [file.split('/')[-1].split('.')[0] for file in  test_file_list]

In [18]:
image_id_list = test_image_id_list

In [19]:
output_file = open('submission.csv', 'w')
output_file.write('id,predicted\n')

for image_id in image_id_list:
    
    path = glob(f"{base}hubmap-kidney-segmentation/*/{image_id}.tiff")[0]
    with rasterio.open(path) as src:
        
        w = src.width
        h = src.height
        
        is_train = image_id in train_image_id_list
        if is_train:

            thumbnail = src.read(out_shape=(500, int(w/h*500)),
                                    resampling=rasterio.enums.Resampling.cubic)

            thumbnail = np.moveaxis(thumbnail, 0, -1)
            plt.imshow(thumbnail)
            plt.show()
            del thumbnail

        size = min(h, w)//n

        x_anchor_list = list(range(0,w-size-1, size))+[w-size]
        y_anchor_list = list(range(0,h-size-1, size))+[h-size]

        output_mask = np.zeros((h, w), dtype=bool)

        for y_anchor in tqdm(y_anchor_list):
            for x_anchor in x_anchor_list:        


                sample_image = src.read(out_shape=(256, 256),
                                        window=Window.from_slices((y_anchor, y_anchor+size),
                                                                  (x_anchor, x_anchor+size)),
                                        resampling=rasterio.enums.Resampling.cubic)

                sample_image = np.moveaxis(sample_image, 0, -1)/255


                pred = generator.predict(sample_image[np.newaxis, ...]).reshape(256,256)

                # Making pred a boolean mask
                pred = pred > 0.5

                pred = array_resize(pred, (size, size))

                output_mask[y_anchor:y_anchor+size, x_anchor: x_anchor+size] = pred

        #         mask = get_mask('aaa6a05cc', window=(y_anchor, y_anchor+size, x_anchor, x_anchor+size), out_shape=(256,256))

        #         plt.subplot(1, 3, 1)
        #         plt.imshow(sample_image)

        #         plt.subplot(1, 3, 2)
        #         plt.imshow(pred)

        #         plt.subplot(1, 3, 3)
        #         plt.imshow(mask)
        #         plt.show()
    
    del pred
    
    out_shape = (h//10, w//10)
    # Reduce image to be able of removing small objects
    output_mask = array_resize(output_mask, out_shape)

    if is_train:
        mask = get_mask(image_id, out_shape=out_shape)
        mask_objects = get_object_sizes(mask)
    
    # Get cortex mask
    print("Removing off-cortext detections")
    cortex_mask = get_cortex_mask(image_id)
    cortex_mask = ~cortex_mask
    cortex_mask = array_resize(cortex_mask, out_shape)
    
    # Delete activations from outside the cortex
    output_mask[cortex_mask] = 0
    del cortex_mask
    
    # Remove small objects
#     print('Removing small objects')
#     output_objects = get_object_sizes(output_mask)
#     min_size = np.percentile(output_objects, 25)
#     output_mask = remove_small_objects(output_mask, min_size=min_size, connectivity=2)
    
    # Plot results for training
    if is_train:
        fig = plt.figure(dpi=150)

        plt.subplot(1,2,1)
        plt.imshow(output_mask)

        plt.subplot(1,2,2)
        plt.imshow(mask)

        plt.show()
        
        dice = dice_coef(output_mask, mask)
        
        print(f"Dice Coefficient {dice:.3f}")
        
        del mask
    

    
    if not is_train:
        print('RL econding')
        # Return output mask to its original size
        output_mask = array_resize(output_mask, (h, w))
        output_file.write(f"{image_id},")
        
        output_mask = output_mask.transpose()
        
        prediction = " ".join([f"{s} {r}" for s, r in rl_encoder_generator(output_mask)])
        output_file.write(prediction)
        
#         for start, run in rl_encoder_generator(output_mask):
#             output_file.write(f"{start} {run} ")
        
#         # Removing the last spa
#         output_file.seek(-1, os.SEEK_END)
#         output_file.truncate()
        output_file.write("\n")
        
        
    del output_mask, prediction

output_file.close()

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


  0%|          | 0/20 [00:00<?, ?it/s]

Removing off-cortext detections
RL econding


  0%|          | 0/21 [00:00<?, ?it/s]

Removing off-cortext detections
RL econding


  0%|          | 0/20 [00:00<?, ?it/s]

Removing off-cortext detections
RL econding


  0%|          | 0/21 [00:00<?, ?it/s]

Removing off-cortext detections
RL econding


  0%|          | 0/20 [00:00<?, ?it/s]

Removing off-cortext detections
RL econding
