In [1]:
# This script reads a csv file containing run length encoded segmentation labels,
# creates the encoded binary images and saves them 

# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Path to csv file containing run length encoded labels
csv_path ='/content/drive/MyDrive/Deep_learning/project/data/train.csv'

# Path to train images
train_images_path = '/content/drive/MyDrive/Deep_learning/project/data/train/'

# Path to save npz files
test_path ='/content/drive/MyDrive/Deep_learning/project/data/test_multi_npz/'
train_path ='/content/drive/MyDrive/Deep_learning/project/data/train_multi_npz/'

Mounted at /content/drive


In [2]:
# Import modules
import numpy as np
import matplotlib.image as mpimg
from matplotlib import cm
import pandas as pd
import os
import sys
import cv2

class Utils:
    @staticmethod
    def get_preprocessed_img(img_path,label=False):
        img = cv2.imread(img_path,0)
        img = img.astype(np.float32)
        # Normalize to range [0 1]
        img = (img-np.min(img))/(np.max(img)-np.min(img))
        if label:
          img = 1- img
        return img


In [3]:
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)


def build_masks(rles, label, input_shape, colors=True):
    height, width = input_shape

    if colors:
        mask = np.zeros((height, width, 3))
        for rle in rles:
            mask += rle_decode(rle, shape=(height, width , 3), color=np.random.rand(3))
    else:
        mask = np.zeros((height, width, 1))
        for ind,rle in enumerate(rles):
            mask += rle_decode(rle, shape=(height, width, 1))

    mask = mask.clip(0, 1)

    if label == 'shsy5y':
      label_val = 1
    elif label == 'astro':
      label_val = 2
    elif label == 'cort':
      label_val = 3
    else:
      raise Exception(f"Unknown label - {label}")

    return mask*label_val

In [4]:
train_csv = pd.read_csv(csv_path)

sample_ids = train_csv["id"].unique()

In [5]:
def save_images(sample_ids):
    '''
    Function to create and save images
    
    sample_ids - list of ids

    '''
    
    for idx, sample_id in enumerate(sample_ids):

        global sample_mask, sample_labels
        
        sample_rles = train_csv.loc[train_csv['id'] == sample_id]['annotation'].values
        sample_labels = train_csv.loc[train_csv['id'] == sample_id]['cell_type'].values

        sample_mask = build_masks(sample_rles, sample_labels[0], (520, 704), colors=False)

        sample_mask = (np.squeeze(sample_mask, axis=2)) 

        img = Utils.get_preprocessed_img("{}/{}.png".format(train_images_path, sample_id))

        if idx<=500:
            np.savez_compressed("{}/{}".format(train_path, sample_id), image=img, label=sample_mask)
        else:
            np.savez_compressed("{}/{}".format(test_path, sample_id), image=img, label=sample_mask)

        if len(np.unique(sample_labels))>1:
          raise Exception('Error - few labels in one image')




In [6]:
save_images(sample_ids)