In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook as tqdm
from datetime import datetime # https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/97119
from scipy import stats

In [42]:
def mask2rle(img, width=1024, height=1024, max_color=1):
    rle = []
    lastColor = 0
    currentPixel = 0
    runStart = -1
    runLength = 0

    for x in range(width):
        for y in range(height):
            currentColor = img[x][y]
            if currentColor != lastColor:
                if currentColor == max_color:
                    runStart = currentPixel
                    runLength = 1
                else:
                    rle.append(str(runStart))
                    rle.append(str(runLength))
                    runStart = -1
                    runLength = 0
                    currentPixel = 0
            elif runStart > -1:
                runLength += 1
            lastColor = currentColor
            currentPixel+=1

    return " ".join(rle)

def rle2mask(rle, width=1024, height=1024, max_color=255):
    mask = np.zeros(width * height)
    """WARNING: This function should only be used in SIIM dataset because it constains .T() transformation here"""
    if rle == '-1': return mask.reshape(width, height)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        current_position += start
        mask[current_position:current_position+lengths[index]] = max_color
        current_position += lengths[index]
    """WARNING: This function should only be used in SIIM dataset because it constains .T() transformation here"""
    return mask.reshape(width, height)


def rle2mask(rle, width=1024, height=1024, max_color=255):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(width * height, dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = max_color
    return img.reshape(width, height, order='F')

def mask2rle(img, width=None, height=None, max_color=None):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [11]:
def calculate_mode(ndarray, axis=0):
    # Check inputs
    ndarray = np.asarray(ndarray)
    ndim = ndarray.ndim
    if ndarray.size == 1:
        return (ndarray[0], 1)
    elif ndarray.size == 0:
        raise Exception('Cannot compute mode on empty array')
    try:
        axis = range(ndarray.ndim)[axis]
    except:
        raise Exception('Axis "{}" incompatible with the {}-dimension array'.format(axis, ndim))

    # If array is 1-D and numpy version is > 1.9 numpy.unique will suffice
    if all([ndim == 1,
            int(np.__version__.split('.')[0]) >= 1,
            int(np.__version__.split('.')[1]) >= 9]):
        modals, counts = np.unique(ndarray, return_counts=True)
        index = np.argmax(counts)
        return modals[index], counts[index]

    # Sort array
    sort = np.sort(ndarray, axis=axis)
    # Create array to transpose along the axis and get padding shape
    transpose = np.roll(np.arange(ndim)[::-1], axis)
    shape = list(sort.shape)
    shape[axis] = 1
    # Create a boolean array along strides of unique values
    strides = np.concatenate([np.zeros(shape=shape, dtype='bool'),
                                 np.diff(sort, axis=axis) == 0,
                                 np.zeros(shape=shape, dtype='bool')],
                                axis=axis).transpose(transpose).ravel()
    # Count the stride lengths
    counts = np.cumsum(strides)
    counts[~strides] = np.concatenate([[0], np.diff(counts[~strides])])
    counts[strides] = 0
    # Get shape of padded counts and slice to return to the original shape
    shape = np.array(sort.shape)
    shape[axis] += 1
    shape = shape[transpose]
    slices = [slice(None)] * ndim
    slices[axis] = slice(1, None)
    # Reshape and compute final counts
    counts = counts.reshape(shape).transpose(transpose)[slices] + 1

    # Find maximum counts and return modals/counts
    slices = [slice(None, i) for i in sort.shape]
    del slices[axis]
    index = np.ogrid[slices]
    index.insert(axis, np.argmax(counts, axis=axis))
    return sort[index], counts[index]

In [3]:
def draw(image, vmin=0, vmax=1):
    F = plt.figure()
    plt.imshow(np.squeeze(image), cmap='plasma', vmin=vmin, vmax=vmax)
    plt.grid(False)
def draw_gray(image, vmin=0, vmax=255):
    F = plt.figure()
    plt.imshow(np.squeeze(image), cmap='Greys', vmin=vmin, vmax=vmax)
    plt.grid(False)
def get_metadata_by_id(path):
    ds = pydicom.dcmread(path)
    return ds.PatientAge, ds.PatientSex, ds.PixelSpacing, ds.ReferringPhysicianName, ds.SeriesDescription, ds.ViewPosition
def get_load_image_by_id(path):
    ds = pydicom.read_file(path)
    return np.array(ds.pixel_array)
def get_time(ts): #1517875163.537053 -> 2018-02-05 23:59:23
    return datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')

In [4]:
csv = pd.read_csv("/home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/resnet_keras_with_some_new_ideas_v13_0.653.csv")
csv.sort_values(by=['Image_Label']).head()

Unnamed: 0,Image_Label,EncodedPixels
0,002f507.jpg_Fish,
1,002f507.jpg_Flower,
2,002f507.jpg_Gravel,15 314 362 323 701 347 1051 67538 68590 348 68...
3,002f507.jpg_Sugar,90837 3 91184 7 91533 8 91883 8 92233 9 92583 ...
4,0035ae9.jpg_Fish,


# Voting Ensemble with Overlapping Mask (Use This)

In [49]:
FOLDER = "/home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/"
CSV_PATH = ["cloud_convexhull_polygon_postprocessing_0.655.csv",
           "indeptionresnetv2_for_cloud_classifier_v8_0.657.csv",
           "keras_efficientnetb2_for_classifying_cloud_v13_0.655.csv",
           "keras_efficientnetb4_v3_0.655.csv",
           "resnet_keras_with_some_new_ideas_v13_0.653.csv",]
CSV_PATH = [FOLDER + i for i in CSV_PATH]
CSV = [pd.read_csv(i).sort_values(by=['Image_Label']) for i in CSV_PATH]
WEIGHT = np.array([1, 1, 1, 1, 1])
WIDTH = 350
HEIGHT = 525

prob_path = "{}voting-top.csv".format(FOLDER)
print("Creating Path: {}".format(prob_path))
if os.path.exists(prob_path):
    os.remove(prob_path)
    print("WARNING: delete file '{}'".format(prob_path))
with open(prob_path, 'a') as prob_file:
    prob_file.write('{},{}\n'.format("Image_Label", "EncodedPixels"))
    
    pbar = tqdm(CSV[0]['Image_Label'].tolist())
    for id in pbar:
        mask = np.zeros((WIDTH, HEIGHT))
        for i, csv in enumerate(CSV):
            pixel = str(csv.loc[csv['Image_Label'] == id]['EncodedPixels'].item())
            if pixel == "nan": pixel = ""
            pixel = rle2mask(pixel, width=WIDTH, height=HEIGHT, max_color=1)
            mask = mask + pixel
        mask = mask / WEIGHT.sum()
        mask = (mask > 0.5).astype(np.byte)
        mask = mask2rle(mask, width=WIDTH, height=HEIGHT, max_color=1)
        pbar.set_description("{}".format(""))
        prob_file.write('{},{}\n'.format(id, mask))

print("Output file '{}'".format(prob_path))

Creating Path: /home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/voting-top.csv


HBox(children=(IntProgress(value=0, max=14792), HTML(value='')))




Output file '/home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/voting-top.csv'


# Voting Ensemble with No Overlapping Mask (Don't Use This)

In [50]:
FOLDER = "/home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/"
CSV_PATH = ["cloud_convexhull_polygon_postprocessing_0.655.csv",]
#            "indeptionresnetv2_for_cloud_classifier_v8_0.657.csv",
#            "keras_efficientnetb2_for_classifying_cloud_v13_0.655.csv",
#            "keras_efficientnetb4_v3_0.655.csv",
#            "resnet_keras_with_some_new_ideas_v13_0.653.csv",]
CSV_PATH = [FOLDER + i for i in CSV_PATH]
CSV = [pd.read_csv(i).sort_values(by=['Image_Label']) for i in CSV_PATH]
WEIGHT = np.array([1, 1, 1, 1, 1])
WIDTH = 350
HEIGHT = 525

prob_path = "{}voting-top.csv".format(FOLDER)
print("Creating Path: {}".format(prob_path))
if os.path.exists(prob_path):
    os.remove(prob_path)
    print("WARNING: delete file '{}'".format(prob_path))

with open(prob_path, 'a') as prob_file:
    prob_file.write('{},{}\n'.format("Image_Label", "EncodedPixels"))
    
    pbar = tqdm(set(i.split("_")[0] for i in CSV[0]['Image_Label'].tolist()))
    empty = 0
    for id in pbar:
        mask = np.zeros((len(CSV), WIDTH, HEIGHT))
#         pbar.set_description("Step 1")
        for i, csv in enumerate(CSV):
            fish = str(csv.loc[csv['Image_Label'] == id+"_Fish"]['EncodedPixels'].item())
            if fish == "nan": fish = ""
            fish = rle2mask(fish, width=WIDTH, height=HEIGHT, max_color=1)
            
            flower = str(csv.loc[csv['Image_Label'] == id+"_Flower"]['EncodedPixels'].item())
            if flower == "nan": flower = ""
            flower = rle2mask(flower, width=WIDTH, height=HEIGHT, max_color=1)
#             draw(flower)
            
            gravel = str(csv.loc[csv['Image_Label'] == id+"_Gravel"]['EncodedPixels'].item())
            if gravel == "nan": gravel = ""
            gravel = rle2mask(gravel, width=WIDTH, height=HEIGHT, max_color=1)
            
            sugar = str(csv.loc[csv['Image_Label'] == id+"_Sugar"]['EncodedPixels'].item())
            if sugar == "nan": sugar = ""
            sugar = rle2mask(sugar, width=WIDTH, height=HEIGHT, max_color=1)
            
            mask[i] = mask[i] + 1*fish
            mask[i] = mask[i] + 2*flower
            mask[i] = mask[i] + 3*gravel
            mask[i] = mask[i] + 4*sugar
            
#         pbar.set_description("Step 2")
        mask, counts = calculate_mode(mask, axis=0)
        
#         pbar.set_description("Step 3")
        fish = np.zeros((WIDTH, HEIGHT))
        flower = np.zeros((WIDTH, HEIGHT))
        gravel = np.zeros((WIDTH, HEIGHT))
        sugar = np.zeros((WIDTH, HEIGHT))
        
#         pbar.set_description("Step 4")
        fish[mask[:,:]==1] = 1
        flower[mask[:,:]==2] = 1
        gravel[mask[:,:]==3] = 1
        sugar[mask[:,:]==4] = 1
#         draw(flower)
        
        if fish.sum() + flower.sum() + gravel.sum() + sugar.sum() == 0: empty = empty +1
        
#         pbar.set_description("Step 5")
        pbar.set_description("{}".format(empty))
        fish = mask2rle(fish, width=WIDTH, height=HEIGHT, max_color=1)
        flower = mask2rle(flower, width=WIDTH, height=HEIGHT, max_color=1)
        gravel = mask2rle(gravel, width=WIDTH, height=HEIGHT, max_color=1)
        sugar = mask2rle(sugar, width=WIDTH, height=HEIGHT, max_color=1)
        
        prob_file.write('{},{}\n'.format(id+"_Fish", fish))
        prob_file.write('{},{}\n'.format(id+"_Flower", flower))
        prob_file.write('{},{}\n'.format(id+"_Gravel", gravel))
        prob_file.write('{},{}\n'.format(id+"_Sugar", sugar))

print("Output file '{}'".format(prob_path))

Creating Path: /home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/voting-top.csv


HBox(children=(IntProgress(value=0, max=3698), HTML(value='')))




Output file '/home/koke_cacao/Documents/Koke_Cacao/Python/WorkSpace/RedstoneTorch/scripts/cloud_project/voting-top.csv'
