In [7]:
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import Model
import sys
sys.path.append('../model_zoo/')
from resnet34_unet import ResNet34_UNet
from inception_resnet_unet import InceptionResNetV2_UNet
#from inception_resnet_unet_hypercolumns_multigpus import InceptionResNetV2_UNet

In [8]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
df_test = pd.read_csv('../input/sample_submission.csv')
ids_test = df_test['id']
input_size = 128
batch_size = 48
orig_width = 101
orig_height = 101
num_blocks=6
threshold = 0.03 
output_stride=8
upsample_type = 'unet'
weight_name = '../weights/best_weight_resnet34_unet_lovasz_hinge_loss_9_17_10.834750.hdf5'
#model = ResNet50(input_shape=(input_size,input_size,3),output_stride=8,num_blocks=6,multigrid=[1,2,1],use_se=True,upsample_type=upsample_type)
#model = PSPNet50(input_shape=(input_size,input_size,3),output_stride=8,num_blocks=6,multigrid=[1,2,1],use_se=True,upsample_type=upsample_type)
inputs,outputs = ResNet34_UNet(use_activation=False)
model = Model(inputs,outputs)
model.load_weights(weight_name)
names = []
for id in ids_test:
    names.append('{}'.format(id))

(None, 128, 128, 1)


In [9]:
def run_length_encode(img, order='F', format=True):
    """
    img is binary mask image, shape (r,c)
    order is down-then-right, i.e. Fortran
    format determines if the order needs to be preformatted (according to submission rules) or not
    returns run length as an array or string (if format is True)
    """
    bytes = img.reshape(img.shape[0] * img.shape[1], order=order)
    runs = []  ## list of run lengths
    r = 0  ## the current run length
    pos = 1  ## count starts from 1 per WK
    for c in bytes:
        if (c == 0):
            if r != 0:
                runs.append((pos, r))
                pos += r
                r = 0
            pos += 1
        else:
            r += 1

    # if last run is unsaved (i.e. data ends with 1)
    if r != 0:
        runs.append((pos, r))
        pos += r
        r = 0

    if format:
        z = ''

        for rr in runs:
            z += '{} {} '.format(rr[0], rr[1])
        return z[:-1]
    else:
        return runs
    
def load_image(path, mask = False):
    """
    Load image from a given path and pad it on the sides, so that eash side is divisible by 32 (newtwork requirement)
    
    if pad = True:
        returns image as numpy.array, tuple with padding in pixels as(x_min_pad, y_min_pad, x_max_pad, y_max_pad)
    else:
        returns image as numpy.array
    """
    img = cv2.imread(str(path))
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    height, width, _ = img.shape
    # Padding in needed for UNet models because they need image size to be divisible by 32 
    if height % 32 == 0:
        y_min_pad = 0
        y_max_pad = 0
    else:
        y_pad = 32 - height % 32
        y_min_pad = int(y_pad / 2)
        y_max_pad = y_pad - y_min_pad
        
    if width % 32 == 0:
        x_min_pad = 0
        x_max_pad = 0
    else:
        x_pad = 32 - width % 32
        x_min_pad = int(x_pad / 2)
        x_max_pad = x_pad - x_min_pad
    
    img = cv2.copyMakeBorder(img, y_min_pad, y_max_pad, x_min_pad, x_max_pad, cv2.BORDER_REFLECT_101)
    if mask:
        # Convert mask to 0 and 1 format
        img = img[:, :, 0:1] // 255.0
        return img
    else:
        img = img[:, :, :] / 255.0
        return img

def get_prob(prob,height = 101, width =101):
    if height % 32 == 0:
        y_min_pad = 0
        y_max_pad = 0
    else:
        y_pad = 32 - height % 32
        y_min_pad = int(y_pad / 2)
        y_max_pad = y_pad - y_min_pad

    if width % 32 == 0:
        x_min_pad = 0
        x_max_pad = 0
    else:
        x_pad = 32 - width % 32
        x_min_pad = int(x_pad / 2)
        x_max_pad = x_pad - x_min_pad
    return prob[y_min_pad:128 - y_max_pad, x_min_pad:128 - x_max_pad]

In [10]:
rles = []
batch_size = 32
print('Predicting on {} samples with batch_size = {}...'.format(len(ids_test), batch_size))
preds_raw = []
for start in tqdm(range(0, len(ids_test), batch_size)):
    x_batch = []
    end = min(start + batch_size, len(ids_test))
    ids_test_batch = ids_test[start:end]
    for id in ids_test_batch.values:
        image_name = '../input/test/images/{}.png'.format(id)
        img = load_image(image_name,mask=False) 
        x_batch.append(img)
        
    x_batch = np.array(x_batch, np.float32) 
    x_batch_vertical = x_batch[:,::-1,:,:]
    x_batch_horizontal = x_batch[:,:,::-1,:]
    x_batch_diag = x_batch[:,::-1,::-1,:]


    preds = model.predict_on_batch(x_batch)
    preds = np.squeeze(preds, axis=3)

    
    preds_horizontal = model.predict_on_batch(x_batch_horizontal)[:,:,::-1,:]
    preds_horizontal = np.squeeze(preds_horizontal, axis=3)

    preds = (preds+preds_horizontal)/2.0

    for pred in preds:
        prob = get_prob(pred)
        mask = prob > threshold
        rle = run_length_encode(mask)
        rles.append(rle)
        preds_raw.append(pred)


print("Generating submission file...")
df = pd.DataFrame({'id': names, 'rle_mask': rles})
df.to_csv('../submit/submission_{}.csv.gz'.format(weight_name.split('/')[-1]), index=False, compression='gzip')
pd.to_pickle(preds_raw,'../sub_prob/prob_{}.pkl'.format(weight_name.split('/')[-1]))

  0%|          | 0/563 [00:00<?, ?it/s]

Predicting on 18000 samples with batch_size = 32...


100%|██████████| 563/563 [14:41<00:00,  1.57s/it]


Generating submission file...


In [11]:
(df.rle_mask=='').mean()

0.42977777777777776

In [6]:
import pandas as pd
from tqdm import tqdm
df_test = pd.read_csv('../input/sample_submission.csv')
ids_test = df_test['id']
names = []
for id in ids_test:
    names.append('{}'.format(id))

In [7]:
#preds_raw_lavsaz_hyper = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_hypercolumns_lovasz_hinge_loss_9_10_multigpu_0_0.852250.hdf5.pkl')
preds_raw_lavsaz_fold0 = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_60.852750.hdf5.pkl')
preds_raw_lavsaz_fold1 = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_10.836625.hdf5.pkl')
preds_raw_lavsaz_fold2 = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_20.835000.hdf5.pkl')
preds_raw_lavsaz_fold3 = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_30.841375.hdf5.pkl')
preds_raw_lavsaz_fold4 = pd.read_pickle('../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_40.850500.hdf5.pkl')

In [8]:
preds = [(item0+item1+item2+item3+item4)/5.0 for item0,item1,item2,item3,item4 in zip(preds_raw_lavsaz_fold0,
                                       preds_raw_lavsaz_fold1,
                                       preds_raw_lavsaz_fold2,
                                       preds_raw_lavsaz_fold3,
                                       preds_raw_lavsaz_fold4)]

In [9]:
rles = []
threshold = 0.03
for pred in tqdm(preds):
    prob = get_prob(pred) 
    mask = prob > threshold
    rle = run_length_encode(mask)
    rles.append(rle)

100%|██████████| 18000/18000 [07:58<00:00, 37.60it/s]


In [10]:
df = pd.DataFrame({'id': names, 'rle_mask': rles})

In [11]:
df.to_csv('../submit/submission_bagging_lavsaz_th_0.03.csv.gz', index=False, compression='gzip')

In [13]:
pd.to_pickle(preds,'../sub_prob/prob_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_bagging_128_128.pkl')

In [16]:
old = pd.read_csv('../submit/submission_best_weight_inception_resnet_unet_lovasz_hinge_loss_9_6_40.850500.hdf5.csv.gz',compression='gzip')

In [17]:
old.rle_mask.isnull().mean()

0.42227777777777775

In [20]:
df.head()

Unnamed: 0,id,rle_mask
0,155410d6fa,1 907 910 98 1011 97 1112 97 1213 96 1314 95 1...
1,78b32781d1,56 46 157 46 257 47 357 48 456 50 555 52 654 5...
2,63db2a476a,8297 10 8395 19 8495 23 8596 25 8698 26 8800 2...
3,17bfcdb967,4445 33 4546 51 4643 5559
4,7ea0fd3c88,


In [21]:
old.head()

Unnamed: 0,id,rle_mask
0,155410d6fa,1 807 809 99 910 98 1011 97 1112 96 1213 96 13...
1,78b32781d1,56 46 157 46 257 47 356 49 455 51 555 52 654 5...
2,63db2a476a,8296 11 8395 18 8495 22 8596 24 8698 25 8776 2...
3,17bfcdb967,4445 28 4546 46 4647 5555
4,7ea0fd3c88,
