In [None]:
import os
import sys
import random
import warnings
import time
import shutil

import numpy as np
import pandas as pd
import cv2

import matplotlib.pyplot as plt

import seaborn as sns
from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from textwrap import wrap

from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

import tensorflow as tf
%matplotlib inline

# def read_image(filepath, color_mode=cv2.IMREAD_COLOR, target_size=None,space='bgr'):
#     """Read an image from a file and resize it."""
#     img = cv2.imread(filepath, color_mode)
#     if target_size: 
#         img = cv2.resize(img, target_size, interpolation = cv2.INTER_AREA)
#     if space == 'hsv':
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#     return img


# def load_raw_data(image_size=(256, 256), space = 'bgr',load_mask=True):
#     """Load raw data."""
#     # Python lists to store the training images/masks and test images.
#     x_train, y_train, x_test = [],[],[]

#     # Read and resize train images/masks. 
#     print('Loading and resizing train images and masks ...')
#     print (train_df['image_path'])
#     sys.stdout.flush()
#     for i, filename in tqdm.tqdm(enumerate(train_df['image_path']), total=len(train_df)):
#         img = read_image(train_df['image_path'].loc[i], target_size=image_size,space = space)
#         if load_mask:
#             mask = read_image(train_df['mask_path'].loc[i],
#                               color_mode=cv2.IMREAD_GRAYSCALE,
#                               target_size=image_size)
#             #mask = read_mask(train_df['mask_dir'].loc[i], target_size=image_size)
#             y_train.append(mask)
#         x_train.append(img)
        
#     # Read and resize test images. 
#     print('Loading and resizing test images ...')
#     sys.stdout.flush()
#     for i, filename in tqdm.tqdm(enumerate(test_df['image_path']), total=len(test_df)):
#         img = read_image(test_df['image_path'].loc[i], target_size=image_size,space=space)
#         x_test.append(img)

#     # Transform lists into 4-dim numpy arrays.
#     x_train = np.array(x_train)
#     #if load_mask:
#     y_train = np.array(y_train)
#     #y_train = np.expand_dims(np.array(y_train), axis=4)
#     x_test = np.array(x_test)
#     print('Data loaded')
#     if load_mask:
#         return x_train, y_train, x_test
#     else:
#         return x_train, x_test


# Set some parameters
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
TRAIN_PATH = 'G:/Kaggle_data/Kaggle_bowl/stage1_train/'
TEST_PATH = 'G:/Kaggle_data/Kaggle_bowl/stage2_test_final/'

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')
seed = 666
random.seed = seed
np.random.seed = seed
time = time.strftime("%Y-%m-%d", time.gmtime())

smooth = 1

def get_images():
    # Get train and test IDs
    train_ids = next(os.walk(TRAIN_PATH))[1]
    test_ids = next(os.walk(TEST_PATH))[1]

    # Get and resize train images and masks
    X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    print('Getting and resizing train images and masks ... ')
    sys.stdout.flush()
    for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
        path = TRAIN_PATH + id_
        img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_train[n] = img
        mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
        for mask_file in next(os.walk(path + '/masks/'))[2]:
            mask_ = imread(path + '/masks/' + mask_file)
            mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant', 
                                          preserve_range=True), axis=-1)
            mask = np.maximum(mask, mask_)
        Y_train[n] = mask

    #def scale_img_canals(an_img):
    for i in range(IMG_CHANNELS):
        canal = img[:,:,i]
        canal = canal - canal.min()
        canalmax = canal.max()
        if canalmax > 0:
            factor = 255/canalmax
            canal = (canal * factor).astype(int)
        img[:,:,i] = canal

    # Get and resize test images
    X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
    sizes_test = []
    print('Getting and resizing test images ... ')
    sys.stdout.flush()
    for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
        path = TEST_PATH + id_ + '/images/' + id_ + '.png'
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #    img = img[:, :, :3]
    #     img = imread(path + '/images/' + id_ + '.png', as_grey=True)[:,:,:]
        sizes_test.append([img.shape[0], img.shape[1]])
        img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
        X_test[n] = img

    for i in range(IMG_CHANNELS):
        canal = img[:,:,i]
        canal = canal - canal.min()
        canalmax = canal.max()
        if canalmax > 0:
            factor = 255/canalmax
            canal = (canal * factor).astype(int)
        img[:,:,i] = canal

    print('Done!')
    
    print('Preparing to augment images')
    get_more_images(X_train)
    dublicate_labels(X_train)
    
    get_more_images(Y_train)
    dublicate_labels(Y_train)
    

    print(X_train.shape)
    print(Y_train.shape)
    print(X_test.shape)
    
    return X_train, Y_train, X_test

def read_train_data_properties(train_dir, img_dir_name, mask_dir_name):
    """Read basic properties of training images and masks"""
    tmp = []
    for i,dir_name in enumerate(next(os.walk(train_dir))[1]):

        img_dir = os.path.join(train_dir, dir_name, img_dir_name)
        mask_dir = os.path.join(train_dir, dir_name, mask_dir_name) 
        num_masks = len(next(os.walk(mask_dir))[2])
        img_name = next(os.walk(img_dir))[2][0]
        img_name_id = os.path.splitext(img_name)[0]
        img_path = os.path.join(img_dir, img_name)
        mask_path = os.path.join(train_dir,dir_name,mask_dir,img_name_id+'_mask.png')
        img_shape = read_image(img_path).shape
        tmp.append(['{}'.format(img_name_id), img_shape[0], img_shape[1],
                    img_shape[0]/img_shape[1], img_shape[2], num_masks,
                    img_path, mask_dir,mask_path])

    train_df = pd.DataFrame(tmp, columns = ['img_id', 'img_height', 'img_width',
                                            'img_ratio', 'num_channels', 
                                            'num_masks', 'image_path', 'mask_dir','mask_path'])
    return train_df


def read_test_data_properties(test_dir, img_dir_name):
    """Read basic properties of test images."""
    tmp = []
    for i,dir_name in enumerate(next(os.walk(test_dir))[1]):

        img_dir = os.path.join(test_dir, dir_name, img_dir_name)
        img_name = next(os.walk(img_dir))[2][0]
        img_name_id = os.path.splitext(img_name)[0]
        img_path = os.path.join(img_dir, img_name)
        img_shape = read_image(img_path).shape
        tmp.append(['{}'.format(img_name_id), img_shape[0], img_shape[1],
                    img_shape[0]/img_shape[1], img_shape[2], img_path])

    test_df = pd.DataFrame(tmp, columns = ['img_id', 'img_height', 'img_width',
                                           'img_ratio', 'num_channels', 'image_path'])
    
    return test_df

def get_domimant_colors(img, top_colors=1):
    """Return dominant image color"""
    img_l = img.reshape((img.shape[0] * img.shape[1], img.shape[2]))
    clt = KMeans(n_clusters = top_colors)
    clt.fit(img_l)
    # grab the number of different clusters and create a histogram
    # based on the number of pixels assigned to each cluster
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins = numLabels)
    # normalize the histogram, such that it sums to one
    hist = hist.astype("float")
    hist /= hist.sum()
    return clt.cluster_centers_, hist

def cluster_images_by_hsv():
    """Clusterization based on hsv colors. Adds 'hsv_cluster' column to tables"""
    print('Loading data')
#     x_train_hsv,x_test_hsv = load_raw_data(image_size=None,space='hsv',load_mask=False)
    x_train_hsv,x_test_hsv = get_images()
    x_hsv = np.concatenate([x_train_hsv,x_test_hsv])
    print('Calculating dominant hsv for each image')
    dominant_hsv = []
    for img in tqdm.tqdm(x_hsv):
        res1, res2 = get_domimant_colors(img,top_colors=1)
        dominant_hsv.append(res1.squeeze())
    print('Calculating clusters')
    kmeans = KMeans(n_clusters=3).fit(dominant_hsv)
    train_df['HSV_CLUSTER'] = kmeans.predict(dominant_hsv[:len(x_train_hsv)])
    test_df['HSV_CLUSTER'] = kmeans.predict(dominant_hsv[len(x_train_hsv):])
    print('Images clustered')
    return None

def get_more_images(imgs):
    
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
    rotated_images_pos90 = []
    rotated_images_neg90 = []
    
    for i in range(0,imgs.shape[0]):
        a=imgs[i,:,:,0]
        b=imgs[i,:,:,1]
        c=imgs[i,:,:,2]
        
#         IMG_HEIGHT = imgs.shape[0]
#         IMG_WIDTH = imgs.shape[1]
    
        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        bv=cv2.flip(b,1)
        bh=cv2.flip(b,0)
        cv=cv2.flip(c,1)
        ch=cv2.flip(c,0)
        
        arp90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),90,1)
        arp90_r = cv2.warpAffine(a,arp90,(IMG_WIDTH,IMG_HEIGHT))
        
        arn90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),270,1)
        arn90_r = cv2.warpAffine(a,arn90,(IMG_WIDTH,IMG_HEIGHT))
        
        brp90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),90,1)
        brp90_r = cv2.warpAffine(b,brp90,(IMG_WIDTH,IMG_HEIGHT))
        
        brn90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),270,1)
        brn90_r = cv2.warpAffine(b,brn90,(IMG_WIDTH,IMG_HEIGHT))
        
        crp90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),90,1)
        crp90_r = cv2.warpAffine(c,crp90,(IMG_WIDTH,IMG_HEIGHT))
        
        crn90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),270,1)
        crn90_r = cv2.warpAffine(c,crn90,(IMG_WIDTH,IMG_HEIGHT))

        
        vert_flip_imgs.append(np.dstack((av, bv, cv)))
        hori_flip_imgs.append(np.dstack((ah, bh, ch)))
        rotated_images_pos90.append(np.dstack((arp90_r, brp90_r, crp90_r)))
        rotated_images_neg90.append(np.dstack((arn90_r, brn90_r, crn90_r)))
    
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
    rp = np.array(rotated_images_pos90)
    rn = np.array(rotated_images_neg90)
    
 #   print(v)
#     print('h shape \n' + h)
#    print(scp)
#     print('rn shape \n' + rn)
    more_images = np.concatenate((imgs,v,h,rp,rn))
    
    return more_images

def duplicate_labels(labels):
    more_images = []
    vert_flip_imgs = []
    hori_flip_imgs = []
    rotated_images_pos90 = []
    rotated_images_neg90 = []
    
    for i in range(0,labels.shape[0]):
        
#         IMG_HEIGHT = labels.shape[0]
#         IMG_WIDTH = labels.shape[1]
        
        a=labels[i,:,:,0]

        av=cv2.flip(a,1)
        ah=cv2.flip(a,0)
        
        arp90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),90,1)
        arp90_r = cv2.warpAffine(a,arp90,(IMG_WIDTH,IMG_HEIGHT))
        
        arn90=cv2.getRotationMatrix2D((IMG_WIDTH/2,IMG_HEIGHT/2),-90,1)
        arn90_r = cv2.warpAffine(a,arn90,(IMG_WIDTH,IMG_HEIGHT))
        
        vert_flip_imgs.append(av.reshape(IMG_WIDTH,IMG_WIDTH,1))
        hori_flip_imgs.append(ah.reshape(IMG_WIDTH,IMG_WIDTH,1))
        rotated_images_pos90.append(arp90_r.reshape(IMG_WIDTH,IMG_WIDTH,1))
        rotated_images_neg90.append(arn90_r.reshape(IMG_WIDTH,IMG_WIDTH,1))      
        
        
    v = np.array(vert_flip_imgs)
    h = np.array(hori_flip_imgs)
    rp = np.array(rotated_images_pos90)
    rn = np.array(rotated_images_neg90) 
    
    duplicate_labels = np.concatenate((labels,v,h,rp,rn))
    return duplicate_labels

get_images()
# X_validation = X_train[-50:,...]
# Y_validation = Y_train[-50:]
# X_train = X_train[0:-50,...]
# Y_train = Y_train[0:-50]
# print("Validation Set of Size "+str(Y_validation.shape[0])+" Separated")
# Y_train.dtype = np.uint8
# X_train = get_more_images(X_train)
# Y_train = duplicate_labels(Y_train)
cluster_images_by_hsv()

X_train_1cl = train_df[train_df['HSV_CLUSTER'] == 0]
X_train_2cl = train_df[train_df['HSV_CLUSTER'] == 1]
X_train_3cl = train_df[train_df['HSV_CLUSTER'] == 2]

X_test_1cl = test_df[test_df['HSV_CLUSTER'] == 0]
X_test_2cl = test_df[test_df['HSV_CLUSTER'] == 1]
X_test_3cl = test_df[test_df['HSV_CLUSTER'] == 2]

# Y_train_1cl = Y_train
# Y_train_2cl = Y_train
# Y_train_3cl = Y_train

# print (X_train.shape)
# print (Y_train.shape)
# print (X_validation.shape)
# print (Y_validation.shape)
# print("Data Rotation and Flipping Complete")
# print (X_train.shape)
# print (Y_train.shape)
# print (X_validation.shape)
# print (Y_validation.shape)
# X_train = np.concatenate((X_train,X_validation),axis=0)
# Y_train = np.concatenate((Y_train,Y_validation),axis=0)
#np.savez_compressed(file='train.npz',X=X_train,Y=Y_train)
#np.savez_compressed(file='test.npz',X=X_test)

# npz = np.load('G:/Kaggle_data/Kaggle_bowl/train.npz') 
# X_train2 = npz['X'] 
# Y_train2 = npz['Y']
# X_test2 = np.load('G:/Kaggle_data/Kaggle_bowl/test.npz')['X']

# Check if training data looks all right
# ix = random.randint(0, len(train_ids))
# imshow(X_train[ix])
# plt.show()
# imshow(np.squeeze(Y_train[ix]))
# plt.show()

# Define IoU metric
"""def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)"""

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

# # Build U-Net model
# inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
# s = Lambda(lambda x: x / 255) (inputs)

# c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)
# c1 = Dropout(0.1) (c1)
# c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)
# p1 = MaxPooling2D((2, 2)) (c1)

# c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)
# c2 = Dropout(0.1) (c2)
# c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)
# p2 = MaxPooling2D((2, 2)) (c2)

# c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)
# c3 = Dropout(0.2) (c3)
# c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)
# p3 = MaxPooling2D((2, 2)) (c3)

# c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)
# c4 = Dropout(0.2) (c4)
# c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)

# u6 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c4)
# u6 = concatenate([u6, c3])
# c6 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)
# c6 = Dropout(0.2) (c6)
# c6 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)

# u7 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c6)
# u7 = concatenate([u7, c2])
# c7 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)
# c7 = Dropout(0.2) (c7)
# c7 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)

# u8 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c7)
# u8 = concatenate([u8, c1])
# c8 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)
# c8 = Dropout(0.1) (c8)
# c8 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)


# outputs = Conv2D(1, (1, 1), activation='sigmoid') (c8)

# model = Model(inputs=[inputs], outputs=[outputs])
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coef])
# model.summary()

# # # Fit model
# # earlystopper = EarlyStopping(patience=5, verbose=1)
# # checkpointer = ModelCheckpoint('model-dsbowl2018-1_+img_rot_corr_256_UNET-sh.h5', verbose=1, save_best_only=True)
# # results = model.fit(X_train, Y_train, validation_split=0.1, batch_size=16, epochs=50, 
# #                     callbacks=[earlystopper, checkpointer])
# # # results2 = model.fit(X_train2, Y_train2, validation_split=0.1, batch_size=16, epochs=50, 
# # #                     callbacks=[earlystopper, checkpointer])

# Predict on train, val and test
model_0cl = load_model('model-dsbowl2018-1_+img_rot_corrected_0cl.h5', custom_objects={'dice_coef': dice_coef})
model_1cl = load_model('model-dsbowl2018-1_+img_rot_corrected_1cl.h5', custom_objects={'dice_coef': dice_coef})
model_2cl = load_model('model-dsbowl2018-1_+img_rot_corrected_2cl.h5', custom_objects={'dice_coef': dice_coef})

preds_train_0cl = model_0cl.predict(X_train_0cl[:int(X_train_0cl.shape[0]*0.9)], verbose=1)
preds_val_0cl = model_0cl.predict(X_train_0cl[int(X_train_0cl.shape[0]*0.9):], verbose=1)
preds_test_0cl = model_0cl.predict(X_test_0cl, verbose=1)

preds_train_1cl = model_1cl.predict(X_train_1cl[:int(X_train_1cl.shape[0]*0.9)], verbose=1)
preds_val_1cl = model_1cl.predict(X_train_1cl[int(X_train_1cl.shape[0]*0.9):], verbose=1)
preds_test_1cl = model_1cl.predict(X_test_1cl, verbose=1)

preds_train_2cl = model_2cl.predict(X_train_2cl[:int(X_train_2cl.shape[0]*0.9)], verbose=1)
preds_val_2cl = model_2cl.predict(X_train_2cl[int(X_train_2cl.shape[0]*0.9):], verbose=1)
preds_test_2cl = model_2cl.predict(X_test_2cl, verbose=1)



# Threshold predictions
preds_train_t0 = (preds_train_0cl > 0.5).astype(np.uint8)
preds_val_t0 = (preds_val_0cl > 0.5).astype(np.uint8)
preds_test_t0 = (preds_test_0cl > 0.5).astype(np.uint8)

preds_train_t1 = (preds_train_1cl > 0.5).astype(np.uint8)
preds_val_t1 = (preds_val_1cl > 0.5).astype(np.uint8)
preds_test_t1 = (preds_test_1cl > 0.5).astype(np.uint8)

preds_train_t2 = (preds_train_2cl > 0.5).astype(np.uint8)
preds_val_t2 = (preds_val_2cl > 0.5).astype(np.uint8)
preds_test_t2 = (preds_test_2cl > 0.5).astype(np.uint8)

# Create list of upsampled test masks
preds_test_upsampled = []
for i in range(len(preds_test_t0)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True))
    
for i in range(len(preds_test_t1)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True))
    
for i in range(len(preds_test_t2)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True))

    
# Perform a sanity check on some random training samples
# ix = random.randint(0, len(preds_train_t))
# imshow(X_train[ix])
# plt.show()
# imshow(np.squeeze(Y_train[ix]))
# plt.show()
# imshow(np.squeeze(preds_train_t[ix]))
# plt.show()

# Perform a sanity check on some random validation samples
# ix = random.randint(0, len(preds_val_t))
# imshow(X_train[int(X_train.shape[0]*0.9):][ix])
# plt.show()
# imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
# plt.show()
# imshow(np.squeeze(preds_val_t[ix]))
# plt.show()

# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)
        
new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))
    
# Create submission DataFrame

sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.to_csv('sub-dsbowl2018-1_%s_256_+flip_rot_corr_256_final.csv' % time, index=False)

