In [7]:
import os
import pandas as pd
import numpy as np
import glob
import cv2

from itertools import groupby

In [71]:
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

def prepare_image_final(image):
    if not is_back_and_white(image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.bitwise_not(gray)
        gray = boost_contrast(gray)
        gray = add_seuillage(gray, level=50)
    else:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        if gray.mean() > 150:
            gray = cv2.bitwise_not(gray)
        gray = boost_contrast(gray)
        gray = add_seuillage(gray, level=0)
    return gray

def is_back_and_white(img):
    a = np.array_equiv(image[:, :, 0], image[:, :, 1])
    b = np.array_equiv(image[:, :, 1], image[:, :, 2])
    return (a and b)

def boost_contrast(img):
    mini = img.min()
    maxi = img.max()
    return (img-mini)*(255/(maxi-mini))

def add_seuillage(img, level=100):
    img = (img-level)*(255/(255-(2*level)))
    img = np.clip(img, 0, 255)
    return img

In [107]:
from skimage.morphology import label

def img_to_rle(img):
    result = []
    X = img.flatten('F').tolist()
    current_index = 1
    for k, g in groupby(X):
        l = len(list(g))
        if k > 0.5:
            result.append(current_index)
            result.append(l)
        current_index += l
    return result

"""
From Kaggle but I prefer groupby vs loop
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths
"""

def prob_to_rles(x, cutoff=0.5):
    # From Kaggle
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        #yield rle_encoding(lab_img == i)
        yield img_to_rle(lab_img == i)

[2, 3, 8, 5, 14, 2, 17, 1, 19, 3, 28, 1, 31, 4, 38, 3, 42, 1, 44, 2, 47, 1, 50, 2, 55, 1, 57, 1, 59, 4, 65, 5, 75, 1, 80, 6, 90, 1, 95, 2]
[98, 1]


# Model 1

First, let's prepare a submission dataset with threshold method.

In [112]:
img_list = []
rle_list = []
for img in glob.glob("images/test/*/images/*.png"):
    img_name = os.path.basename(img)[:-4]
    image = cv2.imread(img)
    gray = prepare_image_final(image)
    ret1,th1 = cv2.threshold(gray, 40, 1, cv2.THRESH_BINARY)
    for rle in prob_to_rles(th1):
#     rle = img_to_rle(th1)
        img_list.append(img_name)
        rle_list.append(rle)

In [110]:
df = pd.DataFrame()
df['ImageId'] = img_list
df['EncodedPixels'] = pd.Series(rle_list).apply(lambda x: ' '.join(str(y) for y in x))
df.to_csv('sub-simple_model.csv', index=False)

# Model 2