In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import pandas as pd
from collections import namedtuple, defaultdict
import os
import pickle
%matplotlib inline

In [90]:
def get_black_mask(im_bgr, threshold=20, low_threshold = 3, min_blob_size=256*256):
    # Apply a simple threshold
    mask = (im_bgr[:,:,0] <= threshold) & (im_bgr[:,:,1] <= threshold) & (im_bgr[:,:,2] <= threshold)
    mask_u8 = (mask * 255).astype(np.uint8)
    # Clean the mask: remove small connected components
    n_labels, im_labels, stats, centroids = cv2.connectedComponentsWithStats(mask_u8, connectivity=8)
    blobs_size = stats[:, 4]
    sorted_blob_ids = np.argsort(-blobs_size)
    mask_blobs_ids = list()
    for blob_id in sorted_blob_ids:
        if blob_id != 0: # this is the background label
            if blobs_size[blob_id] > min_blob_size:
                # check that most of the blob is at 0
                blob_pixels = im_labels == blob_id
                n_black_blob_pixels = np.count_nonzero(im_bgr[blob_pixels, :] < low_threshold)
                if n_black_blob_pixels/(blobs_size[blob_id] * 1.0) > 0.9:
                    mask_blobs_ids.append(blob_id)
            else:
                break  # we can stop, as they are sorted by decreasing size

    filtered_mask = np.in1d(im_labels.ravel(), np.array(mask_blobs_ids)).reshape((mask.shape))
    
    # Inverse the mask
    filtered_mask = np.logical_not(filtered_mask)
    return filtered_mask

In [60]:
def get_train_id(filename):
    directory, basename = os.path.split(filename)
    train_id, ext = os.path.splitext(basename)
    return int(train_id)

In [96]:
def process_all_black_mask(train_ids_to_process=None):
    train_dir = "/home/lowik/sealion/data/sealion/Train/"
    dotted_dir = "/home/lowik/sealion/data/sealion/TrainDotted/"
    mask_dir = "/home/lowik/sealion/data/sealion/TrainMask/"
    os.makedirs(mask_dir, exist_ok=True)
    
    files_to_process = [filename for filename in os.listdir(train_dir) if filename.endswith("jpg")] 
    for filename in files_to_process:
        train_id = get_train_id(filename)
        if os.path.exists(os.path.join(mask_dir, "{}.jpg".format(train_id))):
            continue
        if train_ids_to_process is None or train_id in train_ids_to_process:
            try:
                im_bgr = cv2.imread(os.path.join(train_dir, filename))
                im_dotted_bgr = cv2.imread(os.path.join(dotted_dir, filename))
                mask_train = get_black_mask(im_bgr)
                mask_dotted = get_black_mask(im_dotted_bgr)
                mask = np.logical_and(mask_train, mask_dotted)
                cv2.imwrite(os.path.join(mask_dir, "{}.jpg".format(train_id)), (mask * 255).astype(np.uint8))  
            except Exception as err:
                print("{}: ".format(train_id), err)

In [97]:
process_all_black_mask()