In [232]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import pandas as pd
from collections import namedtuple, defaultdict
import os
import pickle
%matplotlib inline


    red: adult males
    magenta: subadult males
    brown: adult females
    blue: juveniles
    green: pups


In [2]:
HSVColor = namedtuple("HSVColor", ["hue_offset", "hue_min", "hue_max", "sat_min", "sat_max", "val_min", "val_max"])

In [215]:
dot_colors = {
    "adult_males": HSVColor(90, 80, 100, 210, 255, 160, 255),     # red
    "subadult_males": HSVColor(0, 145, 155, 235, 255, 230, 255),  # magenta
    "adult_females": HSVColor(0, 11, 20, 130, 255, 70, 105),      # brown
    "juveniles": HSVColor(0, 103, 123, 155, 230, 115, 200),       # blue
    "pups": HSVColor(0, 47, 67, 200, 255, 148, 180)               # green
}

In [203]:
def get_diff_mask(im_bgr, im_dotted_bgr, threshold_diff = 60):
    """ Return a boolean mask with the difference between the dotted and non-dotted image.
    """
    diff_dotted_bgr = cv2.absdiff(im_bgr, im_dotted_bgr)
    diff_dotted_gray = np.sum(diff_dotted_bgr, axis=2)/3
    diff_dotted_mask = diff_dotted_gray > threshold_diff
    return diff_dotted_mask

In [212]:
def get_black_mask(im_bgr, threshold=15, min_blob_size=256*256):
    # Apply a simple threshold
    mask = (im_bgr[:,:,0] <= threshold) & (im_bgr[:,:,1] <= threshold) & (im_bgr[:,:,2] <= threshold)
    mask_u8 = (mask * 255).astype(np.uint8)
    
    # Clean the mask: remove small connected components
    n_labels, im_labels, stats, centroids = cv2.connectedComponentsWithStats(mask_u8, connectivity=8)
    blobs_size = stats[:, 4]

    sorted_blobs = np.argsort(-blobs_size)
    sorted_blob_ids = sorted_blobs[1:] # the biggest is the background
    mask_blobs_ids = list()
    for blob_id in sorted_blob_ids:
        if blobs_size[blob_id] > min_blob_size:
            mask_blobs_ids.append(blob_id)
        else:
            break

    filtered_mask = np.in1d(im_labels.ravel(), np.array(mask_blobs_ids)).reshape((mask.shape))
    
    # Inverse the mask
    filtered_mask = np.logical_not(filtered_mask)
    return filtered_mask

In [205]:
def get_dots(im_dotted_hsv, diff_mask, color):
    im_hsv_corr = im_dotted_hsv.copy()
    blob_size_max = 12*12
    # apply an offset: it is to use the same tests for red as other colors (as red can be 180 or 0)
    im_hsv_corr[:,:,0] = (im_dotted_hsv[:,:,0] + color.hue_offset)%180
    
    # Create a mask with pixel fulfilling all constraints on HSV values
    candidates = diff_mask & \
        (im_hsv_corr[:, :, 0] >= color.hue_min) & (im_hsv_corr[:, :, 0] <= color.hue_max) & \
        (im_hsv_corr[:, :, 1] >= color.sat_min) & (im_hsv_corr[:, :, 1] <= color.sat_max) & \
        (im_hsv_corr[:, :, 2] >= color.val_min) & (im_hsv_corr[:, :, 2] <= color.val_max)
        
    # Detect connected components
    candidates_u8 = (candidates * 255).astype(np.uint8)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    candidates_u8 = cv2.dilate(candidates_u8, kernel)
    n_labels, im_labels, stats, centroids = cv2.connectedComponentsWithStats(candidates_u8, connectivity=8)
    
    # Remove the biggest ones (ubsually, the background)
    centroids_filtered = []
    for i, s in enumerate(stats):
        if s[4] < blob_size_max:
            centroids_filtered.append(centroids[i])
    
    return centroids_filtered

In [276]:
def detect_dots(im_bgr, im_dotted_bgr, mask, dot_colors):
    # Convert to HSV, easier to detect color dot.
    im_dotted_hsv = cv2.cvtColor(im_dotted_bgr, cv2.COLOR_BGR2HSV)
    
    # apply mask on both train and dotted, to avoid diff in those masked regions
    mask_u8 = (mask * 255).astype(np.uint8)
    im_bgr_masked = cv2.bitwise_and(im_bgr, im_bgr, mask=mask_u8)
    im_dotted_bgr_masked = cv2.bitwise_and(im_dotted_bgr, im_dotted_bgr, mask=mask_u8)
    
    # Compute the diff between dotted and non-dotted images. 
    diff_dotted_mask = get_diff_mask(im_bgr_masked, im_dotted_bgr_masked)
    diff_dotted_mask_u8 = (diff_dotted_mask * 255).astype(np.uint8)
    # Find the dot for each type of sealion.
    min_size = 3*3
    max_size = 15*15
    n_labels, im_labels, stats, centroids = cv2.connectedComponentsWithStats(diff_dotted_mask_u8, connectivity=8)
    print(n_labels)
    dots = defaultdict(list)
    for blob_id, stat in enumerate(stats):
        if min_size < stat[4] < max_size:
            center = int(centroids[blob_id][0]), int(centroids[blob_id][1])
            
            dot_hsv = im_dotted_hsv[center[1] - 1:center[1] + 2, center[0] - 1:center[0] + 2, :].copy()
            val_mean = np.mean(dot_hsv[:, :, 1])
            sat_mean = np.mean(dot_hsv[:, :, 2])
            print("SV: ", val_mean, sat_mean)
            for sealion, color in dot_colors.items():
                hue = (dot_hsv[:, :, 0] + color.hue_offset).astype(np.int32) % 180
                print("Hoff: ", type(hue))
                hue_mean = np.mean(hue)
                print("H: ", hue_mean)
                if color.hue_min < hue_mean < color.hue_max and \
                    color.val_min < val_mean < color.val_max and \
                    color.sat_min < sat_mean < color.sat_max:
                        dots[sealion].append(centroids[blob_id])

    return dots

In [206]:
def detect_dots(im_bgr, im_dotted_bgr, mask, dot_colors):
    # Convert to HSV, easier to detect color dot.
    im_dotted_hsv = cv2.cvtColor(im_dotted_bgr, cv2.COLOR_BGR2HSV)
    
    # apply mask on both train and dotted, to avoid diff in those masked regions
    mask_u8 = (mask * 255).astype(np.uint8)
    im_bgr_masked = cv2.bitwise_and(im_bgr, im_bgr, mask=mask_u8)
    im_dotted_bgr_masked = cv2.bitwise_and(im_dotted_bgr, im_dotted_bgr, mask=mask_u8)
    
    # Compute the diff between dotted and non-dotted images. 
    diff_dotted_mask = get_diff_mask(im_bgr_masked, im_dotted_bgr_masked)
    
    # Find the dot for each type of sealion.
    dots = {}
    for sealion, color in dot_colors.items():
        dots_centroids = get_dots(im_dotted_hsv, diff_dotted_mask, color)
        dots[sealion] = dots_centroids

    return dots

In [225]:
def draw_detected(im, dots):
    im_draw = im.copy()
    n = 5
    colors = plt.cm.rainbow(np.linspace(0, 1, n))
    colors_rgb = [(int(c[0]), int(c[1]), int(c[2])) for c in colors * 255]
    for i, (sealion, centroids) in enumerate(dots.items()):
        for c in centroids:
            center = (int(c[0]), int(c[1]))
            cv2.circle(im_draw, center, 9, colors_rgb[i], 1)
            cv2.circle(im_draw, center, 1, colors_rgb[i], -1)
    return im_draw

In [226]:
def print_numbers(dots):
    for sealion, centroids in dots.items():
        print("{t}: {n}".format(t=sealion, n=len(centroids)))

In [227]:
def process_image(filename_train, root_dir_dotted): 
    path, basename = os.path.split(filename_train)
    train_id, ext = os.path.splitext(basename)
    filename_dotted = os.path.join(root_dir_dotted, basename)
    
    # Load images
    im_train = cv2.imread(filename_train)
    im_dotted = cv2.imread(filename_dotted)
    
    # Find the black mask
    mask_train = get_black_mask(im_train)
    mask_dotted = get_black_mask(im_dotted)
    mask = np.logical_and(mask_train, mask_dotted)
    
    # Find the dots
    dots = detect_dots(im_train, im_dotted, mask, dot_colors)
    
    return train_id, mask, dots, im_dotted

In [228]:
def process_all(train_ids=None):
    train_dir = "/home/lowik/sealion/data/sealion/Train/"
    dotted_dir = "/home/lowik/sealion/data/sealion/TrainDotted/"
    mask_dir = "/home/lowik/sealion/data/sealion/TrainMask/"
    debug_dir = "/home/lowik/sealion/data/sealion/TrainDebug/"
    dots_dir = "/home/lowik/sealion/data/sealion/TrainDots/"
    os.makedirs(mask_dir, exist_ok=True)
    os.makedirs(debug_dir, exist_ok=True)
    os.makedirs(dots_dir, exist_ok=True)
    df = pd.read_csv(os.path.join(train_dir, "train_new.csv"))
    train_id_with_errors = []
    
    files_to_process = [filename for filename in os.listdir(train_dir) if filename.endswith("jpg")]
    if train_ids:
        filtered = []
        for filename in files_to_process:
            train_id, _ = os.path.splitext(filename)
            if int(train_id) in train_ids:
                filtered.append(filename)
        files_to_process = filtered       
    for filename in files_to_process:
        train_id, mask, dots, im_train = process_image(os.path.join(train_dir, filename), dotted_dir)
        cv2.imwrite(os.path.join(mask_dir, train_id + ".png"), (mask * 255).astype(np.uint8))

        # Check if there is a discrepancy with the groudtruth
        errors = 0
        grountruth = df[df.train_id==int(train_id)]
        for sealion, cs in dots.items():
            detected = len(cs)
            gt = grountruth[sealion].get_values()[0]
            errors += abs(detected - gt)
            if abs(detected - gt):
                print("{type}: {gt} vs {det}".format(type=sealion, gt=gt, det=detected))

        if errors:
            train_id_with_errors.append(train_id)
            print("Errors on {train_id}: {total} ".format(train_id=train_id, total=errors))
            im_draw = draw_detected(im_train, dots)
            cv2.imwrite(os.path.join(debug_dir, train_id + ".jpg"), im_draw)

        # Save dots
        with open(os.path.join(dots_dir, train_id + ".pkl"), "wb") as ofile:
            pickle.dump(dots, ofile, pickle.HIGHEST_PROTOCOL)
            

In [277]:
process_all({947}) #process_all({947, 816, 946, 840})

172
SV:  252.777777778 243.333333333
Hoff:  <class 'numpy.ndarray'>
H:  0.555555555556
Hoff:  <class 'numpy.ndarray'>
H:  0.555555555556
Hoff:  <class 'numpy.ndarray'>
H:  90.5555555556
Hoff:  <class 'numpy.ndarray'>
H:  0.555555555556
Hoff:  <class 'numpy.ndarray'>
H:  0.555555555556
SV:  251.666666667 249.666666667
Hoff:  <class 'numpy.ndarray'>
H:  59.7777777778
Hoff:  <class 'numpy.ndarray'>
H:  59.7777777778
Hoff:  <class 'numpy.ndarray'>
H:  64.4444444444
Hoff:  <class 'numpy.ndarray'>
H:  59.7777777778
Hoff:  <class 'numpy.ndarray'>
H:  59.7777777778
SV:  245.222222222 243.555555556
Hoff:  <class 'numpy.ndarray'>
H:  151.111111111
Hoff:  <class 'numpy.ndarray'>
H:  151.111111111
Hoff:  <class 'numpy.ndarray'>
H:  61.1111111111
Hoff:  <class 'numpy.ndarray'>
H:  151.111111111
Hoff:  <class 'numpy.ndarray'>
H:  151.111111111
SV:  225.666666667 89.1111111111
Hoff:  <class 'numpy.ndarray'>
H:  13.7777777778
Hoff:  <class 'numpy.ndarray'>
H:  13.7777777778
Hoff:  <class 'numpy.ndarra

adult_males: 6 vs 5
Errors on 947: 1 


In [264]:
(179 + 90)%180

89

In [265]:
f = np.array([[[  0,0,179], [  0 , 0 ,179], [  1,   0, 179]],[[  0,0,179], [  0 , 0 ,179], [  1,   0, 179]],[[  0,0,179], [  0 , 0 ,179], [  1,   0, 179]]])

In [266]:
f.shape

(3, 3, 3)

In [269]:
(f[0,:,:] + 90)%180

array([[90, 90, 89],
       [90, 90, 89],
       [91, 90, 89]])

In [260]:
np.mean((f + 90) % 180)

89.777777777777771

In [219]:
im_train = cv2.imread("../data/sealion/Train/816.jpg")
im_dotted = cv2.imread("../data/sealion/TrainDotted/816.jpg")

In [220]:
mask_train = get_black_mask(im_train)
mask_dotted = get_black_mask(im_dotted)
mask = np.logical_and(mask_train, mask_dotted)

In [221]:
# Convert to HSV, easier to detect color dot.
im_dotted_hsv = cv2.cvtColor(im_dotted, cv2.COLOR_BGR2HSV)

# apply mask on both train and dotted, to avoid diff in those masked regions
mask_u8 = (mask * 255).astype(np.uint8)
im_bgr_masked = cv2.bitwise_and(im_train, im_train, mask=mask_u8)
im_dotted_bgr_masked = cv2.bitwise_and(im_dotted, im_dotted, mask=mask_u8)

# Compute the diff between dotted and non-dotted images. 
diff_dotted_mask = get_diff_mask(im_bgr_masked, im_dotted_bgr_masked)

In [222]:
cv2.imwrite("../data/diff.png", (diff_dotted_mask * 255).astype(np.uint8))

True