In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

In [8]:
def get_sizes_stats():
    annotations_dir = "/home/lowik/sealion/data/sealion/TrainAnnotations/"
    dots_dir = "/home/lowik/sealion/data/sealion/TrainDots/"
    sizes = [[] for _ in range(5)]
    areas = [[] for _ in range(5)]
    for filename in os.listdir(annotations_dir):
        # Load annotation mask and dots
        train_id, _ = os.path.splitext(filename)
        train_id = int(train_id)
        mask = cv2.imread(os.path.join(annotations_dir, filename), cv2.IMREAD_GRAYSCALE)
        n_labels, im_labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
        with open(os.path.join(dots_dir, "{}.pkl".format(train_id)), "rb") as p:
            dots = pickle.load(p)
        
        # Find the sealion type for each label.
        blob_types = dict()
        for i, ds in enumerate(dots):
            for x, y in ds:
                if mask[y, x] != 0:
                    blob_types[im_labels[y, x]] = i
                    
        # Compute size of each blobs and add it to the corresponding sealion data.
        for label in range(1, n_labels):
            y, x = np.nonzero(im_labels == label)
            coords = np.zeros((x.shape[0], 2), dtype=np.int32)
            coords[:, 0] = x
            coords[:, 1] = y
            ((xc, yc), (w, h), a) = cv2.minAreaRect(coords)
            if label not in blob_types:
                print("{train_id} - label {label} has no type?".format(train_id=train_id, label=label))
                continue
            sizes[blob_types[label]].append(max(w, h))
            areas[blob_types[label]].append(len(x))
            
    return sizes, areas

In [11]:
sizes, areas = get_sizes_stats()

In [14]:
for i, ss in enumerate(sizes):
    mi = np.min(ss)
    ma = np.max(ss)
    mean = np.mean(ss)
    std = np.std(ss)
    median = np.median(ss)
    print("Size {}: min={}, max={}, mean={}, std={}, median={}, n={}".format(i, mi, ma, mean, std, median, len(ss)))

Size 0: min=41.87986755371094, max=164.6144561767578, mean=81.30168822004988, std=22.956427884506798, median=77.74258422851562, n=37
Size 1: min=45.04875564575195, max=142.55235290527344, mean=75.6908574785505, std=24.858394791602496, median=64.53681945800781, n=21
Size 2: min=39.369667053222656, max=138.251953125, mean=62.66514139175415, std=19.768246623079616, median=59.07316017150879, n=40
Size 3: min=35.0, max=102.97856140136719, mean=51.28458023071289, std=14.259899847392974, median=48.382843017578125, n=33
Size 4: min=18.026947021484375, max=55.61539077758789, mean=29.880167219373917, std=8.777418843640895, median=28.429442405700684, n=18


In [15]:
for i, ss in enumerate(areas):
    mi = np.min(ss)
    ma = np.max(ss)
    mean = np.mean(ss)
    std = np.std(ss)
    median = np.median(ss)
    print("Area {}: min={}, max={}, mean={}, std={}, median={}, n={}".format(i, mi, ma, mean, std, median, len(ss)))

Area 0: min=990, max=8216, mean=2363.9189189189187, std=1348.295087165457, median=2025.0, n=37
Area 1: min=1034, max=5659, mean=2073.8571428571427, std=1275.4377133142245, median=1510.0, n=21
Area 2: min=519, max=4757, mean=1276.125, std=871.7512600363707, median=1030.0, n=40
Area 3: min=485, max=2134, mean=894.2424242424242, std=378.9575038382737, median=812.0, n=33
Area 4: min=127, max=761, mean=327.72222222222223, std=144.7756139515966, median=297.0, n=18


Mean area of a superpixels: 103