In [3]:
import cv2
import os
import numpy as np
import json

In [7]:
class StreamStats(object):
    """ See https://www.johndcook.com/blog/standard_deviation/
    """
    def __init__(self):
        self.M = None
        self.S = None
        self.k = 0
        self.min = None
        self.max = None
        
    def update(self, x):
        self.k += 1
        if self.k == 1:
            self.M = x
            self.S = 0
            self.min = x
            self.max = x
        else:
            prevM = self.M
            prevS = self.S
            self.M = prevM + (x - prevM)/self.k
            self.S = prevS + (x - prevM) * (x - self.M)
            self.min = np.minimum(x, self.min)
            self.max = np.maximum(x, self.max)
            
    def mean(self):
        return self.M
        
    def variance(self):
        if self.k - 1 > 0:
            return self.S / (self.k - 1)
        else:
            return 0
    
    def std(self):
        return np.sqrt(self.variance())
    
    def minimum(self):
        return self.min
    
    def maximum(self):
        return self.max

In [3]:
train_dir = "/home/lowik/sealion/data/sealion/TrainBlock/"

In [5]:
stats = StreamStats()
block_status = dict()
# if more than 60% of the pixel are black, we won't use the block.
threshold_block_available = 0.6
N = 500
shuffled_filenames = np.random.permutation(os.listdir(train_dir))[:N]
for filename in shuffled_filenames:
    im = cv2.imread(os.path.join(train_dir, filename))/255
    train_id, ext = filename.split("_")
    block_id, _ = ext.split(".")
    train_id, block_id = int(train_id), int(block_id)
    
    if train_id not in block_status:
        block_status[train_id] = np.zeros((16, ), np.bool)
        
    total_pixel = im.shape[0] * im.shape[1]
    if np.count_nonzero(im == 0)/total_pixel < threshold_block_available:
        block_status[train_id][block_id] = True
        
    mean_per_channel = np.mean(np.mean(im, axis=0), axis=0)
    stats.update(mean_per_channel)

print("Mean: ", stats.mean())
print("Std: ", stats.std())

Mean:  [ 0.41749925  0.41663546  0.39410174]
Std:  [ 0.15231504  0.15536667  0.15928478]


In [None]:
for tid in block_status.keys():
    # Need to convert to bool, np.bool is not JSON serializable.
    block_status[tid] = [bool(s) for s in block_status[tid]]

with open("/home/lowik/sealion/data/sealion/blocks_status.json", "w") as jfile:
    json.dump(block_status, jfile, indent=2)

In [6]:
np.save("../data/sealion/mean_bgr.npy", stats.mean())
np.save("../data/sealion/std_bgr.npy", stats.std())

In [5]:
train_dir = "/home/lowik/sealion/data/sealion/TrainDensity/"

In [19]:
stats_min = StreamStats()
stats_max = StreamStats()
stats_count = StreamStats()
stats_mean = StreamStats()
# if more than 60% of the pixel are black, we won't use the block.
N = 500
shuffled_filenames = np.random.permutation(os.listdir(train_dir))[:N]
for filename in shuffled_filenames:
    dmap = np.load(os.path.join(train_dir, filename))["dmap"]
    dmap = np.sum(dmap, axis=-1)
    train_id, ext = filename.split("_")
    block_id, _ = ext.split(".")
    train_id, block_id = int(train_id), int(block_id)
    
    stats_min.update(np.min(dmap))
    stats_max.update(np.max(dmap))
    stats_count.update(np.sum(dmap))
    stats_mean.update(np.mean(dmap))

In [20]:
stats_min.minimum()

0.0

In [21]:
stats_max.maximum()

0.0057468279

In [22]:
stats_count.maximum()

200.94699

In [23]:
stats_count.mean()

8.3698267011880834

In [24]:
stats_mean.mean()

4.1060985197746936e-06

In [25]:
stats_mean.std()

1.12463456142464e-05