In [1]:
import os
import glob
import torch
import numpy as np
import multiprocessing as mp
from skimage import io
from skimage.color import rgb2gray
from tqdm import tqdm

mp.set_start_method('fork', force=True)

In [2]:
def _backbone_mp_image_stas(this_img_path, image_mean_ls, image_std_ls, image_gray_mean_ls, image_gray_std_ls, mp_progress, task_size, mp_lock):
    """
    Multiprocessng backbone function to get image statistics.
    This is to alleviate the IO-bound performance bottleneck.
    """
    image = io.imread(this_img_path)
    if len(image.shape) == 3 and image.shape[-1] == 4:
        # RGBA to RGB for Cesium dataset
        image = image[:, :, :3]
    
    # Warning: the order of each entry might not match the overall input argument list because of inter-thread synchronization overhead.
    image_mean_ls.append(np.mean(image.reshape(-1, 3), axis=0))
    image_std_ls.append(np.std(image.reshape(-1, 3), axis=0))

    image_gray = rgb2gray(image)
    image_gray_mean_ls.append(np.mean(image_gray))
    image_gray_std_ls.append(np.std(image_gray))
    
    with mp_lock:
        mp_progress.value += 1
        print("\rProgress: {:d} / {:d}".format(mp_progress.value, task_size), end=" ", flush=True)
        
        
def _backbone_mp_label_stas(this_label_path, label_mean_ls, label_std_ls, mp_progress, task_size, mp_lock, surface_normal=False):
    """
    Multiprocessng backbone function to get 3D label statistics.
    This is to alleviate the IO-bound performance bottleneck.
    """
    label = torch.load(this_label_path)  # the .dat file
    if len(label.shape) == 2:
        # data in [H, W] such as depth
        label = label.unsqueeze(0)  # [1, H, W]
    elif len(label.shape) == 3:
        # data in [X, H, W] such as coord (X=3)
        pass
    else:
        raise NotImplementedError

    # Warning: the order of each entry might not match the overall input argument list because of inter-thread synchronization overhead.
    label = label.reshape(label.size(0), -1)  # [X, N]
    valid_label = label[0, :] != -1  # [N], mask
    label = label[:, valid_label]  # [X', N]
    
    if surface_normal:
        label = xyz2ae(label.unsqueeze(0)).squeeze(0) # [2, N]
    this_mean = label.reshape(label.size(0), -1).mean(dim=1).numpy()  # [X] numpy array
    this_std = label.reshape(label.size(0), -1).std(dim=1).numpy()  # [X] numpy array
    label_mean_ls.append(this_mean)
    label_std_ls.append(this_std)
    
    with mp_lock:
        mp_progress.value += 1
        print("\rProgress: {:d} / {:d}".format(mp_progress.value, task_size), end=" ", flush=True)

        
def xyz2ae(xyz: torch.Tensor) -> torch.Tensor:
    """
    Turn normalized direction vector into azimuth and elevation.
    @param xyz  [B, 3, *] tensor of normalized direction vector.
    @return:    [B, 2, *] tensor of azimuth and elevation in radian.
    """

    # azimuth = arctan2(y, x), range [-pi, pi]
    azimuth = torch.atan2(xyz[:, 1], xyz[:, 0])  # [B, *]

    # elevation = arctan2(z, sqrt(x**2 + y**2)), range [-pi, pi]
    elevation = torch.atan2(xyz[:, 2], torch.norm(xyz[:, 0:2], dim=1, p=2))  # [B, *]

    return torch.stack([azimuth, elevation], dim=1)  # [B, 2, *]

In [3]:
"""
Obtain RGB and grayscale mean & std statistics respectively for EPFL dataset.
"""

dataset_nms = ['EPFL']
PROJ_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))

mp_manager = mp.Manager()
mp_lock = mp_manager.Lock()
mp_progress = mp_manager.Value('i', 0)
image_mean_ls = mp_manager.list()
image_std_ls = mp_manager.list()
image_gray_mean_ls = mp_manager.list()
image_gray_std_ls = mp_manager.list()


for nm in dataset_nms:
    train_sim_rgb_dir = os.path.join(PROJ_DIR, 'datasets', nm, 'train_sim', 'rgb')
    train_sim_rgb_ls = sorted(glob.glob(os.path.join(train_sim_rgb_dir, '*.png')))

    mp_args_ls = []
    for i in tqdm(range(len(train_sim_rgb_ls)), desc="Multiprocessing preparation..."):
        this_img_path = train_sim_rgb_ls[i]
        mp_args_ls.append((this_img_path, image_mean_ls, image_std_ls, image_gray_mean_ls, image_gray_std_ls, mp_progress, len(train_sim_rgb_ls), mp_lock))
        
    with mp.Pool() as pool:
        pool.starmap(_backbone_mp_image_stas, mp_args_ls, chunksize=os.cpu_count())
        
    image_mean_ls = np.array(image_mean_ls) / 255.0
    mean_overall = np.mean(image_mean_ls, axis=0)
    
    image_std_ls = np.array(image_std_ls) / 255.0
    std_overall = np.mean(image_std_ls, axis=0)
    
    # skimage rgb2gray already stores the pixel in [0, 1] range, no need to divide by 255
    image_gray_mean_ls = np.array(image_gray_mean_ls)
    gray_mean_overall = np.mean(image_gray_mean_ls, axis=0)
    
    image_gray_std_ls = np.array(image_gray_std_ls)
    gray_std_overall = np.mean(image_gray_std_ls, axis=0)
    
    print("Dataset {:s} statistics:\nRGB mean: {}, RGB std: {}".format(nm, mean_overall, std_overall))
    print("grayscale mean: {}, grayscale std: {}".format(gray_mean_overall, gray_std_overall))
    

Multiprocessing preparation...: 100%|█████████████████████| 13500/13500 [00:00<00:00, 1301530.95it/s]


Progress: 13500 / 13500                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [4]:
"""
Obtain 3D label mean & std statistics respectively for EPFL dataset.
"""

dataset_nms = ['EPFL']
PROJ_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))

mp_manager = mp.Manager()
mp_lock = mp_manager.Lock()
mp_progress = mp_manager.Value('i', 0)
label_mean_ls = mp_manager.list()
label_std_ls = mp_manager.list()

label_nms = ['init', 'depth', 'normal']

for label_nm in label_nms:
    for nm in dataset_nms:
        with mp_lock:
            mp_progress.value = 0
            label_mean_ls = mp_manager.list()
            label_std_ls = mp_manager.list()
        train_sim_label_dir = os.path.join(PROJ_DIR, 'datasets', nm, 'train_sim', label_nm)
        train_sim_label_ls = sorted(glob.glob(os.path.join(train_sim_label_dir, '*.dat')))

        mp_args_ls = []
        for i in tqdm(range(len(train_sim_label_ls)), desc="Multiprocessing preparation..."):
            this_label_path = train_sim_label_ls[i]
            mp_args_ls.append((this_label_path, label_mean_ls, label_std_ls, mp_progress, len(train_sim_label_ls), mp_lock, label_nm == 'normal'))

        with mp.Pool() as pool:
            pool.starmap(_backbone_mp_label_stas, mp_args_ls, chunksize=os.cpu_count())

        label_mean_ls = np.array(label_mean_ls)  # [N, X]
        mean_overall = np.mean(label_mean_ls, axis=0)  # [X]

        label_std_ls = np.array(label_std_ls)  # [N, X]
        std_overall = np.mean(label_std_ls, axis=0)  # [X]

        print("Dataset {:s} statistics:\n{:s} mean: {}, {:s} std: {}".format(nm, label_nm, mean_overall, label_nm, std_overall))

Multiprocessing preparation...: 100%|█████████████████████| 13500/13500 [00:00<00:00, 1587726.89it/s]


Progress: 13500 / 13500                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

Multiprocessing preparation...: 100%|█████████████████████| 13500/13500 [00:00<00:00, 1697538.79it/s]


Progress: 13500 / 13500                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

Multiprocessing preparation...: 100%|██████████████████████| 13500/13500 [00:00<00:00, 617206.09it/s]


Progress: 13500 / 13500                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [5]:
"""
Obtain RGB and grayscale mean & std statistics respectively for comballaz dataset.
"""

dataset_nms = ['comballaz']
PROJ_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))

mp_manager = mp.Manager()
mp_lock = mp_manager.Lock()
mp_progress = mp_manager.Value('i', 0)
image_mean_ls = mp_manager.list()
image_std_ls = mp_manager.list()
image_gray_mean_ls = mp_manager.list()
image_gray_std_ls = mp_manager.list()


for nm in dataset_nms:
    train_sim_rgb_dir = os.path.join(PROJ_DIR, 'datasets', nm, 'train_sim', 'rgb')
    train_sim_rgb_ls = sorted(glob.glob(os.path.join(train_sim_rgb_dir, '*.png')))

    mp_args_ls = []
    for i in tqdm(range(len(train_sim_rgb_ls)), desc="Multiprocessing preparation..."):
        this_img_path = train_sim_rgb_ls[i]
        mp_args_ls.append((this_img_path, image_mean_ls, image_std_ls, image_gray_mean_ls, image_gray_std_ls, mp_progress, len(train_sim_rgb_ls), mp_lock))
        
    with mp.Pool() as pool:
        pool.starmap(_backbone_mp_image_stas, mp_args_ls, chunksize=os.cpu_count())
        
    image_mean_ls = np.array(image_mean_ls) / 255.0
    mean_overall = np.mean(image_mean_ls, axis=0)
    
    image_std_ls = np.array(image_std_ls) / 255.0
    std_overall = np.mean(image_std_ls, axis=0)
    
    # skimage rgb2gray already stores the pixel in [0, 1] range, no need to divide by 255
    image_gray_mean_ls = np.array(image_gray_mean_ls)
    gray_mean_overall = np.mean(image_gray_mean_ls, axis=0)
    
    image_gray_std_ls = np.array(image_gray_std_ls)
    gray_std_overall = np.mean(image_gray_std_ls, axis=0)
    
    print("Dataset {:s} statistics:\nRGB mean: {}, RGB std: {}".format(nm, mean_overall, std_overall))
    print("grayscale mean: {}, grayscale std: {}".format(gray_mean_overall, gray_std_overall))
    

Multiprocessing preparation...: 100%|██████████████████████| 27000/27000 [00:00<00:00, 386026.26it/s]


Progress: 27000 / 27000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

In [6]:
"""
Obtain 3D label mean & std statistics respectively for comballaz dataset.
"""

dataset_nms = ['comballaz']
PROJ_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))

mp_manager = mp.Manager()
mp_lock = mp_manager.Lock()
mp_progress = mp_manager.Value('i', 0)
label_mean_ls = mp_manager.list()
label_std_ls = mp_manager.list()

label_nms = ['init', 'depth', 'normal']

for label_nm in label_nms:
    for nm in dataset_nms:
        with mp_lock:
            mp_progress.value = 0
            label_mean_ls = mp_manager.list()
            label_std_ls = mp_manager.list()
        train_sim_label_dir = os.path.join(PROJ_DIR, 'datasets', nm, 'train_sim', label_nm)
        train_sim_label_ls = sorted(glob.glob(os.path.join(train_sim_label_dir, '*.dat')))

        mp_args_ls = []
        for i in tqdm(range(len(train_sim_label_ls)), desc="Multiprocessing preparation..."):
            this_label_path = train_sim_label_ls[i]
            mp_args_ls.append((this_label_path, label_mean_ls, label_std_ls, mp_progress, len(train_sim_label_ls), mp_lock, label_nm == 'normal'))

        with mp.Pool() as pool:
            pool.starmap(_backbone_mp_label_stas, mp_args_ls, chunksize=os.cpu_count())

        label_mean_ls = np.array(label_mean_ls)  # [N, X]
        mean_overall = np.mean(label_mean_ls, axis=0)  # [X]

        label_std_ls = np.array(label_std_ls)  # [N, X]
        std_overall = np.mean(label_std_ls, axis=0)  # [X]

        print("Dataset {:s} statistics:\n{:s} mean: {}, {:s} std: {}".format(nm, label_nm, mean_overall, label_nm, std_overall))

Multiprocessing preparation...: 100%|█████████████████████| 27000/27000 [00:00<00:00, 1386258.24it/s]


Progress: 27000 / 27000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

Multiprocessing preparation...: 100%|█████████████████████| 27000/27000 [00:00<00:00, 1526002.99it/s]


Progress: 27000 / 27000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

Multiprocessing preparation...: 100%|█████████████████████| 27000/27000 [00:00<00:00, 1453607.61it/s]


Progress: 27000 / 27000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 