# DataLoader

In [1]:
from __future__ import absolute_import, division, print_function

import os
import random
import numpy as np
import copy
from PIL import Image  

import PIL.Image as pil

import torch
import torch.utils.data as data
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import skimage.transform
from collections import Counter

In [2]:
def pil_loader(path):
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

In [3]:
class MainDataset(data.Dataset):
    
    def __init__(self,data_path,filenames,height,width,frame_idxs,num_scales,is_train=False,img_ext='.jpg'):
        super(MainDataset, self).__init__()
        
        self.data_path = data_path
        self.filenames = filenames
        self.height = height
        self.width = width
        self.num_scales = num_scales
        self.frame_idxs = frame_idxs
        self.is_train = is_train
        self.img_ext = img_ext
        
        self.interp = Image.ANTIALIAS
        self.loader = pil_loader
        self.to_tensor = transforms.ToTensor()
               
        self.resize = {}
        for i in range(self.num_scales):
            s = 2 ** i
            self.resize[i] = transforms.Resize((self.height // s, self.width // s),interpolation=self.interp)
            
        self.load_depth = self.check_depth()
        
        self.K = np.array([[0.58, 0, 0.5, 0],
                           [0, 1.92, 0.5, 0],
                           [0, 0, 1, 0],
                           [0, 0, 0, 1]], dtype=np.float32)

        self.full_res_shape = (1242, 375)
        self.side_map = {"2": 2, "3": 3, "l": 2, "r": 3}
        
        
    def __len__(self):
        return len(self.filenames)
    
    def preprocess(self, inputs, color_aug):
        
        for k in list(inputs):
            frame = inputs[k]
            if "color" in k:
                n, im, i = k
                for i in range(self.num_scales):
                    inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)])

        for k in list(inputs):
            f = inputs[k]
            if "color" in k:
                n, im, i = k
                inputs[(n, im, i)] = self.to_tensor(f)
                inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
                
    

                    
    def check_depth(self):
        line = self.filenames[0].split()
        scene_name = line[0]
        frame_index = int(line[1])

        velo_filename = os.path.join(
            self.data_path,
            scene_name,
            "velodyne_points/data/{:010d}.bin".format(int(frame_index)))

        return os.path.isfile(velo_filename)
    
    #def get_color(self, folder, frame_index, side, do_flip):
    def get_color(self, folder, frame_index, side):
        
        color = self.loader(self.get_image_path(folder, frame_index, side))
        
        #if do_flip:
        #    color = color.transpose(pil.FLIP_LEFT_RIGHT)

        return color
    
    def get_image_path(self, folder, frame_index, side):
        f_str = "{:010d}{}".format(frame_index, self.img_ext)
        image_path = os.path.join(
            self.data_path, folder, "image_0{}/data".format(self.side_map[side]), f_str)
        return image_path

    def get_depth(self, folder, frame_index, side):
        calib_path = os.path.join(self.data_path, folder.split("/")[0])

        velo_filename = os.path.join(
            self.data_path,
            folder,
            "velodyne_points/data/{:010d}.bin".format(int(frame_index)))

        depth_gt = generate_depth_map(calib_path, velo_filename, self.side_map[side])
        depth_gt = skimage.transform.resize(
            depth_gt, self.full_res_shape[::-1], order=0, preserve_range=True, mode='constant')


        return depth_gt
    

                    
    def __getitem__(self, index):
        
        inputs = {}
        
        do_color_aug = self.is_train and random.random() > 0.5
        #do_flip = self.is_train and random.random() > 0.5
        
        line = self.filenames[index].split()
        folder = line[0]
        
        
        
        if len(line) == 3:
            frame_index = int(line[1])
            side = line[2]
        else:
            frame_index = 0
            side = None

        for i in self.frame_idxs:
            if i == "s":
                other_side = {"r": "l", "l": "r"}[side]
                #inputs[("color", i, -1)] = self.get_color(folder, frame_index, other_side, do_flip)
                inputs[("color", i, -1)] = self.get_color(folder, frame_index, other_side)
            else:
                #inputs[("color", i, -1)] = self.get_color(folder, frame_index + i, side, do_flip)
                inputs[("color", i, -1)] = self.get_color(folder, frame_index + i, side)
                
        for scale in range(self.num_scales):
            K = self.K.copy()

            K[0, :] *= self.width // (2 ** scale)
            K[1, :] *= self.height // (2 ** scale)

            inv_K = np.linalg.pinv(K)

            inputs[("K", scale)] = torch.from_numpy(K)
            inputs[("inv_K", scale)] = torch.from_numpy(inv_K)

        #if do_color_aug:
        #    color_aug = transforms.ColorJitter.get_params(
        #        self.brightness, self.contrast, self.saturation, self.hue)
        #else:
        color_aug =  (lambda x: x)
            
        self.preprocess(inputs,color_aug)
        
        for i in self.frame_idxs:
            del inputs[("color", i, -1)]
            del inputs[("color_aug", i, -1)]
            
        if self.load_depth:
            depth_gt = self.get_depth(folder, frame_index, side)
            inputs["depth_gt"] = np.expand_dims(depth_gt, 0)
            inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype(np.float32))
            
        if "s" in self.frame_idxs:
            stereo_T = np.eye(4, dtype=np.float32)
            baseline_sign =  +1 #-1 if do_flip else 1
            side_sign = -1 if side == "l" else 1
            stereo_T[0, 3] = side_sign * baseline_sign * 0.1

            inputs["stereo_T"] = torch.from_numpy(stereo_T)

        return inputs

In [4]:
def load_velodyne_points(filename):

    points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4)
    points[:, 3] = 1.0  # homogeneous
    return points


def read_calib_file(path):

    float_chars = set("0123456789.e+- ")
    data = {}
    with open(path, 'r') as f:
        for line in f.readlines():
            key, value = line.split(':', 1)
            value = value.strip()
            data[key] = value
            if float_chars.issuperset(value):
                # try to cast to float array
                try:
                    data[key] = np.array(list(map(float, value.split(' '))))
                except ValueError:
                    # casting error: data[key] already eq. value, so pass
                    pass

    return data


def sub2ind(matrixSize, rowSub, colSub):

    m, n = matrixSize
    return rowSub * (n-1) + colSub - 1


def generate_depth_map(calib_dir, velo_filename, cam=2, vel_depth=False):

    # load calibration files
    cam2cam = read_calib_file(os.path.join(calib_dir, 'calib_cam_to_cam.txt'))
    velo2cam = read_calib_file(os.path.join(calib_dir, 'calib_velo_to_cam.txt'))
    velo2cam = np.hstack((velo2cam['R'].reshape(3, 3), velo2cam['T'][..., np.newaxis]))
    velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))

    # get image shape
    im_shape = cam2cam["S_rect_02"][::-1].astype(np.int32)

    # compute projection matrix velodyne->image plane
    R_cam2rect = np.eye(4)
    R_cam2rect[:3, :3] = cam2cam['R_rect_00'].reshape(3, 3)
    P_rect = cam2cam['P_rect_0'+str(cam)].reshape(3, 4)
    P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)

    # load velodyne points and remove all behind image plane (approximation)
    # each row of the velodyne data is forward, left, up, reflectance
    velo = load_velodyne_points(velo_filename)
    velo = velo[velo[:, 0] >= 0, :]

    # project the points to the camera
    velo_pts_im = np.dot(P_velo2im, velo.T).T
    velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis]

    if vel_depth:
        velo_pts_im[:, 2] = velo[:, 0]

    # check if in bounds
    # use minus 1 to get the exact same value as KITTI matlab code
    velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1
    velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1
    val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
    val_inds = val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0])
    velo_pts_im = velo_pts_im[val_inds, :]

    # project to image
    depth = np.zeros((im_shape[:2]))
    depth[velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)] = velo_pts_im[:, 2]

    # find the duplicate points and choose the closest depth
    inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
    dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
    for dd in dupe_inds:
        pts = np.where(inds == dd)[0]
        x_loc = int(velo_pts_im[pts[0], 0])
        y_loc = int(velo_pts_im[pts[0], 1])
        depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
    depth[depth < 0] = 0

    return depth

In [5]:
#dataset Parameters
data_path = "/home/ubuntu/monodepth2/kitti_data"
height = 192
width = 640
frame_ids = [0, -1, 1,'s']
num_scales = 4
img_ext = '.jpg'

#Loader Parameters
batch_size = 12
num_workers = 12

In [6]:
fpath_train = "/home/ubuntu/monodepth2/splits/eigen_zhou/train_files.txt"
f_train = open(fpath_train)
train_filenames = f_train.readlines()
#train_filenames

In [7]:
fpath_val = "/home/ubuntu/monodepth2/splits/eigen_zhou/val_files.txt"
f_val = open(fpath_val)
val_filenames = f_val.readlines()
#val_filenames

In [13]:
fpath_test = "/home/ubuntu/monodepth2/splits/eigen/test_files.txt"
f_test = open(fpath_test)
test_filenames = f_test.readlines()
#test_filenames

In [9]:
#datasets_dict = {"kitti": datasets.KITTIRAWDataset}
datasets_dict = {"kitti": MainDataset}
dataset = datasets_dict["kitti"]
#dataset

In [15]:
#Train
train_dataset = dataset(data_path, train_filenames, height, width,frame_ids,num_scales,is_train=True, img_ext=img_ext)
train_loader = DataLoader(train_dataset, batch_size, True,num_workers=num_workers, pin_memory=True, drop_last=True)

In [16]:
#Validation
val_dataset = dataset(data_path, val_filenames, height, width,frame_ids, num_scales, is_train=False, img_ext=img_ext)
val_loader = DataLoader(val_dataset, batch_size, True,num_workers=num_workers, pin_memory=True, drop_last=True)

In [17]:
#Test
test_dataset = dataset(data_path, test_filenames, height, width,frame_ids, num_scales, is_train=False, img_ext=img_ext)
test_loader = DataLoader(test_dataset, batch_size, True,num_workers=num_workers, pin_memory=True, drop_last=True)

In [18]:
train_dataset[0]

{('K', 0): tensor([[371.2000,   0.0000, 320.0000,   0.0000],
         [  0.0000, 368.6400,  96.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 0): tensor([[ 2.6940e-03, -5.3033e-11, -8.6207e-01,  0.0000e+00],
         [-3.9697e-11,  2.7127e-03, -2.6042e-01,  0.0000e+00],
         [-8.8676e-11,  5.3253e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 1): tensor([[185.6000,   0.0000, 160.0000,   0.0000],
         [  0.0000, 184.3200,  48.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 1): tensor([[ 5.3879e-03, -1.3609e-10, -8.6207e-01,  0.0000e+00],
         [ 2.7678e-11,  5.4253e-03, -2.6042e-01,  0.0000e+00],
         [ 3.1640e-10,  7.0044e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 2): tensor([[92.8000,  0.

In [19]:
val_dataset[0]

{('K', 0): tensor([[371.2000,   0.0000, 320.0000,   0.0000],
         [  0.0000, 368.6400,  96.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 0): tensor([[ 2.6940e-03, -5.3033e-11, -8.6207e-01,  0.0000e+00],
         [-3.9697e-11,  2.7127e-03, -2.6042e-01,  0.0000e+00],
         [-8.8676e-11,  5.3253e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 1): tensor([[185.6000,   0.0000, 160.0000,   0.0000],
         [  0.0000, 184.3200,  48.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 1): tensor([[ 5.3879e-03, -1.3609e-10, -8.6207e-01,  0.0000e+00],
         [ 2.7678e-11,  5.4253e-03, -2.6042e-01,  0.0000e+00],
         [ 3.1640e-10,  7.0044e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 2): tensor([[92.8000,  0.

In [20]:
test_dataset[0]

{('K', 0): tensor([[371.2000,   0.0000, 320.0000,   0.0000],
         [  0.0000, 368.6400,  96.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 0): tensor([[ 2.6940e-03, -5.3033e-11, -8.6207e-01,  0.0000e+00],
         [-3.9697e-11,  2.7127e-03, -2.6042e-01,  0.0000e+00],
         [-8.8676e-11,  5.3253e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 1): tensor([[185.6000,   0.0000, 160.0000,   0.0000],
         [  0.0000, 184.3200,  48.0000,   0.0000],
         [  0.0000,   0.0000,   1.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000,   1.0000]]),
 ('inv_K', 1): tensor([[ 5.3879e-03, -1.3609e-10, -8.6207e-01,  0.0000e+00],
         [ 2.7678e-11,  5.4253e-03, -2.6042e-01,  0.0000e+00],
         [ 3.1640e-10,  7.0044e-11,  1.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]),
 ('K', 2): tensor([[92.8000,  0.

In [None]:
for bacth_id,i in enumerate(train_loader):
    #print(i[('K',0)].shape)
    print(i['depth_gt'].shape)
    break