In [5]:
import tensorflow as tf
import argparse
import os
from config import cfg
from easydict import EasyDict

In [16]:
parser=EasyDict()
parser.i = 1
parser.tag = 'Test1'
parser.single_batch_size = 2
parser.lr =0.001
parser.al =1
parser.output_path = './prediction'
parser.v=False

dataset_dir = cfg.DATA_DIR
train_dir = os.path.join(cfg.DATA_DIR, 'training')
val_dir = os.path.join(cfg.DATA_DIR, 'validation')
log_dir = os.path.join('./log', parser.tag)
save_model_dir = os.path.join('./save_model', parser.tag)
os.makedirs(log_dir, exist_ok=True)
os.makedirs(save_model_dir, exist_ok=True)

In [255]:
#def aug_data():
def process_pointcloud(point_cloud,cls=cfg.DETECT_OBJ):
    # Input:
    #   (N, 4)
    # Output:
    #   voxel_dict
    
    if cls=="Car":
        scene_size=np.array([4, 80, 70.4], dtype=np.float32)
        voxel_size=np.array([0.4,0.2,0.2],dtype=np.float32)
        grid_size=np.array([10,400,352],dtype=np.float64)
        lidar_coord=np.array([0,40,3],dtype=np.float32)
        max_point_number=35
    else:
        scene_size = np.array([4, 40, 48], dtype=np.float32)
        voxel_size = np.array([0.4, 0.2, 0.2], dtype=np.float32)
        grid_size = np.array([10, 200, 240], dtype=np.int64)
        lidar_coord = np.array([0, 20, 3], dtype=np.float32)
        max_point_number = 45              
        np.random.shuffle(point_cloud)
    
    # what is lidar_coord
    shifted_coord = point_cloud[:,:3]+lidar_coord
    
    # reverse the point cloud coordinate (X,Y,Z) -> (Z,Y,X)
    voxel_index = np.floor(shifted_coord[:, ::-1] / voxel_size).astype(np.int)

    bound_x = np.logical_and(
        voxel_index[:, 2] >= 0, voxel_index[:, 2] < grid_size[2])
    bound_y = np.logical_and(
        voxel_index[:, 1] >= 0, voxel_index[:, 1] < grid_size[1])
    bound_z = np.logical_and(
        voxel_index[:, 0] >= 0, voxel_index[:, 0] < grid_size[0])

    bound_box = np.logical_and(np.logical_and(bound_x, bound_y), bound_z)
    #find valuable box
    point_cloud = point_cloud[bound_box]
    voxel_index = voxel_index[bound_box]
    #bound_x = np.logical_and(voxel_index)

    # [K, 3] coordinate buffer as described in the paper
    # points cloud be fall in same box, remove duplication
    coordinate_buffer = np.unique(voxel_index, axis=0)
    
    K=len(coordinate_buffer)
    T = max_point_number
    
    number_buffer=np.zeros(shape=(K),dtype=np.int64)
    
    # [K, T, 7] feature buffer as described in the paper
    feature_buffer = np.zeros(shape=(K, T, 7), dtype=np.float32)
    
    # build a reverse index for coordinate buffer
    index_buffer = {}
    # inorder to order
    for i in range(K):
        index_buffer[tuple(coordinate_buffer[i])]=i
    
    for voxel,point in zip(voxel_index,point_cloud):
        index=index_buffer[tuple(voxel)]
        number = number_buffer[index]
        if number<T:
            feature_buffer[index,number,:4]=point
            number_buffer[index]+=1
    feature_buffer[:,:,-3:]=feature_buffer[:,:,:3]-feature_buffer[:,:,:3].sum(axis=1,keepdims=True)/number_buffer.reshape(K,1,1)
    
    voxel_dict={'feature_buffer':feature_buffer,'coordinate_buffer':coordinate_buffer,'number_buffer':number_buffer}
    return voxel_dict

In [258]:
class Processor():
    def __init__(self,data_tag,f_rgb,f_lidar,f_label,data_dir,aug,is_testset):
        self.data_tag = data_tag
        self.f_rgb = f_rgb
        self.f_lidar = f_lidar
        self.f_label = f_label
        self.aug = aug
        self.is_testset = is_testset
        
    def __call__(self,load_index):
        if self.aug:
            ret = aug_data(self.data_tag[load_index],self.data_dir)
        else:
            rgb= cv2.resize(cv2.imread(self.f_rgb[load_index]),(cfg.IMAGE_WIDTH,cfg.IMAGE_HEIGHT))
            raw_lidar = np.fromfile(self.f_lidar[load_index], dtype=np.float32).reshape((-1, 4))
            
            if not self.is_testset:
                labels=[line for line in open(self.f_label[load_index],'r').readlines()]
            else:
                labels=[""]
            tag = self.data_tag[load_index]
            voxel = process_pointcloud(raw_lidar)
            ret=[tag,rgb,raw_lidar,voxel,labels]
        return ret

In [294]:
def build_input(voxel_dict_list):
    batch_size = len(voxel_dict_list)

    feature_list = []
    number_list = []
    coordinate_list = []
    for i, voxel_dict in zip(range(batch_size), voxel_dict_list):
        feature_list.append(voxel_dict['feature_buffer'])
        number_list.append(voxel_dict['number_buffer'])
        coordinate = voxel_dict['coordinate_buffer']
        coordinate_list.append(
            np.pad(coordinate, ((0, 0), (1, 0)),
                   mode='constant', constant_values=i))

    feature = np.concatenate(feature_list)
    number = np.concatenate(number_list)
    coordinate = np.concatenate(coordinate_list)
    return batch_size, feature, number, coordinate

In [297]:
# kitti loader
import glob
import os
import math
import cv2
import numpy as np
import multiprocessing

# global pool
TRAIN_POOL = multiprocessing.Pool(2)
VAL_POOL = multiprocessing.Pool(2)

def iterate_data(data_dir, shuffle=False, aug=False, is_testset=False, batch_size=1,multi_gpu_sum=1):
    f_rgb=glob.glob(os.path.join(data_dir,"image_2",'*.png'))
    f_rgb.sort()
    f_lidar = glob.glob(os.path.join(data_dir,"velodyne",'*.bin'))
    f_lidar.sort()
    f_label=glob.glob(os.path.join(data_dir,"label_2",'*.txt'))
    f_label.sort()
    
    data_tag = [name.split("/")[-1].split(".")[-2] for name in f_rgb]
    
    assert len(data_tag)!=0, "dataset folder is not correct"
    assert len(data_tag)==len(f_rgb)==len(f_lidar), "dataset folder is not correct"
    
    nums = len(f_rgb)
    indices = list(range(nums))
    
    if shuffle:
        np.random.shuffle(indices)
    
    num_batches = int(math.floor(nums/float(batch_size)))
    
    proc = Processor(data_tag,f_rgb,f_lidar,f_label,data_dir,aug,is_testset)
    
    for batch_idx in range(num_batches):
        start_idx = batch_idx*batch_size
        excerpt = indices[start_idx:start_idx+batch_size]
           
        rets=TRAIN_POOL.map(proc,excerpt)
    
        tag = [ret[0] for ret in rets]
        rgb = [ret[1] for ret in rets]
        raw_lidar=[ret[2] for ret in rets]
        voxel = [ret[3] for ret in rets]
        labels = [ret[4] for ret in rets]
        
        # only for voxel -> [gpu, k_single_batch, ...]
        vox_feature, vox_number, vox_coordinate = [], [], []
        single_batch_size = int(batch_size / multi_gpu_sum)
        for idx in range(multi_gpu_sum):
            _, per_vox_feature, per_vox_number, per_vox_coordinate = build_input(voxel[idx * single_batch_size:(idx + 1) * single_batch_size])
            vox_feature.append(per_vox_feature)
            vox_number.append(per_vox_number)
            vox_coordinate.append(per_vox_coordinate)        

        ret = (
               np.array(tag),
               np.array(labels),
               np.array(vox_feature),
               np.array(vox_number),
               np.array(vox_coordinate),
               np.array(rgb),
               np.array(raw_lidar)
               )        
    
        yield ret

In [299]:
batches=iterate_data(train_dir)

In [300]:
next(batches)

(array(['000000'], dtype='<U6'),
 array([['Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01\n']],
       dtype='<U87'),
 array([[[[ 3.8050001e+00, -7.0460000e+00, -2.7539999e+00, ...,
           -9.0000629e-03, -9.9997520e-03, -1.4998913e-03],
          [ 3.8230000e+00, -7.0260000e+00, -2.7509999e+00, ...,
            8.9998245e-03,  1.0000229e-02,  1.5001297e-03],
          [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
           -3.8140001e+00,  7.0360003e+00,  2.7525001e+00],
          ...,
          [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
           -3.8140001e+00,  7.0360003e+00,  2.7525001e+00],
          [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
           -3.8140001e+00,  7.0360003e+00,  2.7525001e+00],
          [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
           -3.8140001e+00,  7.0360003e+00,  2.7525001e+00]],
 
         [[ 4.1199999e+00, -7.0539999e+00, -2.6610000e+00, ...,
           -3.70