In [1]:
import tensorflow as tf
import argparse
import os
from config import cfg
from easydict import EasyDict
from utils.kitti_loader import iterate_data
from model.group_pointcloud import FeatureNet
import numpy as np
from model.rpn import MiddleAndRPN

In [2]:
parser=EasyDict()
parser.i = 1
parser.tag = 'Test1'
parser.single_batch_size = 2
parser.lr =0.001
parser.al =1
parser.output_path = './prediction'
parser.v=False

dataset_dir = cfg.DATA_DIR
train_dir = os.path.join(cfg.DATA_DIR, 'training')
val_dir = os.path.join(cfg.DATA_DIR, 'validation')
log_dir = os.path.join('./log', parser.tag)
save_model_dir = os.path.join('./save_model', parser.tag)
os.makedirs(log_dir, exist_ok=True)
os.makedirs(save_model_dir, exist_ok=True)

In [3]:
batches=iterate_data(train_dir,batch_size=2)
#tag,labels,vox_feature,vox_number,vox_coordinate,rgb,raw_lidar
batch=next(batches)
voxel_feature=batch[2]
voxel_coordinate=batch[4]

In [4]:
print("{}:{}".format("tag",batch[0]))
print("{}:{}".format("labels",batch[1]))
#print("{}:{}".format("vox_feature",batch[2]))
print("{}:{}".format("vox_number",batch[3]))
#print("{}:{}".format("vox_coordinate",batch[4]))
#print("{}:{}".format("rgb",batch[5]))
print("{}:{}".format("raw_lidar",batch[6][0].shape))

tag:['000000' '000001']
labels:[list(['Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01\n'])
 list(['Truck 0.00 0 -1.57 599.41 156.40 629.75 189.25 2.85 2.63 12.34 0.47 1.49 69.44 -1.56\n', 'Car 0.00 0 1.85 387.63 181.54 423.81 203.12 1.67 1.87 3.69 -16.53 2.39 58.49 1.57\n', 'Cyclist 0.00 3 -1.65 676.60 163.95 688.98 193.93 1.86 0.60 2.02 4.59 1.32 45.84 -1.55\n', 'DontCare -1 -1 -10 503.89 169.71 590.61 190.13 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 511.35 174.96 527.81 187.45 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 532.37 176.35 542.68 185.27 -1 -1 -1 -1000 -1000 -1000 -10\n', 'DontCare -1 -1 -10 559.62 175.83 575.40 183.15 -1 -1 -1 -1000 -1000 -1000 -10\n'])]
vox_number:[[2 5 9 ... 1 1 1]]
raw_lidar:(115384, 4)


In [5]:
A=FeatureNet(voxel_feature[0],voxel_coordinate[0],training=True,batch_size=2)
output=A.outputs
output.shape

TensorShape([2, 10, 400, 352, 128])

In [6]:
from utils.utils import *
anchors = cal_anchors()
pos_equal_one, neg_equal_one, targets = cal_rpn_target(batch[1], [cfg.FEATURE_HEIGHT,cfg.FEATURE_WIDTH], anchors)
pos_equal_one[..., [0]].shape
pos_equal_one_for_reg = np.concatenate([np.tile(pos_equal_one[..., [0]], 7), np.tile(pos_equal_one[..., [1]], 7)], axis=-1)
pos_equal_one_sum = np.clip(np.sum(pos_equal_one, axis=(1, 2, 3)).reshape(-1, 1, 1, 1), a_min=1, a_max=None)
neg_equal_one_sum = np.clip(np.sum(neg_equal_one, axis=(1, 2, 3)).reshape(-1, 1, 1, 1), a_min=1, a_max=None)
info={"pos_equal_one":pos_equal_one,"neg_equal_one":neg_equal_one,"targets":targets,
      "pos_equal_one_for_reg":pos_equal_one_for_reg,"pos_equal_one_sum":pos_equal_one_sum,
      "neg_equal_one_sum":neg_equal_one_sum}

In [7]:
model=MiddleAndRPN(output,info)

In [71]:
def ConvMD(M,Cin,Cout,k,s,p,inputs,training=True,activation=None,bn=True,name='conv'):
    # k:kerneal_size s:stride, p:padding
    temp_p=np.array(p)
    temp_p=np.lib.pad(temp_p,(1,1),'constant',constant_values=(0,0))
    if M==3:
        # padding only valid for depth,width and height, not bathch_size and in_channel
        paddings = (np.array(temp_p)).repeat(2).reshape(5,2)
        pad = tf.pad(tensor=inputs,paddings=paddings,mode="CONSTANT",constant_values=0)
        temp_conv = tf.keras.layers.Conv3D(Cout,kernel_size=k,strides=s,padding="valid",name=name)(pad)
    if M==2:
        paddings=(np.array(temp_p)).repeat(2).reshape(4,2)
        pad=tf.pad(tensor=inputs,paddings=paddings,mode="CONSTANT",constant_values=0)
        temp_conv=tf.keras.layers.Conv2D(Cout,kernel_size=k,strides=s,padding="valid",name=name)(pad)
    if bn:
        temp_conv = tf.keras.layers.BatchNormalization(name=name)(temp_conv,training=training)
    if activation:
        temp_conv = tf.keras.layers.ReLU(name=name)(temp_conv)
    #print("{}:{}".format(name,temp_conv.shape))    
    return temp_conv
    
def Deconv2D(Cin,Cout,k,s,p,inputs,training=True,activation=True,bn=True,name=""):
    temp_p = np.array(p)
    temp_p = np.lib.pad(temp_p,(1,1),mode='constant',constant_values=(0,0))
    padding = np.array(temp_p).repeat(2).reshape(4,2)
    pad = tf.pad(tensor=inputs,paddings=padding,mode="CONSTANT")
    temp_conv = tf.keras.layers.Conv2DTranspose(Cout,kernel_size=k,data_format="channels_last",
                                                strides=s,padding="SAME",name=name)(pad)
    if bn:
        temp_conv = tf.keras.layers.BatchNormalization()(temp_conv,training=training)
    temp_conv = tf.keras.layers.ReLU(name=name)(temp_conv)
    return temp_conv

In [155]:
small_addon_for_BCE = 1e-6
class MiddleAndRPN(object):
    def __init__(self,inputs,info,alpha=1.5,beta=1,sigma=3,training=True,name=''):
        # scale = [batchsize, 10, 400/200, 352/240, 128] should be the output of feature learning network
        self.input = inputs
        self.training=training
        self.targets= info["targets"]
        self.pos_equal_one = info["pos_equal_one"]
        self.pos_equal_one_sum = info["pos_equal_one_sum"]
        self.pos_equal_one_for_reg = info["pos_equal_one_for_reg"]
        self.neg_equal_one = info["neg_equal_one"]
        self.neg_equal_one_sum = info["neg_equal_one_sum"]
        temp_conv=ConvMD(3,128,64,3,(2,1,1),(1,1,1),self.input,name="conv1")
        temp_conv=ConvMD(3,64,64,3,(1,1,1),(0,1,1),temp_conv,name="conv2")
        temp_conv=ConvMD(3,64,64,3,(2,1,1),(1,1,1),temp_conv,name="conv3")
        temp_conv = tf.transpose(a=temp_conv,perm=[0,2,3,4,1])
        temp_conv=tf.reshape(temp_conv,[-1,cfg.INPUT_HEIGHT,cfg.INPUT_WIDTH,128])
        
        #rpn
        #block1:
        temp_conv = ConvMD(2,128,128,3,(2,2),(1,1),temp_conv,training=self.training,name="conv4")
        temp_conv = ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv5")
        temp_conv = ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv6")
        temp_conv = ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv7")
        deconv1 = Deconv2D(128,256,3,(1,1),(0,0),temp_conv,training=self.training,name="deconv1")
        
        #block2:
        temp_conv=ConvMD(2,128,128,3,(2,2),(1,1),temp_conv,training=self.training,name="conv8")
        temp_conv=ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv9")
        temp_conv=ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv10")
        temp_conv=ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv11")
        temp_conv=ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv12")
        temp_conv=ConvMD(2,128,128,3,(1,1),(1,1),temp_conv,training=self.training,name="conv13")
        deconv2 = Deconv2D(128,256,2,(2,2),(0,0),temp_conv,training=self.training,name="deconv2")
        
        #block3:
        temp_conv=ConvMD(2,128,256,3,(2,2),(1,1),temp_conv,training=self.training,name="conv14")
        temp_conv=ConvMD(2,256,256,3,(1,1),(1,1),temp_conv,training=self.training,name="conv15")
        temp_conv=ConvMD(2,256,256,3,(1,1),(1,1),temp_conv,training=self.training,name="conv16")
        temp_conv=ConvMD(2,256,256,3,(1,1),(1,1),temp_conv,training=self.training,name="conv17")
        temp_conv=ConvMD(2,256,256,3,(1,1),(1,1),temp_conv,training=self.training,name="conv18")
        temp_conv=ConvMD(2,256,256,3,(1,1),(1,1),temp_conv,training=self.training,name="conv19")
        deconv3 = Deconv2D(256,256,4,(4,4),(0,0),temp_conv,training=self.training,name="deconv3")
        
        temp_conv=tf.concat([deconv3,deconv2,deconv1],-1)
        p_map=ConvMD(2,768,2,1,(1,1),(0,0),temp_conv,training=self.training,activation=False,bn=False,name="conv20")
        r_map=ConvMD(2,768,14,1,(1,1),(0,0),temp_conv,training=self.training,activation=False,bn=False,name='conv21')
        # softmax output for positive anchor and negative anchor, scale = [None, 200/100, 176/120, 1]
        self.p_pos = tf.sigmoid(p_map)
        #self.p_pos = tf.nn.softmax(p_map, dim=3)
        self.output_shape=[cfg.FEATURE_HEIGHT,cfg.FEATURE_WIDTH]
        
        self.cls_pos_loss = (-self.pos_equal_one*tf.math.log(self.p_pos+small_addon_for_BCE))/self.pos_equal_one_sum
        self.cls_neg_loss = (-self.neg_equal_one*tf.math.log(1-self.p_pos+small_addon_for_BCE))/self.neg_equal_one_sum
        self.cls_loss = tf.reduce_sum( input_tensor=alpha * self.cls_pos_loss + beta * self.cls_neg_loss )
        self.cls_pos_loss_rec = tf.reduce_sum( input_tensor=self.cls_pos_loss )
        self.cls_neg_loss_rec = tf.reduce_sum( input_tensor=self.cls_neg_loss )
        self.reg_loss = smooth_l1(r_map * self.pos_equal_one_for_reg, self.targets *
                                      self.pos_equal_one_for_reg, sigma) / self.pos_equal_one_sum
        self.reg_loss = tf.reduce_sum(input_tensor=self.reg_loss)
        self.loss = tf.reduce_sum(input_tensor=self.cls_loss + self.reg_loss)
        self.delta_output = r_map
        self.prob_output = self.p_pos

In [152]:
from utils.utils import *
anchors = cal_anchors()
pos_equal_one, neg_equal_one, targets = cal_rpn_target(batch[1], [cfg.FEATURE_HEIGHT,cfg.FEATURE_WIDTH], anchors)
pos_equal_one[..., [0]].shape
pos_equal_one_for_reg = np.concatenate([np.tile(pos_equal_one[..., [0]], 7), np.tile(pos_equal_one[..., [1]], 7)], axis=-1)
pos_equal_one_sum = np.clip(np.sum(pos_equal_one, axis=(1, 2, 3)).reshape(-1, 1, 1, 1), a_min=1, a_max=None)
neg_equal_one_sum = np.clip(np.sum(neg_equal_one, axis=(1, 2, 3)).reshape(-1, 1, 1, 1), a_min=1, a_max=None)
info={"pos_equal_one":pos_equal_one,"neg_equal_one":neg_equal_one,"targets":targets,
      "pos_equal_one_for_reg":pos_equal_one_for_reg,"pos_equal_one_sum":pos_equal_one_sum,
      "neg_equal_one_sum":neg_equal_one_sum}

In [158]:
model=MiddleAndRPN(output,info)

In [154]:
print(model.pos_equal_one.shape)
model.cls_neg_loss.shape
pos_equal_one_sum

(2, 200, 176, 2)


array([[[[ 1.]]],


       [[[10.]]]])

In [36]:
class RPN3D(object):
    def __init__(self,cls="Car",single_batch_size=1,learning_rate=0.001,max_gradient_norm=5.0,
                 alpha=1.5,beta=1,avail_gpus=['0']):
        self.cls = cls
        self.single_batch_size = single_batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(1, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)
        self.alpha = alpha
        self.beta = beta
        self.avail_gpus = avail_gpus
         
        boundaries = [80, 120]
        values = [ self.learning_rate, self.learning_rate * 0.1, self.learning_rate * 0.01 ]
        self.lr = tf.compat.v1.train.piecewise_constant(self.epoch, boundaries, values)

        #build graph
        self.is_train=True
        
        self.vox_feature = []
        self.vox_number = []
        self.vox_coordinate = []
        self.targets = []
        self.pos_equal_one = []
        self.pos_equal_one_sum = []
        self.pos_equal_one_for_reg = []
        self.neg_equal_one = []
        self.neg_equal_one_sum = []
        
        self.delta_output = []
        self.prob_output = []
        #self.opt = tf.compat.v1.train.AdamOptimizer(lr)
        self.gradient_norm = []
        self.tower_grads = []
        
    def __call__(self,a):
        print(a)
        for idx, dev in enumerate(self.avail_gpus):
            if idx==2:
                break
            else:
                batches=iterate_data(train_dir)
                batch=next(batches)
                voxel_feature=batch[2]
                voxel_coordinate=batch[4]
                    
                feature=FeatureNet(voxel_feature[0],voxel_coordinate[0],training=True,batch_size=1)
                #rpn = MiddleAndRPN(input=feature.outputs, alpha=self.alpha, beta=self.beta, training=self.is_train)
                # input
                self.vox_feature.append(feature.feature)
                #self.vox_number.append(feature.number)
                self.vox_coordinate.append(feature.coordinate)
                #self.targets.append(rpn.targets)
                #self.pos_equal_one.append(rpn.pos_equal_one)
                #self.pos_equal_one_sum.append(rpn.pos_equal_one_sum)
                #self.pos_equal_one_for_reg.append(rpn.pos_equal_one_for_reg)
                #self.neg_equal_one.append(rpn.neg_equal_one)
                #self.neg_equal_one_sum.append(rpn.neg_equal_one_sum)

In [37]:
model=RPN3D()
model(1)

1


In [37]:
i=0
# batch: (tag,labels,vox_feature,vox_number,vox_coordinate,rgb,raw_lidar)
for batch in iterate_data(train_dir):
    i+=1
    if i == 2:
        break
    else:
        pass
        #print("batch:",batch[2])
        # build model
        #input data
