# Problem 1 Vanilla PointNet

In [None]:
import tensorflow as tf
import utils
import numpy as np

In [3]:
# load training data and labels
data0 = utils.load_h5("ply_data_train0.h5")
data1 = utils.load_h5("ply_data_train1.h5")
data2 = utils.load_h5("ply_data_train2.h5")
data3 = utils.load_h5("ply_data_train3.h5")
data4 = utils.load_h5("ply_data_train4.h5")

In [4]:
# aggregate training data, training label
train_data = np.append(data0[0], data1[0], axis=0)
train_data = np.append(train_data, data2[0], axis=0)
train_data = np.append(train_data, data3[0], axis=0)
train_data = np.append(train_data, data4[0], axis=0)
print(np.shape(train_data))

train_labels = np.append(data0[1], data1[1], axis=0)
train_labels = np.append(train_labels, data2[1], axis=0)
train_labels = np.append(train_labels, data3[1], axis=0)
train_labels = np.append(train_labels, data4[1], axis=0)
print(np.shape(train_labels))

(9840, 2048, 3)
(9840, 1)


In [5]:
# load test data
test0 = utils.load_h5("ply_data_test0.h5")
test1 = utils.load_h5("ply_data_test1.h5")
# print(np.shape(test1[0]))

In [6]:
# aggregate test data, test label
test_data = np.append(test0[0], test1[0], axis=0)
test_labels = np.append(test0[1], test1[1], axis=0)
print(np.shape(test_data))
print(np.shape(test_labels))

(2468, 2048, 3)
(2468, 1)


In [7]:
train_labels_one_hot = []
for l in train_labels:
    one_hot = np.zeros(40, dtype=np.int)
    one_hot[l[0]] = 1
    train_labels_one_hot.append(one_hot)
train_labels_one_hot = np.array(train_labels_one_hot)
print(np.shape(train_labels_one_hot))

(9840, 40)


In [8]:
# one hot encode test_labels
test_labels_one_hot = []
for l in test_labels:
    one_hot = np.zeros(40, dtype=np.int)
    one_hot[l[0]] = 1
    test_labels_one_hot.append(one_hot)
test_labels_one_hot = np.array(test_labels_one_hot)
print(np.shape(test_labels_one_hot))

(2468, 40)


In [9]:
batch_size = 32

In [10]:
cloud = tf.placeholder(tf.float32, [None, 2048, 3])
print(np.shape(cloud))

(?, 2048, 3)


In [12]:
# main network
pt_cloud = tf.expand_dims(cloud, -1)

# placeholder for one-hot labels
y = tf.placeholder(tf.float32, [None, 40])

# placeholder for labels
y_labels = tf.placeholder(tf.int64, [None])

# 1st mlp layer
layer_conv1 = tf.contrib.layers.conv2d(inputs=pt_cloud, num_outputs=64, kernel_size=[1, 3], padding="VALID", activation_fn=tf.nn.relu)
layer_conv1 = tf.contrib.layers.batch_norm(layer_conv1)

# 2nd mlp layer
layer_conv2 = tf.contrib.layers.conv2d(inputs=layer_conv1, num_outputs=64, kernel_size=[1, 1], padding="VALID", activation_fn=tf.nn.relu)
layer_conv2 = tf.contrib.layers.batch_norm(layer_conv2)


# 3rd mlp layer
layer_conv3 = tf.contrib.layers.conv2d(inputs=layer_conv2, num_outputs=64, kernel_size=[1, 1], padding="VALID", activation_fn=tf.nn.relu)
layer_conv3 = tf.contrib.layers.batch_norm(layer_conv3)


# 4th cnn
layer_conv4 = tf.contrib.layers.conv2d(inputs=layer_conv3, num_outputs=128, kernel_size=[1, 1], padding="VALID", activation_fn=tf.nn.relu)
layer_conv4 = tf.contrib.layers.batch_norm(layer_conv4)


# 5th cnn
layer_conv5 = tf.contrib.layers.conv2d(inputs=layer_conv4, num_outputs=1024, kernel_size=[1, 1], padding="VALID", activation_fn=tf.nn.relu)
layer_conv5 = tf.contrib.layers.batch_norm(layer_conv5)

# max pooling
max_pool = tf.contrib.layers.max_pool2d(inputs=layer_conv5, kernel_size=[2048, 1], stride=1, padding="VALID")

# fnn1
layer_fnn1 = tf.contrib.layers.fully_connected(inputs=max_pool, num_outputs=512, activation_fn=tf.nn.relu)
layer_fnn1 = tf.contrib.layers.batch_norm(layer_fnn1)

# fnn2
layer_fnn2 = tf.contrib.layers.fully_connected(inputs=layer_fnn1, num_outputs=256, activation_fn=tf.nn.relu)
layer_fnn2 = tf.contrib.layers.batch_norm(layer_fnn2)

layer_fnn2 = tf.contrib.layers.dropout(inputs=layer_fnn2, keep_prob=0.7)

# fnn3
logits = tf.contrib.layers.fully_connected(inputs=layer_fnn2, num_outputs=40, activation_fn=tf.nn.relu)
logits = tf.squeeze(logits, [1, 2])

# softmax
output = tf.nn.softmax(logits)
output_class = tf.argmax(output,axis=1)

In [13]:
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.001
l_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20*batch_size, 0.5, staircase=True)

# loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = y))

# optimizer
optim = tf.train.AdamOptimizer(learning_rate=l_rate)
optimizer = optim.minimize(loss, global_step=global_step)

In [14]:
# accuracy
correct_labels = tf.equal(output_class, y_labels)
accuracy = tf.reduce_mean(tf.cast(correct_labels, tf.float32))

In [15]:
from random import *
import math
# only rotate against y axis
def rotate(pt_cloud):
    angle = np.deg2rad(randint(0, 360))
    R = np.array([[math.cos(angle), 0, math.sin(angle)], [0, 1, 0], [-math.sin(angle), 0, math.cos(angle)]])
    rotated_pt_cloud = np.matmul(R, pt_cloud.T).T
    return rotated_pt_cloud

def jitter(pt_cloud):
    return pt_cloud + np.random.normal(0, 0.02, None)

def augment(pt_cloud):
    seed = np.random.randint(10)
    if seed == 0:
        return rotate(jitter(pt_cloud))
    elif seed == 1:
        return rotate(pt_cloud)
    elif seed == 2:
        return jitter(pt_cloud)
    else:
        return pt_cloud

In [16]:
train_data = np.array([augment(x) for x in train_data])

sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

num_iter = int(200*batch_size)

for i in range(num_iter):
    idx = np.random.choice(9840, [batch_size], False)
    batch_img_vanilla = train_data[idx][:]
    batch_y = train_labels_one_hot[idx][:]
    _, l, lr= sess.run([optimizer, loss, optim._lr], feed_dict = {cloud: batch_img_vanilla , y: batch_y})
    if i % (batch_size*20) == 0:
        print(l, lr)

4.33436 0.001
1.34029 0.0005
0.392139 0.00025
0.28361 0.000125
0.302493 6.25e-05
0.204395 3.125e-05
0.27048 1.5625e-05
0.253359 7.8125e-06
0.138807 3.90625e-06
0.122358 1.95313e-06


In [17]:
right_count = 0
i = 0
while i < len(test_data):
    j = min(i + 32, len(test_data))
    correct_labels = tf.equal(output_class, y_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_labels, tf.float32))
    # compute accuracy on test data
    labels = np.array([label.argmax() for label in test_labels_one_hot[i:j]])
    accuracy = sess.run([accuracy],feed_dict = {cloud: test_data[i:j], y: test_labels_one_hot[i:j], y_labels: labels})
    right_count = right_count + accuracy[0] * (j - i)
    i += 32
final_accuracy = right_count / len(test_data)
print("Final Accuracy", final_accuracy)

Final Accuracy 0.849270664506
