In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import scipy
import numpy as np
import h5py
import utils
import os
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
MODELNET40_PATH = "./"

In [3]:
def load_h5(h5_filename):
    """
    Data loader function.
    Input: The path of h5 filename
    Output: A tuple of (data,label)
    """
    f = h5py.File(h5_filename)
    data = f['data'][:]
    label = f['label'][:]
    return (data, label)

In [4]:
def get_category_names():
    """
    Function to list out all the categories in MODELNET40
    """
    shape_names_file = os.path.join(MODELNET40_PATH, 'shape_names.txt')
    shape_names = [line.rstrip() for line in open(shape_names_file)]
    return shape_names

In [5]:
def evaluate(true_labels,predicted_labels):
    """
    Function to calculate the total accuracy.
    Input: The ground truth labels and the predicted labels
    Output: The accuracy of the model
    """
    return np.mean(true_labels == predicted_labels)

In [6]:
K = 40
X_train = []
y_temp = []
for i in range(5):
    fn = "ply_data_train" + str(i)+".h5"
    h5_data = load_h5(fn)
    for cloud in h5_data[0]:
        X_train.append(cloud)
    for label in h5_data[1]:
        y_temp.append(label)

In [8]:
def rotate(deg, cloud):
    rad = np.deg2rad(deg)
    c = np.cos(rad)
    s = np.sin(rad)
    rotmat = np.array([[c, 0 ,s],
                       [0, 1, 0],
                       [-s, 0, c]])
    cloud = np.matmul(rotmat, cloud.T).T
    return cloud

In [9]:
def jittering(cloud):
    return cloud + np.random.normal(loc=0.0, scale=0.02, size=None)

In [10]:
def aug(cloud):
    
    seed = np.random.randint(10)
    if seed == 0:
        return rotate(np.random.randint(360),jittering(cloud))
    elif seed == 1:
        return rotate(np.random.randint(360), cloud)
    elif seed == 2:
        return jittering(cloud)
    else:
        return cloud

In [11]:
# Make sure the dimension of the h5 data is correct
X_train = np.array(X_train)
X_train = np.array([aug(x) for x in X_train])
X_train.shape

(9840, 2048, 3)

In [12]:
y_temp = np.reshape(y_temp, (-1, 1))
y_temp.shape

(9840, 1)

In [13]:
y_train = np.zeros((y_temp.shape[0], K))
for i in range(len(y_temp)):
    y_train[i][y_temp[i]] = 1

In [14]:
X_test = []
y_temp = []
for i in range(2):
    fn = "ply_data_test" + str(i)+".h5"
    h5_data = load_h5(fn)
    for cloud in h5_data[0]:
        X_test.append(cloud)
    for label in h5_data[1]:
        y_temp.append(label)

In [15]:
X_test = np.array(X_test)
y_temp = np.reshape(y_temp, (-1, 1))
y_test = np.zeros((y_temp.shape[0], K))
for i in range(len(y_temp)):
    y_test[i][y_temp[i]] = 1

In [17]:
def next_batch(X, y, batch_size):
    '''
    Return a total of `num` random samples and labels. 
    '''
    indices = np.arange(0 , len(X))
    np.random.shuffle(indices)
    selected = indices[:batch_size]
#     print(selected)
    X_ = [X[s] for s in selected]
    y_ = [y[s] for s in selected]

    return np.asarray(X_), np.asarray(y_)

In [64]:
def tNet(inputs, output_dim):
    batch_norm = tf.contrib.layers.batch_norm
    if output_dim == 3:
        mlp1 = tf.contrib.layers.conv2d(inputs=inputs, num_outputs=64, kernel_size=[1,3], padding="VALID",
                                    activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    else:
        mlp1 = tf.contrib.layers.conv2d(inputs=inputs, num_outputs=64, kernel_size=1, padding="VALID",
                                    activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    mlp2 = tf.contrib.layers.conv2d(inputs=mlp1, num_outputs=128, kernel_size=1, padding="VALID",
                                    activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    mlp3 = tf.contrib.layers.conv2d(inputs=mlp2, num_outputs=1024, kernel_size=1, padding="VALID",
                                    activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    global_feature = tf.contrib.layers.max_pool2d(inputs = mlp3, kernel_size=[2048,1], stride = 1, padding="VALID")
    fc1 = tf.contrib.layers.fully_connected(inputs=global_feature, num_outputs=512, 
                                        activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    fc2 = tf.contrib.layers.fully_connected(inputs=fc1, num_outputs=256, 
                                        activation_fn=tf.nn.relu,normalizer_fn=batch_norm)
    init = np.eye(output_dim).flatten()
    output = tf.contrib.layers.fully_connected(inputs=fc2, num_outputs = output_dim*output_dim, 
                                              biases_initializer=tf.constant_initializer(init))
    return tf.reshape(output, (-1, output_dim, output_dim))

In [65]:
tf.reset_default_graph()
# Input layer
batch_norm = tf.contrib.layers.batch_norm
X = tf.placeholder(tf.float32, shape = (None, 2048, 3))
y_ = tf.placeholder(tf.float32, shape = (None, 40))

X_in = tf.reshape(X, (-1, 2048, 3, 1))
t1 = tNet(X_in, 3)
X_transformed = tf.reshape(tf.matmul(X, t1), (-1, 2048, 3, 1))

learning_rate = tf.placeholder(tf.float32, shape=[])
decay_rate = tf.placeholder(tf.float32, shape=[])

mlp1 = tf.contrib.layers.conv2d(inputs=X_transformed, num_outputs=64, kernel_size=[1,3], padding="VALID",
                                activation_fn=tf.nn.relu, 
                                normalizer_fn=batch_norm, 
                                normalizer_params = {'decay':decay_rate})

mlp2 = tf.contrib.layers.conv2d(inputs=mlp1, num_outputs=64, kernel_size=1, padding="VALID",
                                activation_fn=tf.nn.relu,
                                normalizer_fn=batch_norm,
                                normalizer_params = {'decay':decay_rate})
t2 = tNet(mlp2, 64)
mlp2_transformed = tf.expand_dims(tf.matmul(tf.squeeze(mlp2, [2]), t2), [2])


mlp3 = tf.contrib.layers.conv2d(inputs=mlp2_transformed , num_outputs=64, kernel_size=1, padding="VALID",
                                activation_fn=tf.nn.relu,
                                normalizer_fn=batch_norm,
                                normalizer_params = {'decay':decay_rate})
mlp4 = tf.contrib.layers.conv2d(inputs=mlp3, num_outputs=128, kernel_size=1, padding="VALID",
                                activation_fn=tf.nn.relu,
                                normalizer_fn=batch_norm,
                                normalizer_params = {'decay':decay_rate})
mlp5 = tf.contrib.layers.conv2d(inputs=mlp4, num_outputs=1024, kernel_size=1, padding="VALID",
                                activation_fn=tf.nn.relu,
                                normalizer_fn=batch_norm,
                                normalizer_params = {'decay':decay_rate})
# Pooling layer
global_feature = tf.contrib.layers.max_pool2d(inputs = mlp5, kernel_size=[2048,1], stride = 1, padding="VALID")

fc1 = tf.contrib.layers.fully_connected(inputs=global_feature, num_outputs=512, 
                                        activation_fn=tf.nn.relu,
                                        normalizer_fn=batch_norm,
                                        normalizer_params = {'decay':decay_rate})
fc2 = tf.contrib.layers.fully_connected(inputs=fc1, num_outputs=256, 
                                        activation_fn=tf.nn.relu,
                                        normalizer_fn=batch_norm,
                                        normalizer_params = {'decay':decay_rate})
# Apply a dropout operation with keep ratio of 0.7
fc2 = tf.contrib.layers.dropout(fc2, keep_prob=0.7)
logits = tf.contrib.layers.fully_connected(inputs=fc2, num_outputs=K, 
                                           activation_fn=tf.nn.relu,
                                           normalizer_fn=batch_norm,
                                           normalizer_params = {'decay':decay_rate})
y = tf.nn.softmax(logits)
y = tf.reshape(y, (-1,40))

In [68]:
def train():
    lr = 0.001 # learning rate initialization
    dr = 0.50 # decay rate for batch_norm initialization
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_, logits = logits))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    optim = optimizer.minimize(loss)
    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    #Make batches to train
    epochs = 100
    num_iter = 307
    batch_size = 32
    for e in range(epochs):
        l = 0
        if (dr < 0.99): 
            dr += 0.01
        for i in range(num_iter):
            batch_X, batch_y = next_batch(X_train, y_train, batch_size)
            _, l, lrate = sess.run([optim, loss, optimizer._lr],feed_dict = {X: batch_X , y_: batch_y, 
                                                       learning_rate: lr, decay_rate : dr})
        
        if (e+1) % 10 == 0:
            correct_labels = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_labels, tf.float32))
            acc = []
            i = 0
            # Calculate the result from the test set
            # Patched due to the limitation of GPU memory
            while((i+1) * batch_size < len(X_test)):
                acc.append(sess.run(accuracy, feed_dict={X: X_test[i*batch_size:(i+1)*batch_size],
                                                         y_: y_test[i*batch_size:(i+1)*batch_size]}))
                i+=1
            ax = sum(acc) / len(acc)
            remain = sess.run(accuracy, feed_dict={X: X_test[i*batch_size:],y_: y_test[i*batch_size:]})
            rlen = len(X_test[i*batch_size:])
            ax = ax * (len(X_test)-rlen) / len(X_test) + remain * rlen / len(X_test)
            print ("epoch " + str(e+1) + "; loss = " + str(l) + "; accuracy = " + str(ax)
                   + "; lr = " + str(lrate))
        if e % 20 == 0 and e > 0:
            lr /= 2

In [69]:
train()

epoch 10; loss = 0.965813; accuracy = 0.695705024311; lr = 0.0010000000474974513
epoch 20; loss = 1.06671; accuracy = 0.726499189627; lr = 0.0010000000474974513


KeyboardInterrupt: 