PointNet aims to capture both local and global features from point cloud data. Local features pertain to the characteristics of individual points or small local neighborhoods, while global features represent the overall structure of the entire point cloud.

PointNet employs shared MLPs to extract features from individual points independently. Each point is processed by the same MLP, ensuring weight sharing and translation invariance across points.

After processing individual points through shared MLPs, PointNet utilizes max pooling to aggregate features across all points, producing a fixed-size global feature vector.
This step effectively summarizes the local features extracted from the point cloud.

In [1]:
# import libraries
import os
import glob
import trimesh
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
from matplotlib import pyplot as plt
tf.random.set_seed(42)

In [2]:
# load the dtataset
root_dir = './data/ModelNet10/'
folders = glob.glob(os.path.join(root_dir, "*"))
folders

['./data/ModelNet10\\bathtub',
 './data/ModelNet10\\bed',
 './data/ModelNet10\\chair',
 './data/ModelNet10\\desk',
 './data/ModelNet10\\dresser',
 './data/ModelNet10\\monitor',
 './data/ModelNet10\\night_stand',
 './data/ModelNet10\\sofa',
 './data/ModelNet10\\table',
 './data/ModelNet10\\toilet']

In [3]:
# data preprocessing
def preprocess_data(num_points=2048):
    X_train = []
    y_train = []
    X_test = []
    y_test = []
    classes = {}

    for i, folder in enumerate(folders):

        print("processing class: ", os.path.basename(folder))

        train_files = glob.glob(os.path.join(folder, "train/*"))
        test_files = glob.glob(os.path.join(folder, "test/*"))

        for f in train_files:
            X_train.append(trimesh.load(f).sample(num_points))
            y_train.append(i)

        for f in test_files:
            X_test.append(trimesh.load(f).sample(num_points))
            y_test.append(i)
        
        classes[i] = folder.split("/")[-1]

    return (np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), classes)

In [4]:
num_points = 2048
num_classes = len(folders) #10
X_train, y_train, X_test, y_test, classes = preprocess_data(num_points)

processing class:  bathtub
processing class:  bed
processing class:  chair
processing class:  desk
processing class:  dresser
processing class:  monitor
processing class:  night_stand
processing class:  sofa
processing class:  table
processing class:  toilet


In [12]:
def augment(points, label):
    points += tf.random.uniform(points.shape, -0.005, 0.005, dtype="float64") #add randon noise
    points = tf.random.shuffle(points)
    return points, label

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [13]:
train_dataset = train_dataset.shuffle(len(X_train)).map(augment).batch(32)
test_dataset = test_dataset.shuffle(len(X_test)).batch(32)

In [15]:
# PointNet Model
def conv_layer(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    x = layers.Activation("relu")(x)
    return x

def dense_layer(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    x = layers.Activation("relu")(x)
    return x

In [None]:
class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        self.num_features = num_features
        self.l2reg = l2reg
        self.eye = tf.eye(num_features)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))

In [None]:
def tnet(inputs, num_features):

    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_layer(inputs, 32)
    x = conv_layer(x, 64)
    x = conv_layer(x, 512)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_layer(x, 256)
    x = dense_layer(x, 128)
    x = layers.Dense(num_features * num_features, kernel_initializer="zeros", bias_initializer=bias, activity_regularizer=reg)(x)
    feat_T = layers.Reshape((num_features, num_features))(x)

    return layers.Dot(axes=(2, 1))([inputs, feat_T])

In [None]:
inputs = keras.Input(shape=(num_points, 3))

x = tnet(inputs, 3)
x = conv_layer(x, 32)
x = conv_layer(x, 32)
x = tnet(x, 32)
x = conv_layer(x, 32)
x = conv_layer(x, 64)
x = conv_layer(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_layer(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_layer(x, 128)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(num_classes, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="pointnet")
model.summary()

In [None]:
model.compile( loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=["sparse_categorical_accuracy"])
model.fit(train_dataset, epochs=50, validation_data=test_dataset)

In [None]:
data = test_dataset.take(1)

points, labels = list(data)[0]
points = points[:8, ...]
labels = labels[:8, ...]

# run test data through model
preds = model.predict(points)
preds = tf.math.argmax(preds, -1)

points = points.numpy()

# plot points with predicted class and label
fig = plt.figure(figsize=(15, 10))
for i in range(8):
    ax = fig.add_subplot(2, 4, i + 1, projection="3d")
    ax.scatter(points[i, :, 0], points[i, :, 1], points[i, :, 2])
    ax.set_title(
        "pred: {:}, label: {:}".format(
            classes[preds[i].numpy()], classes[labels.numpy()[i]]
        )
    )
    ax.set_axis_off()
plt.show()