## PointNet Training Module

##### Mike Pieschl

This module implements the original PointNet model described in PointNet: Deep Leanring on Point Sets for 3D Classification and Segmentation (2017)

In [1]:
import sys
import json
import pickle
import tf2onnx
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import onnxruntime as ort
import onnx

from plotly.subplots import make_subplots
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
from ipywidgets import interact
from IPython.display import clear_output
from tqdm import tqdm

import importlib
import PointNetSegmentation
import PointCloudSet
import mat_ops

importlib.reload(PointNetSegmentation)
importlib.reload(PointCloudSet)
importlib.reload(mat_ops)

print(sys.version)

2025-11-18 22:46:22.093010: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


3.11.5 (main, Sep 11 2023, 13:54:46) [GCC 11.2.0]


In [2]:
USE_SAVED_DATA = True
SAVE_DATA = True
SHOW_DISPLAYS = True

BATCH_SIZE = 32
EPOCHS = 3
PATIENCE = 50
INPUT_SIZE = 4096
LEARNING_RATE = 0.0001
LR_DECAY_STEPS = 7000
LR_DECAY_RATE = 0.7

In [3]:
MODEL_PATH = 'models/'
MESH_PATH = 'mesh/'
FIGURE_PATH = 'figures/'
DATA_PATH = 'data/'
PALINDROME_DATA_PATH = '/mnt/c/repos/aburn/usr/hub/palindrome_playground/DataCollect/'
RANDOM_SEED = 42

MODEL_NAME = '_bertha_test'
PC_NAME = 'collect_2025.Nov.19_00.33.24.3472488.UTC__'

class_labels = ['kc46']
part_labels = ['fuselage', 'left_engine', 'right_engine', 'left_wing', 'right_wing', 'left_hstab', 'right_hstab', 'vstab', 'left_boom_stab', 'right_boom_stab', 'boom_wing', 'boom_hull', 'boom_hose']

In [4]:
physical_devices = tf.config.experimental.list_physical_devices( 'GPU' )
if( len( physical_devices ) > 0 ): 
    print( 'GPUs Available: ', len( physical_devices ) )
    tf.config.experimental.set_memory_growth( physical_devices[0], True )
else:   print( "No GPUs available." )

GPUs Available:  1


#### Parse AftrBurner output

In [5]:
pc = PointCloudSet.PointCloudSet(one_hot = True,
                                 class_labels = class_labels, 
                                 part_labels = part_labels, 
                                 pretrain_tnet = False, 
                                 network_input_width = INPUT_SIZE,
                                 batch_size = 8,
                                 rand_seed = RANDOM_SEED)
pc.build_from_aftr_output(f'{DATA_PATH}{PC_NAME}')
pc.get_info()
with open(f'{DATA_PATH}{PC_NAME}.pkl', 'wb') as p:
    pickle.dump(pc, p)

Parsing frames in data/collect_2025.Nov.19_00.33.24.3472488.UTC__...


  0%|          | 0/2726 [00:00<?, ?it/s]

100%|██████████| 2726/2726 [00:04<00:00, 635.33it/s]


data/collect_2025.Nov.19_00.33.24.3472488.UTC__ parsed:  found 276 valid frames out of 2 total.


#### Load PointCloudSet using pickle

In [6]:
# pc = None
# with open(f'{DATA_PATH}{PC_NAME}.pkl', 'rb') as p:
#     pc = pickle.load(p)

# assert pc != None, 'PointCloudSet failed to load.'

#### Define Training Networks

In [7]:
def train_save_so3(point_cloud: PointCloudSet.PointCloudSet, name: str):    
    so3_net = PointNetSegmentation.TNetRegressor(add_regularization = False)
    so3_net.build(input_shape = (None, INPUT_SIZE, 3))

    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        LEARNING_RATE,
        decay_steps = LR_DECAY_STEPS,
        decay_rate = LR_DECAY_RATE,
        staircase = False
    )

    optimizer = keras.optimizers.Adam(
        learning_rate = lr_schedule
    )

    so3_net.compile(
        optimizer = optimizer,
        loss = 'mse',
        metrics = [keras.metrics.RootMeanSquaredError()]
    )

    early_stopping = EarlyStopping(
        monitor = 'val_loss',
        patience = PATIENCE,
        verbose = 1,
        restore_best_weights = True
    )

    train = point_cloud.get_train_tnet_set()
    val = point_cloud.get_val_tnet_set()

    history = so3_net.fit(x = train, epochs = EPOCHS, verbose = 1, validation_data = val, callbacks = [early_stopping])
    so3_net.input_transform.save_weights(f'{MODEL_PATH}{name}_tnet.weights.h5')

    with open(f'{MODEL_PATH}{name}_tnet_history.json', 'w') as j:
        json.dump({
            'loss': history.history['loss'],
            'val_loss': history.history['val_loss'],
            'root_mean_squared_error': history.history['root_mean_squared_error'],
            'val_root_mean_squared_error': history.history['val_root_mean_squared_error'],
        }, j)

    return so3_net

def load_so3_net(name: str):
    so3_net = PointNetSegmentation.TNetRegressor(add_regularization = False)
    so3_net.build((None, INPUT_SIZE, 3))
    so3_net.input_transform.load_weights(f'{MODEL_PATH}{name}_tnet.weights.h5', skip_mismatch = False)
    return so3_net

def train_save_pointnet_segmentater(point_cloud: PointCloudSet.PointCloudSet, name: str, use_pretrained_tnet: bool = False):    
    model = PointNetSegmentation.PointNetSegmentation(output_width = len(part_labels))
    model.build(input_shape = (None, INPUT_SIZE, 3))

    if(use_pretrained_tnet):
        model.input_transform.load_weights(f'{MODEL_PATH}{name}_tnet.weights.h5', skip_mismatch = False)
        model.input_transform.trainable = False

    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        LEARNING_RATE,
        decay_steps = LR_DECAY_STEPS,
        decay_rate = LR_DECAY_RATE,
        staircase = False
    )

    optimizer = keras.optimizers.Adam(
        learning_rate = lr_schedule
    )

    model.compile(
        optimizer = optimizer,
        loss = keras.losses.CategoricalCrossentropy(from_logits = True),
        metrics = [keras.metrics.CategoricalAccuracy()]
    )

    early_stopping = EarlyStopping(
        monitor = 'val_loss',
        patience = PATIENCE,
        verbose = 1,
        restore_best_weights = True
    )

    train = point_cloud.get_train_seg_set()
    val = point_cloud.get_val_seg_set()

    history = model.fit(x = train, epochs = EPOCHS, verbose = 1, validation_data = val, callbacks = [early_stopping])

    model.save(f'{MODEL_PATH}{name}.keras')

    with open(f'{MODEL_PATH}{name}_history.json', 'w') as j:
        json.dump({
            'loss': history.history['loss'],
            'val_loss': history.history['val_loss'],
            'categorical_accuracy': history.history['categorical_accuracy'],
            'val_categorical_accuracy': history.history['val_categorical_accuracy'],
        }, j)

    return model

#### (Model40 Dataset for Verification)

In [8]:
# DATA_DIR = tf.keras.utils.get_file("modelnet.zip", "http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip", extract = True)
# DATA_DIR = os.path.join(os.path.dirname(DATA_DIR), "modelnet_extracted/ModelNet10")

# def parse_dataset(num_points = INPUT_SIZE):

#     train_points = []
#     train_labels = []
#     test_points = []
#     test_labels = []
#     class_map = {}
#     folders = glob.glob(os.path.join(DATA_DIR, "[!README]*"))
    
#     for i, folder in tqdm(enumerate(folders)):
#         print(f"Processing class {os.path.basename(folder)}")

#         # Store folder name with ID so we can retrieve later
#         class_map[i] = folder.split("/")[-1]

#         # Gath all files in folder
#         train_files = glob.glob(os.path.join(folder, "train/*"))
#         test_files = glob.glob(os.path.join(folder, "test/*"))

#         for f in train_files:
#             train_points.append(trimesh.load(f).sample(num_points))
#             train_labels.append(i)

#         for f in test_files:
#             test_points.append(trimesh.load(f).sample(num_points))
#             test_labels.append(i)
    
#     return (np.array(train_points),
#             np.array(test_points),
#             np.array(train_labels),
#             np.array(test_labels),
#             class_map)

# def augment(points, label):
#     # Jitter points
#     points += tf.random.uniform(points.shape, -0.005, 0.005, dtype = tf.float64)
    
#     # Shuffle points
#     points = tf.random.shuffle(points)

#     return points, label

# train_points, test_points, train_labels, test_labels, CLASS_MAP = parse_dataset(1024)

# train_dataset = tf.data.Dataset.from_tensor_slices((train_points, train_labels))
# test_dataset = tf.data.Dataset.from_tensor_slices((train_points, train_labels))

# train_dataset = train_dataset.shuffle(len(train_points)).map(augment).batch(BATCH_SIZE)
# test_dataset = test_dataset.shuffle(len(test_points)).batch(BATCH_SIZE)

#### Train Models

In [9]:
if(pc != None):    
    model = train_save_pointnet_segmentater(pc, f'{pc.get_description()}{MODEL_NAME}', use_pretrained_tnet = False)
    loss, accuracy = model.evaluate(pc.get_test_seg_set())

    print(loss, accuracy)

I0000 00:00:1763524021.371759   28591 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 21458 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9


Training data size:  obs = (206, 4096, 3) | labels = (206, 4096) 
Validation data size:  obs = (42, 4096, 3) | labels = (42, 4096) 
Epoch 1/3
Training: True
Layer s3_l5_output_convolution_layer has no activation function assigned.
Training: True
Layer s3_l5_output_convolution_layer has no activation function assigned.


2025-11-18 22:47:13.648208: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f8a34002810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-18 22:47:13.648245: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-11-18 22:47:13.999902: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-18 22:47:15.283091: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91600
2025-11-18 22:47:15.660722: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-18 22:47:15.660840: I e

[1m 3/26[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 34ms/step - categorical_accuracy: 6.1035e-05 - loss: nan 

I0000 00:00:1763524049.832077   28864 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m25/26[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - categorical_accuracy: 5.1277e-04 - loss: nan

2025-11-18 22:47:31.817264: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-18 22:47:31.817365: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.










[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step - categorical_accuracy: 5.2272e-04 - loss: nanTraining: False
Layer s3_l5_output_convolution_layer has no activation function assigned.


2025-11-18 22:47:45.500533: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.







[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 766ms/step - categorical_accuracy: 5.3194e-04 - loss: nan - val_categorical_accuracy: 7.5567e-05 - val_loss: nan
Epoch 2/3
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - categorical_accuracy: 5.3194e-04 - loss: nan - val_categorical_accuracy: 7.5567e-05 - val_loss: nan
Epoch 3/3
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - categorical_accuracy: 5.3194e-04 - loss: nan - val_categorical_accuracy: 7.5567e-05 - val_loss: nan
Restoring model weights from the end of the best epoch: 1.
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - categorical_accuracy: 0.0000e+00 - loss: nan

2025-11-18 22:47:52.013282: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.







[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - categorical_accuracy: 5.5862e-04 - loss: nan
nan 0.0008370535797439516


In [10]:
custom_objects = {
    "PointNetSegmentation": PointNetSegmentation.PointNetSegmentation,
    "TNet": PointNetSegmentation.TNet,
    "ConvLayer": PointNetSegmentation.ConvLayer,
    "DenseLayer": PointNetSegmentation.DenseLayer
}

test_reload_model = tf.keras.models.load_model(
    f'{MODEL_PATH}{MODEL_NAME}.keras',
    custom_objects = custom_objects
)

test_reload_model.compile()
test_reload_model.build(input_shape = (None, INPUT_SIZE, 3))
test_reload_model.summary()

loss, accuracy = test_reload_model.evaluate(pc.get_test_seg_set())

  saveable.load_own_variables(weights_store.get(inner_path))


ValueError: You cannot add new elements of state (variables or sub-layers) to a layer that is already built. All state must be created in the `__init__()` method or in the `build()` method.

In [None]:
test_case = np.expand_dims(pc.get_raw_test_set()['observations'][0], axis = 0)
test_labels = pc.get_raw_test_set()['part_labels'][0]

predict_labels = test_reload_model.predict(test_case).squeeze(axis = 0)
part_labels_np = np.array(part_labels)
print(f"{np.sum(np.array([part_labels[i] == test_labels[i] for i in np.argmax(predict_labels, axis = -1)]))}")

In [None]:
input_signature = [
    tf.TensorSpec((None, INPUT_SIZE, 3), dtype = tf.float32)
]

onnx_model, _ = tf2onnx.convert.from_keras(
    test_reload_model,
    input_signature = input_signature,
    opset = 13
)

onnx.save(onnx_model, f'{MODEL_PATH}{MODEL_NAME}.onnx')

In [None]:
print(f"ONNX Runtime Providers: {ort.get_available_providers()}")

session = ort.InferenceSession(
    f'{MODEL_PATH}{MODEL_NAME}.onnx',
    providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
)

input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

print(f'Input name: {input_name}')
print(f'Output name: {output_name}')

test_inputs_f32 = test_case.astype(np.float32)

logits_output = session.run(
    [output_name],
    {input_name: test_inputs_f32}
)

print(logits_output)

part_labels_np = np.array(part_labels)
output_np = np.squeeze(np.array(logits_output), axis = (0, 1))
print(f"{np.sum(np.array([part_labels[i] == test_labels[i] for i in np.argmax(output_np, axis = -1)]))}")