# PointNet

This is an implementation of [PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation](https://arxiv.org/abs/1612.00593) using PyTorch.


## Getting started

Don't forget to turn on GPU if you want to start training directly.


**Runtime** -> **Change runtime type**-> **Hardware accelerator**



In [113]:
!pip install plyfile
from plyfile import PlyData
from scipy.spatial import Delaunay
import numpy as np
import math
import random
import os
import torch
import scipy.spatial.distance
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from google.colab import drive




In [114]:
!pip install path.py;
from path import Path



In [115]:
random.seed = 42

Download the [dataset](http://3dvision.princeton.edu/projects/2014/3DShapeNets/) directly to the Google Colab Runtime. It comprises 10 categories, 3,991 models for training and 908 for testing.

In [116]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [117]:
import zipfile
zip_path = '/content/drive/My Drive/dataset_pad_csv.zip'  # Replace with your path
extract_to = '/content/my_dataset'  # Or any folder you like

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)
#!unzip -q dataset.zip;

path = Path("/content/my_dataset/dataset_pad_csv")

In [118]:
folders = [dir for dir in sorted(os.listdir(path)) if os.path.isdir(path/dir)]
classes = {folder: i for i, folder in enumerate(folders)};
classes

{'test': 0, 'train': 1}

This dataset consists of **.off** files that contain meshes represented by *vertices* and *triangular faces*.

We will need a function to read this type of files:

In [119]:
# def read_off(file):
#     if 'OFF' != file.readline().strip():
#         raise('Not a valid OFF header')
#     n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
#     verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
#     faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]
#     return verts, faces


def read_point_cloud_csv_to_mesh(filepath, grid_size=64):
    """
    Reads a CSV file with point cloud data and returns verts and faces.

    Parameters:
    - filepath: str, path to the CSV file.
    - grid_size: int, the width/height of the assumed square grid (default=64).

    Returns:
    - verts: (N, 3) ndarray of vertex positions.
    - faces: (M, 3) ndarray of triangle faces (indices into verts).
    """
    # Read CSV
    df = pd.read_csv(filepath)
    data = np.loadtxt(filepath, delimiter=',', skiprows=1)

    # Extract vertices
    verts = df[['x', 'y', 'z']].to_numpy()

    # Ensure the number of points fits a square grid
    if len(verts) != grid_size * grid_size:
        raise ValueError(f"Expected {grid_size**2} points, but got {len(verts)}. Check the grid_size or data.")

    # Generate faces (two triangles per grid cell)
    faces = []
    for i in range(grid_size - 1):
        for j in range(grid_size - 1):
            idx = i * grid_size + j
            # Triangle 1
            faces.append([idx, idx + 1, idx + grid_size])
            # Triangle 2
            faces.append([idx + 1, idx + grid_size + 1, idx + grid_size])

    faces = np.array(faces, dtype=np.int32)

    label_column = data[:, 8]  # label is in the 9th column
    label = int(label_column[0])  # assuming one label per file

    return verts, faces, label

In [120]:
verts, faces,_ = read_point_cloud_csv_to_mesh(path/"test/cell_10_14.csv", grid_size=64)
x, y, z = verts.T

print("Number of vertices:", len(verts))
print("Number of faces:", len(faces) if faces is not None else "No faces found.")

# Print available extra fields (e.g., nx, ny, nz, curvature, density)
#print("Extra fields:", list(extras.keys()))

#with open(path/"terrain/terrain_test/cell_0_14.ply", 'r') as f:
#  verts, faces = read_off(f)

Number of vertices: 4096
Number of faces: 7938


In [121]:
i,j,k = np.array(faces).T
x,y,z = np.array(verts).T

In [122]:
len(x)

4096

Don't be scared of this function. It's just to display animated rotation of meshes and point clouds.

In [123]:
def visualize_rotate(data):
    x_eye, y_eye, z_eye = 1.25, 1.25, 0.8
    frames=[]

    def rotate_z(x, y, z, theta):
        w = x+1j*y
        return np.real(np.exp(1j*theta)*w), np.imag(np.exp(1j*theta)*w), z

    for t in np.arange(0, 10.26, 0.1):
        xe, ye, ze = rotate_z(x_eye, y_eye, z_eye, -t)
        frames.append(dict(layout=dict(scene=dict(camera=dict(eye=dict(x=xe, y=ye, z=ze))))))
    fig = go.Figure(data=data,
                    layout=go.Layout(
                        updatemenus=[dict(type='buttons',
                                    showactive=False,
                                    y=1,
                                    x=0.8,
                                    xanchor='left',
                                    yanchor='bottom',
                                    pad=dict(t=45, r=10),
                                    buttons=[dict(label='Play',
                                                    method='animate',
                                                    args=[None, dict(frame=dict(duration=50, redraw=True),
                                                                    transition=dict(duration=0),
                                                                    fromcurrent=True,
                                                                    mode='immediate'
                                                                    )]
                                                    )
                                            ]
                                    )
                                ]
                    ),
                    frames=frames
            )

    return fig

In [124]:
visualize_rotate([go.Mesh3d(x=x, y=y, z=z, color='lightpink', opacity=0.50, i=i,j=j,k=k)]).show()

This mesh definitely looks like a bed.

In [125]:
visualize_rotate([go.Scatter3d(x=x, y=y, z=z,
                                   mode='markers')]).show()

Unfortunately, that's not the case for its vertices. It would be difficult for PointNet to classify point clouds like this one.

First things first, let's write a function to accurately visualize point clouds so we could see vertices better.

In [126]:
def pcshow(xs,ys,zs):
    data=[go.Scatter3d(x=xs, y=ys, z=zs,
                                   mode='markers')]
    fig = visualize_rotate(data)
    fig.update_traces(marker=dict(size=2,
                      line=dict(width=2,
                      color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    fig.show()


In [127]:
pcshow(x,y,z)

## Transforms

As we want it to look more like a real bed, let's write a function to sample points on the surface uniformly.

 ### Sample points

In [128]:
import numpy as np
import random

class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int), "Output size must be an integer."
        self.output_size = output_size

    def triangle_area(self, pt1, pt2, pt3):
        side_a = np.linalg.norm(pt1 - pt2)
        side_b = np.linalg.norm(pt2 - pt3)
        side_c = np.linalg.norm(pt3 - pt1)
        s = 0.5 * (side_a + side_b + side_c)
        area = max(s * (s - side_a) * (s - side_b) * (s - side_c), 0)**0.5
        return area

    def sample_point(self, pt1, pt2, pt3):
        s, t = sorted([random.random(), random.random()])
        point = s * pt1 + (t - s) * pt2 + (1 - t) * pt3
        return point

    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        faces = np.array(faces)

        if len(faces) == 0 or len(verts) == 0:
            print("[ERROR] Empty mesh data! Returning empty array.")
            return np.zeros((self.output_size, 3))
        valid_faces = [] #
        valid_areas = [] #
        for i, face in enumerate(faces):
            try:
                pt1, pt2, pt3 = verts[face[0]], verts[face[1]], verts[face[2]]
                area = self.triangle_area(pt1, pt2, pt3)
                if area > 0:
                    valid_faces.append(face)
                    valid_areas.append(area)
            except IndexError as e:
                print(f"[ERROR] Invalid face index at face {i}: {face}. Error: {e}")
                continue

        if len(valid_faces) < 1:
            #print(f"[WARNING] Only {len(valid_faces)} valid triangles found. Sampling uniformly from vertices.")
            return verts[np.random.choice(len(verts), size=self.output_size, replace=True)]

        sampled_faces = random.choices(valid_faces, weights=valid_areas, k=self.output_size)

        sampled_points = np.zeros((self.output_size, 3))
        for i, face in enumerate(sampled_faces):
            pt1, pt2, pt3 = verts[face[0]], verts[face[1]], verts[face[2]]
            sampled_points[i] = self.sample_point(pt1, pt2, pt3)

        return sampled_points

In [129]:
pointcloud = PointSampler(3000)((verts, faces))

In [130]:
pcshow(*pointcloud.T)

This pointcloud looks much more like a bed!

### Normalize

Unit sphere

In [131]:
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0)
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud

In [132]:
norm_pointcloud = Normalize()(pointcloud)

In [133]:
pcshow(*norm_pointcloud.T)

Notice that axis limits have changed.

### Augmentations

Let's add *random rotation* of the whole pointcloud and random noise to its points.

In [134]:
class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        theta = random.random() * 2. * math.pi
        rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                               [ math.sin(theta),  math.cos(theta),    0],
                               [0,                             0,      1]])

        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        return  rot_pointcloud

class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        noise = np.random.normal(0, 0.02, (pointcloud.shape))

        noisy_pointcloud = pointcloud + noise
        return  noisy_pointcloud

In [135]:
rot_pointcloud = RandRotation_z()(norm_pointcloud)
noisy_rot_pointcloud = RandomNoise()(rot_pointcloud)

In [136]:
pcshow(*noisy_rot_pointcloud.T)

### ToTensor

In [137]:
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        return torch.from_numpy(pointcloud)

In [138]:
ToTensor()(noisy_rot_pointcloud)

tensor([[ 0.0318,  0.0098, -0.0024],
        [ 0.0826,  0.0210,  0.0349],
        [ 0.0562,  0.0247,  0.0132],
        ...,
        [ 0.1207,  0.0521, -0.0134],
        [ 0.1494,  0.0499, -0.0142],
        [ 0.0462,  0.0674, -0.0069]], dtype=torch.float64)

In [139]:
def default_transforms():
    return transforms.Compose([
                                PointSampler(1024),
                                Normalize(),
                                ToTensor()
                              ])

## Dataset

Now we can create a [custom PyTorch Dataset](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

In [140]:
class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=None, grid_size=64):
        self.root_dir = Path(root_dir)
        self.grid_size = grid_size
        self.valid = valid
        self.transforms = transform if not valid else default_transforms()

        print(f"\n[INIT] Dataset loading from: {self.root_dir}")
        print(f"[INFO] Mode: {'Validation' if valid else 'Training'} | Folder: {folder}\n")

        folder_path = self.root_dir / folder
        if not folder_path.exists():
            raise FileNotFoundError(f"Folder does not exist: {folder_path}")

        # Class mapping (fixed based on label values in the 9th column)
        self.classes = {
            0: "terrain",
            1: "stump",
            2: "others"
        }
        self.files = [folder_path / file for file in os.listdir(folder_path) if file.endswith('.csv')]
        print(f"[INFO] Found {len(self.files)} CSV files in '{folder}' folder.\n")

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file_path):
        #print(f"[LOAD] Reading file: {file_path}")
        try:
            verts, faces, label = read_point_cloud_csv_to_mesh(file_path, grid_size=self.grid_size)
            if self.transforms:
                pointcloud = self.transforms((verts, faces))
            return pointcloud, label
        except Exception as e:
            print(f"[ERROR] Failed to process {file_path}: {e}")
            raise

    def __getitem__(self, idx):
        try:
            file_path = self.files[idx]
            pointcloud, label = self.__preproc__(file_path)
            return {
                'pointcloud': pointcloud,
                'category': label
            }
        except IndexError:
            print(f"[ERROR] Index out of range: {idx}")
            raise
        except Exception as e:
            print(f"[ERROR] Unexpected error at index {idx}: {e}")
            raise

Transforms for training. 1024 points per cloud as in the paper!

In [141]:
train_transforms = transforms.Compose([
                    PointSampler(4096),
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    ToTensor()
                    ])

In [142]:
train_ds = PointCloudData(path, transform=train_transforms)
valid_ds = PointCloudData(path, valid=True, folder='test', transform=train_transforms)


[INIT] Dataset loading from: /content/my_dataset/dataset_pad_csv
[INFO] Mode: Training | Folder: train

[INFO] Found 520 CSV files in 'train' folder.


[INIT] Dataset loading from: /content/my_dataset/dataset_pad_csv
[INFO] Mode: Validation | Folder: test

[INFO] Found 93 CSV files in 'test' folder.



In [143]:
print('Train dataset size: ', len(train_ds))
print('Valid dataset size: ', len(valid_ds))
print('Number of classes: ', len(train_ds.classes))
print('Sample pointcloud shape: ', train_ds[0]['pointcloud'].size())
#print('Class: ', inv_classes[train_ds[0]['category']])

Train dataset size:  520
Valid dataset size:  93
Number of classes:  3
Sample pointcloud shape:  torch.Size([4096, 3])


In [144]:
train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

## Model

In [145]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)
      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)


   def forward(self, input):
      # input.shape == (bs,n,3)
      bs = input.size(0)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))
      pool = nn.MaxPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)
      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))

      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix


class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)


        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

   def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)


        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output), matrix3x3, matrix64x64

In [146]:
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)

## Training loop

You can find a pretrained model [here](https://drive.google.com/open?id=1nDG0maaqoTkRkVsOLtUAR9X3kn__LMSL)

In [147]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [148]:
pointnet = PointNet()
pointnet.to(device);

In [149]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [150]:
def train(model, train_loader, val_loader=None,  epochs=15, save=True):
    for epoch in range(epochs):
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                    print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                        (epoch + 1, i + 1, len(train_loader), running_loss / 10))
                    running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc = 100. * correct / total
            print('Valid accuracy: %d %%' % val_acc)

        # save the model
        if save:
            torch.save(pointnet.state_dict(), "save_"+str(epoch)+".pth")

In [None]:
train(pointnet, train_loader, valid_loader,  save=True)

[Epoch: 1, Batch:   10 /   17], loss: 1.972


## Test

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
pointnet = PointNet()
pointnet.load_state_dict(torch.load('save.pth'))
pointnet.eval();

In [None]:
all_preds = []
all_labels = []
with torch.no_grad():
    for i, data in enumerate(valid_loader):
        print('Batch [%4d / %4d]' % (i+1, len(valid_loader)))

        inputs, labels = data['pointcloud'].float(), data['category']
        outputs, __, __ = pointnet(inputs.transpose(1,2))
        _, preds = torch.max(outputs.data, 1)
        all_preds += list(preds.numpy())
        all_labels += list(labels.numpy())



In [None]:
cm = confusion_matrix(all_labels, all_preds);
cm

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

# function from https://deeplizard.com/learn/video/0LhiS6yu2qQ
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
plt.figure(figsize=(8,8))
plot_confusion_matrix(cm, list(classes.keys()), normalize=True)

In [None]:
plt.figure(figsize=(8,8))
plot_confusion_matrix(cm, list(classes.keys()), normalize=False)