# Imports 

In [2]:
import os 
import sys 
import json 
import numpy as np 
import pandas as pd 
from tqdm import tqdm

# For plotting 
import matplotlib.pyplot as plt 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots 

# append path to custom scripts 
sys.path.append('/kaggle/input/lidar-od-scripts/gpuVersion/gpuVersion/')

# torch imports 
import torch 
import torch.nn as nn 

from visual_utils import plot_pc_data3d, plot_bboxes_3d 

In [None]:
plotly.offline.init_notebook_mode(connected=True)

# Shapenet Core Dataset Exploration 

- shapenet_core is a subset of the original ShapeNet dataset 
- It contains single clean 3D models, manually verified category and alignment annotations
- 16 classes from 12 categories 

In [3]:
# Path to data folder 
DATA_FOLDER = '/kaggle/input/shapenet-core-seg/Shapenetcore_benchmark/'

class_name_id_map = {'Airplane': 0, 'Bag': 1, 'Cap': 2, 'Car': 3, 'Chair': 4, 
                'Earphone': 5, 'Guitar': 6, 'Knife': 7, 'Lamp': 8, 'Laptop': 9,
                'Motorbike': 10, 'Mug': 11, 'Pistol': 12, 'Rocket': 13, 
                'Skateboard': 14, 'Table': 15}

class_id_name_map = {v:k for k,v in class_name_id_map.items()}

PCD_SCENE=dict(xaxis=dict(visible=False), yaxis=dict(visible=False), zaxis=dict(visible=False), aspectmode='data')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
train_split_data = json.load(open('/kaggle/input/shapenet-core-seg/Shapenetcore_benchmark/train_split.json', 'r'))
train_class_count = np.array([x[0] for x in train_split_data])

# plottin classwise count in the train set 
train_dist_plots = [go.Bar(x=list(class_name_id_map.keys()), y= np.bincount(train_class_count))]
layout = dict(template="plotly_dark", title="Shapenet Core Train Distribution", title_x=0.5)
fig = go.Figure(data=train_dist_plots, layout=layout)
fig.show(renderer='iframe')

In [5]:
import glob
points_list = glob.glob("/kaggle/input/shapenet-core-seg/Shapenetcore_benchmark/04379243/points/*.npy")
print(len(points_list))

5263


In [6]:
import random
idx = random.randint(0,len(points_list))

# load point cloud data
points = np.load(points_list[idx])
print(f"points shape = {points.shape}, min xyz = {np.min(points, axis=0)}, max xyz = {np.max(points, axis=0)}")

# load seg labels 
seg_file_path = points_list[idx].replace('points', 'points_label').replace('.npy', '.seg')
seg_labels = np.loadtxt(seg_file_path).astype(np.int8)
print(f"seg_labels shape = {seg_labels.shape}, unique labels = {np.unique(seg_labels)}")

points shape = (2685, 3), min xyz = [-0.25886 -0.16786 -0.38479], max xyz = [0.27159 0.16786 0.38475]
seg_labels shape = (2685,), unique labels = [1 2 3]


In [7]:
# there is a maxof 16 parts in an object in Shapenet core Dataset 
# Let us create random colours according to part labels 
NUM_PARTS = 16
PART_COLORS = np.random.choice(range(255),size=(NUM_PARTS,3))

In [8]:
pc_plots = plot_pc_data3d(x = points[:,0], y = points[:,1], z = points[:,2], apply_color_gradient=False, color = PART_COLORS[seg_labels -1], marker_size =2) 
layout = dict(template = 'plotly_dark', title = 'Raw Point cloud', scene = PCD_SCENE, title_x = 0.5) 
fig = go.Figure(data = pc_plots, layout = layout)
fig.show(renderer='iframe')

# Building a Custom Dataset 
- Creating a dataset object
- Creating a PyTorch dataloader

In [9]:
class ShapeNetDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, split_type, num_samples=2500):
        self.root_dir = root_dir
        self.split_type = split_type
        self.num_samples = num_samples
        with open(os.path.join(root_dir, f'{self.split_type}_split.json'), 'r') as f:
            self.split_data = json.load(f) 

    def __getitem__(self, index):
        # read point cloud data
        class_id, class_name, point_cloud_path, seg_label_path = self.split_data[index]
        
        # Point cloud data 
        point_cloud_path = os.path.join(self.root_dir, point_cloud_path)
        pc_data = np.load(point_cloud_path)

        # Seg labels 
        # the -1 is to change part values from [1 - 16] to [0 - 15] 
        # this helps us run segmentation 
        pc_seg_labels = np.loadtxt(os.path.join(self.root_dir, seg_label_path)).astype(np.int8) - 1
        # Sample a fixed number of points 
        num_points = pc_data.shape[0]
        if num_points < self.num_samples:
            additional_indices = np.random.choice(num_points, self.num_samples - num_points, replace=True)
            pc_data = np.concatenate((pc_data, pc_data[additional_indices]), axis=0)
            pc_seg_labels = np.concatenate((pc_seg_labels, pc_seg_labels[additional_indices]), axis=0)
        else: 
            # Randomly select max_num_point samples from the available points 
            random_indices = np.random.choice(num_points, self.num_samples)
            pc_data = pc_data[random_indices]
            pc_seg_labels = pc_seg_labels[random_indices]

        # return variables 
        data_dict= {}
        data_dict['class_id'] = class_id
        data_dict['class_name'] = class_name        
        data_dict['points'] = pc_data 
        data_dict['seg_labels'] = pc_seg_labels 
        return data_dict   


    def __len__(self):
        return len(self.split_data)

In [10]:
train_set = ShapeNetDataset(root_dir = DATA_FOLDER, split_type='train')
val_set = ShapeNetDataset(root_dir = DATA_FOLDER, split_type='val')
test_set = ShapeNetDataset(root_dir = DATA_FOLDER, split_type='test')
print(f"Train set length = {len(train_set)}")
print(f"Validation set length = {len(val_set)}")
print(f"Test set length = {len(test_set)}")

Train set length = 12137
Validation set length = 1861
Test set length = 2848


In [11]:
data_dict= train_set[25]
print(f"Keys in dataset sample = {list(data_dict.keys())}")
points = data_dict['points']
seg_labels = data_dict['seg_labels']
print(f"class_id = {data_dict['class_id']}, class_name = {data_dict['class_name']}")

Keys in dataset sample = ['class_id', 'class_name', 'points', 'seg_labels']
class_id = 15, class_name = Table


In [12]:
pc_plots = plot_pc_data3d(x=points[:,0], y=points[:,1], z=points[:,2], apply_color_gradient=False, color=PART_COLORS[seg_labels - 1], marker_size=2)
layout = dict(template="plotly_dark", title=f"{data_dict['class_name']}, class id = {data_dict['class_id']}, from Shapenetcore Torch Dataset", scene=PCD_SCENE, title_x=0.5)
fig = go.Figure(data=pc_plots, layout=layout)      
fig.show(renderer='iframe')

Data loader for Custom Dataset 

In [13]:
def collate_fn(batch_list):
    ret = {}
    ret['class_id'] =  torch.from_numpy(np.array([x['class_id'] for x in batch_list])).long()
    ret['class_name'] = np.array([x['class_name'] for x in batch_list])
    ret['points'] = torch.from_numpy(np.stack([x['points'] for x in batch_list], axis=0)).float()
    ret['seg_labels'] = torch.from_numpy(np.stack([x['seg_labels'] for x in batch_list], axis=0)).long()
    return ret

In [14]:
# Testing loader 
sample_loader = torch.utils.data.DataLoader(train_set, batch_size=16, num_workers=2, shuffle=True, collate_fn=collate_fn) 
dataloader_iter = iter(sample_loader)   
batch_dict = next(dataloader_iter)
print(batch_dict.keys())
for key in ['points','seg_labels', 'class_id']:
    print(f"batch_dict[{key}].shape = {batch_dict[key].shape}")

dict_keys(['class_id', 'class_name', 'points', 'seg_labels'])
batch_dict[points].shape = torch.Size([16, 2500, 3])
batch_dict[seg_labels].shape = torch.Size([16, 2500])
batch_dict[class_id].shape = torch.Size([16])


In [15]:
batchSize= 32
workers = 2
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batchSize, shuffle=True, num_workers=workers, collate_fn=collate_fn)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batchSize, shuffle=True, num_workers=workers, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batchSize,shuffle=True, num_workers=workers, collate_fn=collate_fn)

# PointNet 
Architecture taken from [this paper](https://arxiv.org/pdf/1612.00593)
In depth implementation and explaination can be found in [this notebook](https://github.com/BharathU2207/Point_Net/blob/main/PointNet_architecture_from_Scratch.ipynb)

The PointNet comprises several parts 
- T-Nets
- FeatureNet
- Classification or Segmentation Head 

## T-net

In [16]:
import torch.nn.functional as F 
from torch.autograd import Variable

In [17]:
# Setting up the T net class
class Tnet(nn.Module):
  ''' T-Net learns a transformation matrix with a specified dimension'''
  def __init__(self, dim, num_points = 2500):
    super(Tnet, self).__init__()

    # Dimensions for transform matrix
    self.dim = dim

    self.conv1 = nn.Conv1d(dim, 64, kernel_size = 1)
    self.conv2 = nn.Conv1d(64, 128, kernel_size =1)
    self.conv3 = nn.Conv1d(128, 1024, kernel_size =1)

    self.linear1 = nn.Linear(1024, 512)
    self.linear2 = nn.Linear(512, 256)
    self.linear3 = nn.Linear(256, dim**2) # This allows the class to be used for both input spaces

    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(128)
    self.bn3 = nn.BatchNorm1d(1024)
    self.bn4 = nn.BatchNorm1d(512)
    self.bn5 = nn.BatchNorm1d(256)

    self.max_pool = nn.MaxPool1d(kernel_size = num_points)

  def forward(self, x):
    bs = x.shape[0]

    # pass through shared MLP layers (conv1d)
    x = self.bn1(F.relu(self.conv1(x)))
    x = self.bn2(F.relu(self.conv2(x)))
    x = self.bn3(F.relu(self.conv3(x)))

    # max pool over num points
    x = self.max_pool(x).view(bs, -1)
    '''.view(bs , -1) flattens the output of the maxpool from a 3D tensor [bs, num_channels, 1] to
    a 2d tensor [bs, num_features]. The -1 tells PyTorch to calculate the total number of rows
    to ensure that the number of elements is consistent.
    '''

    # Pass through the MLP
    x = self.bn4(F.relu(self.linear1(x)))
    x = self.bn5(F.relu(self.linear2(x)))
    x = self.linear3(x)

    # initialize the identity matrix
    iden = torch.eye(self.dim, requires_grad = True).repeat(bs ,1, 1)

    if x.is_cuda:
      iden = iden.cuda()

    x = x.view(-1, self.dim, self.dim) + iden # reshaping output to a matrix

    return x


In [18]:
test_model = Tnet(3).to(device) 
sim_data = Variable(torch.rand(32, 3, 2500)).to(device) 
out = test_model(sim_data) 
print('TNet', out.size())

TNet torch.Size([32, 3, 3])


## PointNet Backbone 

In [19]:
# Point Net Backbone
class PointNetBackbone(nn.Module):
  def __init__(self, num_points = 2500, num_global_feats = 1024, local_feat = True):
    super(PointNetBackbone, self).__init__()

    # if true concat local and global features
    self.num_points = num_points
    self.num_global_feats = num_global_feats
    self.local_feat = local_feat

    # spatial transformer network (T-Net)
    self.tnet1 = Tnet(dim = 3, num_points = num_points)
    self.tnet2 = Tnet(dim = 64, num_points = num_points)

    # Shared MLP 1
    self.conv1 = nn.Conv1d(3, 64, kernel_size =1)
    self.conv2 = nn.Conv1d(64, 64, kernel_size =1)

    # Shared MLP 2
    self.conv3 = nn.Conv1d(64, 64, kernel_size =1)
    self.conv4 = nn.Conv1d(64, 128, kernel_size =1)
    self.conv5 = nn.Conv1d(128, self.num_global_feats, kernel_size = 1)

    # batch norms for both shared mlps
    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(64)
    self.bn3 = nn.BatchNorm1d(64)
    self.bn4 = nn.BatchNorm1d(128)
    self.bn5 = nn.BatchNorm1d(self.num_global_feats)


    # Max pool to get the global features
    # We can visualize them by getting the max pool func to return the indices by setting return_indices = True
    self.max_pool = nn.MaxPool1d(kernel_size= num_points, return_indices = True)

  def forward(self, x):

    # get batch size
    bs = x.shape[0]

    # pass through first Tnet to get transform matrix
    A_input = self.tnet1(x)

    # perform first transformation across each point in the batch
    x = torch.bmm(x.transpose(2, 1), A_input).transpose(2,1)

    # pass through first shared MLP
    x = self.bn1(F.relu(self.conv1(x)))
    x = self.bn2(F.relu(self.conv2(x)))

    # get feature transform
    A_feat = self.tnet2(x)

    # perform second transformation across each (64 dim) features in the batch
    x = torch.bmm(x.transpose(2,1), A_feat).transpose(2,1)

    # store local point features for segmentation head
    local_features = x.clone()

    # pass through second MLP head
    x = self.bn3(F.relu(self.conv3(x)))
    x = self.bn4(F.relu(self.conv4(x)))
    x = self.bn5(F.relu(self.conv5(x)))

    # get global feature vector and critial indexes
    global_features, critical_indexes = self.max_pool(x)
    global_features = global_features.view(bs, -1)
    critical_indexes = critical_indexes.view(bs, -1)

    if self.local_feat:
      features = torch.cat((local_features, global_features.unsqueeze(-1).repeat(1, 1, self.num_points)), dim =1)
      return features, critical_indexes, A_feat
    else:
      return global_features, critical_indexes, A_feat

In [20]:
pointbackbone = PointNetBackbone(local_feat = False).to(device) 
out, _, _ = pointbackbone (sim_data) 
print('global feat', out.size())

pointbackbone = PointNetBackbone(local_feat = True).to(device) 
out, _, _ = pointbackbone(sim_data)
print('point feat', out.size())

global feat torch.Size([32, 1024])
point feat torch.Size([32, 1088, 2500])


## Classification Head 

In [21]:
# Classification head
class PointNetClassHead(nn.Module):
  '''Classification Head'''
  def __init__(self, num_points = 2500, num_global_feats = 1024, k =2):
    super(PointNetClassHead, self).__init__()

    # get the backbone (require only global features for classification)
    self.backbone = PointNetBackbone(num_points, num_global_feats, local_feat = False)

    # MLP for classification
    self.linear1 = nn.Linear(num_global_feats, 512)
    self.linear2 = nn.Linear(512, 256)
    self.linear3 = nn.Linear(256, k)

    # batchnorm for the first 2 linear layers
    self.bn1 = nn.BatchNorm1d(512)
    self.bn2 = nn.BatchNorm1d(256)

    # the paper says only batchnorm is added to the layers before the classification layer
    # but another version uses dropouts for the first 2 layers as well
    self.dropout = nn.Dropout(p = 0.3)

  def forward(self, x):
    # get global features
    x, crit_idxs, A_feat = self.backbone(x)
    x = self.bn1(F.relu(self.linear1(x)))
    x = self.bn2(F.relu(self.linear2(x)))
    x = self.dropout(x)
    x = self.linear3(x)

    return x, crit_idxs, A_feat

In [22]:
cls = PointNetClassHead(k = 16).to(device) 
out, _, _ = cls(sim_data) 
print('class', out.size())

class torch.Size([32, 16])


### Defining Loss for Point Net: Focal Loss 

For Point Net we will be using the Categorical Cross Entropy loss with a regularization term that will enforce the high dimensional transform matrix to 0. We will also present the option for using the Balanced Cross Entropy Loss via the 'alpha' argument which assigns a weight to each class this weights the importance of each example based on their class frequencies. We also provide the option to use the Focal Loss which adds a modulating term to the Cross Entropy Loss $(1-p_n)^γ$, this term forces the model to focus on hard examples (i.e. examples with low prediction probability). Some notes on the Focal Loss are given below. 

**Focal Loss**
The Focal Loss is a modified Cross Entropy (CE) Loss. The CE Loss for a sample *n* is given below. 

$$CE(s_n, y_n) = -α_{y_n} * log{(\frac{exp(s_n)}{\sum_{i=1}^{N}})} 1c(y_n)$$

Where **$s_n$** is the predicted class score vector (logits), **$M$** is the number of classes, **$y_n$** is the true class, **$α_{y_n}$** is the class weight, and **$1c(y_n)$** is the [Indcator Function](https://en.wikipedia.org/wiki/Indicator_function) that tells us to only consider the prediction for the current class **$y_n$**.

We may also notice that the term inside the **$log$** is the Softmax function, which along wth the indicator function, gives us the predicted class probability. We can rewrite the CE Loss in a more simple format as:
$$CE(p_n) = -α_{y_n}log(p_n)$$
Where the **$n$** subscript refers to the true class at sample **$n$**. 
The CE loss is typically unweighted, but if a weight is used it is reffered to as the Balanced (or Weighted) CE Loss. The weights are usually based on inverse class distribution and typically range from \[0, 1\]. The weights can also be set as a hyperparameter via Cross Validation.
$$α = \frac{1}{class\ counts}$$
We may also normalize alpha so that it spreads between \[0,1\]
$$α = \frac{α}{max\ α}$$
The Focal Loss adds an additional modulating factor to the weighted CE Loss: $(1-p_n)^γ$, where $γ>=0$ is referred to as the focusing parameter. This term tends to 0 when the prediction probability is high, and has a larger value when the prediction probability is lower forcing the model to focus on the hard examples. It can be said that the focusing parameter smoothly adjusts the rate at which easy examples are down-weighted. We can formally express the Focal Loss as: 
$$FL(p_n) = -α_{y_n}(1-p_n)^γlog(p_n)$$ 

In [23]:
# special loss for Classification: Focal Loss + regularization
class PointNetLoss(nn.Module):
    def __init__(self, alpha=None, gamma=0, reg_weight=0, size_average=True):
        super(PointNetLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reg_weight = reg_weight
        self.size_average = size_average

        # sanitize inputs
        if isinstance(alpha,(float, int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,(list, np.ndarray)): self.alpha = torch.Tensor(alpha)

        # get Balanced Cross Entropy Loss
        self.cross_entropy_loss = nn.CrossEntropyLoss(weight=self.alpha)
        

    def forward(self, predictions, targets, A=None):

        # get batch size
        bs = predictions.size(0)

        # get Balanced Cross Entropy Loss
        ce_loss = self.cross_entropy_loss(predictions, targets)

        # get predicted class probabilities for the true class
        pn = F.softmax(predictions, -1)
        pn = pn.gather(1, targets.view(-1, 1)).view(-1)

        # get regularization term
        if self.reg_weight > 0:
            I = torch.eye(64).unsqueeze(0).repeat(A.shape[0], 1, 1) # .to(device)
            if A.is_cuda: I = I.cuda()
            reg = torch.linalg.norm(I - torch.bmm(A, A.transpose(2, 1)))
            reg = self.reg_weight*reg/bs
        else:
            reg = 0

        # compute loss (negative sign is included in ce_loss)
        loss = ((1 - pn)**self.gamma * ce_loss)
        if self.size_average: return loss.mean() + reg
        else: return loss.sum() + reg


# special loss for segmentation Focal Loss + Dice Loss
class PointNetSegLoss(nn.Module):
    def __init__(self, alpha=None, gamma=0, size_average=True, dice=False):
        super(PointNetSegLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.size_average = size_average
        self.dice = dice

        # sanitize inputs
        if isinstance(alpha,(float, int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,(list, np.ndarray)): self.alpha = torch.Tensor(alpha)

        # get Balanced Cross Entropy Loss
        self.cross_entropy_loss = nn.CrossEntropyLoss(weight=self.alpha)
        
    def forward(self, predictions, targets, pred_choice=None):
        # print(f"Predictions(loss fn) shape before: {predictions.shape}")
        # print(f"Targets(loss fn) shape before: {targets.shape}")
    
        # get Balanced Cross Entropy Loss
        ce_loss = self.cross_entropy_loss(predictions.transpose(2, 1), targets)  # Reshape targets to [B, N]

        # reformat predictions (b, n, c) -> (b*n, c)
        predictions = predictions.contiguous() \
                                 .view(-1, predictions.size(2)) 
        # get predicted class probabilities for the true class
        pn = F.softmax(predictions, dim = -1)
        pn = pn.gather(1, targets.view(-1, 1)).view(-1)

        # compute loss (negative sign is included in ce_loss)
        loss = ((1 - pn)**self.gamma * ce_loss)
        if self.size_average: loss = loss.mean() 
        else: loss = loss.sum()

        # add dice coefficient if necessary
        if self.dice: return loss + self.dice_loss(targets, pred_choice, eps=1)
        else: return loss


    @staticmethod
    def dice_loss(predictions, targets, eps=1):
        ''' Compute Dice loss, directly compare predictions with truth '''

        targets = targets.reshape(-1)
        predictions = predictions.reshape(-1)

        cats = torch.unique(targets)

        top = 0
        bot = 0
        for c in cats:
            locs = targets == c

            # get truth and predictions for each class
            y_tru = targets[locs]
            y_hat = predictions[locs]

            top += torch.sum(y_hat == y_tru)
            bot += len(y_tru) + len(y_hat)


        return 1 - 2*((top + eps)/(bot + eps)) 


## Training 

In [24]:
def train_model(model, num_epochs, criterion, optimizer, dataloader_train,
                label_str = 'class_id', lr_scheduler = None, output_name = 'pointnet.pth', Segmentation = False):
    # move model to device
    model.to(device)
    for epoch in range(num_epochs):
        print(f"Starting {epoch + 1} epoch ...")
        
        # Training
        model.train()
        train_loss = 0.0
        if Segmentation == False: 
            for batch_dict in tqdm(dataloader_train, total=len(dataloader_train)):            
                # Forward pass
                x = batch_dict['points'].transpose(1, 2).to(device)
                labels = batch_dict[label_str].to(device).view(-1)
                
                pred, _, A = model(x)
                loss = criterion(pred, labels, A)
                train_loss += loss.item()
                  
                # Backward pass
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
    
                # adjusting learning rate
                if lr_scheduler is not None:
                    lr_scheduler.step()
        elif Segmentation == True: 
            for batch_dict in tqdm(dataloader_train, total=len(dataloader_train)): 
                x = batch_dict['points'].transpose(1, 2).to(device)
                #targets = batch_dict[label_str].squeeze().to(device) 
                labels = batch_dict[label_str].to(device)

                preds, _, _ = model(x)
                preds_trans = preds.transpose(2,1)

                pred_choice = torch.softmax(preds_trans, dim=2).argmax(dim=2)

                loss = criterion(preds_trans, labels, pred_choice)
                train_loss += loss.item()
                
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                if lr_scheduler is not None: 
                    lr_scheduler.step() 
                
        # compute per batch losses, metric value
        train_loss = train_loss / len(dataloader_train)

        print(f'Epoch: {epoch+1}, trainLoss:{train_loss:6.5f}')
        torch.cuda.empty_cache()
    torch.save(model.state_dict(), output_name)


In [25]:
# import gc
# def report_gpu(): 
#     print(torch.cuda.list_gpu_processes()) 
#     gc.collect() 
#     torch.cuda.empty_cache()
# report_gpu()

In [26]:
import torch.optim as optim

N_EPOCHS = 25
LR = 0.01
REG_WEIGHT = 0.001 

num_points = 2500
num_classes = 16

# manually downweight the high frequency classes
alpha = np.ones(num_classes)
alpha[0] = 0.5  # airplane
alpha[4] = 0.5  # chair
alpha[-1] = 0.5 # table

gamma = 2

#criterion = nn.NLLLoss()
criterion = PointNetLoss(alpha=alpha, gamma=gamma, reg_weight=REG_WEIGHT).to(device)

# create model, optimizer, lr_scheduler and pass to training function
num_classes = len(class_id_name_map.items())
classifier = PointNetClassHead(k = num_classes, num_points = num_points)

# DEFINE OPTIMIZERS
optimizer = optim.SGD(classifier.parameters(), lr=LR, momentum=0.9)
if torch.cuda.is_available():
    classifier.cuda()
_ = train_model(classifier, N_EPOCHS, criterion, optimizer, train_loader)

Starting 1 epoch ...


100%|██████████| 380/380 [01:19<00:00,  4.78it/s]


Epoch: 1, trainLoss:0.53907
Starting 2 epoch ...


100%|██████████| 380/380 [01:19<00:00,  4.76it/s]


Epoch: 2, trainLoss:0.13574
Starting 3 epoch ...


100%|██████████| 380/380 [01:21<00:00,  4.68it/s]


Epoch: 3, trainLoss:0.08899
Starting 4 epoch ...


100%|██████████| 380/380 [01:22<00:00,  4.63it/s]


Epoch: 4, trainLoss:0.06769
Starting 5 epoch ...


100%|██████████| 380/380 [01:22<00:00,  4.59it/s]


Epoch: 5, trainLoss:0.05557
Starting 6 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.58it/s]


Epoch: 6, trainLoss:0.04260
Starting 7 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.57it/s]


Epoch: 7, trainLoss:0.04066
Starting 8 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 8, trainLoss:0.04848
Starting 9 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.57it/s]


Epoch: 9, trainLoss:0.04504
Starting 10 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 10, trainLoss:0.04752
Starting 11 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 11, trainLoss:0.03438
Starting 12 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 12, trainLoss:0.02757
Starting 13 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 13, trainLoss:0.02569
Starting 14 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 14, trainLoss:0.02022
Starting 15 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 15, trainLoss:0.02364
Starting 16 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 16, trainLoss:0.02133
Starting 17 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 17, trainLoss:0.01877
Starting 18 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 18, trainLoss:0.02316
Starting 19 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 19, trainLoss:0.01928
Starting 20 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.57it/s]


Epoch: 20, trainLoss:0.01663
Starting 21 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 21, trainLoss:0.01790
Starting 22 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 22, trainLoss:0.01468
Starting 23 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 23, trainLoss:0.01275
Starting 24 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.56it/s]


Epoch: 24, trainLoss:0.01477
Starting 25 epoch ...


100%|██████████| 380/380 [01:23<00:00,  4.55it/s]

Epoch: 25, trainLoss:0.01582





## Inference 

In [27]:
classifier = PointNetClassHead(k = num_classes).to(device) 
classifier.load_state_dict(torch.load('pointnet.pth', weights_only = True))
classifier.eval()

total_loss = 0.0

with torch.no_grad():
    for batch_dict in tqdm(test_loader, total=len(test_loader)):
        x = batch_dict['points'].transpose(1, 2).to(device)
        labels = batch_dict['class_id'].to(device)
        pred, _, A = classifier(x)

        # calculate loss
        loss = criterion(pred, labels, A)
        total_loss += loss.item()

evaluation_loss = total_loss / len(test_loader)
print(evaluation_loss)

100%|██████████| 89/89 [00:21<00:00,  4.23it/s]

0.05312892890880617





### Test on individual items 

In [28]:
# Random test sample
test_sample = test_set[np.random.choice(np.arange(len(test_set)))]
batch_dict = collate_fn([test_sample])
x = batch_dict['points'].transpose(1, 2).to(device)

# Get model preds 
model_preds, _, _ = classifier(x)
predicted_class = torch.argmax(model_preds, axis=1).detach().cpu().numpy()[0]
predicted_class_name = class_id_name_map[predicted_class]
pred_class_probs = F.softmax(model_preds.flatten(), dim=0).detach().cpu().numpy()


# plot results 
title = f"Label = {test_sample['class_name']}, Predicted class = {predicted_class_name}"
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "scatter3d"}, {}]], column_widths=[0.4, 0.6])
fig.update_layout(template="plotly_dark", scene=PCD_SCENE, height = 400, width = 1200,
                title=title, title_x=0.1, title_y=0.97, margin=dict(r=0, b=0, l=0, t=0))    
fig.add_trace(plot_pc_data3d(x=test_sample['points'][:,0], y=test_sample['points'][:,1], z=test_sample['points'][:,2]), row=1, col=1)
fig.add_trace(go.Bar(x=list(class_name_id_map.keys()), y=pred_class_probs, showlegend=False), row=1, col=2)
fig.show(renderer='iframe')

# Segmentation 

In [29]:
# Segmentation head
class PointNetSegHead(nn.Module):
  '''Segmentation Head'''
  def __init__(self, num_points = 2500, num_global_feats = 1024, m =2):
    super(PointNetSegHead, self).__init__()

    self.num_points = num_points
    self.m = m 

    # get the backbone
    self.backbone = PointNetBackbone(num_points, num_global_feats, local_feat = True)

    # Shared MLP
    num_features = num_global_feats + 64 # local and global features
    self.conv1 = nn.Conv1d(num_features, 512, kernel_size =1)
    self.conv2 = nn.Conv1d(512, 256, kernel_size =1)
    self.conv3 = nn.Conv1d(256, 128, kernel_size =1)
    self.conv4 = nn.Conv1d(128, self.m, kernel_size =1)

    # batch norm for shared MLP
    self.bn1 = nn.BatchNorm1d(512)
    self.bn2 = nn.BatchNorm1d(256)
    self.bn3 = nn.BatchNorm1d(128)

  def forward(self, x):

    # get combined features
    x, crit_idxs, A_feat = self.backbone(x)

    # pass through shared MLP
    x = self.bn1(F.relu(self.conv1(x)))
    x = self.bn2(F.relu(self.conv2(x)))
    x = self.bn3(F.relu(self.conv3(x)))
    x = self.conv4(x)

    #x = x.transpose(2,1)
    return x, crit_idxs, A_feat

In [30]:
seg = PointNetSegHead(m = 16).to(device)
print(seg) 
out, _, _ = seg(sim_data)
print('seg', out.size()) 
preds_trans = out.transpose(2,1) 
print(preds_trans.size())

PointNetSegHead(
  (backbone): PointNetBackbone(
    (tnet1): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (linear1): Linear(in_features=1024, out_features=512, bias=True)
      (linear2): Linear(in_features=512, out_features=256, bias=True)
      (linear3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (max_pool): MaxPool1d(kernel_size=2500, stride=2500, padding=

In [31]:
N_EPOCHS = 25
num_points = 2500

# manually set alpha weights
alpha = np.ones(num_classes)
alpha[0:3] *= 0.25 # balance background classes
alpha[-1] *= 0.75  # balance clutter class

gamma = 1

criterion = PointNetSegLoss(alpha=alpha, gamma=gamma, dice=True).to(device)
# criterion = nn.CrossEntropyLoss() 

# Create model, optimizer, lr_scheduler, and pass to training function 
num_classes = len(class_id_name_map.items()) 
dense_classifier = PointNetSegHead(m = NUM_PARTS, num_points = num_points).to(device) 

# Optimizer 
optimizer = optim.SGD(dense_classifier.parameters(), lr = 0.01, momentum = 0.9) 

train_model(dense_classifier, N_EPOCHS, criterion, optimizer, train_loader, label_str = 
           'seg_labels', output_name = 'pointnet_seg.pth', Segmentation = True)

Starting 1 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 1, trainLoss:0.66986
Starting 2 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 2, trainLoss:0.28887
Starting 3 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 3, trainLoss:0.22959
Starting 4 epoch ...


100%|██████████| 380/380 [02:13<00:00,  2.86it/s]


Epoch: 4, trainLoss:0.21539
Starting 5 epoch ...


100%|██████████| 380/380 [02:13<00:00,  2.86it/s]


Epoch: 5, trainLoss:0.19228
Starting 6 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 6, trainLoss:0.17955
Starting 7 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 7, trainLoss:0.16881
Starting 8 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 8, trainLoss:0.16282
Starting 9 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 9, trainLoss:0.16927
Starting 10 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 10, trainLoss:0.15342
Starting 11 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 11, trainLoss:0.14834
Starting 12 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 12, trainLoss:0.13970
Starting 13 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 13, trainLoss:0.13823
Starting 14 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 14, trainLoss:0.13815
Starting 15 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 15, trainLoss:0.13223
Starting 16 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 16, trainLoss:0.14157
Starting 17 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 17, trainLoss:0.12686
Starting 18 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 18, trainLoss:0.12601
Starting 19 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 19, trainLoss:0.13444
Starting 20 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 20, trainLoss:0.12465
Starting 21 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]


Epoch: 21, trainLoss:0.12070
Starting 22 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 22, trainLoss:0.11963
Starting 23 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 23, trainLoss:0.12285
Starting 24 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.87it/s]


Epoch: 24, trainLoss:0.11759
Starting 25 epoch ...


100%|██████████| 380/380 [02:12<00:00,  2.86it/s]

Epoch: 25, trainLoss:0.11897





In [32]:
dense_classifier.load_state_dict(torch.load('pointnet_seg.pth', weights_only = True))
dense_classifier.eval()

total_loss = 0.0

with torch.no_grad():
    for batch_dict in tqdm(test_loader, total=len(test_loader)):
        x = batch_dict['points'].transpose(1, 2).to(device)
        labels = batch_dict['seg_labels'].to(device)
        pred, _, _ = dense_classifier(x)
        pred_transposed = pred.transpose(2,1)

        pred_choice = torch.softmax(pred_transposed, dim=2).argmax(dim=2)

        # calculate loss
        loss = criterion(pred_transposed, labels, pred_choice)
        total_loss += loss.item()

evaluation_loss = total_loss / len(test_loader)
print(evaluation_loss)

100%|██████████| 89/89 [00:12<00:00,  6.93it/s]

0.19535148922312126





### Test on individual items 

In [33]:
# Random test sample
test_sample = test_set[np.random.choice(np.arange(len(test_set)))]
batch_dict = collate_fn([test_sample])

# Get model predictions
x = batch_dict['points'].transpose(1, 2).to(device)
model_preds, _, _ = dense_classifier(x)
pred_part_labels = torch.argmax(model_preds, axis=1).detach().cpu().numpy()[0]

points = test_sample['points']
part_labels = test_sample['seg_labels']


# plot results
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "scatter3d"}, {"type": "scatter3d"}]], column_widths=[0.5, 0.5],
                    subplot_titles=('Part Labels', 'Part Predictions'))

# ground truth part labels
part_label_plots = plot_pc_data3d(x=points[:,0], y=points[:,1], z=points[:,2], apply_color_gradient=False, 
                                  color=PART_COLORS[part_labels - 1], marker_size=2)

# ground truth part labels
pred_part_label_plots = plot_pc_data3d(x=points[:,0], y=points[:,1], z=points[:,2], apply_color_gradient=False, 
                                  color=PART_COLORS[pred_part_labels - 1], marker_size=2)

fig.update_layout(template="plotly_dark", scene=PCD_SCENE, scene2=PCD_SCENE, height = 400, width = 1200,
                title='PointNet Segmentation', title_x=0.5, title_y=0.97, margin=dict(r=0, b=0, l=0, t=0))
fig.add_trace(part_label_plots, row=1, col=1)
fig.add_trace(pred_part_label_plots, row=1, col=2)
fig.show(renderer='iframe')