# 3D Model Classification - PointNet

## Dataset: ModelNet10, ModelNet40

In [1]:
# importing libraries

import os
import glob
import trimesh
import numpy as np
import math, random
random.seed = 42
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [2]:
# Loading data

data_dir = os.path.join(os.getcwd(), "ModelNet40")
print(data_dir)

C:\Users\Matte\Desktop\NNDL\ModelNet40


In [3]:
# Loading gpu/cpu

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [4]:
# parameters
cloud_points = 2048 # Number of points to describe an object
classes = 40 # Number of classes in the dataset - set for ModelNet40
batch_size = 32
learning_rate = 0.00025
dropout = 0.5
num_epochs = 50

In [5]:
def dataset(points = cloud_points):
    train_points = []
    train_labels = []
    test_points = []
    test_labels = []
    classes_map = {}
    
    #saving all classes' folder paths in list
    folders = [dir for dir in sorted(os.listdir(data_dir)) if os.path.isdir(data_dir)]
    folders_path = []
    for i in range(classes):
        folders_path.append(glob.glob(os.path.join(data_dir,folders[i])))
    
    for i,folder in enumerate(folders_path):
        #storing classes' names into list
        classes_map[i] = folder[0].split("\\")[-1]
        
        #storing train and test files
        train_files = glob.glob(os.path.join(folder[0], "train/*"))
        test_files = glob.glob(os.path.join(folder[0], "test/*"))
        
        #converting train and test files in cloud points
        for j in train_files:
            train_points.append(trimesh.load(j).sample(cloud_points))
            train_labels.append(i)

        for j in test_files:
            test_points.append(trimesh.load(j).sample(cloud_points))
            test_labels.append(i)
    
    return (np.array(train_points),
            np.array(test_points),
            np.array(train_labels),
            np.array(test_labels),
            classes_map)

In [6]:
# Dataset creation

data_start_time = time.time()
train_points, test_points, train_labels, test_labels, classes_map = dataset(cloud_points)
data_end_time = time.time()
torch.save(train_points)
torch.save(train_labels)
torch.save(test_points)
torch.save(test_labels)
print("Time spent creating the dataset object: ",np.round((data_end_time - data_start_time)/60,2), " minutes.")
# 42.04 minutes spent creating ModelNet40 dataset with 4096 points
#print("Notice that changing the size of the point clouds do not increase significantly the time spent creating the dataset.")

TypeError: save() missing 1 required positional argument: 'f'

In [17]:
# Data augmentation is fundamental when working with point cloud data

def augment(points, label):
    # jitter points
    points += np.random.uniform(points.shape, -0.005, 0.005, dtype=tf.float64)
    # shuffle points
    points = np.random.shuffle(points)
    return points, label

train_dataset = torch.utils.data.TensorDataset(torch.Tensor(train_points), torch.Tensor(train_labels))
#train_ds = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
#train_dataset = tf.data.Dataset.from_tensor_slices((train_points, train_labels))
test_dataset = torch.utils.data.TensorDataset(torch.Tensor(test_points), torch.Tensor(test_labels))
#test_dataset = tf.data.Dataset.from_tensor_slices((test_points, test_labels))
#test_ds = DataLoader(test_dataset, batch_size = batch_size*2, shuffle = True)
#train_dataset = train_dataset.shuffle(len(train_points)).map(augment).batch(batch_size)
#test_dataset = test_dataset.shuffle(len(test_points)).batch(batch_size)

In [9]:
class Tnet(nn.Module):
    def __init__(self, k=3):
        super().__init__()
        self.k=k
        self.conv1 = nn.Conv1d(k,64,1)
        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)
        self.fc1 = nn.Linear(1024,512)
        self.fc2 = nn.Linear(512,256)
        self.fc3 = nn.Linear(256,k*k)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(256)


    def forward(self, input):
        # input.shape == (bs,n,3)
        bs = input.size(0)
        xb = F.relu(self.bn1(self.conv1(input)))
        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = F.relu(self.bn3(self.conv3(xb)))
        pool = nn.MaxPool1d(xb.size(-1))(xb)
        flat = nn.Flatten(1)(pool)
        xb = F.relu(self.bn4(self.fc1(flat)))
        xb = F.relu(self.bn5(self.fc2(xb)))

        #initialize as identity
        init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
        if xb.is_cuda:
            init=init.cuda()
        matrix = self.fc3(xb).view(-1,self.k,self.k) + init
        return matrix

In [12]:
class Transform(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)
        self.conv1 = nn.Conv1d(3,64,1)

        self.conv2 = nn.Conv1d(64,128,1)
        self.conv3 = nn.Conv1d(128,1024,1)


        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

    def forward(self, input):
        matrix3x3 = self.input_transform(input)
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)

        xb = F.relu(self.bn1(self.conv1(xb)))

        matrix64x64 = self.feature_transform(xb)
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)

        xb = F.relu(self.bn2(self.conv2(xb)))
        xb = self.bn3(self.conv3(xb))
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        output = nn.Flatten(1)(xb)
        return output, matrix3x3, matrix64x64

class PointNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.transform = Transform()
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)
        

        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=dropout)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        xb, matrix3x3, matrix64x64 = self.transform(input)
        xb = F.relu(self.bn1(self.fc1(xb)))
        xb = F.relu(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output), matrix3x3, matrix64x64

In [54]:
def pointnetloss(outputs, labels, m3x3, m64x64, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id64x64 = torch.eye(64, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id64x64=id64x64.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff64x64 = id64x64-torch.bmm(m64x64,m64x64.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff64x64)) / float(bs)

In [55]:
pointnet = PointNet()
pointnet.to(device)

PointNet(
  (transform): Transform(
    (input_transform): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (feature_transform): Tnet(
      (conv1): Conv1d(64, 64, kernel_size=(1,

In [56]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=learning_rate)

In [57]:
def train(model, train_loader, val_loader=None,  epochs=4):
    for epoch in range(epochs): 
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = next(iter(train_loader)) 
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 5 == 4:    # print every 10 mini-batches
                print('[Epoch: %d, Batch: %4d / %4d], loss: %.3f' %
                    (epoch + 1, i + 1, len(train_loader), running_loss / 10))
                running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['pointcloud'].to(device).float(), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc = 100. * correct / total
            print('Valid accuracy: %d %%' % val_acc)


In [58]:
print(train_ds)
train(model = pointnet, train_loader= train_ds, val_loader= test_ds)

<torch.utils.data.dataloader.DataLoader object at 0x00000298AE68C7C0>


RuntimeError: expected scalar type Long but found Float