# **点云处理：基于Paddle2.0实现PointNet点云分类Conv1D版**
&emsp;&emsp;&emsp;&emsp;<font size=4>作者：[WhiteFireFox](https://github.com/WhiteFireFox)</font><br><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>日期：2021年2月26日</font><br><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>本示例在于演示如何基于Paddle2.0实现PointNet在ShapeNet数据集上进行点云分类处理。</font><br><br>

# **环境设置**

In [None]:
import os
import numpy as np
import random
import h5py
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

# **数据集**
## **①数据介绍**
&emsp;&emsp;&emsp;&emsp;<font size=4>ShapeNet数据集是一个注释丰富且规模较大的 3D 形状数据集，由斯坦福大学、普林斯顿大学和芝加哥丰田技术学院于 2015 年联合发布。</font><br><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>ShapeNet数据集的储存格式是h5文件，该文件中key值分别为：</font><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>1、data：这一份数据中所有点的xyz坐标，</font><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>2、label：这一份数据所属类别，如airplane等，</font><br>
&emsp;&emsp;&emsp;&emsp;<font size=4>3、pid：这一份数据中所有点所属的类型，如这一份数据属airplane类，则它包含的所有点的类型有机翼、机身等类型。</font>
## **②解压数据集**

In [None]:
!unzip data/data70460/shapenet_part_seg_hdf5_data.zip
!mv hdf5_data dataset

## **③数据列表**
&emsp;&emsp;&emsp;&emsp;<font size=4>ShapeNet数据集所有的数据文件。</font><br><br>

In [None]:
train_list = ['ply_data_train0.h5', 'ply_data_train1.h5', 'ply_data_train2.h5', 'ply_data_train3.h5', 'ply_data_train4.h5', 'ply_data_train5.h5']
test_list = ['ply_data_test0.h5', 'ply_data_test1.h5']
val_list = ['ply_data_val0.h5']

## **④搭建数据生成器**
&emsp;&emsp;&emsp;&emsp;<font size=4>说明：将ShapeNet数据集全部读入后，按照Batchsize生成Mini-batch的数据。</font><br><br>

In [None]:
def pointDataLoader(mode='train'):
    path = './dataset/'
    MAX_POINT = 2048
    if mode == 'train':
        BATCHSIZE = 128
    else:
        BATCHSIZE = 32

    datas = []
    labels = []
    targets = []
    if mode == 'train':
        for file_list in train_list:
            f = h5py.File(os.path.join(path, file_list), 'r')
            datas.extend(f['data'][:, :MAX_POINT, :])
            labels.extend(f['label'])
            targets.extend(f['pid'][:, :MAX_POINT])
            f.close()
    elif mode == 'test':
        for file_list in test_list:
            f = h5py.File(os.path.join(path, file_list), 'r')
            datas.extend(f['data'][:, :MAX_POINT, :])
            labels.extend(f['label'])
            targets.extend(f['pid'][:, :MAX_POINT])
            f.close()
    else:
        for file_list in val_list:
            f = h5py.File(os.path.join(path, file_list), 'r')
            datas.extend(f['data'][:, :MAX_POINT, :])
            labels.extend(f['label'])
            targets.extend(f['pid'][:, :MAX_POINT])
            f.close()

    datas = np.array(datas)
    labels = np.array(labels)
    targets = np.array(targets)
    print('==========load over==========')

    index_list = list(range(len(datas)))

    def pointDataGenerator():
        if mode == 'train':
            random.shuffle(index_list)
        datas_list = []
        labels_list = []
        targets_list = []
        for i in index_list:
            target = np.reshape(targets[i], [MAX_POINT]).astype('int64')
            datas_list.append(datas[i].T.astype('float32')) 
            labels_list.append(labels[i].astype('int64'))
            targets_list.append(target)
            if len(datas_list) == BATCHSIZE:
                yield np.array(datas_list), np.array(labels_list), np.array(targets_list)
                datas_list = []
                labels_list = []
                targets_list = []
        if len(datas_list) > 0:
            yield np.array(datas_list), np.array(labels_list), np.array(targets_list)

    return pointDataGenerator

# **定义网络**
&emsp;&emsp;&emsp;&emsp;<font size=4>PointNet是斯坦福大学研究人员提出的一个点云处理网络，在这篇论文中，它提出了空间变换网络（T-Net）解决点云的旋转问题（注：因为考虑到某一物体的点云旋转后还是该物体，所以需要有一个网络结构去学习并解决这个旋转问题），并且提出了采取MaxPooling的方法极大程度上地提取点云全局特征。</font><br><br>
## **定义网络结构**

In [None]:
class PointNet(nn.Layer):
    def __init__(self, name_scope='PointNet_', num_classes=16, num_point=2048):
        super(PointNet, self).__init__()
        self.input_transform_net = nn.Sequential(
            nn.Conv1D(3, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),
            nn.MaxPool1D(num_point)
        )
        self.input_fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 9,
                weight_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.zeros((256, 9)))),
                bias_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.reshape(paddle.eye(3), [-1])))
            )
        )
        self.mlp_1 = nn.Sequential(
            nn.Conv1D(3, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
        )
        self.feature_transform_net = nn.Sequential(
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),

            nn.MaxPool1D(num_point)
        )
        self.feature_fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 64*64)
        )
        self.mlp_2 = nn.Sequential(
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),
        )
        self.fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=0.7),
            nn.Linear(256, num_classes),
            nn.LogSoftmax(axis=-1)
        )
    def forward(self, inputs):
        batchsize = inputs.shape[0]

        t_net = self.input_transform_net(inputs)
        t_net = paddle.squeeze(t_net, axis=-1)
        t_net = self.input_fc(t_net)
        t_net = paddle.reshape(t_net, [batchsize, 3, 3])

        x = paddle.transpose(inputs, (0, 2, 1))
        x = paddle.matmul(x, t_net)
        x = paddle.transpose(x, (0, 2, 1))
        x = self.mlp_1(x)

        t_net = self.feature_transform_net(x)
        t_net = paddle.squeeze(t_net, axis=-1)
        t_net = self.feature_fc(t_net)
        t_net = paddle.reshape(t_net, [batchsize, 64, 64])

        x = paddle.squeeze(x, axis=-1)
        x = paddle.transpose(x, (0, 2, 1))
        x = paddle.matmul(x, t_net)
        x = paddle.transpose(x, (0, 2, 1))
        x = self.mlp_2(x)
        x = paddle.max(x, axis=-1)
        x = paddle.squeeze(x, axis=-1)
        x = self.fc(x)

        return x

## **网络结构可视化**

In [None]:
pointnet = PointNet()
paddle.summary(pointnet, (64, 3, 2048))

---------------------------------------------------------------------------
 Layer (type)       Input Shape          Output Shape         Param #    
   Conv1D-1       [[64, 3, 2048]]       [64, 64, 2048]          256      
  BatchNorm-1     [[64, 64, 2048]]      [64, 64, 2048]          256      
    ReLU-1        [[64, 64, 2048]]      [64, 64, 2048]           0       
   Conv1D-2       [[64, 64, 2048]]     [64, 128, 2048]         8,320     
  BatchNorm-2    [[64, 128, 2048]]     [64, 128, 2048]          512      
    ReLU-2       [[64, 128, 2048]]     [64, 128, 2048]           0       
   Conv1D-3      [[64, 128, 2048]]     [64, 1024, 2048]       132,096    
  BatchNorm-3    [[64, 1024, 2048]]    [64, 1024, 2048]        4,096     
    ReLU-3       [[64, 1024, 2048]]    [64, 1024, 2048]          0       
  MaxPool1D-1    [[64, 1024, 2048]]     [64, 1024, 1]            0       
   Linear-1         [[64, 1024]]          [64, 512]           524,800    
    ReLU-4          [[64, 512]]     

{'total_params': 3475283, 'trainable_params': 3460179}

# **训练**

In [7]:
def train():
    train_loader = pointDataLoader(mode='train')
    val_loader = pointDataLoader(mode='val')

    model = PointNet(num_classes=16, num_point=2048)
    model.train()
    optim = paddle.optimizer.Adam(parameters=model.parameters(), weight_decay=0.001)

    epoch_num = 10
    for epoch in range(epoch_num):
        # train
        print("===================================train===========================================")
        for batch_id, data in enumerate(train_loader()):
            inputs = paddle.to_tensor(data[0])
            labels = paddle.to_tensor(data[1])

            predicts = model(inputs)
            loss = F.nll_loss(predicts, labels)
            acc = paddle.metric.accuracy(predicts, labels)        

            if batch_id % 20 == 0: 
                print("train: epoch: {}, batch_id: {}, loss is: {}, accuracy is: {}".format(epoch, batch_id, loss.numpy(), acc.numpy()))
            
            loss.backward()
            optim.step()
            optim.clear_grad()

        if epoch % 2 == 0:
            paddle.save(model.state_dict(), './model/PointNet.pdparams')
            paddle.save(optim.state_dict(), './model/PointNet.pdopt')
        
        # validation
        print("===================================val===========================================")
        model.eval()
        accuracies = []
        losses = []
        for batch_id, data in enumerate(val_loader()):
            inputs = paddle.to_tensor(data[0])
            labels = paddle.to_tensor(data[1])

            predicts = model(inputs)

            loss = F.nll_loss(predicts, labels)
            acc = paddle.metric.accuracy(predicts, labels)    
            
            losses.append(loss.numpy())
            accuracies.append(acc.numpy())

        avg_acc, avg_loss = np.mean(accuracies), np.mean(losses)
        print("validation: loss is: {}, accuracy is: {}".format(avg_loss, avg_acc))
        model.train()

if __name__ == '__main__':
    train()

train: epoch: 8, batch_id: 80, loss is: [0.16953929], accuracy is: [0.9765625]
validation: loss is: 0.1051618754863739, accuracy is: 0.9724576473236084
train: epoch: 9, batch_id: 0, loss is: [0.15017189], accuracy is: [0.9453125]
train: epoch: 9, batch_id: 20, loss is: [0.10286096], accuracy is: [0.9765625]
train: epoch: 9, batch_id: 40, loss is: [0.09501736], accuracy is: [0.9609375]
train: epoch: 9, batch_id: 60, loss is: [0.06682345], accuracy is: [0.9765625]
train: epoch: 9, batch_id: 80, loss is: [0.1222449], accuracy is: [0.96875]
validation: loss is: 0.10170484334230423, accuracy is: 0.9703390002250671


# **评估与测试**

In [10]:
def evaluation():
    test_loader = pointDataLoader(mode='test')
    model = PointNet()
    model_state_dict = paddle.load('./model/PointNet.pdparams')
    model.load_dict(model_state_dict)

    model.eval()
    accuracies = []
    losses = []
    for batch_id, data in enumerate(test_loader()):
        inputs = paddle.to_tensor(data[0])
        labels = paddle.to_tensor(data[1])

        predicts = model(inputs)

        loss = F.nll_loss(predicts, labels)
        acc = paddle.metric.accuracy(predicts, labels)    
        
        losses.append(loss.numpy())
        accuracies.append(acc.numpy())

    avg_acc, avg_loss = np.mean(accuracies), np.mean(losses)
    print("validation: loss is: {}, accuracy is: {}".format(avg_loss, avg_acc))

if __name__ == '__main__':
    evaluation()

validation: loss is: 0.14826710522174835, accuracy is: 0.9617254734039307
