In [9]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Feb 18 17:25:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   61C    P0    29W /  N/A |    865MiB /  5934MiB |     28%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

导入相关库

In [10]:
import  torch.utils.data as data
import os
import torch
import numpy as np
import glob
import pandas as pd
import torch.nn.functional as F
from torch import nn
import torch.optim as optim
from tqdm import  tqdm
from torchsummary import summary
from torch.autograd import Variable

制作数据载入器

In [16]:
root = "/workspace/Data/modelnet40_normal_resampled"
npoints = 9999
split = "train"
data_augmentation = True

In [17]:
cls_names = glob.glob(os.path.join(root,"*/*.txt"))
cls_names = [p.split("/")[4] for p in cls_names]
cls_names = np.unique(cls_names)

In [18]:
clsname_to_index = dict((name,index) for index,name in enumerate(cls_names))

In [21]:
class ModelNetDataset(data.Dataset):
  def __init__(self,
               root,
               classmap,
               npoints = 9999,
               split = "train",
               data_augmentation = True
               ):
    self.classmap = classmap
    self.npoints = npoints
    self.root = root
    self.split = split
    self.data_augmentation = data_augmentation
    self.point_paths = []
    self.labels = []

    list_file = None
    if split == "train":
      list_file = os.path.join(root,"modelnet40_train.txt")
    else:
      list_file = os.path.join(root,"modelnet40_test.txt")

    # 提取对应的文件列表
    files = []
    if list_file != None:
      with open(list_file, 'r',encoding="utf-8") as f:
        for line in f.readlines():
          file_name = line.replace("\n","")
          files.append(file_name) 
    else:
      print("ERROR:list file not exist!!!")

    #print(files)

    paths = glob.glob(os.path.join(root,"*/*.txt"))
    self.point_paths = [p for p in paths if p.split("/")[5].replace(".txt","") in files]
    self.labels = [self.classmap.get(p.split("/")[4]) for p in self.point_paths]

  def __getitem__(self, index):
    point_path = self.point_paths[index]
    label = self.labels[index]
    points = pd.read_csv(point_path).iloc[:,0:3]
    points = np.asarray(points)

    # 归一化
    points = points - np.expand_dims(np.mean(points,axis=0),0)
    dist = np.max(np.sqrt(np.sum(points **2,axis=1)),0)
    points = points/dist

    # 随机旋转角度和添加噪声
    if (self.split == "train" and self.data_augmentation):
      theta = np.random.uniform(0, np.pi * 2)
      rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
      points[:, [0, 2]] = points[:, [0, 2]].dot(rotation_matrix)  # random rotation
      points += np.random.normal(0, 0.02, size=points.shape)  # random jitter

    points = torch.from_numpy(points.astype(np.float32))
    label = torch.from_numpy(np.array([label]).astype(np.int64))

    return points,label

  def __len__(self):
    return len(self.point_paths)



In [22]:
traindatasets = ModelNetDataset(root,clsname_to_index,data_augmentation=False)

In [23]:
train_dataloader = torch.utils.data.DataLoader(traindatasets,
                                               batch_size = 8,
                                               shuffle = True)

In [24]:
testdatasets = ModelNetDataset(root,clsname_to_index,split = "test",data_augmentation=False)

In [25]:
test_dataloader = torch.utils.data.DataLoader(testdatasets,
                                               batch_size = 8,
                                               shuffle = True)

定义网络

In [26]:
class STN3d(nn.Module): # 未保证旋转矩阵的特性
  def __init__(self):
    super(STN3d, self).__init__()
    self.conv1 = torch.nn.Conv1d(3, 64, 1)
    self.conv2 = torch.nn.Conv1d(64, 128, 1)
    self.conv3 = torch.nn.Conv1d(128, 1024, 1)
    self.fc1 = nn.Linear(1024, 512)
    self.fc2 = nn.Linear(512, 256)
    self.fc3 = nn.Linear(256, 9)
    self.relu = nn.ReLU()

    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(128)
    self.bn3 = nn.BatchNorm1d(1024)
    self.bn4 = nn.BatchNorm1d(512)
    self.bn5 = nn.BatchNorm1d(256)


  def forward(self, x):
    batchsize = x.size()[0]
    x = F.relu(self.bn1(self.conv1(x)))
    x = F.relu(self.bn2(self.conv2(x)))
    x = F.relu(self.bn3(self.conv3(x)))
    x = torch.max(x, 2, keepdim=True)[0]
    x = x.view(-1, 1024)

    x = F.relu(self.bn4(self.fc1(x)))
    x = F.relu(self.bn5(self.fc2(x)))
    x = self.fc3(x)

    iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)
    if x.is_cuda:
      iden = iden.cuda()
    x = x + iden
    x = x.view(-1, 3, 3)
    return x

In [27]:
class PointNetfeat(nn.Module):
  def __init__(self):
    super(PointNetfeat, self).__init__()
    self.stn = STN3d()
    self.conv1 = torch.nn.Conv1d(3, 64, 1)
    self.conv2 = torch.nn.Conv1d(64, 128, 1)
    self.conv3 = torch.nn.Conv1d(128, 1024, 1)
    self.bn1 = nn.BatchNorm1d(64)
    self.bn2 = nn.BatchNorm1d(128)
    self.bn3 = nn.BatchNorm1d(1024)

  def forward(self, x):
    trans = self.stn(x)
    x = x.transpose(2, 1)
    x = torch.bmm(x, trans)
    x = x.transpose(2, 1)
    x = F.relu(self.bn1(self.conv1(x)))

    x = F.relu(self.bn2(self.conv2(x)))
    x = self.bn3(self.conv3(x))
    x = torch.max(x, 2, keepdim=True)[0]
    x = x.view(-1, 1024)
      
    return x

In [28]:
class PointNetCls(nn.Module):
  def __init__(self, k=40):
    super(PointNetCls, self).__init__()
    self.feat = PointNetfeat()
    self.fc1 = nn.Linear(1024, 512)
    self.fc2 = nn.Linear(512, 256)
    self.fc3 = nn.Linear(256, k)
    self.dropout = nn.Dropout(p=0.3)
    self.bn1 = nn.BatchNorm1d(512,momentum=1.0)
    self.bn2 = nn.BatchNorm1d(256,momentum=1.0)
    self.relu = nn.ReLU()

  def forward(self, x):
    x = self.feat(x)
    x = F.relu(self.bn1(self.fc1(x)))
    x = F.relu(self.bn2(self.dropout(self.fc2(x))))
    x = self.fc3(x)
    return F.log_softmax(x, dim=1)

In [29]:
classifier = PointNetCls(k=40)

In [30]:
classifier.cuda()

PointNetCls(
  (feat): PointNetfeat(
    (stn): STN3d(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (relu): ReLU()
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
    (co

In [31]:
summary(classifier,(3,9999))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1             [-1, 64, 9999]             256
       BatchNorm1d-2             [-1, 64, 9999]             128
            Conv1d-3            [-1, 128, 9999]           8,320
       BatchNorm1d-4            [-1, 128, 9999]             256
            Conv1d-5           [-1, 1024, 9999]         132,096
       BatchNorm1d-6           [-1, 1024, 9999]           2,048
            Linear-7                  [-1, 512]         524,800
       BatchNorm1d-8                  [-1, 512]           1,024
            Linear-9                  [-1, 256]         131,328
      BatchNorm1d-10                  [-1, 256]             512
           Linear-11                    [-1, 9]           2,313
            STN3d-12                 [-1, 3, 3]               0
           Conv1d-13             [-1, 64, 9999]             256
      BatchNorm1d-14             [-1, 6

In [32]:
optimizer = optim.Adam(classifier.parameters(), lr=0.001, betas=(0.9, 0.999))
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [33]:
num_batch = len(traindatasets) / 8

In [34]:
blue = lambda x: '\033[94m' + x + '\033[0m'

In [None]:
for epoch in range(15):
  scheduler.step()
  for i, data in enumerate(train_dataloader, 0):
    points, target = data
    target = target[:, 0]
    points = points.transpose(2, 1)
    points, target = points.cuda(), target.cuda()
    classifier = classifier.train()
    optimizer.zero_grad()
    pred = classifier(points)
    loss = F.nll_loss(pred, target)
    loss.backward()
    optimizer.step()
    pred_choice = pred.data.max(1)[1]
    correct = pred_choice.eq(target.data).cpu().sum()
    print('[%d: %d/%d] train loss: %f accuracy: %f' % (epoch, i, num_batch, loss.item(), correct.item() / float(8)))

    if i % 10 == 0:
      j, data = next(enumerate(train_dataloader, 0))
      points, target = data
      target = target[:, 0]
      points = points.transpose(2, 1)
      points, target = points.cuda(), target.cuda()
      classifier = classifier.eval()
      pred = classifier(points)
      loss = F.nll_loss(pred, target)
      pred_choice = pred.data.max(1)[1]
      correct = pred_choice.eq(target.data).cpu().sum()
      print('[%d: %d/%d] %s loss: %f accuracy: %f' % (epoch, i, num_batch, blue('test'), loss.item(), correct.item()/float(8)))


In [39]:
total_correct = 0
total_testset = 0
for i,data in tqdm(enumerate(test_dataloader, 0)):
  points, target = data
  target = target[:, 0]
  points = points.transpose(2, 1)
  points, target = points.cuda(), target.cuda()
  classifier = classifier.eval()
  pred = classifier(points)
  pred_choice = pred.data.max(1)[1]
  correct = pred_choice.eq(target.data).cpu().sum()
  total_correct += correct.item()
  total_testset += points.size()[0]


309it [00:27, 11.27it/s]


In [40]:
print("final accuracy {}".format(total_correct / float(total_testset)))

final accuracy 0.836709886547812
