<a href="https://colab.research.google.com/github/TechnoPolizzz/safety_doors/blob/main/PointNet_Cloud_Segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Загрузка обучающей выборки и обученной модели

In [90]:
from google_drive_downloader import GoogleDriveDownloader as gdd
gdd.download_file_from_google_drive(file_id="1smKIBz165fm1AY98Hbj3_zLZNbWMncjt", dest_path="/content/planes_pointcloud.zip", showsize="True", )
gdd.download_file_from_google_drive(file_id="1-ATgOBGLa2Veky77eRpuD-1ZvcSzC0rH", dest_path="/content/model/0_75.81492537313433", showsize="True", )

Downloading 1smKIBz165fm1AY98Hbj3_zLZNbWMncjt into /content/planes_pointcloud.zip... 
95.0 MiB Done.
Downloading 1-ATgOBGLa2Veky77eRpuD-1ZvcSzC0rH into /content/model/0_75.81492537313433... 
30.0 MiB Done.


In [None]:
!unzip planes_pointcloud.zip

In [None]:
root_dir = "02691156/"

# Подключение необходимых пакетов

In [None]:
!pip install path.py;
!pip install open3d
from path import Path
import sys
import plotly.graph_objects as go
import numpy as np
import scipy.spatial.distance
import math
import random
sys.path.append(root_dir)

Collecting path.py
  Downloading https://files.pythonhosted.org/packages/8f/04/130b7a538c25693c85c4dee7e25d126ebf5511b1eb7320e64906687b159e/path.py-12.5.0-py3-none-any.whl
Collecting path
  Downloading https://files.pythonhosted.org/packages/d3/2a/b0f97e1b736725f6ec48a8bd564ee1d1f3f945bb5d39cb44ef8bbe66bd14/path-15.1.2-py3-none-any.whl
Installing collected packages: path, path.py
Successfully installed path-15.1.2 path.py-12.5.0


# Считывание и подготовка данных

In [None]:
# Считываем облако точек и нормализуем

def read_pts(file):
    verts = np.genfromtxt(file)
    norm_pointcloud = verts - np.mean(verts, axis=0)
    norm_pointcloud /= np.max(np.linalg.norm(verts, axis=1))
    return  norm_pointcloud

# Считываем разметку для нашего облака

def read_seg(file):
    verts = np.genfromtxt(file, dtype= (int))
    return verts

# Преобразуем данные для отправки в нейронную сеть

def sample_2000(pts, pts_cat):    
    res1 = np.concatenate((pts,np.reshape(pts_cat, (pts_cat.shape[0], 1))), axis=1)
    print(res1)
    print(res1.shape)
    res = np.asarray(random.choices(res1, weights=None, cum_weights=None, k=2000))
    images = res[:, 0:3]
    categories = res[:, 3]
    categories-=np.ones(categories.shape)
    return images, categories

# Визуализируем облако точек из обучающей выборки

In [None]:
def showPointCloud(pcd):
  points = pcd
  fig = go.Figure(
    data=[
        go.Scatter3d(
            x=points[:,0], y=points[:,1], z=points[:,2], 
            mode='markers',
            marker=dict(size=1)
        )
    ],
    layout=dict(width=600, height=600,
        scene=dict(
            xaxis=dict(visible=False),
            yaxis=dict(visible=False),
            zaxis=dict(visible=False)
        )
    )
  )
  fig.show()

In [91]:
import open3d as o3d
import numpy as np
import plotly.graph_objects as go
img_path = '02691156/points/66a32714d2344d1bf52a658ce0ec2c1.pts'
seg_path = '02691156/expert_verified/points_label/66a32714d2344d1bf52a658ce0ec2c1.seg'

with open(img_path, 'r') as f:
  image1 = read_pts(f)
with open(seg_path, 'r') as f:  
  category1 = read_seg(f)

image2, category2 = sample_2000(image1, category1)

showPointCloud(image2)

[[-7.75697261e-01  1.47599009e-02 -2.70688497e-04  1.00000000e+00]
 [-8.33121497e-01  3.23210080e-01 -9.72420209e-03  3.00000000e+00]
 [-7.81791262e-01  5.59074256e-02 -5.92314454e-02  3.00000000e+00]
 ...
 [ 9.56666815e-02 -5.73263956e-02 -5.99149472e-01  4.00000000e+00]
 [ 2.05176391e-01 -9.98069945e-03  3.47738408e-01  2.00000000e+00]
 [ 1.07437999e-01 -1.20901612e-02  2.93517429e-01  2.00000000e+00]]
(2538, 4)


# Готовим модель нейронной сети

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

class Tnet(nn.Module):
   def __init__(self, k=3):
      super().__init__()
      self.k=k
      self.conv1 = nn.Conv1d(k,64,1)
      self.conv2 = nn.Conv1d(64,128,1)
      self.conv3 = nn.Conv1d(128,1024,1)
      self.fc1 = nn.Linear(1024,512)
      self.fc2 = nn.Linear(512,256)
      self.fc3 = nn.Linear(256,k*k)

      self.bn1 = nn.BatchNorm1d(64)
      self.bn2 = nn.BatchNorm1d(128)
      self.bn3 = nn.BatchNorm1d(1024)
      self.bn4 = nn.BatchNorm1d(512)
      self.bn5 = nn.BatchNorm1d(256)
       

   def forward(self, input):
      # input.shape == (bs,n,3)
      bs = input.size(0)
      xb = F.relu(self.bn1(self.conv1(input)))
      xb = F.relu(self.bn2(self.conv2(xb)))
      xb = F.relu(self.bn3(self.conv3(xb)))
      pool = nn.MaxPool1d(xb.size(-1))(xb)
      flat = nn.Flatten(1)(pool)
      xb = F.relu(self.bn4(self.fc1(flat)))
      xb = F.relu(self.bn5(self.fc2(xb)))
      
      #initialize as identity
      init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
      if xb.is_cuda:
        init=init.cuda()
      matrix = self.fc3(xb).view(-1,self.k,self.k) + init
      return matrix


class Transform(nn.Module):
   def __init__(self):
        super().__init__()
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=128)
        self.fc1 = nn.Conv1d(3,64,1)
        self.fc2 = nn.Conv1d(64,128,1) 
        self.fc3 = nn.Conv1d(128,128,1)
        self.fc4 = nn.Conv1d(128,512,1)
        self.fc5 = nn.Conv1d(512,2048,1)

        
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(128)
        self.bn4 = nn.BatchNorm1d(512)
        self.bn5 = nn.BatchNorm1d(2048)

   def forward(self, input):
        n_pts = input.size()[2]
        matrix3x3 = self.input_transform(input)
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)
        outs = []
        
        out1 = F.relu(self.bn1(self.fc1(xb)))
        outs.append(out1)
        out2 = F.relu(self.bn2(self.fc2(out1)))
        outs.append(out2)
        out3 = F.relu(self.bn3(self.fc3(out2)))
        outs.append(out3)
        matrix128x128 = self.feature_transform(out3)
        
        out4 = torch.bmm(torch.transpose(out3,1,2), matrix128x128).transpose(1,2) 
        outs.append(out4)
        out5 = F.relu(self.bn4(self.fc4(out4)))
        outs.append(out5)
       
        xb = self.bn5(self.fc5(out5))
        
        xb = nn.MaxPool1d(xb.size(-1))(xb)
        out6 = nn.Flatten(1)(xb).repeat(n_pts,1,1).transpose(0,2).transpose(0,1)#.repeat(1, 1, n_pts)
        outs.append(out6)
        
        
        return outs, matrix3x3, matrix128x128


class PointNetSeg(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.transform = Transform()

        self.fc1 = nn.Conv1d(3008,256,1) 
        self.fc2 = nn.Conv1d(256,256,1) 
        self.fc3 = nn.Conv1d(256,128,1) 
        self.fc4 = nn.Conv1d(128,4,1) 
        

        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(256)
        
        self.bn3 = nn.BatchNorm1d(128)
        self.bn4 = nn.BatchNorm1d(4)
        
        self.logsoftmax = nn.LogSoftmax(dim=1)
        

    def forward(self, input):
        inputs, matrix3x3, matrix128x128 = self.transform(input)
        stack = torch.cat(inputs,1)
        
        xb = F.relu(self.bn1(self.fc1(stack)))
       
        xb = F.relu(self.bn2(self.fc2(xb)))
    
        xb = F.relu(self.bn3(self.fc3(xb)))
        
        output = F.relu(self.bn4(self.fc4(xb)))
        
        return self.logsoftmax(output), matrix3x3, matrix128x128



## Подготовка датасета для обучения

In [None]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.dataset import random_split

class Data(Dataset):

    def __init__(self, root_dir, valid=False, transform=None):
        
        self.root_dir = root_dir
        self.files = []
        self.valid=valid

        newdir = root_dir + 'expert_verified/points_label/'

        for file in os.listdir(newdir):
            o = {}
            o['category'] = newdir + file
            o['img_path'] = root_dir + 'points/'+ file.replace('.seg', '.pts')
            self.files.append(o)
       

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = self.files[idx]['img_path']
        category = self.files[idx]['category']
        with open(img_path, 'r') as f:
            image1 = read_pts(f)
        with open(category, 'r') as f:  
            category1 = read_seg(f)
        image2, category2 = sample_2000(image1, category1)
        if not self.valid:
            theta = random.random()*360
            # theta = random.random() * 2. * math.pi
            rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                                  [ math.sin(theta),  math.cos(theta),    0],
                                   [0,                           0,      1]])
            image2 = rot_matrix.dot(image2.T).T
        
        return {'image': np.array(image2, dtype="float32"), 'category': category2.astype(int)}


In [None]:
dset = Data(root_dir , transform=None)
train_num = int(len(dset) * 0.95)
val_num = int(len(dset) *0.05)
if int(len(dset)) - train_num -  val_num >0 :
    train_num = train_num + 1
elif int(len(dset)) - train_num -  val_num < 0:
    train_num = train_num -1
#train_dataset, val_dataset = random_split(dset, [3000, 118])
train_dataset, val_dataset = random_split(dset, [train_num, val_num])
val_dataset.valid=True

print('######### Dataset class created #########')
print('Number of images: ', len(dset))
print('Sample image shape: ', dset[0]['image'].shape)
#print('Sample image points categories', dset[0]['category'], end='\n\n')

train_loader = DataLoader(dataset=train_dataset, batch_size=64)
val_loader = DataLoader(dataset=val_dataset, batch_size=8)

#dataloader = torch.utils.data.DataLoader(dset, batch_size=4, shuffle=True, num_workers=4)

######### Dataset class created #########
Number of images:  2690
[[ 0.08509474 -0.09802865  0.63680208  4.        ]
 [-0.04310084  0.00923188 -0.0895459   1.        ]
 [ 0.5265012   0.04629676  0.09032695  1.        ]
 ...
 [ 0.08689319  0.01035591  0.41725098  2.        ]
 [-0.9619108   0.01074932 -0.42057264  3.        ]
 [ 0.32504698  0.08797016  0.04584347  1.        ]]
(2487, 4)
Sample image shape:  (2000, 3)


## Обучаем нейронную сеть

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
pointnet = PointNetSeg()

In [None]:
pointnet.to(device);

In [None]:
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [None]:
def pointnetloss(outputs, labels, m3x3, m128x128, alpha = 0.0001):
    criterion = torch.nn.NLLLoss()
    bs=outputs.size(0)
    id3x3 = torch.eye(3, requires_grad=True).repeat(bs,1,1)
    id128x128 = torch.eye(128, requires_grad=True).repeat(bs,1,1)
    if outputs.is_cuda:
        id3x3=id3x3.cuda()
        id128x128=id128x128.cuda()
    diff3x3 = id3x3-torch.bmm(m3x3,m3x3.transpose(1,2))
    diff128x128 = id128x128-torch.bmm(m128x128,m128x128.transpose(1,2))
    return criterion(outputs, labels) + alpha * (torch.norm(diff3x3)+torch.norm(diff128x128)) / float(bs)
        

In [None]:
def train(model, train_loader, val_loader=None,  epochs=5, save=True):
    for epoch in range(epochs): 
        pointnet.train()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data['image'].to(device), data['category'].to(device)
            optimizer.zero_grad()
            outputs, m3x3, m64x64 = pointnet(inputs.transpose(1,2))

            loss = pointnetloss(outputs, labels, m3x3, m64x64)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 10 == 9:    # print every 10 mini-batches
                    print('Эпоха: %d, Батч: %5d Потери: %.3f' %
                        (epoch + 1, i + 1, running_loss / 10))
                    running_loss = 0.0

        pointnet.eval()
        correct = total = 0

        # validation
        if val_loader:
            with torch.no_grad():
                for data in val_loader:
                    inputs, labels = data['image'].to(device), data['category'].to(device)
                    outputs, __, __ = pointnet(inputs.transpose(1,2))
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0) * labels.size(1) ##
                    correct += (predicted == labels).sum().item()
            val_acc = 100 * correct / total
            print('Точность на валидационной выборке: %d %%' % val_acc)

        # save the model
        if save:
            torch.save(pointnet.state_dict(), "model/"+str(epoch)+"_"+str(val_acc))


In [None]:
train(pointnet, train_loader, val_loader, epochs=1,  save=True)


[1,    10] loss: 1.237
[1,    20] loss: 0.993
[1,    30] loss: 0.900
[1,    40] loss: 0.845
Valid accuracy: 75 %


## Тестирование нейронной сети
Тестирование на батче валидационной выборки

In [None]:
pointnet = PointNetSeg()
pointnet.load_state_dict(torch.load('model/0_75.81492537313433'))
pointnet.eval()

PointNetSeg(
  (transform): Transform(
    (input_transform): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (feature_transform): Tnet(
      (conv1): Conv1d(128, 64, kernel_size

In [None]:
batch = next(iter(val_loader))
pred = pointnet(batch['image'].transpose(1,2))
pred_np = np.array(torch.argmax(pred[0],1));

[[ 9.63947928e-01 -1.95178757e-03 -4.99362001e-02  1.00000000e+00]
 [ 3.79412559e-01 -8.41268353e-03 -3.22969879e-01  4.00000000e+00]
 [-6.21512686e-01  2.25909685e-01 -1.29009388e-02  3.00000000e+00]
 ...
 [ 1.75799721e-01 -1.01379299e-01  5.50601174e-02  1.00000000e+00]
 [ 8.41461235e-01 -1.00838639e-01  5.54656129e-02  1.00000000e+00]
 [ 3.57515798e-01 -1.34137955e-02  2.33234867e-01  4.00000000e+00]]
(2384, 4)
pred (tensor([[[-3.9971e+01, -5.4357e+01, -5.3183e+01,  ..., -3.5519e+01,
          -4.0326e+01, -3.7167e+01],
         [-3.9417e+01, -5.2709e+01, -5.1742e+01,  ..., -3.4926e+01,
          -3.9504e+01, -3.6511e+01],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -1.5543e-15,
           0.0000e+00, -2.2204e-16],
         [-3.9971e+01, -5.4357e+01, -5.3183e+01,  ..., -3.5519e+01,
          -4.0326e+01, -3.7167e+01]]], grad_fn=<LogSoftmaxBackward>), tensor([[[ 2.4835,  1.6424,  0.1980],
         [ 2.2386, -0.1293, -0.5537],
         [-2.0382,  1.3798,  0.4940]]], grad_fn

In [None]:
acc = (pred_np==np.array(batch['category']))
resulting_acc = np.sum(acc, axis=1) / 2000
print('Точность: %f' % resulting_acc[0])

Точность: 0.863000


Визуализируем результат обработки облака

In [None]:
x,y,z=np.array(batch['image'][0]).T
c = np.array(batch['category'][0]).T

fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z, 
                                   mode='markers',
                                   marker=dict(
        size=1000,
        color=c,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=1.0
    ))])
fig.update_traces(marker=dict(size=5,
                              line=dict(width=5,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()

Проверяем на своем облаке точек

In [None]:
pcl = o3d.io.read_point_cloud('cloud_0_1620665797175109.pcd')
xyz = np.asarray(pcl.points)
print(xyz.shape)
res = np.asarray(random.choices(xyz, weights=None, cum_weights=None, k=2000))
print(res.shape)
showPointCloud(res)

(234474, 3)
(2000, 3)


In [None]:
res = np.asarray(random.choices(xyz, weights=None, cum_weights=None, k=2000))
res = np.expand_dims(res, axis=0)
res = torch.from_numpy(res)
res = res.type(torch.float32)
pred = pointnet(res.transpose(1,2))
pred_np = np.array(torch.argmax(pred[0],1));

In [None]:
x,y,z=np.array(res[0]).T
c = pred_np[0].T

fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z, 
                                   mode='markers',
                                   marker=dict(
        size=1000,
        color=c,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=1.0
    ))])
fig.update_traces(marker=dict(size=5,
                              line=dict(width=5,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.show()