<a href="https://colab.research.google.com/github/Tinameow/175PointCloud/blob/master/2D_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import math
import random
import os
import torch
import scipy.spatial.distance
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import plotly.graph_objects as go
import plotly.express as px

##
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.cm as cm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T


## Get dataset

In [2]:
pip install path 

Collecting path
  Downloading https://files.pythonhosted.org/packages/ce/76/08fdf5988b815f40a4a26a9b63052ebf7c35d677591d93d0a61bfc63379c/path-14.0.1-py3-none-any.whl
Installing collected packages: path
Successfully installed path-14.0.1


In [5]:
!wget http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip

--2020-05-28 05:39:21--  http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip
Resolving 3dvision.princeton.edu (3dvision.princeton.edu)... 128.112.136.61
Connecting to 3dvision.princeton.edu (3dvision.princeton.edu)|128.112.136.61|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 473402300 (451M) [application/zip]
Saving to: ‘ModelNet10.zip’


2020-05-28 05:39:29 (59.8 MB/s) - ‘ModelNet10.zip’ saved [473402300/473402300]



In [0]:
!unzip -q ModelNet10.zip

In [8]:
from path import Path

path = Path("ModelNet10")
folders = [dir for dir in sorted(os.listdir(path)) if os.path.isdir(path/dir)]
classes = {folder: i for i, folder in enumerate(folders)};
classes

{'bathtub': 0,
 'bed': 1,
 'chair': 2,
 'desk': 3,
 'dresser': 4,
 'monitor': 5,
 'night_stand': 6,
 'sofa': 7,
 'table': 8,
 'toilet': 9}

{'bathtub': 0,
 'bed': 1,
 'chair': 2,
 'desk': 3,
 'dresser': 4,
 'monitor': 5,
 'night_stand': 6,
 'sofa': 7,
 'table': 8,
 'toilet': 9}

In [0]:
def read_off(file):
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')
    n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
    verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
    faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]
    return verts, faces

## sample 3d points

In [0]:
class PointSampler(object):
    def __init__(self, output_size):
        assert isinstance(output_size, int)
        self.output_size = output_size
    
    def triangle_area(self, pt1, pt2, pt3):
        side_a = np.linalg.norm(pt1 - pt2)
        side_b = np.linalg.norm(pt2 - pt3)
        side_c = np.linalg.norm(pt3 - pt1)
        s = 0.5 * ( side_a + side_b + side_c)
        return max(s * (s - side_a) * (s - side_b) * (s - side_c), 0)**0.5

    def sample_point(self, pt1, pt2, pt3):
        # barycentric coordinates on a triangle
        # https://mathworld.wolfram.com/BarycentricCoordinates.html
        s, t = sorted([random.random(), random.random()])
        f = lambda i: s * pt1[i] + (t-s)*pt2[i] + (1-t)*pt3[i]
        return (f(0), f(1), f(2))
        
    
    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        areas = np.zeros((len(faces)))

        for i in range(len(areas)):
            areas[i] = (self.triangle_area(verts[faces[i][0]],
                                           verts[faces[i][1]],
                                           verts[faces[i][2]]))
            
        sampled_faces = (random.choices(faces, 
                                      weights=areas,
                                      cum_weights=None,
                                      k=self.output_size))
        
        sampled_points = np.zeros((self.output_size, 3))

        for i in range(len(sampled_faces)):
            sampled_points[i] = (self.sample_point(verts[sampled_faces[i][0]],
                                                   verts[sampled_faces[i][1]],
                                                   verts[sampled_faces[i][2]]))
        
        return sampled_points

In [0]:
class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0) 
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud

## take 2d photo

In [0]:
def makerotation(rx,ry,rz):
    """
    Generate a rotation matrix    

    Parameters
    ----------
    rx,ry,rz : floats
        Amount to rotate around x, y and z axes in degrees

    Returns
    -------
    R : 2D numpy.array (dtype=float)
        Rotation matrix of shape (3,3)
    """
    rx = np.pi*rx/180.0
    ry = np.pi*ry/180.0
    rz = np.pi*rz/180.0

    Rx = np.array([[1,0,0],[0,np.cos(rx),-np.sin(rx)],[0,np.sin(rx),np.cos(rx)]])
    Ry = np.array([[np.cos(ry),0,-np.sin(ry)],[0,1,0],[np.sin(ry),0,np.cos(ry)]])
    Rz = np.array([[np.cos(rz),-np.sin(rz),0],[np.sin(rz),np.cos(rz),0],[0,0,1]])
    R = (Rz @ Ry @ Rx)
    
    return R 

class Camera:
    """
    A simple data structure describing camera parameters 
    
    The parameters describing the camera
    cam.f : float   --- camera focal length (in units of pixels)
    cam.c : 2x1 vector  --- offset of principle point
    cam.R : 3x3 matrix --- camera rotation
    cam.t : 3x1 vector --- camera translation 

    
    """    
    def __init__(self,f,c,R,t):
        self.f = f
        self.c = c
        self.R = R
        self.t = t

    def __str__(self):
        return f'Camera : \n f={self.f} \n c={self.c.T} \n R={self.R} \n t = {self.t.T}'
    
    def project(self,pts3):
        """
        Project the given 3D points in world coordinates into the specified camera    

        Parameters
        ----------
        pts3 : 2D numpy.array (dtype=float)
            Coordinates of N points stored in a array of shape (3,N)

        Returns
        -------
        pts2 : 2D numpy.array (dtype=float)
            Image coordinates of N points stored in an array of shape (2,N)

        """
        assert(pts3.shape[0]==3)

        # get point location relative to camera
        pcam = self.R.transpose() @ (pts3 - self.t)
         
        # project
        p = self.f * (pcam / pcam[2,:])
        
        # offset principal point
        pts2 = p[0:2,:] + self.c
        
        assert(pts2.shape[1]==pts3.shape[1])
        assert(pts2.shape[0]==2)
    
        return pts2
 
    def update_extrinsics(self,params):
        """
        Given a vector of extrinsic parameters, update the camera
        to use the provided parameters.
  
        Parameters
        ----------
        params : 1D numpy.array (dtype=float)
            Camera parameters we are optimizing over stored in a vector
            params[0:2] are the rotation angles, params[2:5] are the translation

        """
        self.R = makerotation(params[0],params[1],params[2])
        self.t = np.array([[params[3]],[params[4]],[params[5]]])

In [0]:
cam1 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[0,2,0]]).T, R=makerotation(90,0,0))
cam2 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[2,0,0]]).T, R=makerotation(0,90,0))
cam3 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[0,0,2]]).T, R=makerotation(180,0,0))

## if need to augment data... adding noise

In [0]:
class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        theta = random.random() * 2. * math.pi
        rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                               [ math.sin(theta),  math.cos(theta),    0],
                               [0,                             0,      1]])
        
        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        return  rot_pointcloud
    
class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        noise = np.random.normal(0, 0.02, (pointcloud.shape))
    
        noisy_pointcloud = pointcloud + noise
        return  noisy_pointcloud

## Creating data point

In [0]:
class create_data_point(object):
    def __init__(self, cams):
        self.cams = cams
    
    def __call__(self, pointcloud):
        n = len(self.cams)
        pts2 = np.zeros((n,32,32))
        for i in range(n):
            ind = self.cams[i].project(pointcloud.T).astype(int).T
            for k,j in ind:
                pts2[i,j,k] += 1

        pts2 /= np.max(pts2)
        return pts2

In [0]:
class create_3_view(object):
    def __call__(self, pointcloud):
        pts2 = np.zeros((3,32,32))
        pts2[0,:,:] = pointcloud[:,:2]
        pts2[1,:,:] = pointcloud[:,1:]
        pts2[2,:,:] = np.hstack((pointcloud[:,0], pointcloud[:,1]))

        pts2 /= np.max(pts2)
        return pts2

## Creating data set

In [0]:
class ToTensor(object):
    def __call__(self, pointcloud):
        return torch.from_numpy(pointcloud)

In [0]:
cam1 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[0,2,0]]).T, R=makerotation(90,0,0))
cam2 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[2,0,0]]).T, R=makerotation(0,90,0))
cam3 = Camera(f=25,c=np.array([[16,16]]).T,t=np.array([[0,0,2]]).T, R=makerotation(180,0,0))
cams = [cam1,cam2,cam3]


train_transforms = transforms.Compose([
                    PointSampler(1024),
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    create_data_point(cams),
                    ToTensor()
                    ])

transforms_3views = transforms.Compose([
                    PointSampler(1024),
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    create_3_view(),
                    ToTensor()
                    ])

def default_transforms():
    return transforms.Compose([
                    PointSampler(1024),
                    Normalize(),
                    create_data_point(cams),
                    ToTensor()
                    ])

In [0]:
class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder: i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []
        for category in self.classes.keys():
            new_dir = root_dir/Path(category)/folder
            for file in os.listdir(new_dir):
                if file.endswith('.off'):
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file):
        verts, faces = read_off(file)
        if self.transforms:
            pointcloud = self.transforms((verts, faces))
        return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)
        return {'pointcloud': pointcloud, 
                'category': self.classes[category]}

In [0]:
def train(model, loss_fn, optimizer, num_epochs = 1, save = False):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, data in enumerate(train_loader):
            x_var, y_var = data['pointcloud'].to(device).float(), data['category'].to(device)

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % 10 == 0:
                print('loss = %.4f' % (loss.data))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        if save:
            torch.save(model.state_dict(), "save_"+str(epoch)+".pth")
    
            
def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for i, data in enumerate(loader):
        x_var, y = data['pointcloud'].to(device).float(), data['category']

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

## Model

In [0]:
class ConvOneView(nn.Module):
    def __init__(self):
        '''extracting features from single view'''
        super().__init__()
        self.conv1 = nn.Conv2d(1,3,3)
        self.conv2 = nn.Conv2d(3,64,3)
        self.conv3 = nn.Conv2d(64,128,3)

        self.fc1 = nn.Linear(86528,1024)
        self.fc2 = nn.Linear(512,256)
        self.fc3 = nn.Linear(256,64)

        self.bn1 = nn.BatchNorm2d(3)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)

        self.bn4 = nn.BatchNorm1d(1024)
        self.bn5 = nn.BatchNorm1d(256)
        self.bn6 = nn.BatchNorm1d(64)

    def forward(self, input):
        # input.shape == (bs,1,32,32)
        
        bs = input.size(0)
        xb = nn.ReLU(inplace=True)(self.bn1(self.conv1(input)))
        xb = nn.ReLU(inplace=True)(self.bn2(self.conv2(xb)))
        xb = nn.ReLU(inplace=True)(self.bn3(self.conv3(xb)))
        # pool = nn.MaxPool2d(2)(xb)
        flat = nn.Flatten()(xb)
        xb = nn.ReLU(inplace=True)(self.bn4(self.fc1(flat)))
        # xb = F.relu(self.bn5(self.fc2(xb)))
        # xb = self.bn6(self.fc3(xb))

        return xb


class CombineMultiView(nn.Module):
    '''extracting features from multi views'''
    def __init__(self):
        super().__init__()
        self.conv1 = ConvOneView()

    def forward(self, input):
#         print(list(input[:,0,:,:][:,None,:,:].size()))
        layer1 = self.conv1(input[:,0,:,:][:,None,:,:])
        layer2 = self.conv1(input[:,1,:,:][:,None,:,:])
        layer3 = self.conv1(input[:,2,:,:][:,None,:,:])
        
        xb = nn.MaxPool1d(1)(torch.stack((layer1,layer2,layer3),2))
        # xb = nn.MaxPool1d(3)(torch.stack((layer1,layer2,layer3),2))
        output = nn.Flatten(1)(xb)
        
        # print(list(xb.size()))
        
        return output

class MVNet(nn.Module):
    def __init__(self, classes = 10):
        super().__init__()
        self.CombineMultiView = CombineMultiView()
        self.fc1 = nn.Linear(3072, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, classes)


        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout = nn.Dropout(p=0.3)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, input):
        xb = self.CombineMultiView(input)
        xb = nn.ReLU(inplace=True)(self.bn1(self.fc1(xb)))
        xb = nn.ReLU(inplace=True)(self.bn2(self.dropout(self.fc2(xb))))
        output = self.fc3(xb)
        return self.logsoftmax(output)

In [0]:
train_ds = PointCloudData(Path(path), transform=train_transforms)
valid_ds = PointCloudData(Path(path), valid=True, folder='test', transform=train_transforms)

In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [24]:
pointnet = MVNet()
pointnet.to(device)

MVNet(
  (CombineMultiView): CombineMultiView(
    (conv1): ConvOneView(
      (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
      (conv2): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
      (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
      (fc1): Linear(in_features=86528, out_features=1024, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=64, bias=True)
      (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True,

In [0]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(pointnet.parameters(), lr=0.001)

In [0]:
train_loader = DataLoader(dataset=train_ds, batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset=valid_ds, batch_size=64)

In [0]:
train(pointnet, loss_fn, optimizer, num_epochs = 10, save = False)

Starting epoch 1 / 10
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 1.0790
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 0.9056
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 1.0418
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 0.9434
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 0.6033
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
loss = 0.6011
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3]
[32, 1024, 3

In [0]:
check_accuracy(pointnet, valid_loader)