# Setting

Mount your Google Drive

In [1]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Set up mount symlink

In [2]:
# DRIVE_PATH = '/content/gdrive/My\ Drive/PointNet
DRIVE_PATH = '/content/gdrive/MyDrive/PointNet'
DRIVE_PYTHON_PATH = DRIVE_PATH.replace('\\', '')
if not os.path.exists(DRIVE_PYTHON_PATH):
  %mkdir $DRIVE_PATH

## the space in `My Drive` causes some issues,
## make a symlink to avoid this
SYM_PATH = '/content/PointNet'
if not os.path.exists(SYM_PATH):
  !ln -s $DRIVE_PATH $SYM_PATH

## Import and others

In [3]:
%cd $SYM_PATH
%pwd

/content/gdrive/MyDrive/PointNet


'/content/gdrive/MyDrive/PointNet'

In [4]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

In [5]:
import math
import random
from pathlib import Path

## args.py (seems to be unnecessary)

In [6]:
# import sys
# import argparse

In [7]:
# def parse_args():
#     parser = argparse.ArgumentParser(description='')

#     #  experiment settings
#     parser.add_argument('--root_dir', default='../ModelNet10/', type=str,
#                             help='dataset directory')
#     parser.add_argument('--batch_size', default=32, type=int,
#                             help='training batch size')
#     parser.add_argument('--lr', default=1e-3, type=float,
#                             help='learning rate')
#     parser.add_argument('--epochs', default=15, type=int,
#                             help='number of training epochs')
#     parser.add_argument('--save_model_path', default='./checkpoints/', type=str,
#                             help='checkpoints dir')


#     args = parser.parse_args()
    
#     assert args.root_dir is not None
    
#     print(' '.join(sys.argv))
#     print(args)

#     return args

## utils.py

In [8]:
def read_off(file):
    if 'OFF' != file.readline().strip():
        raise('Not a valid OFF header')
    n_verts, n_faces, __ = tuple([int(s) for s in file.readline().strip().split(' ')])
    verts = [[float(s) for s in file.readline().strip().split(' ')] for i_vert in range(n_verts)]
    faces = [[int(s) for s in file.readline().strip().split(' ')][1:] for i_face in range(n_faces)]
    return verts, faces

In [24]:
import time
class PointSampler(object):
    def __init__(self, output_size, method = 'farthest', verbose = False):
        assert isinstance(output_size, int)
        self.output_size = output_size
        self.method = method #NOTE(JH) 'original', 'farthest', 'default'
        self.verbose = verbose 

    def triangle_area(self, pt1, pt2, pt3):
        side_a = np.linalg.norm(pt1 - pt2)
        side_b = np.linalg.norm(pt2 - pt3)
        side_c = np.linalg.norm(pt3 - pt1)
        s = 0.5 * ( side_a + side_b + side_c)
        return max(s * (s - side_a) * (s - side_b) * (s - side_c), 0)**0.5

    def sample_point(self, pt1, pt2, pt3):
        # barycentric coordinates on a triangle
        # https://mathworld.wolfram.com/BarycentricCoordinates.html
        s, t = sorted([random.random(), random.random()])
        f = lambda i: s * pt1[i] + (t-s)*pt2[i] + (1-t)*pt3[i]
        return (f(0), f(1), f(2))

    def sample_fartehst_point(self, point):
        N, D = point.shape
        centroids = np.zeros((self.output_size,))
        distance = np.ones((N,)) * 1e10
        farthest = np.random.randint(0,N)
        for i in range(self.output_size):
          centroids[i] = farthest
          centroid = point[farthest, 0:3]
          dist = np.sum((point[:,0:3] - centroid)**2, -1)
          mask = dist < distance
          distance[mask] = dist[mask]
          farthest = np.argmax(distance, -1)
        point = point[centroids.astype(np.int32)]

        return point

    def __call__(self, mesh):
        verts, faces = mesh
        verts = np.array(verts)
        sampled_points = np.zeros((self.output_size, 3))        

        # EDIT by JH-starts
        # areas = np.zeros((len(faces)))
        tic = time.time()
        if self.method == 'default':
          # N = min(self.output_size, len(faces))
          
          # if N == len(faces):
          #     pass
          # else:
          #     inds = np.random.choice(len(faces), N, replace=False)
          #     faces = [faces[ind] for ind in inds]

          areas = np.zeros((len(faces)))

          for i in range(len(areas)):
              areas[i] = (self.triangle_area(verts[faces[i][0]],
                                            verts[faces[i][1]],
                                            verts[faces[i][2]]))
              
          sampled_faces = (random.choices(faces,
                                        weights=areas,
                                        cum_weights=None,
                                        k=self.output_size))
        

          for i in range(len(sampled_faces)):
              sampled_points[i] = (self.sample_point(verts[sampled_faces[i][0]],
                                                    verts[sampled_faces[i][1]],
                                                    verts[sampled_faces[i][2]]))
        
        elif self.method == 'original':
          
          inds = np.random.choice(len(verts), self.output_size, replace = True)

          for i in range(sampled_points.shape[0]):
              sampled_points[i] = (verts[inds[i]])

        elif self.method == 'farthest':
          # N = min(len(verts), 2 * self.output_size)
          # if N == len(verts):
          #   pass
          # else:
          #   inds = np.random.choice(len(verts), N, replace = False)
          #   verts = verts[inds,:]
          sampled_points = self.sample_fartehst_point(verts)

        if self.verbose:
          print('Sampling method:', self.method, '// Sampling time:', time.time() - tic)

        # EDIT by JH-ends

        return sampled_points

class Normalize(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2
        
        norm_pointcloud = pointcloud - np.mean(pointcloud, axis=0)
        norm_pointcloud /= np.max(np.linalg.norm(norm_pointcloud, axis=1))

        return  norm_pointcloud

class RandRotation_z(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        theta = random.random() * 2. * math.pi
        rot_matrix = np.array([[ math.cos(theta), -math.sin(theta),    0],
                               [ math.sin(theta),  math.cos(theta),    0],
                               [0,                             0,      1]])
        
        rot_pointcloud = rot_matrix.dot(pointcloud.T).T
        return  rot_pointcloud
    
class RandomNoise(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        noise = np.random.normal(0, 0.02, (pointcloud.shape))
    
        noisy_pointcloud = pointcloud + noise
        return  noisy_pointcloud
        
class ToTensor(object):
    def __call__(self, pointcloud):
        assert len(pointcloud.shape)==2

        return torch.from_numpy(pointcloud)

## dataset.py

In [25]:
def default_transforms():
    return transforms.Compose([
                                PointSampler(1024), # utils.PointSampler(1024),
                                Normalize(), # utils.Normalize(),
                                ToTensor() # utils.ToTensor()
                              ])
    
def train_transforms():
    return transforms.Compose([
                      PointSampler(1024,'farthest',verbose = False),
                      Normalize(),
                      RandRotation_z(),
                      RandomNoise(),
                      ToTensor()
                      ])

In [26]:
class PointCloudData(Dataset):
    def __init__(self, root_dir, valid=False, folder="train", transform=default_transforms()):
        self.root_dir = root_dir
        folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        self.classes = {folder: i for i, folder in enumerate(folders)}
        self.transforms = transform if not valid else default_transforms()
        self.valid = valid
        self.files = []
        for category in self.classes.keys():
            new_dir = root_dir/Path(category)/folder
            for file in os.listdir(new_dir):
                if file.endswith('.off'):
                    sample = {}
                    sample['pcd_path'] = new_dir/file
                    sample['category'] = category
                    self.files.append(sample)

    def __len__(self):
        return len(self.files)

    def __preproc__(self, file):
        verts, faces = read_off(file) # utils.read_off(file)
        if self.transforms:
            pointcloud = self.transforms((verts, faces))
        return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        with open(pcd_path, 'r') as f:
            pointcloud = self.__preproc__(f)
        return {'pointcloud': pointcloud,
                'category': self.classes[category]}

# PointNetClass.ipynb

In [27]:
import scipy.spatial.distance
import plotly.graph_objects as go
import plotly.express as px

In [28]:
random.seed = 42

In [29]:
# # Download the dataset directly to the Google Colab Runtime.
# # It comprises 10 categories, 3,991 models for training and 908 for testing.

if not os.path.exists("ModelNet10"): # This may take a few minutes
  !wget http://3dvision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip 
  !unzip -q ModelNet10.zip;

path = Path("ModelNet10")

In [53]:
root_dir = path
folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
classes = {folder: i for i, folder in enumerate(folders)}

print(classes)

method = 'farthest' # default, original, farthest

# if target == 'training':
  # transforms = train_transforms()
# elif target == 'testing':
  # transforms = default_transforms()
pt = PointSampler(1024, method, False)

folder = "train" #train, test

data = []
k = 0
tic = time.time()
for category in classes.keys():
    new_dir = root_dir/Path(category)/folder
    for file in os.listdir(new_dir):
        if file.endswith('.off'):
            sample = {}
            sample['pcd_path'] = new_dir/file
            sample['category'] = category

            with open(sample['pcd_path'], 'r') as f:
              verts, faces = read_off(f)
              mesh = (verts, faces)
              sampled_points = pt(mesh)

            sample['sampled_points'] = sampled_points
            data.append(sample)

            k += 1

            # if k == 10:
            #   break

print('Total folder number:', k)
print('Total time is :', time.time() - tic)
print(data[0])

{'bathtub': 0, 'bed': 1, 'chair': 2, 'desk': 3, 'dresser': 4, 'monitor': 5, 'night_stand': 6, 'sofa': 7, 'table': 8, 'toilet': 9}


KeyboardInterrupt: ignored

In [47]:
import pickle

name = folder + "_" + method + ".pkl"

print(name)

file = open(name, 'wb')

pickle.dump(data, file)

file.close()

train_farthest.pkl


In [54]:
def default_transforms():
    return transforms.Compose([
                                Normalize(), # utils.Normalize(),
                                ToTensor() # utils.ToTensor()
                              ])

train_transforms = transforms.Compose([
                    Normalize(),
                    RandRotation_z(),
                    RandomNoise(),
                    ToTensor()
                    ])

class PointCloudData(Dataset):
    def __init__(self, method = 'default', target = 'train', transform=default_transforms()):
        self.target = target
        self.method = method

        name = target + "_" + method + ".pkl"

        dataset = open(name, 'rb')
        self.files = pickle.load(dataset)
        dataset.close()

        self.classes = {'bathtub': 0, 'bed': 1, 'chair': 2, 'desk': 3, 'dresser': 4, 'monitor': 5, 'night_stand': 6, 'sofa': 7, 'table': 8, 'toilet': 9}

                
        # self.root_dir = root_dir
        # folders = [dir for dir in sorted(os.listdir(root_dir)) if os.path.isdir(root_dir/dir)]
        # self.classes = {folder: i for i, folder in enumerate(folders)}
        # self.transforms = transform if not valid else default_transforms()
        # self.valid = valid
        # self.files = []
        # for category in self.classes.keys():
        #     new_dir = root_dir/Path(category)/folder
        #     for file in os.listdir(new_dir):
        #         if file.endswith('.off'):
        #             sample = {}
        #             sample['pcd_path'] = new_dir/file
        #             sample['category'] = category
        #             self.files.append(sample)

    def __len__(self):
        return len(self.files)

    # def __preproc__(self, file):
    #     verts, faces = read_off(file) # utils.read_off(file)
    #     if self.transforms:
    #         pointcloud = self.transforms((verts, faces))
    #     return pointcloud

    def __getitem__(self, idx):
        pcd_path = self.files[idx]['pcd_path']
        category = self.files[idx]['category']
        pointcloud = self.files[idx]['sampled_points']

        return {'pointcloud': pointcloud,
                'category': self.classes[category]}

In [63]:
train_dataset = PointCloudData('default','train',train_transforms)
dataloader = DataLoader(dataset = train_dataset, batch_size = 1024, shuffle = True)

for i,data in enumerate(dataloader):
  print(i, data.values()[0])

TypeError: ignored