In [0]:
import os
import os.path as osp
import glob

import math

import zipfile

import urllib

import shutil

import torch


def parse_txt_array(src, sep=None, start=0, end=None, dtype=None, device=None):
    src = [[float(x) for x in line.split(sep)[start:end]] for line in src]
    src = torch.tensor(src, dtype=dtype).squeeze()
    return src


def read_txt_array(path, sep=None, start=0, end=None, dtype=None, device=None):
    with open(path, 'r') as f:
        src = f.read().split('\n')[:-1]
    return parse_txt_array(src, sep, start, end, dtype, device)


def parse_off(src):
    if src[0] == 'OFF':
        src = src[1:]
    else:
        src[0] = src[0][3:]

    num_nodes, num_faces = [int(item) for item in src[0].split()[:2]]

    pos = parse_txt_array(src[1:1 + num_nodes])

    face = src[1 + num_nodes:1 + num_nodes + num_faces]
    face = face_to_tri(face)

    return pos, face


def face_to_tri(face):
    face = [[int(x) for x in line.strip().split(' ')] for line in face]

    triangle = torch.tensor([line[1:] for line in face if line[0] == 3])
    triangle = triangle.to(torch.int64)

    rect = torch.tensor([line[1:] for line in face if line[0] == 4])
    rect = rect.to(torch.int64)

    if rect.numel() > 0:
        first, second = rect[:, [0, 1, 2]], rect[:, [1, 2, 3]]
        return torch.cat([triangle, first, second], dim=0).t().contiguous()
    else:
        return triangle.t().contiguous()


def read_off(path):
    with open(path, 'r') as f:
        src = f.read().split('\n')[:-1]
    return parse_off(src)


def sample_points(pos, face, num=1024):
    assert pos.size(1) == 3 and face.size(0) == 3

    pos_max = pos.max()
    pos = pos / pos_max

    area = (pos[face[1]] - pos[face[0]]).cross(pos[face[2]] - pos[face[0]])
    area = area.norm(p=2, dim=1).abs() / 2

    prob = area / area.sum()
    sample = torch.multinomial(prob, num, replacement=True)
    face = face[:, sample]

    frac = torch.rand(num, 2, device=pos.device)
    mask = frac.sum(dim=-1) > 1
    frac[mask] = 1 - frac[mask]

    vec1 = pos[face[1]] - pos[face[0]]
    vec2 = pos[face[2]] - pos[face[0]]

    pos_sampled = pos[face[0]]
    pos_sampled += frac[:, :1] * vec1
    pos_sampled += frac[:, 1:] * vec2

    pos_sampled = pos_sampled * pos_max

    return pos_sampled


class ModelNet(torch.utils.data.Dataset):
    url = 'http://vision.princeton.edu/projects/2014/3DShapeNets/ModelNet10.zip'

    def __init__(self, root, train=True, transform=None):
        super(ModelNet, self).__init__()
        self.root = root
        self.raw_dir = osp.join(self.root, 'raw')
        self.processed_dir = osp.join(self.root, 'processed')
        self.transform = transform

        self.download()
        self.process()

        path = self.processed_paths[0] if train else self.processed_paths[1]
        self.data, self.targets = torch.load(path)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pos, target = self.data[idx], int(self.targets[idx])
        if self.transform is not None:
            pos = self.transform(pos)
        return pos, target

    @property
    def processed_file_names(self):
        return ['training.pt', 'test.pt']

    @property
    def raw_file_names(self):
        return [
            'bathtub', 'bed', 'chair', 'desk', 'dresser', 'monitor',
            'night_stand', 'sofa', 'table', 'toilet'
        ]

    @property
    def raw_paths(self):
        files = self.raw_file_names
        return [osp.join(self.raw_dir, f) for f in files]

    @property
    def processed_paths(self):
        files = self.processed_file_names
        return [osp.join(self.processed_dir, f) for f in files]

    @property
    def num_classes(self):
        return int(self.targets.max().item()) + 1 

    def download(self):

        if all([osp.exists(f) for f in self.raw_paths]):
            return

        os.makedirs(osp.expanduser(osp.normpath(self.raw_dir)))

        filename = self.url.rpartition('/')[2]
        path = osp.join(self.root, filename)
        if osp.exists(path):
            print('Using exist file', filename)
        else:
            print('Downloading', self.url)
            data = urllib.request.urlopen(self.url)
            with open(path, 'wb') as f:
                f.write(data.read())

        with zipfile.ZipFile(path, 'r') as f:
            print('Extracting', path)
            f.extractall(self.root)
        os.unlink(path)

        folder = osp.join(self.root, 'ModelNet10')
        shutil.rmtree(self.raw_dir)
        os.rename(folder, self.raw_dir)
        print('Done!')

    def process(self):
        if all([osp.exists(f) for f in self.processed_paths]):
            return

        print('Processing...')
        os.makedirs(osp.expanduser(osp.normpath(self.processed_dir)))

        self.process_set('train', self.processed_paths[0])
        self.process_set('test', self.processed_paths[1])

        print('Done!')

    def process_set(self, dataset, processed_path):
        categories = glob.glob(osp.join(self.raw_dir, '*', ''))
        categories = sorted([x.split(os.sep)[-2] for x in categories])

        positions = []
        targets = []
        for target, category in enumerate(categories):
            folder = osp.join(self.raw_dir, category, dataset)
            paths = glob.glob('{}/{}_*.off'.format(folder, category))

            for path in paths:
                pos, face = read_off(path)

                scale = (1 / pos.abs().max()) * 0.999999
                pos = pos * scale

                pos = sample_points(pos, face)
                positions.append(pos.t())
                targets.append(target)

        positions = torch.stack(positions)
        targets = torch.Tensor(targets)

        torch.save((positions, targets), processed_path)


def fixed_points(pos, y, num):
    N, D = pos.shape
    assert D == 3
    choice = torch.cat([torch.randperm(N)
                        for _ in range(math.ceil(num / N))], dim=0)[:num]
    return pos[choice], y[choice]

class ShapeNet(torch.utils.data.Dataset):
    url = 'https://shapenet.cs.stanford.edu/iccv17/partseg'

    categories = {
        'Airplane': '02691156',
        'Bag': '02773838',
        'Cap': '02954340',
        'Car': '02958343',
        'Chair': '03001627',
        'Earphone': '03261776',
        'Guitar': '03467517',
        'Knife': '03624134',
        'Lamp': '03636649',
        'Laptop': '03642806',
        'Motorbike': '03790512',
        'Mug': '03797390',
        'Pistol': '03948459',
        'Rocket': '04099429',
        'Skateboard': '04225987',
        'Table': '04379243',
    }

    def __init__(self, root, category, train=True, transform=None):
        super(ShapeNet, self).__init__()
        self.category = category

        assert self.category in self.categories

        self.root = root
        self.raw_dir = osp.join(self.root, 'raw')
        self.processed_dir = osp.join(self.root, 'processed')
        self.transform = transform

        self.download()
        self.process()

        path = self.processed_paths[0] if train else self.processed_paths[1]
        self.data, self.targets = torch.load(path)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        pos, target = self.data[idx], self.targets[idx]
        if self.transform is not None:
            pos = self.transform(pos)
        return pos, target

    @property
    def raw_file_names(self):
        return [
            'train_data', 'train_label', 'val_data', 'val_label', 'test_data',
            'test_label'
        ]

    @property
    def processed_file_names(self):
        names = ['training.pt', 'test.pt']
        return [osp.join(self.category, name) for name in names]

    @property
    def raw_paths(self):
        files = self.raw_file_names
        return [osp.join(self.raw_dir, f) for f in files]

    @property
    def processed_paths(self):
        files = self.processed_file_names
        return [osp.join(self.processed_dir, f) for f in files]

    @property
    def num_classes(self):
        return int(self.targets.max().item()) + 1 

    def download(self):
        if all([osp.exists(f) for f in self.raw_paths]):
            return
        os.makedirs(osp.expanduser(osp.normpath(self.raw_dir)))

        for name in self.raw_file_names:
            url = '{}/{}.zip'.format(self.url, name)

            filename = url.rpartition('/')[2]
            path = osp.join(self.raw_dir, filename)
            if osp.exists(path):
                print('Using exist file', filename)
            else:
                print('Downloading', url)
                data = urllib.request.urlopen(url)
                with open(path, 'wb') as f:
                    f.write(data.read())

            with zipfile.ZipFile(path, 'r') as f:
                print('Extracting', path)
                f.extractall(self.raw_dir)
            os.unlink(path)

        print('Done!')

    def process(self):
        if all([osp.exists(f) for f in self.processed_paths]):
            return

        print('Processing...')

        directory = osp.expanduser(osp.normpath(
            osp.join(self.processed_dir, self.category)))
        if not osp.exists(directory):
            os.makedirs(directory)

        idx = self.categories[self.category]
        paths = [osp.join(path, idx) for path in self.raw_paths]
        datasets = []
        for path in zip(paths[::2], paths[1::2]):
            pos_paths = sorted(glob.glob(osp.join(path[0], '*.pts')))
            y_paths = sorted(glob.glob(osp.join(path[1], '*.seg')))
            positions, ys = [], []
            for path in zip(pos_paths, y_paths):
                pos = read_txt_array(path[0])
                y = read_txt_array(path[1], dtype=torch.long)
                pos, y = fixed_points(pos, y, 2048)

                positions.append(pos.t())
                ys.append(y)

            positions = torch.stack(positions)
            ys = torch.stack(ys)
            datasets.append((positions, ys))

        train_data = torch.cat([datasets[0][0], datasets[1][0]], dim=0), torch.cat(
            [datasets[0][1], datasets[1][1]], dim=0)
        test_data = datasets[2]

        torch.save(train_data, self.processed_paths[0])
        torch.save(test_data, self.processed_paths[1])

        print('Done.')


# Introduction
In this project, you will be asked to implement [PointNet](https://arxiv.org/abs/1612.00593) architecture and train a classification network (left) and a segmentation network (middle).
![title](img/cls_sem.jpg)

### Grading Points
* Task 1.1 - 5
* Task 1.2 - 5
* Task 2.1 - 10
* Task 2.2 - 5
* Task 2.3 - 5
* Task 2.4 - 5
* Task 2.5 - 5
* Task 2.6 - 10
* Task 2.7 - 5
* Task 2.8 - 10
* Task 2.9 - 10
* Task 2.10 - 5 
* Task 2.11 - 5
* Task 2.12 - 5
* Task 2.13 - 10

In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
import random
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

from torchvision.transforms import Compose

#import dataset # custom dataset for ModelNet10 and ShapeNet

# 1. Data Loading

Usually, we write the point cloud as $X\in\mathbb{R}^{N\times 3}$. While in programming, we use `B x 3 x N` layout, where `B` is the batch-size and `N` is the number of points in a single point cloud.

## 1.1 Jitter the position of each points by a zero mean Gaussian
For input $X\in\mathbb{R}^{N\times 3}$, we transform $X$ by $X \leftarrow X + \mathcal{N}(0, \sigma^2)$.

In [0]:
class RandomJitter(object):
    def __init__(self, sigma):
        self.sigma = sigma
        
    def __call__(self, data):
        ## hint: useful function `torch.randn` and `torch.randn_like`
        ## TASK 1.1
        ## This function takes as input a point cloud of layout `3 x N`, 
        ## and output the jittered point cloud of layout `3 x N`.        
        return data + (self.sigma * torch.randn_like(data))

In [0]:
## random generate data and test your transform here

## 1.2 Rotate the object along the z-axis randomly
For input $X\in\mathbb{R}^{N\times 3}$, we rotate all points along z-axis (up-axis) by a degree $\theta$.


Suppose $T$ is the transformation matrix,
$$X\leftarrow XT,$$
where $$T=\begin{bmatrix}\cos\theta & \sin\theta & 0 \\ -\sin\theta & \cos\theta & 0 \\ 0 & 0 & 1 \end{bmatrix}.$$

In [0]:
import numpy as np
class RandomZRotate(object):
    def __init__(self, degrees):
        ## here `self.degrees` is a tuple (0, 360) which defines the range of degree
        self.degrees = degrees
        
    def __call__(self, data):
        ## TASK 1.2
        ## This function takes as input a point cloud of layout `3 x N`, 
        ## and output the rotated point cloud of layout `3 x N`.
        ##
        ## The rotation is along z-axis, and the degree is uniformly distributed
        ## between [0, 360]
        ##
        ## hint: useful function `torch.randn`， `torch.randn_like` and `torch.matmul`
        ##
        ## Notice:   
        ## Different from its math notation `N x 3`, the input has size of `3 x N`
        d = np.random.uniform(self.degrees[0], self.degrees[1])
        sin, cos = np.sin(d), np.cos(d)
        rot_mat = torch.Tensor([[cos, sin, 0], 
                                [-sin, cos, 0], 
                                [0, 0, 1]])
    
        return torch.matmul(rot_mat, data)


In [0]:
## random generate data and test your transform here

## 1.3 Load dataset ModelNet10 for Point Cloud Classification

### ModelNet10
By loading this dataset, we have data of shape `B x 3 x N` and label of shape `B`.

In [0]:
# It may taske some time to download and pre-process the dataset.
train_transform = Compose([RandomZRotate((0, 360)), RandomJitter(0.02)])
train_cls_dataset = ModelNet(root='./ModelNet10', transform=train_transform, train=True)
test_cls_dataset = ModelNet(root='./ModelNet10', train=False)
train_cls_loader = data.DataLoader(
    train_cls_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=1,
)
test_cls_loader = data.DataLoader(
    test_cls_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=1,
)

In [0]:
print(train_cls_dataset.num_classes)

10


## ShapeNet
By loading this dataset, we have data of shape `B x 3 x N` and target of shape `B x N`.

Here is the list of categories:
['Airplane', 'Bag', 'Cap', 'Car', 'Chair', 'Earphone', 'Guitar', 'Knife', 'Lamp', 'Laptop', 'Motorbike', 'Mug', 'Pistol', 'Rocket', 'Skateboard', 'Table']

In [0]:
## Here as an example, we choose the cateogry 'Airplane'
category = 'Airplane'
train_seg_dataset = ShapeNet(root='./ShapeNet', category=category, train=True)
test_seg_dataset = ShapeNet(root='./ShapeNet', category=category, train=False)
train_seg_loader = data.DataLoader(
    train_seg_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=1,
)
test_seg_loader = data.DataLoader(
    test_seg_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=1,
)

In [0]:
print(train_seg_dataset.num_classes)

5


# 2 PointNet Architecture (Read Section 4.2 and Appendix C)
In this section, you will be asked to implement classification and segmentation step by step.
![pointnet](img/pointnet.jpg)

## 2.1 Joint Alignment Network 
This mini-network takes as input matrix of size $N \times K$, and outputs a transformation matrix of size $K \times K$. 

In programming, the input size of this module is `B x K x N` and output size is `B x K x K`.

For the shared MLP, use structure like this `(FC(64), BN, ReLU, FC(128), BN, ReLU, FC(1024), BN, ReLU)`.

For the MLP after global max pooling, use structure like this `(FC(512), BN, ReLU, FC(256), BN, ReLU, FC(K*K)`.


In [0]:
class Transformation(nn.Module):
    def __init__(self, k=3):
        super(Transformation, self).__init__()
        
        self.k = k
        
        ## TASK 2.1
        
        ## define your network layers here
        ## shared mlp
        ## input size: B x K x N
        ## output size: B x 1024 x N
        ## hint: you may want to use `nn.Conv1d` here. Why? translation invariant, (less peramtr = better effeiceny)
        self.Shared_nn = nn.Sequential(
            nn.Conv1d(k, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Conv1d(128, 1024, 1),
            nn.BatchNorm1d(1024),
            nn.ReLU(inplace=True))

        ## define your network layers here
        ## mlp
        ## input size: B x 1024
        ## output size: B x (K*K)
        self.nn = nn.Sequential(
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Linear(256, k * k))
        
    def forward(self, x):
        B, K, N = x.shape # batch-size, dim, number of points
        ## TASK 2.1

        ## forward of shared mlp
        # input - B x K x N
        # output - B x 1024 x N
        x = self.Shared_nn(x)

        ## global max pooling
        # input - B x 1024 x N
        # output - B x 1024
        x = F.max_pool1d(x, N).view(B, 1024)

        
        ## mlp
        # input - B x 1024
        # output - B x (K*K)
        x = self.nn(x)
        
        ## reshape the transformation matrix to B x K x K
        identity = torch.eye(self.k, device=x.device)
        x = x.view(B, self.k, self.k) + identity[None]
        return x

In [0]:
Transformation()

Transformation(
  (Shared_nn): Sequential(
    (0): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
    (7): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
  )
  (nn): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=256, out_features=9, b

## 2.2 Regularization Loss
$$L_{reg}=\|I-TT^\intercal\|^2_F$$

The output of `Transformation` network is of size `B x K x K`. The module `OrthoLoss` has no trainable parameters, only computes this norm.

In [0]:
class OrthoLoss(nn.Module):
    def __init__(self):
        super(OrthoLoss, self).__init__()
        
    def forward(self, x):
        ## hint: useful function `torch.bmm` and `torch.matmul`
        
        ## TASK 2.2
        ## compute the matrix product
        prod = torch.bmm(x, x.transpose(2,1))

        norm = torch.norm(prod - torch.eye(x.shape[1], device=x.device)[None], dim=(1,2))
        return norm.mean()

In [0]:
## random generate data and test this network

## 2.3 Feature Network
In this subsection, you will be asked to implement the feature network (the top branch).

Local features are a matrix of size `B x 64 x N`, which will be used in the segmentation task.

Global features are a matrix of size `B x 1024`, which will be used in the classification task.

In [0]:
class Feature(nn.Module):
    def __init__(self, alignment=False):
        super(Feature, self).__init__()
        
        self.alignment = alignment
        
        ## `input_transform` calculates the input transform matrix of size `3 x 3`
        if self.alignment:
            self.input_transform = Transformation(3)
        
        ## TASK 2.3
        ## define your network layers here
        ## local feature
        ## shared mlp
        ## input size: B x 3 x N
        ## output size: B x 64 x N
        ## hint: you may want to use `nn.Conv1d` here.
        self.local_nn = nn.Sequential( nn.Conv1d(3, 64, 1),
                                       nn.BatchNorm1d(64),
                                       nn.ReLU(inplace=True))
        
        ## `feature_transform` calculates the feature transform matrix of size `64 x 64`
        if self.alignment:
            self.feature_transform = Transformation(64)
        
        ## TASK 2.4
        ## define your network layers here
        ## global feature
        ## shared mlp
        ## input size: B x 64 x N
        ## output size: B x 1024 x N  
        self.global_nn = nn.Sequential( nn.Conv1d(64, 128, 1),
                                        nn.BatchNorm1d(128),
                                        nn.ReLU(inplace=True),
                                        nn.Conv1d(128, 1024, 1),
                                        nn.BatchNorm1d(1024),
                                        nn.ReLU(inplace=True))      
    
    def forward(self, x):
        B, K, N = x.shape
        
        ## apply the input transform
        if self.alignment:
            transform = self.input_transform(x)
            ## TASK 2.5
            ## apply the input transform
            x = torch.bmm(x.transpose(2, 1), transform).transpose(2, 1)

        ## TASK 2.3
        ## forward of shared mlp
        # input - B x K x N
        # output - B x 64 x N
        x = self.local_nn(x)
        
        if self.alignment:
            transform = self.feature_transform(x)
            ## TASK 2.5
            ## apply the feature transform
            x = torch.bmm(x.transpose(2, 1), transform).transpose(2, 1)
        else:
            ## do not modify this line
            transform = None
        
        local_feature = x
        
        ## TASK 2.4
        ## forward of shared mlp
        # input - B x 64 x N
        # output - B x 1024 x N
        x = self.global_nn(x)
        
        ## TASK 2.4
        ## global max pooling
        # input - B x 1024 x N
        # output - B x 1024
        x = F.max_pool1d(x, kernel_size = N).view(B, 1024)
        
        global_feature = x
        
        ## summary:
        ## global_feature: B x 1024
        ## local_feature: B x 64 x N
        ## transform: B x K x K
        return global_feature, local_feature, transform

In [0]:
Feature()

Feature(
  (local_nn): Sequential(
    (0): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (global_nn): Sequential(
    (0): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
    (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
)

## 2.4 Classification Network
In this network, you will use the global features generated by the `Feature` network defined above.

In [0]:
class Classification(nn.Module):
    def __init__(self, num_classes, alignment=False):
        super(Classification, self).__init__()
                
        self.feature = Feature(alignment=alignment)
        
        ## TASK 2.6
        ## define your network layers here
        ## mlp
        ## input size: B x 1024
        ## output size: B x num_classes
        self.mlp = nn.Sequential( nn.Linear(1024, 512),
                                  nn.BatchNorm1d(512),
                                  nn.ReLU(inplace=True),
                                  nn.Linear(512, 256),
                                  nn.BatchNorm1d(256),
                                  nn.ReLU(inplace=True),
                                  nn.Dropout(0.7),
                                  nn.Linear(256, num_classes),
                                  nn.LogSoftmax(1))
    def forward(self, x):
        # x is the global feature matrix
        # here we don't use local feature matrix
        x, _, trans = self.feature(x)
        
        ## TASK 2.6
        ## forward of mlp
        # input - B x 1024
        # output - B x num_classes        
        x = self.mlp(x)
        
        ## x: B x num_classes
        ## trans: B x K x K
        return x, trans

In [0]:
Classification(10)

Classification(
  (feature): Feature(
    (local_nn): Sequential(
      (0): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (global_nn): Sequential(
      (0): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (mlp): Sequential(
    (0): Linear(in_features=1024, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=T

### 2.4.1 Train this network on ModelNet10

In [0]:
# main train function for classification
def train_cls(train_loader, test_loader, network, optimizer, epochs, scheduler):
    reg = OrthoLoss()
    for epoch in range(epochs):
        print('Epoch:[{:02d}/{:02d}]'.format(epoch+1, epochs))
        print('Training...')
        network.train()
        train_loss = 0
        correct = 0
        for batch, (pos, label) in enumerate(train_loader):
            network.zero_grad()
            pos, label = pos.cuda(), label.cuda()
            
            ## TASK 2.7
            ## forward propagation
            output, trans = network(pos)
            loss = F.nll_loss(output, label)
            ##########
            
            ## regularizer
            if trans is not None:
                loss += reg(trans) * 0.001

            pred = output.max(1)[1]
            correct += pred.eq(label).sum().item()

            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            print('\rIter: [{:03d}/{:03d}] Loss: {:.4f}'.format(batch+1, len(train_loader), loss.item()), end='', flush=True)
        
        scheduler.step()
        print('\nAverage Train Loss: {:.4f}; Train Acc: {:.4f}'.format(train_loss/len(train_loader), correct/len(train_loader.dataset) * 100))
        
        print('\nTesting...')
        with torch.no_grad():
            network.eval()
            test_loss = 0
            correct = 0
            for batch, (pos, label) in enumerate(test_loader):
                pos, label = pos.cuda(), label.cuda()
    
                ## TASK 2.7
                ## forward propagation
                output, trans = network(pos)
                loss = F.nll_loss(output, label)
                ##########

                if trans is not None:
                    loss += reg(trans) * 0.001

                pred = output.max(1)[1]
                correct += pred.eq(label).sum().item()

                test_loss += loss.item()
                print('\rIter: [{:03d}/{:03d}] Loss: {:.4f}'.format(batch+1, len(test_loader), loss.item()), end='', flush=True)

            print('\nAverage Test Loss: {:.4f}; Test Acc: {:.4f}'.format(test_loss/len(test_loader), correct/len(test_loader.dataset) * 100))
        print('-------------------------------------------')


In [0]:
network = Classification(10, alignment=True).cuda()
epochs = 200 # you can change the value to a small number for debugging

## TASK 2.8
# see Appendix C
# choose an optimizer and an initial learning rate
optimizer = torch.optim.Adam(network.parameters(), lr = 0.001, betas = (0.9, 0.999))
# choose a lr scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.5)
#######3

# start training
train_cls(train_cls_loader, test_cls_loader, network, optimizer, epochs, scheduler)

Epoch:[01/200]
Training...
Iter: [250/250] Loss: 1.1645
Average Train Loss: 1.6611; Train Acc: 49.7369

Testing...
Iter: [908/908] Loss: 0.9946
Average Test Loss: 1.4478; Test Acc: 53.3040
-------------------------------------------
Epoch:[02/200]
Training...
Iter: [250/250] Loss: 2.2179
Average Train Loss: 1.1377; Train Acc: 63.4929

Testing...
Iter: [908/908] Loss: 0.6138
Average Test Loss: 1.1410; Test Acc: 63.6564
-------------------------------------------
Epoch:[03/200]
Training...
Iter: [250/250] Loss: 0.3242
Average Train Loss: 0.9861; Train Acc: 68.2786

Testing...
Iter: [908/908] Loss: 2.2894
Average Test Loss: 1.1798; Test Acc: 55.0661
-------------------------------------------
Epoch:[04/200]
Training...
Iter: [250/250] Loss: 3.9719
Average Train Loss: 0.9699; Train Acc: 69.8822

Testing...
Iter: [908/908] Loss: 0.3378
Average Test Loss: 1.0544; Test Acc: 62.0044
-------------------------------------------
Epoch:[05/200]
Training...
Iter: [250/250] Loss: 0.4802
Average Trai

KeyboardInterrupt: ignored

### Report the best test accuracy you can get.

In [0]:
#best test accuracy = 90.5286

## 2.5 Segmentation Network
In this network, you will use the global features and local features generated by the `Feature` network defined above.

The global feature matrix is of size `B x 1024` and the local feature matrix is of size `B x 64 x N`.

They need to be stacked together to a new matrix of size `B x 1088 x n` (How?). 

In [0]:
# main train function for classification
class Segmentation(nn.Module):
    def __init__(self, num_classes, alignment=False):
        super(Segmentation, self).__init__()
               
        self.feature = Feature(alignment=alignment)

        ## TASK 2.9
        ## shared mlp
        ## input size: B x 1088 x N
        ## output size: B x num_classes x N
        self.shared_mlp = nn.Sequential(  nn.Conv1d(1088, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, 256, 1),
                                          nn.BatchNorm1d(256),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(256, 128, 1),
                                          nn.BatchNorm1d(128),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(128, num_classes, 1),
                                          nn.LogSoftmax(1))
        
    def forward(self, x):
        g, l, trans = self.feature(x)
        ## TASK 2.10
        # concat global features and local features to a single matrix
        # g - B x 1024 , global features -> g - B x 1024 , N
        # l - B x 64 x N, local features
        # x - B x 1088 x N, concatenated features
        x = torch.cat([g.view(-1, 1024, 1).repeat(1, 1, x.shape[-1]), l], 1)
            
        ## TASK 2.9
        ## forward of shared mlp
        # input - B x 1088 x N
        # output - B x num_classes x N  
        x = self.shared_mlp(x)
        
        return x, trans

In [0]:
Segmentation(5)

Segmentation(
  (feature): Feature(
    (local_nn): Sequential(
      (0): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (global_nn): Sequential(
      (0): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (shared_mlp): Sequential(
    (0): Conv1d(1088, 512, kernel_size=(1,), stride=(1,))
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv1d(512, 256, kernel_size=(1,), stride=(1,))
    (4): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)


### 2.5.1 Calculating Intersection over Union (IoU) 
For 2D image, the IoU is calculated as follows,
![iou](img/iou.png)

How is it used in the literature of point clouds?

In [0]:
## TASK 2.11
# implement the helper functions to calculate the IoU
def get_i_and_u(pred, target, num_classes):
    """Calculate intersection and union between pred and target.
    
    pred -- B x N matrix
    target -- B x N matrix
    num_classes -- number of classes
    
    return i, u
    i -- B x N binary matrix, intersection, i[b, n] equals 1 if and only if it is a true-positive.
    u -- B x N binary matrix, union, u[b, n] equals 0 if and only if it is a true-negative
    """
    ## TASK 2.11
    ## calculate i and u here
    ## hint: useful function `F.one_hot`    
    ## hint: use element-wise logical tensor operation (`&` and `|`)
    iou=0
    for c in range(1, num_classes+1):
      p = (pred == c)
      t = (target == c)
      i = (p & t).sum()
      u = (p | t).sum()
      iou += 1 if u == 0 else i.float()/u.float()
    return iou/num_classes

def get_iou(pred, target, num_classes):
    """Calculate IoU
    pred -- B x N matrix
    target -- B x N matrix
    num_classes -- number of classes
    
    return iou
    iou -- B matrix, iou[b] is the IoU of b-th point cloud in this batch
    """

    ## use the helper function `i_and_u` defined above
    return torch.Tensor([get_i_and_u(pred[j], target[j], num_classes) for j in range(pred.shape[0])])

### 2.5.2 Train this network on ShapeNet

In [0]:
# main train function for segmentation
def train_seg(train_loader, test_loader, network, optimizer, epochs, scheduler):  
    reg = OrthoLoss()
    for epoch in range(epochs):
        print('Epoch:[{:02d}/{:02d}]'.format(epoch+1, epochs))
        print('Training...')
        network.train()
        train_loss = 0
        correct = 0
        total = 0
        ious = []
        for batch, (pos, label) in enumerate(train_loader):
            network.zero_grad()
            pos, label = pos.cuda(), label.cuda()
            ## TASK 2.12
            ## forward propagation
            output, trans = network(pos)
            loss = F.nll_loss(output, label)
            ##########
            if trans is not None:
                loss += reg(trans) * 0.001        

            pred = output.max(1)[1]
            correct += pred.eq(label).sum().item()
            total += label.numel()

            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            #ref_iou(pred, label, train_loader.dataset.num_classes)
            ious += [get_iou(pred, label, train_loader.dataset.num_classes)]
            print('\rIter: [{:03d}/{:03d}] Loss: {:.4f}'.format(batch+1, len(train_loader), loss.item()), end='', flush=True)
            
        
        scheduler.step()
        print('\nAverage Train Loss: {:.4f}; Train Acc: {:.4f}; Train mean IoU: {:.4f}'.format(train_loss/len(train_loader), correct/total * 100, torch.cat(ious, dim=0).mean().item()))

        print('\nTesting...')
        with torch.no_grad():
            network.eval()
            test_loss = 0
            correct = 0
            total = 0
            ious = []
            for batch, (pos, label) in enumerate(test_loader):
                pos, label = pos.cuda(), label.cuda()
                
                ## TASK 2.12
                ## forward propagation
                output, trans = network(pos)
                loss = F.nll_loss(output, label)
                ##########
                
                if trans is not None:
                    loss += reg(trans) * 0.001   

                pred = output.max(1)[1]
                correct += pred.eq(label).sum().item()
                total += label.numel()

                test_loss += loss.item()

                ious += [get_iou(pred, label, train_loader.dataset.num_classes)]
                print('\rIter: [{:03d}/{:03d}] Loss: {:.4f}'.format(batch+1, len(test_loader), loss.item()), end='', flush=True)

            print('\nAverage Test Loss: {:.4f}; Test Acc: {:.4f}; Test mean IoU: {:.4f}'.format(test_loss/len(test_loader), correct/total * 100, torch.cat(ious, dim=0).mean().item()))
        print('-------------------------------------------')

In [0]:
network = Segmentation(train_seg_dataset.num_classes, alignment=True).cuda()
epochs = 200 # you can change the value to a small number for debugging

## TASK 2.13
# see Appendix C
# choose an optimizer and an initial learning rate
optimizer = torch.optim.Adam(network.parameters(), lr = 0.001, betas = (0.9, 0.999))
# choose a lr scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.5)
#######3

train_seg(train_seg_loader, test_seg_loader, network, optimizer, epochs, scheduler)

Epoch:[01/200]
Training...
Iter: [147/147] Loss: 0.5216
Average Train Loss: 0.6353; Train Acc: 78.0845; Train mean IoU: 0.6929

Testing...
Iter: [341/341] Loss: 0.5050
Average Test Loss: 0.5107; Test Acc: 81.9947; Test mean IoU: 0.7211
-------------------------------------------
Epoch:[02/200]
Training...
Iter: [147/147] Loss: 0.5372
Average Train Loss: 0.4110; Train Acc: 85.9453; Train mean IoU: 0.7814

Testing...
Iter: [341/341] Loss: 0.3375
Average Test Loss: 0.4639; Test Acc: 84.0452; Test mean IoU: 0.7427
-------------------------------------------
Epoch:[03/200]
Training...
Iter: [147/147] Loss: 0.3523
Average Train Loss: 0.3552; Train Acc: 87.5756; Train mean IoU: 0.8050

Testing...
Iter: [341/341] Loss: 0.3499
Average Test Loss: 0.3846; Test Acc: 86.3953; Test mean IoU: 0.7947
-------------------------------------------
Epoch:[04/200]
Training...
Iter: [147/147] Loss: 0.3056
Average Train Loss: 0.3188; Train Acc: 88.6227; Train mean IoU: 0.8184

Testing...
Iter: [341/341] Loss:

KeyboardInterrupt: ignored

### Report the best test mIoU you can get.

In [0]:
# best test mIoU = 0.8605