<a href="https://colab.research.google.com/github/58191554/PointNet-Project/blob/main/ModelAssert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

seed = 42

In [None]:
class Tnet(nn.Module):
    """
    T-Net is a type of spatial transformer network (STN) that learns a kxk transformation matrix
    for a given point cloud. The matrix is then used to transform the point cloud to a canonical
    pose. It consists of two parts: a convolutional network and a fully connected network.
    The convolutional network maps the input point cloud to a feature space and the fully connected
    network learns the transformation matrix from the feature space.
    """
    def __init__(self, hidden_sizes_conv=[64, 128, 1024], hidden_sizes_fc=[512, 256], k=3):
        super().__init__()
        self.k=k
        self.hidden_sizes_conv=hidden_sizes_conv
        self.hidden_sizes_fc=hidden_sizes_fc
        
        self.conv = self._build_conv()
        self.fc = self._build_fc()
  
    def _build_conv(self):
        ########################################################################
        # TODO: Builds the convolutional network that maps the input point cloud 
        # to a feature space. The hidden dimension is hidden_sizes_conv
        #  
        # Hint: consisting of a series of convolutional layers with batch 
        # normalization and ReLU activation.
        #   The convolution layers is in following structure:
        #   [conv1d]-> [Batch Norm Layer] -> [ReLU]-> [conv1d]-> ...
        ########################################################################
        layers = []
        prev_size = self.k
        for layer_id, size in enumerate(self.hidden_sizes_conv):
            bn = nn.BatchNorm1d(size)
            conv = nn.Conv1d(prev_size, size,1)
            layers.append(conv)
            layers.append(bn)
            layers.append(nn.ReLU())
            prev_size = size
        ########################################################################
        return nn.Sequential(*layers)
  
    def _build_fc(self):
        ########################################################################
        # TODO:  the fully connected network that takes the feature space and 
        # learns the transformation matrix. 
        #   The hidden_layers according to hidden_sizes_fc
        # 
        # Hint: the fully connected structur is as follows:
        #   [Fully Connected Layer]-> [Batch Norm Layer] -> [ReLU]-> [Fully Connected Layer]-> ...
        ########################################################################
        layers = []
        prev_size = self.hidden_sizes_conv[-1]
        for layer_id, size in enumerate(self.hidden_sizes_fc):
            bn = nn.BatchNorm1d(size)
            fc = nn.Linear(prev_size, size)
            layers.append(fc)
            layers.append(bn)
            layers.append(nn.ReLU())
            prev_size = size
        layers.append(nn.Linear(self.hidden_sizes_fc[-1],self.k**2))
        ########################################################################
        return nn.Sequential(*layers)
      

    def forward(self, input):
        ########################################################################
        # TODO: Performs the forward pass of the T-Net. 
        # It first applies the convolutional network to the input point cloud 
        # to obtain a feature space. 
        # Then, it applies the fully connected network to the feature space to 
        # obtain the kxk transformation matrix. Finally, it applies the
        # transformation matrix to the input point cloud to transform it to a 
        # canonical pose.
        # 
        # Hint: the forward structure is as follows:
        # [ConvLayers]->[MaxPooling]->[Flatten]->[Fully Connected Layers]->[theta_Matrix + identity]
        #   The identity require gradient
        ########################################################################
        # input.shape (bs,n,3)
        bs = input.size(0)
        
        xb = self.conv(input)   
        pool = nn.MaxPool1d(xb.size(-1))(xb)
        flat = nn.Flatten(1)(pool)
        xb = self.fc(flat)
      
        init = torch.eye(self.k, requires_grad=True).repeat(bs,1,1)
        if xb.is_cuda:
          init=init.cuda()
        matrix = xb.view(-1,self.k,self.k) + init        
        return matrix

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

test_t_net = Tnet()

if count_parameters(test_t_net)!=803081:
    print("Error")
    print("test_t_net parameters number = ", count_parameters(test_t_net))

assert count_parameters(test_t_net)==803081

torch.manual_seed(seed)
x1 = torch.randn(3, 3, 5)


y1 = torch.tensor([[[ 2.4067,  0.2867,  0.0376],
         [ 0.7862,  0.6480,  0.5361],
         [ 0.0954, -0.2747,  1.0449]],

        [[ 1.4776, -0.4632, -0.0986],
         [ 1.2216,  0.7514, -0.6017],
         [-0.4946,  0.4135,  1.2490]],

        [[ 1.3951,  0.7168, -0.2013],
         [-0.3767,  1.9410, -0.8459],
         [ 0.5701, -0.0262,  1.0335]]])
pred_y1 = test_t_net(x1)
print(pred_y1)
assert torch.allclose(y1, pred_y1, rtol=1e-03, atol=1e-03),  "different y_pred and y"

tensor([[[ 0.8501,  0.1849, -0.2045],
         [-0.0936,  0.9165,  0.4215],
         [ 0.0244,  0.0145,  0.7206]],

        [[ 1.2012,  0.2271, -0.0122],
         [-0.1934,  0.2646,  0.3887],
         [ 0.1313, -0.1262,  0.4967]],

        [[ 0.5446,  0.3135, -0.0992],
         [-0.4373,  1.8131,  0.7287],
         [-0.0117,  0.1228,  0.6404]]], grad_fn=<AddBackward0>)


AssertionError: ignored

In [None]:
class Transform(nn.Module):
    def __init__(self, input_size=3, feature_size=64, sharedMLP1_layers=[64, 64], sharedMLP2_layers=[64, 128, 1024], batch_norm = True):
        """
        Transform class is all the pipeline to get a global feature
                 _____________________                                     _______________                 ___________________       _______________
                |                     |                                   |               |                |                 |     |               |                    
        x -->   |   input transform   | --> y (canonical point cloud) --> |  shared MLP   | --> feature -->|feature transform| --> |  shared MLP   | --> max pooling --> z
                |_____________________|                                   |_______________|                |_________________|     |  _____________|
        The transform class is a neural networknet architecture that go throught 2 pairs of spactial transform net and shared MLP.
        The STN is the T-Net that implement above, and the shared-MLP can be regarded as a one-dimensional convolutional layer.

        the input x as a point cloud data of (nx3) shape first compute the 3x3 transform matrix and multiplied with the transform matrix to get a (nx3) transformed point cloud.

        the last_activate bool is True when you want to add the last layer with activation function.
        """
        super().__init__()
        self.batch_norm = True
        
        self.input_transform = Tnet(k=3)
        self.feature_transform = Tnet(k=64)

        self.sharedMLP1 = self._build_sharedMLP(input_size, sharedMLP1_layers, last_activate=True)
        self.sharedMLP2 = self._build_sharedMLP(feature_size, sharedMLP2_layers, last_activate=False)

    def _build_sharedMLP(self, input_dim, sharedMLP_layers, last_activate = True):
        ########################################################################
        # TODO: Build the shared MLP layers 
        # Hint: 
        #   The structure is [Conv1d]->[Batch Norm]->[ReLU]
        ########################################################################
        layers = []
        prev_size = input_dim
        for layer_id, size in enumerate(sharedMLP_layers):
            layers.append(nn.Conv1d(prev_size, size, 1))

            if self.batch_norm:
                layers.append(nn.BatchNorm1d(size))

            if (layer_id < len(sharedMLP_layers)-1) or last_activate:
                layers.append(nn.ReLU())

            prev_size = size
        return nn.Sequential(*layers)
       
    def forward(self, input):     #input:[batch_size, 3, 1024] output:[batch_size, 1024]
    
        ########################################################################
        # TODO: Implement the code to multiply the transform matrix and the point
        # cloud. The transformed x should be the same shape of x 
        # 
        # Hint: 
        # 1. Get the transform matrix by the T-Net
        # 2. Batch matrix multiply the input x and transform matrix
        # 3. Input the data into the Shared MLP
        # 4. Batch matrix multiply the feature and the feature_transform matrix
        # 5. Input the output into the Shared MLP with feature dimension
        # 6. Maxpooling along the feature dimension
        # 7. output the output data, points transform matrix, and the feature
        #       transform matrix
        ########################################################################
        matrix3x3 = self.input_transform(input)     #[batch_size, 3, 3]
        # batch matrix multiplication
        xb = torch.bmm(torch.transpose(input,1,2), matrix3x3).transpose(1,2)     #[batch_size, 3, 1024]
        xb = self.sharedMLP1(xb)

        matrix64x64 = self.feature_transform(xb)     #[batch_size, 64, 64]
        xb = torch.bmm(torch.transpose(xb,1,2), matrix64x64).transpose(1,2)     #[batch_size, 64, 1024]
        xb = self.sharedMLP2(xb)

        xb = nn.MaxPool1d(xb.size(-1))(xb)     #[batch_size, 1024, 1]
        output = nn.Flatten(1)(xb)     #[batch_size, 1024]
        ########################################################################
        return output, matrix3x3, matrix64x64

In [None]:
test_transfrom_net = Transform()

test_tranform_net_param_num = count_parameters(test_transfrom_net)
if test_tranform_net_param_num!=2812105:
    print("Error")
    print("test_transfrom_net parameters number = ", count_parameters(test_transfrom_net))
    print("Difference = ", torch.absolute(test_tranform_net_param_num!=2812105))

assert count_parameters(test_transfrom_net)==2812105


torch.manual_seed(seed)
x2 = torch.randn(2, 3, 5)
pred_y2, pred_mat1, pred_mat2 = test_transfrom_net(x2)

mat1 = torch.tensor([[[ 1.4163,  0.5276,  0.1501],
         [ 0.5164,  1.1149, -0.3152],
         [-0.0963,  0.0254,  0.8117]],

        [[ 1.9937, -0.0146, -0.6189],
         [ 0.5562,  1.2374, -0.4386],
         [ 0.0655,  0.2559,  1.5200]]])

if not torch.allclose(mat1, pred_mat1, rtol=1e-03, atol=1e-03):
    print("Error")
    print("The answer mat1 is \n", mat1)
    print("The pred_mat1 is \n", pred_mat1)
    print("Difference = ", torch.norm(pred_mat1- mat1))
assert torch.allclose(mat1, pred_mat1, rtol=1e-03, atol=1e-03),  "different pred_mat1 and mat1"