<a href="https://colab.research.google.com/github/g4aidl-upc-winter-2020/3D-Shape-classification/blob/main/PointNet_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install all needed packages from PyG:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric

In [2]:
import os
import sys

import torch
from torch_geometric.datasets import ModelNet
from torch_geometric.data import DataLoader
from torch_geometric.utils import to_dense_batch
import torch_geometric.transforms as T
from torch_geometric.transforms import SamplePoints, NormalizeScale
import torch.nn as nn
import torch.nn.functional as F
from torch import autograd

import datetime
from time import time

from torch.utils.tensorboard import SummaryWriter
from tensorboard import notebook
%load_ext tensorboard

import numpy as np


##Import drive folder

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Set a fixed seed

In [4]:
seed = 42

#Controlling sources of randomness
torch.manual_seed(seed)  #Sets the seed for generating random numbers for all devices (both CPU and CUDA)

#Random number generators in other libraries
np.random.seed(seed)

#CUDA convolution benchmarking
torch.backends.cudnn.benchmark = False #ensures that CUDA selects the same algorithm each time an application is run

#Avoiding nondeterministic algorithms
torch.use_deterministic_algorithms(True) #use “deterministic” algorithms (given the same input always produce the same output)

## Hyper-parameters

In [5]:
learning_rate = 0.001
train_batch_size = 32
val_batch_size = 32
num_epochs = 20
weight_decay = 1e-3 

## Instantiate Tensorboard Writer

### Create log folders

In [6]:
root='/content/drive/MyDrive/Proyecto/Colabs/experiments/logs/PointNet'
train_logdir = os.path.join(root, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 'train')
val_logdir = os.path.join(root, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), 'validation')

### Create summary writer

In [7]:
train_writer = SummaryWriter(log_dir=train_logdir)
val_writer = SummaryWriter(log_dir=val_logdir)

# PointNet

This is an implementation of [PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation](https://arxiv.org/abs/1612.00593) using PyTorch *Geometric*.

## Dataset

In [8]:
# Import ModelNet10 dataset from PyG
dataset = ModelNet(root='/content/drive/MyDrive/Proyecto/Colabs/ModelNet', name= "10", train=True, pre_transform=T.SamplePoints(num=1024)) #train dataset

In [9]:
#We load from two text files the indices of train and validation data 
train = open("/content/drive/MyDrive/Proyecto/Colabs/train_split.txt", 'r')
train_idx = []
for idx in train:
  train_idx.append(int(idx))

val = open("/content/drive/MyDrive/Proyecto/Colabs/val_split.txt", 'r')
val_idx = []
for idx in val:
  val_idx.append(int(idx))

In [10]:
train_dataset = dataset[train_idx]
val_dataset = dataset[val_idx]

print('Datasets info:')
print('--------------')
print('Train dataset size: ', len(train_dataset))
print('Validation dataset size: ', len(val_dataset))
print('Number of classes: ', dataset.num_classes) 

Datasets info:
--------------
Train dataset size:  3193
Validation dataset size:  798
Number of classes:  10


### Normalize Input points

In [11]:
#Centers and normalizes node positions to the interval (-1,1) 
train_dataset.transform = NormalizeScale()
val_dataset.transform = NormalizeScale()

## Training

### Make sure your runtime has a GPU

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
assert not device.type == 'cpu', "Change Runtime Type -> GPU"

### Loading the model architecture

In [13]:
#We include a new file path that will point to modules that we want to import
sys.path.append('/content/drive/MyDrive/Proyecto/Colabs/architectures/PointNet')  

In [14]:
#Import the model from a python script
from PointNet_Architecture import ClassificationPointNet
model = ClassificationPointNet()  #Instantiate the model
model.to(device)    # Pass the model to GPU(device)

ClassificationPointNet(
  (transform): Transform(
    (input_transform): Tnet(
      (conv1): Conv1d(3, 64, kernel_size=(1,), stride=(1,))
      (conv2): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
      (conv3): Conv1d(128, 1024, kernel_size=(1,), stride=(1,))
      (fc1): Linear(in_features=1024, out_features=512, bias=True)
      (fc2): Linear(in_features=512, out_features=256, bias=True)
      (fc3): Linear(in_features=256, out_features=9, bias=True)
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (feature_transform): Tnet(
      (conv1): Conv1d(64, 64, k

#### Pararameters of the model

In [15]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Number of parameters: %d' % num_params)

Number of parameters: 3463763


### Accuracy function

In [16]:
def accuracy(output, target):
  pred = output.argmax(dim=1)  # get the index of the max log-probability
  return (pred == target).sum().item() / target.numel()  #return the mean accuracy in the batch

### Train function

In [17]:
def train_epoch(model, train_loader, optimizer, criterion, epoch, scheduler):   #Training function for one epoch
  
  model.train() #Activate the train=True flag inside the model
  losses = []
  accs = []

  lr = scheduler.get_last_lr()    

  for batch_idx, data in enumerate(train_loader, 1):

      inputs = to_dense_batch(data.pos, batch=data.batch)  #return a tuple where in the first position there are the points of every pointcloud in the batch
      
      optimizer.zero_grad()  #setting all the gradient to zero

      output = model(inputs[0].to(device).float().transpose(1,2))  #Pass the inputs through the network and compute the outputs
      
      loss = criterion(output.to(device), data.y.to(device))  #Calculate the loss in the batch
      
      loss.backward() #Backprop over the loss function through the network
      
      acc = 100 * accuracy(output.to(device), data.y.to(device))  #Calculate the mean accuracy in the batch

      losses.append(loss.item()) #save the loss value in a list of losses
      
      accs.append(acc) #save the accuracy value in a list of accuracies

      optimizer.step()

      if batch_idx >= len(train_loader):
          print('Train Epoch: {} \tLR: {} \tAverage Loss: {:.4f}\tAverage Acc: {:.2f} %'.format(
              epoch, lr , np.mean(losses), np.mean(accs)))
          
          train_writer.add_scalar('Loss', np.mean(losses), epoch) #log training loss for one epoch to Tensorboard
          train_writer.add_scalar('Acc', np.mean(accs), epoch)    #log training accuracy for one epoch to Tensorboard

  return np.mean(losses), np.mean(accs)


### Validation function

In [18]:
def eval_epoch(model, val_loader, criterion, epoch):  #evaluation function after one epoch of training
  
  model.eval() #Activate the train=False flag inside the model
  eval_losses = []
  eval_accs = []
  with torch.no_grad():
    for data in val_loader:
      inputs = to_dense_batch(data.pos, batch=data.batch)  #return a tuple where in the first position there are the points of every pointcloud in the batch
      
      output = model(inputs[0].to(device).float().transpose(1,2))  #Pass the inputs through the network 
      
      eval_loss = criterion(output.to(device), data.y.to(device))
      eval_losses.append(eval_loss.item()) #save the loss value in a list of losses
      
      eval_acc = 100 * accuracy(output.to(device), data.y.to(device)) #Calculate the accuracy in the batch
      eval_accs.append(eval_acc) #save the accuracy value in a list of accuracies
    
    print('Val Epoch: {} \tAverage loss: {:.4f}\tAverage Acc: {:.2f} %'.format(
        epoch, np.mean(eval_losses), np.mean(eval_accs)))
    
    val_writer.add_scalar('Loss', np.mean(eval_losses), epoch)  #log validation loss for one epoch to Tensorboard
    val_writer.add_scalar('Acc',  np.mean(eval_accs), epoch)    #log validation accuracy for one epoch to Tensorboard

  return np.mean(eval_losses), np.mean(eval_accs)

## Dataloader

Let's pass the datasets through the DataLoader in order to obtain batches of samples:

In [19]:
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)  #train set
val_loader = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=False)       #val set

## Optimizer

In [20]:
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate, weight_decay = weight_decay)

## Scheduler

In [21]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) #Learning rate is divided by 2 every 20 epochs

## Loss

In [22]:
criterion = nn.CrossEntropyLoss()  #useful to train a classification problem with N classes (logsoftmax layer + NLLLoss)

## Train

In [23]:
def train_net(model, train_loader, val_loader, optimizer, criterion, num_epochs, scheduler): 
  """ Function that trains and evals a network for n epochs.
  """
  #with autograd.detect_anomaly():
  best_accuracy = 0.0
  for epoch in range(1, num_epochs + 1):
    tr_loss, tr_acc = train_epoch(model, train_loader, optimizer, criterion, epoch, scheduler)  #train the model
    val_loss, val_acc = eval_epoch(model, val_loader, criterion, epoch)              #eval the model 
    
    scheduler.step() #Step for LR decay
    
    if best_accuracy < val_acc:
      best_accuracy = val_acc
      torch.save(model.state_dict(), train_logdir + '/best_params.pt')  #save the model state for best val accuracy
  
  return best_accuracy  

In [24]:
best_accuracy = train_net(model, train_loader, val_loader, optimizer, criterion, num_epochs, scheduler)
print('Best validation accuracy = ', best_accuracy)

Train Epoch: 1 	LR: [0.001] 	Average Loss: 0.9617	Average Acc: 69.99 %
Val Epoch: 1 	Average loss: 0.9506	Average Acc: 71.04 %
Train Epoch: 2 	LR: [0.001] 	Average Loss: 0.5532	Average Acc: 81.45 %
Val Epoch: 2 	Average loss: 0.4611	Average Acc: 85.72 %
Train Epoch: 3 	LR: [0.001] 	Average Loss: 0.4187	Average Acc: 85.89 %
Val Epoch: 3 	Average loss: 0.5195	Average Acc: 83.48 %
Train Epoch: 4 	LR: [0.001] 	Average Loss: 0.3514	Average Acc: 87.71 %
Val Epoch: 4 	Average loss: 1.2383	Average Acc: 61.89 %
Train Epoch: 5 	LR: [0.001] 	Average Loss: 0.3533	Average Acc: 87.83 %
Val Epoch: 5 	Average loss: 0.9157	Average Acc: 72.03 %
Train Epoch: 6 	LR: [0.001] 	Average Loss: 0.3018	Average Acc: 89.59 %
Val Epoch: 6 	Average loss: 0.5249	Average Acc: 79.82 %
Train Epoch: 7 	LR: [0.001] 	Average Loss: 0.2842	Average Acc: 90.13 %
Val Epoch: 7 	Average loss: 0.3470	Average Acc: 88.50 %
Train Epoch: 8 	LR: [0.001] 	Average Loss: 0.2737	Average Acc: 91.05 %
Val Epoch: 8 	Average loss: 0.4214	Avera