# dependencies

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
import yaml
import imp
import math
import time
import json

import numpy as np

import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import Dataset

import torch
import torch.nn.functional as F

# import torch.nn as nn
from torch import nn

from pprint import pprint

In [3]:
print(torch.cuda.is_available())
print(torch.cuda.device_count())

True
1


# laserscan.py

In [4]:
class LaserScan:
    """Class that contains LaserScan with x,y,z,r"""
    EXTENSIONS_SCAN = ['.bin']

    def __init__(self,
                 project=False,
                 H=64,
                 W=1024,
                 fov_up=3.0,
                 fov_down=-25.0):
        self.project = project
        self.proj_H = H
        self.proj_W = W
        self.proj_fov_up = fov_up
        self.proj_fov_down = fov_down
        self.reset()

    def reset(self):
        """ Reset scan members. """
        self.points = np.zeros((0, 3), dtype=np.float32)  # [m, 3]: x, y, z
        self.remissions = np.zeros((0, 1), dtype=np.float32)  # [m ,1]: remission

        # projected range image - [H,W] range (-1 is no data)
        self.proj_range = np.full((self.proj_H, self.proj_W), -1,
                                  dtype=np.float32)

        # unprojected range (list of depths for each point)
        self.unproj_range = np.zeros((0, 1), dtype=np.float32)

        # projected point cloud xyz - [H,W,3] xyz coord (-1 is no data)
        self.proj_xyz = np.full((self.proj_H, self.proj_W, 3), -1,
                                dtype=np.float32)

        # projected remission - [H,W] intensity (-1 is no data)
        self.proj_remission = np.full((self.proj_H, self.proj_W), -1,
                                      dtype=np.float32)

        # projected index (for each pixel, what I am in the pointcloud)
        # [H,W] index (-1 is no data)
        self.proj_idx = np.full((self.proj_H, self.proj_W), -1,
                                dtype=np.int32)

        # for each point, where it is in the range image
        self.proj_x = np.zeros((0, 1), dtype=np.int32)  # [m, 1]: x
        self.proj_y = np.zeros((0, 1), dtype=np.int32)  # [m, 1]: y

        # mask containing for each pixel, if it contains a point or not
        self.proj_mask = np.zeros((self.proj_H, self.proj_W),
                                  dtype=np.int32)  # [H,W] mask

    def size(self):
        """ Return the size of the point cloud. """
        return self.points.shape[0]

    def __len__(self):
        return self.size()

    def open_scan(self, filename):
        """ Open raw scan and fill in attributes
        """
        # reset just in case there was an open structure
        self.reset()

        # check filename is string
        if not isinstance(filename, str):
            raise TypeError("Filename should be string type, "
                            "but was {type}".format(type=str(type(filename))))

        # check extension is a laserscan
        if not any(filename.endswith(ext) for ext in self.EXTENSIONS_SCAN):
            raise RuntimeError("Filename extension is not valid scan file.")

        # if all goes well, open pointcloud
        scan = np.fromfile(filename, dtype=np.float32)
        scan = scan.reshape((-1, 4))

        # put in attribute
        points = scan[:, 0:3]  # get xyz
        remissions = scan[:, 3]  # get remission
        
        self.set_points(points, remissions)

    def set_points(self, points, remissions=None):
        """ Set scan attributes (instead of opening from file)
        """
        # reset just in case there was an open structure
        self.reset()

        # check scan makes sense
        if not isinstance(points, np.ndarray):
            raise TypeError("Scan should be numpy array")

        # check remission makes sense
        if remissions is not None and not isinstance(remissions, np.ndarray):
            raise TypeError("Remissions should be numpy array")

        # put in attribute
        self.points = points  # get xyz
        if remissions is not None:
            self.remissions = remissions  # get remission
        else:
            self.remissions = np.zeros((points.shape[0]), dtype=np.float32)

        # if projection is wanted, then do it and fill in the structure
        if self.project:
            self.do_range_projection()

    def do_range_projection(self):
        """ Project a pointcloud into a spherical projection image.projection.
            Function takes no arguments because it can be also called externally
            if the value of the constructor was not set (in case you change your
            mind about wanting the projection)
        """
        # laser parameters
        fov_up = self.proj_fov_up / 180.0 * np.pi  # field of view up in rad
        fov_down = self.proj_fov_down / 180.0 * np.pi  # field of view down in rad
        fov = abs(fov_down) + abs(fov_up)  # get field of view total in rad

        # get depth of all points
        depth = np.linalg.norm(self.points, 2, axis=1)

        # get scan components
        scan_x = self.points[:, 0]
        scan_y = self.points[:, 1]
        scan_z = self.points[:, 2]

        # get angles of all points
        yaw = -np.arctan2(scan_y, scan_x)
        pitch = np.arcsin(scan_z / depth)

        # get projections in image coords
        proj_x = 0.5 * (yaw / np.pi + 1.0)  # in [0.0, 1.0]
        proj_y = 1.0 - (pitch + abs(fov_down)) / fov  # in [0.0, 1.0]

        # scale to image size using angular resolution
        proj_x *= self.proj_W  # in [0.0, W]
        proj_y *= self.proj_H  # in [0.0, H]

        # round and clamp for use as index
        proj_x = np.floor(proj_x)
        proj_x = np.minimum(self.proj_W - 1, proj_x)
        proj_x = np.maximum(0, proj_x).astype(np.int32)  # in [0,W-1]
        self.proj_x = np.copy(proj_x)  # store a copy in orig order

        proj_y = np.floor(proj_y)
        proj_y = np.minimum(self.proj_H - 1, proj_y)
        proj_y = np.maximum(0, proj_y).astype(np.int32)  # in [0,H-1]
        self.proj_y = np.copy(proj_y)  # stope a copy in original order

        # copy of depth in original order
        self.unproj_range = np.copy(depth)

        # order in decreasing depth
        indices = np.arange(depth.shape[0])
        order = np.argsort(depth)[::-1]
        depth = depth[order]
        indices = indices[order]
        points = self.points[order]
        remission = self.remissions[order]
        proj_y = proj_y[order]
        proj_x = proj_x[order]

        # assing to images
        self.proj_range[proj_y, proj_x] = depth
        self.proj_xyz[proj_y, proj_x] = points
        self.proj_remission[proj_y, proj_x] = remission
        self.proj_idx[proj_y, proj_x] = indices
        self.proj_mask = (self.proj_idx > 0).astype(np.int32)

# parser.py

In [5]:
EXTENSIONS_SCAN = ['.bin']
EXTENSIONS_LABEL = ['.label']


def is_scan(filename):
    return any(filename.endswith(ext) for ext in EXTENSIONS_SCAN)


def is_label(filename):
    return any(filename.endswith(ext) for ext in EXTENSIONS_LABEL)


class SemanticKitti(Dataset):
    def __init__(self,
                 # directory where data is
                 dataset_dir_path,
                 # label dict: (e.g 10: "car")
                 labels,
                 # colors dict bgr (e.g 10: [255, 0, 0])
                 color_map,
                 # classes to learn (0 to N-1 for xentropy)
                 learning_map,
                 # inverse of previous (recover labels)
                 learning_map_inv,
                 # sensor to parse scans from
                 sensor,
                 # max number of points present in dataset
                 max_points=150000,
                ):
        # save deats
        self.dataset_dir_path = dataset_dir_path
        
        self.labels = labels
        self.color_map = color_map
        self.learning_map = learning_map
        self.learning_map_inv = learning_map_inv
        
        self.sensor = sensor
        self.sensor_img_H = sensor["img_prop"]["height"]
        self.sensor_img_W = sensor["img_prop"]["width"]
        self.sensor_img_means = torch.tensor(sensor["img_means"], dtype=torch.float)
        self.sensor_img_stds = torch.tensor(sensor["img_stds"], dtype=torch.float)
        self.sensor_fov_up = sensor["fov_up"]
        self.sensor_fov_down = sensor["fov_down"]
        print('SENSOR:')
        pprint(self.sensor)
        
        self.max_points = max_points
    
        # get number of classes (can't be len(self.learning_map) because there
        # are multiple repeated entries, so the number that matters is how many
        # there are for the xentropy)
        self.nclasses = len(self.learning_map_inv)
    
        # sanity checks
    
        # make sure directory exists
        if os.path.isdir(self.dataset_dir_path):
            print("Dataset folder exists! Using clouds from %s" % self.dataset_dir_path)
        else:
            raise ValueError("Dataset folder doesn't exist! Exiting...")
    
        # make sure labels is a dict
        assert(isinstance(self.labels, dict))
    
        # make sure color_map is a dict
        assert(isinstance(self.color_map, dict))
    
        # make sure learning_map is a dict
        assert(isinstance(self.learning_map, dict))
    
        # placeholder for filenames
        self.scan_files = []
        self.label_files = []
    
        # parsing dataset
        print("Parsing dataset... ")

        # get paths for each
        scan_path = self.dataset_dir_path

        # get files
        scan_files = [ os.path.join(dp, f)
                      for dp, dn, fn in os.walk(os.path.expanduser(scan_path))
                      for f in fn if is_scan(f) ]

        # extend list
        self.scan_files.extend(scan_files)
    
        # sort for correspondance
        self.scan_files.sort()
    
        print("Using {} scans".format(len(self.scan_files)))
  

    def __getitem__(self, index):
        # get item in tensor shape
        scan_file = self.scan_files[index]
    
        # open a semantic laserscan
        scan = LaserScan(project=True,
                         H=self.sensor_img_H,
                         W=self.sensor_img_W,
                         fov_up=self.sensor_fov_up,
                         fov_down=self.sensor_fov_down)
    
        # open and obtain scan
        scan.open_scan(scan_file)
        
        # make a tensor of the uncompressed data (with the max num points)
        unproj_n_points = scan.points.shape[0]
        unproj_xyz = torch.full((self.max_points, 3), -1.0, dtype=torch.float)
        unproj_xyz[:unproj_n_points] = torch.from_numpy(scan.points)
        unproj_range = torch.full([self.max_points], -1.0, dtype=torch.float)
        unproj_range[:unproj_n_points] = torch.from_numpy(scan.unproj_range)
        unproj_remissions = torch.full([self.max_points], -1.0, dtype=torch.float)
        unproj_remissions[:unproj_n_points] = torch.from_numpy(scan.remissions)
        unproj_labels = []
    
        # get points and labels
        proj_range = torch.from_numpy(scan.proj_range).clone()
        proj_xyz = torch.from_numpy(scan.proj_xyz).clone()
        proj_remission = torch.from_numpy(scan.proj_remission).clone()
        proj_mask = torch.from_numpy(scan.proj_mask)
        proj_labels = []
        proj_x = torch.full([self.max_points], -1, dtype=torch.long)
        proj_x[:unproj_n_points] = torch.from_numpy(scan.proj_x)
        proj_y = torch.full([self.max_points], -1, dtype=torch.long)
        proj_y[:unproj_n_points] = torch.from_numpy(scan.proj_y)
        
        # WITH remission
        proj = torch.cat([proj_range.unsqueeze(0).clone(),
                          proj_xyz.clone().permute(2, 0, 1),
                          proj_remission.unsqueeze(0).clone()])
        
#         print('MEANS: %s' % self.sensor_img_means[:, None, None])
#         print('STDS: %s' % self.sensor_img_stds[:, None, None])
        proj = (proj - self.sensor_img_means[:, None, None]) / self.sensor_img_stds[:, None, None]
        proj = proj * proj_mask.float()
    
        # WITHOUT remission-------------------------------------
#         proj = torch.cat([
#             proj_range.unsqueeze(0).clone(),
#             proj_xyz.clone().permute(2, 0, 1),
#         ])
#         proj = (proj - self.sensor_img_means[:4, None, None]) / self.sensor_img_stds[:4, None, None]
#         proj = proj * proj_mask.float()
        # ------------------------------------------------------
    
        # get name and sequence
        path_norm = os.path.normpath(scan_file)
        path_split = path_norm.split(os.sep)
        path_name = path_split[-1].replace(".bin", ".label")
        # print("path_norm: ", path_norm)
        # print("path_seq", path_seq)
        # print("path_name", path_name)
    
        # return
        return ( proj,
                 proj_mask,
                 proj_labels,
                 unproj_labels,
                 path_name,
                 proj_x,
                 proj_y,
                 proj_range,
                 unproj_range,
                 proj_xyz,
                 unproj_xyz,
                 proj_remission,
                 unproj_remissions,
                 unproj_n_points )
  
    def __len__(self):
        return len(self.scan_files)
  
    @staticmethod
    def map(label, mapdict):
        # put label from original values to xentropy
        # or vice-versa, depending on dictionary values
        # make learning map a lookup table
        maxkey = 0
        for key, data in mapdict.items():
            if isinstance(data, list):
                nel = len(data)
            else:
                nel = 1
            if key > maxkey:
                maxkey = key
        # +100 hack making lut bigger just in case there are unknown labels
        if nel > 1:
            lut = np.zeros((maxkey + 100, nel), dtype=np.int32)
        else:
            lut = np.zeros((maxkey + 100), dtype=np.int32)
        for key, data in mapdict.items():
            try:
                lut[key] = data
            except IndexError:
                print("Wrong key ", key)
        # do the mapping
        return lut[label]

# parser.py

In [6]:
class Parser():
    # standard conv, BN, relu
    def __init__(self,
                 dataset_dir_path,
                 
                 # labels in data
                 labels,
                 # color for each label
                 color_map,
                 # mapping for training labels
                 learning_map,
                 # recover labels from xentropy
                 learning_map_inv,
                 
                 # sensor to use
                 sensor,
                 # max points in each scan in entire dataset
                 max_points,
                 
                 # batch size for train and val
                 batch_size,
                 # threads to load data
                 workers,
                 # shuffle training set?
                 shuffle_train=True
                ):
        super(Parser, self).__init__()
  
        # parameters
        self.dataset_dir_path = dataset_dir_path
        
        self.labels = labels
        self.color_map = color_map
        self.learning_map = learning_map
        self.learning_map_inv = learning_map_inv
        
        self.sensor = sensor
        self.max_points = max_points
        
        self.batch_size = batch_size
        self.workers = workers
        self.shuffle_train = shuffle_train
    
        # number of classes that matters is the one for xentropy
        self.nclasses = len(self.learning_map_inv)
    
        # Load test dataset
        self.test_dataset = SemanticKitti(self.dataset_dir_path,
                                          self.labels,
                                          self.color_map,
                                          self.learning_map,
                                          self.learning_map_inv,
                                          self.sensor,
                                          self.max_points)
    
        self.testloader = torch.utils.data.DataLoader(self.test_dataset,
                                                       batch_size=self.batch_size,
                                                       shuffle=False,
                                                       num_workers=self.workers,
                                                       drop_last=True)
        
        assert len(self.testloader) > 0
        self.testiter = iter(self.testloader)
    
  
    def get_test_batch(self):
        scans = self.testiter.next()
        return scans
  
    def get_test_set(self):
        return self.testloader
  
    def get_test_size(self):
        return len(self.testloader)
  
    def get_n_classes(self):
        return self.nclasses
  
    def get_original_class_string(self, idx):
        return self.labels[idx]
  
    def get_xentropy_class_string(self, idx):
        return self.labels[self.learning_map_inv[idx]]
  
    def to_original(self, label):
        # put label in original values
        return SemanticKitti.map(label, self.learning_map_inv)
  
    def to_xentropy(self, label):
        # put label in xentropy values
        return SemanticKitti.map(label, self.learning_map)
  
    def to_color(self, label):
        # put label in original values
        label = SemanticKitti.map(label, self.learning_map_inv)
        # put label in color
        return SemanticKitti.map(label, self.color_map)


# segmentator.py

In [7]:
class Add(nn.Module):
    def __init__(self):
        super(Add, self).__init__()

    def forward(self, x, y):
        return x + y


class resBlock_with_add(nn.Module):
    def __init__(self, conv, act, bn):
        super(resBlock_with_add, self).__init__()

        self.conv = conv
        self.act = act
        self.bn = bn

    def forward(self, x, y):
        res = self.conv(x)
        res = self.act(res)
        res = self.bn(res)
        return res + y


class Trans(nn.Module):
    def __init__(self, trans, trans_act, trans_bn):
        super(Trans, self).__init__()
        self.trans = trans
        self.trans_act = trans_act
        self.trans_bn = trans_bn

    def forward(self, x):
        upA = self.trans(x)
        upA = self.trans_act(upA)
        upA = self.trans_bn(upA)
        return upA


class AttentionBlock(nn.Module):
    def __init__(self, f_g, f_l, f_int):
        super(AttentionBlock, self).__init__()
        self.Wg = nn.Sequential(nn.Conv2d(f_g, f_int, kernel_size=1, padding=0, stride=1),
                                nn.BatchNorm2d(f_int))

        self.Wx = nn.Sequential(nn.Conv2d(f_l, f_int, kernel_size=1, padding=0, stride=1),
                                nn.BatchNorm2d(f_int))

        self.psi = nn.Sequential(nn.Conv2d(f_int, 1, kernel_size=1, padding=0, stride=1),
                                 nn.BatchNorm2d(1),
                                 nn.Sigmoid())
        self.relu = nn.ReLU()

    def forward(self, g, x):
        g1 = self.Wg(g)
        x1 = self.Wx(x)
        psi = self.relu(g1 + x1)
        psi = self.psi(psi)
        return x * psi


class ResContextBlock(nn.Module):
    def __init__(self, in_filters, out_filters, kernel_size=(3, 3), stride=1):
        super(ResContextBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_filters, out_filters, kernel_size=(1, 1), stride=stride)
        self.act1 = nn.LeakyReLU()

        self.conv2 = nn.Conv2d(in_filters, out_filters, kernel_size, padding=1)
        self.act2 = nn.LeakyReLU()
        self.bn1 = nn.BatchNorm2d(out_filters)

        self.conv3 = nn.Conv2d(out_filters, out_filters, kernel_size, padding=1)
        self.act3 = nn.LeakyReLU()
        self.bn2 = nn.BatchNorm2d(out_filters)

    def forward(self, x):
        shortcut = self.conv1(x)
        shortcut = self.act1(shortcut)

        resA = self.conv2(x)
        resA = self.act2(resA)
        resA = self.bn1(resA)

        resA = self.conv3(resA)
        resA = self.act3(resA)
        resA = self.bn2(resA)
        return resA + shortcut


class ResBlock(nn.Module):
    def __init__(self, in_filters, out_filters, dropout_rate, kernel_size=(3, 3), stride=1,
                 pooling=True, drop_out=True):
        super(ResBlock, self).__init__()
        self.pooling = pooling
        self.drop_out = drop_out
        self.conv1 = nn.Conv2d(in_filters, out_filters, kernel_size=(1, 1), stride=stride)
        self.act1 = nn.LeakyReLU()

        self.conv2 = nn.Conv2d(in_filters, out_filters, kernel_size=kernel_size, padding=1)
        self.act2 = nn.LeakyReLU()
        self.bn1 = nn.BatchNorm2d(out_filters)

        self.conv3 = nn.Conv2d(out_filters, out_filters, kernel_size=kernel_size, padding=1)
        self.act3 = nn.LeakyReLU()
        self.bn2 = nn.BatchNorm2d(out_filters)

        if pooling:
            self.dropout = nn.Dropout2d(p=dropout_rate)
            self.pool = nn.AvgPool2d(kernel_size=kernel_size, stride=2, padding=1)
        else:
            self.dropout = nn.Dropout2d(p=dropout_rate)

    def forward(self, x):
        shortcut = self.conv1(x)
        shortcut = self.act1(shortcut)

        resA = self.conv2(x)
        resA = self.act2(resA)
        resA = self.bn1(resA)

        resA = self.conv3(resA)
        resA = self.act3(resA)
        resA = self.bn2(resA)
        resA = shortcut + resA

        if self.pooling:
            if self.drop_out:
                resB = self.dropout(resA)
            else:
                resB = resA
            resB = self.pool(resB)

            return resB, resA
        else:
            if self.drop_out:
                resB = self.dropout(resA)
            else:
                resB = resA
            return resB


class UpBlock(nn.Module):
    def __init__(self, in_filters, out_filters, dropout_rate, kernel_size=(3, 3),drop_out=True):
        super(UpBlock, self).__init__()
        self.drop_out = drop_out
        self.trans = nn.ConvTranspose2d(in_filters, out_filters, kernel_size, stride=(2, 2), padding=1)
        self.trans_act = nn.LeakyReLU()
        self.trans_bn = nn.BatchNorm2d(out_filters)

        self.dropout1 = nn.Dropout2d(p=dropout_rate)
        self.dropout2 = nn.Dropout2d(p=dropout_rate)

        self.conv1 = nn.Conv2d(out_filters, out_filters, kernel_size, padding=1)
        self.act1 = nn.LeakyReLU()
        self.bn1 = nn.BatchNorm2d(out_filters)

        self.conv2 = nn.Conv2d(out_filters, out_filters, kernel_size, padding=1)
        self.act2 = nn.LeakyReLU()
        self.bn2 = nn.BatchNorm2d(out_filters)

        self.conv3 = nn.Conv2d(out_filters, out_filters, kernel_size, padding=1)
        self.act3 = nn.LeakyReLU()
        self.bn3 = nn.BatchNorm2d(out_filters)
        self.dropout3 = nn.Dropout2d(p=dropout_rate)

    def forward(self, x, skip):
        upA = self.trans(x)

        if upA.shape != skip.shape:
            upA = F.pad(upA, (0, 1, 0, 1), mode='replicate')

        upA = self.trans_act(upA)
        upA = self.trans_bn(upA)
        if self.drop_out:
            upA = self.dropout1(upA)
        upB = upA + skip
        if self.drop_out:
            upB = self.dropout2(upB)

        upE = self.conv1(upB)
        upE = self.act1(upE)
        upE = self.bn1(upE)

        upE = self.conv2(upE)
        upE = self.act2(upE)
        upE = self.bn2(upE)

        upE = self.conv3(upE)
        upE = self.act3(upE)
        upE = self.bn3(upE)
        if self.drop_out:
            upE = self.dropout3(upE)

        return upE


class SalsaNet(nn.Module):
    def __init__(self, ARCH, nclasses, path=None, path_append="", strict=False):
        super(SalsaNet, self).__init__()
        self.ARCH = ARCH
        self.nclasses = nclasses
        self.path = path
        self.path_append = path_append
        self.strict = False

         # WITH remission
        self.downCntx = ResContextBlock(5, 32)
        
        # WITHOUT remission
#         self.downCntx = ResContextBlock(4, 32)

        self.resBlock1 = ResBlock(32, 32, 0.2, pooling=True, drop_out=False)
        self.resBlock2 = ResBlock(32, 2 * 32, 0.2, pooling=True)
        self.resBlock3 = ResBlock(2 * 32, 4 * 32, 0.2, pooling=True)
        self.resBlock4 = ResBlock(4 * 32, 8 * 32, 0.2, pooling=True)
        self.resBlock5 = ResBlock(8 * 32, 16 * 32, 0.2, pooling=True)
        self.resBlock6 = ResBlock(16 * 32, 16 * 32, 0.2, pooling=False)

        self.upBlock1 = UpBlock(16 * 32, 16 * 32, 0.2)
        self.upBlock2 = UpBlock(16 * 32, 8 * 32, 0.2)
        self.upBlock3 = UpBlock(8 * 32, 4 * 32, 0.2)
        self.upBlock4 = UpBlock(4 * 32, 2 * 32, 0.2)
        self.upBlock5 = UpBlock(2 * 32, 32, 0.2, drop_out=False)

        self.logits = nn.Conv2d(32, nclasses, kernel_size=(1, 1))

    def forward(self, x):
        downCntx = self.downCntx(x)
        down0c, down0b = self.resBlock1(downCntx)
        down1c, down1b = self.resBlock2(down0c)
        down2c, down2b = self.resBlock3(down1c)
        down3c, down3b = self.resBlock4(down2c)
        down4c, down4b = self.resBlock5(down3c)
        down5b = self.resBlock6(down4c)

        up4e = self.upBlock1(down5b, down4b)
        up3e = self.upBlock2(up4e, down3b)
        up2e = self.upBlock3(up3e, down2b)
        up1e = self.upBlock4(up2e, down1b)
        up0e = self.upBlock5(up1e, down0b)

        logits = self.logits(up0e)
        logits = F.softmax(logits, dim=1)
        return logits


# KNN.py

In [8]:
def get_gaussian_kernel(kernel_size=3, sigma=2, channels=1):
    # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
    x_coord = torch.arange(kernel_size)
    x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
    y_grid = x_grid.t()
    xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()

    mean = (kernel_size - 1) / 2.
    variance = sigma ** 2.

    # Calculate the 2-dimensional gaussian kernel which is
    # the product of two gaussian distributions for two different
    # variables (in this case called x and y)
    gaussian_kernel = (1. / (2. * math.pi * variance)) * \
                      torch.exp(-torch.sum((xy_grid - mean) ** 2., dim=-1) / (2 * variance))

    # Make sure sum of values in gaussian kernel equals 1.
    gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)

    # Reshape to 2d depthwise convolutional weight
    gaussian_kernel = gaussian_kernel.view(kernel_size, kernel_size)

    return gaussian_kernel


class KNN(nn.Module):
    def __init__(self, params, nclasses):
        super().__init__()
        print("*" * 80)
        print("Cleaning point-clouds with kNN post-processing")
        self.knn = params["knn"]
        self.search = params["search"]
        self.sigma = params["sigma"]
        self.cutoff = params["cutoff"]
        self.nclasses = nclasses
        print("kNN parameters:")
        print("knn:", self.knn)
        print("search:", self.search)
        print("sigma:", self.sigma)
        print("cutoff:", self.cutoff)
        print("nclasses:", self.nclasses)
        print("*" * 80)

    def forward(self, proj_range, unproj_range, proj_argmax, px, py):
        ''' Warning! Only works for un-batched pointclouds.
            If they come batched we need to iterate over the batch dimension or do
            something REALLY smart to handle unaligned number of points in memory
        '''
        # get device
        if proj_range.is_cuda:
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")

        # sizes of projection scan
        H, W = proj_range.shape

        # number of points
        P = unproj_range.shape

        # check if size of kernel is odd and complain
        if (self.search % 2 == 0):
            raise ValueError("Nearest neighbor kernel must be odd number")

        # calculate padding
        pad = int((self.search - 1) / 2)

        # unfold neighborhood to get nearest neighbors for each pixel (range image)
        proj_unfold_k_rang = F.unfold(proj_range[None, None, ...],
                                      kernel_size=(self.search, self.search),
                                      padding=(pad, pad))

        # index with px, py to get ALL the pcld points
        idx_list = py * W + px
        unproj_unfold_k_rang = proj_unfold_k_rang[:, :, idx_list]

        # WARNING, THIS IS A HACK
        # Make non valid (<0) range points extremely big so that there is no screwing
        # up the nn self.search
        unproj_unfold_k_rang[unproj_unfold_k_rang < 0] = float("inf")

        # now the matrix is unfolded TOTALLY, replace the middle points with the actual range points
        center = int(((self.search * self.search) - 1) / 2)
        unproj_unfold_k_rang[:, center, :] = unproj_range

        # now compare range
        k2_distances = torch.abs(unproj_unfold_k_rang - unproj_range)

        # make a kernel to weigh the ranges according to distance in (x,y)
        # I make this 1 - kernel because I want distances that are close in (x,y)
        # to matter more
        inv_gauss_k = (
                1 - get_gaussian_kernel(self.search, self.sigma, 1)).view(1, -1, 1)
        inv_gauss_k = inv_gauss_k.to(device).type(proj_range.type())

        # apply weighing
        k2_distances = k2_distances * inv_gauss_k

        # find nearest neighbors
        _, knn_idx = k2_distances.topk(
            self.knn, dim=1, largest=False, sorted=False)

        # do the same unfolding with the argmax
        proj_unfold_1_argmax = F.unfold(proj_argmax[None, None, ...].float(),
                                        kernel_size=(self.search, self.search),
                                        padding=(pad, pad)).long()
        unproj_unfold_1_argmax = proj_unfold_1_argmax[:, :, idx_list]

        # get the top k predictions from the knn at each pixel
        knn_argmax = torch.gather(
            input=unproj_unfold_1_argmax, dim=1, index=knn_idx)

        # fake an invalid argmax of classes + 1 for all cutoff items
        if self.cutoff > 0:
            knn_distances = torch.gather(input=k2_distances, dim=1, index=knn_idx)
            knn_invalid_idx = knn_distances > self.cutoff
            knn_argmax[knn_invalid_idx] = self.nclasses

        # now vote
        # argmax onehot has an extra class for objects after cutoff
        knn_argmax_onehot = torch.zeros(
            (1, self.nclasses + 1, P[0]), device=device).type(proj_range.type())
        ones = torch.ones_like(knn_argmax).type(proj_range.type())
        knn_argmax_onehot = knn_argmax_onehot.scatter_add_(1, knn_argmax, ones)

        # now vote (as a sum over the onehot shit)  (don't let it choose unlabeled OR invalid)
        knn_argmax_out = knn_argmax_onehot[:, 1:-1].argmax(dim=1) + 1

        # reshape again
        knn_argmax_out = knn_argmax_out.view(P)

        return knn_argmax_out


# user.py

In [9]:
def get_sync_time():
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    return time.perf_counter()

class User():
    def __init__(self,
                 arch_cfg,
                 data_cfg,
                 dataset_dir_path,
                 log_dir_path,
                 model_dir_path,
                 model_name
                ):
        
        # parameters
        self.arch_cfg = arch_cfg
        self.data_cfg = data_cfg
        self.dataset_dir_path = dataset_dir_path
        self.log_dir_path = log_dir_path
        self.model_dir_path = model_dir_path
        self.model_name = model_name

        # get the data        
        self.parser = Parser(
            self.dataset_dir_path,
            
            # ???
            self.data_cfg['labels'],
            # ???
            self.data_cfg['color_map'],
            # ???
            self.data_cfg['learning_map'],
            # ???
            self.data_cfg['learning_map_inv'],
            
            self.arch_cfg['dataset']['sensor'],
            self.arch_cfg['dataset']['max_points'],
            
            batch_size=1,
            workers=0,
            shuffle_train=False
        )

        # concatenate the encoder and the head
        if self.model_name in ('salsanet', 'salsanext'):
            with torch.no_grad():
                print('modeldir: %s' % self.model_dir_path)
                model_path = os.path.join(self.model_dir_path, 'SalsaNet')
                print('model_path: %s' % model_path)

                self.model = SalsaNet(self.arch_cfg,
                                      self.parser.get_n_classes(),
                                      model_path)
                
                # DONT WORK WITH TRACING
                # SO WILL WORK ONLY WITH SINGLE GPU?
#                 self.model = nn.DataParallel(self.model)
                
                torch.nn.Module.dump_patches = True

                w_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
#                 print(w_dict['state_dict'].keys())

#                 self.model.module.load_state_dict(w_dict['state_dict'], strict=True)
                self.model.load_state_dict(w_dict['state_dict'], strict=True)
        else:
            print('ERROR MODEL NAME!')

        # use knn post processing?
        self.post = None
        if self.arch_cfg['post']['KNN']['use']:
            self.post = KNN(self.arch_cfg['post']['KNN']['params'], self.parser.get_n_classes())

        # GPU?
        self.gpu = False
        self.model_single = self.model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print('Infering in device: ', self.device)
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            cudnn.benchmark = True
            cudnn.fastest = True
            self.gpu = True
            self.model.cuda()
#         self.model.to(self.device)


    def infer(self):
        self.infer_subset(loader=self.parser.get_test_set(),
                          to_orig_fn=self.parser.to_original)

        print('Finished Infering')
        

    def infer_subset(self, loader, to_orig_fn):        
        # switch to evaluate mode
        self.model.eval()

        # empty the cache to infer in high res
        if self.gpu:
            torch.cuda.empty_cache()

        with torch.no_grad():    
            # infer time segments
            infer_times = []
            
            # USING ONLY SUBSET FOR RESEARCH
            limit = 10000
            for i, (proj_in, proj_mask, _, _, path_name, p_x, p_y, proj_range, unproj_range, _, _, _, _, npoints) in enumerate(loader):
                limit -= 1
                if limit < 0:
                    break
                
                # first cut to rela size (batch size one allows it)
                p_x = p_x[0, :npoints]
                p_y = p_y[0, :npoints]
                proj_range = proj_range[0, :npoints]
                unproj_range = unproj_range[0, :npoints]
                path_name = path_name[0]

                # loading data on GPU
                if self.gpu:
                    proj_in = proj_in.cuda()
                    p_x = p_x.cuda()
                    p_y = p_y.cuda()
                    if self.post:
                        proj_range = proj_range.cuda()
                        unproj_range = unproj_range.cuda()
            
                # INFER TIME START
                infer_time_start = get_sync_time()
    
                # compute output
#                 print('PROJ IN: %s' % str(proj_in.shape))
                proj_output = self.model(proj_in)
#                 print('PROJ OUT: %s' % str(proj_output.shape))
                
                proj_argmax = proj_output[0].argmax(dim=0)

                if self.post:
                    # knn postproc
                    unproj_argmax = self.post(proj_range, unproj_range, proj_argmax, p_x, p_y)
                else:
                    # put in original pointcloud using indexes
                    unproj_argmax = proj_argmax[p_y, p_x]

                # INFER TIME END
                infer_times.append(get_sync_time() - infer_time_start)
                print('Scan: %s' % path_name)
                print('Infer time: %s sec' % infer_times[-1])
                    
                # save scan
                # get the first scan in batch and project scan
                pred_np = unproj_argmax.cpu().numpy()
                pred_np = pred_np.reshape((-1)).astype(np.int32)

                # map to original label
                pred_np = to_orig_fn(pred_np)

                # save scan
                path = os.path.join(self.log_dir_path, path_name)
                pred_np.tofile(path)
                
            print('*' * 30)
            print('INFER TIME STATISTICS')
            print('MEAN: %s' % np.mean(infer_times[1:]))
            print('STD: %s' % np.std(infer_times[1:]))
            print('COUNT: %s' % len(infer_times[1:]))

# infer single folder

## data cfg

In [10]:
data_cfg = {
 'color_map': {0: [0, 0, 0],
               1: [0, 0, 255],
               10: [245, 150, 100],
               11: [245, 230, 100],
               13: [250, 80, 100],
               15: [150, 60, 30],
               16: [255, 0, 0],
               18: [180, 30, 80],
               20: [255, 0, 0],
               30: [30, 30, 255],
               31: [200, 40, 255],
               32: [90, 30, 150],
               40: [255, 0, 255],
               44: [255, 150, 255],
               48: [75, 0, 75],
               49: [75, 0, 175],
               50: [0, 200, 255],
               51: [50, 120, 255],
               52: [0, 150, 255],
               60: [170, 255, 150],
               70: [0, 175, 0],
               71: [0, 60, 135],
               72: [80, 240, 150],
               80: [150, 240, 255],
               81: [0, 0, 255],
               99: [255, 255, 50],
               252: [245, 150, 100],
               253: [200, 40, 255],
               254: [30, 30, 255],
               255: [90, 30, 150],
               256: [255, 0, 0],
               257: [250, 80, 100],
               258: [180, 30, 80],
               259: [255, 0, 0]},
 'content': {0: 0.018889854628292943,
             1: 0.0002937197336781505,
             10: 0.040818519255974316,
             11: 0.00016609538710764618,
             13: 2.7879693665067774e-05,
             15: 0.00039838616015114444,
             16: 0.0,
             18: 0.0020633612104619787,
             20: 0.0016218197275284021,
             30: 0.00017698551338515307,
             31: 1.1065903904919655e-08,
             32: 5.532951952459828e-09,
             40: 0.1987493871255525,
             44: 0.014717169549888214,
             48: 0.14392298360372,
             49: 0.0039048553037472045,
             50: 0.1326861944777486,
             51: 0.0723592229456223,
             52: 0.002395131480328884,
             60: 4.7084144280367186e-05,
             70: 0.26681502148037506,
             71: 0.006035012012626033,
             72: 0.07814222006271769,
             80: 0.002855498193863172,
             81: 0.0006155958086189918,
             99: 0.009923127583046915,
             252: 0.001789309418528068,
             253: 0.00012709999297008662,
             254: 0.00016059776092534436,
             255: 3.745553104802113e-05,
             256: 0.0,
             257: 0.00011351574470342043,
             258: 0.00010157861367183268,
             259: 4.3840131989471124e-05},
 'labels': {0: 'unlabeled',
            1: 'outlier',
            10: 'car',
            11: 'bicycle',
            13: 'bus',
            15: 'motorcycle',
            16: 'on-rails',
            18: 'truck',
            20: 'other-vehicle',
            30: 'person',
            31: 'bicyclist',
            32: 'motorcyclist',
            40: 'road',
            44: 'parking',
            48: 'sidewalk',
            49: 'other-ground',
            50: 'building',
            51: 'fence',
            52: 'other-structure',
            60: 'lane-marking',
            70: 'vegetation',
            71: 'trunk',
            72: 'terrain',
            80: 'pole',
            81: 'traffic-sign',
            99: 'other-object',
            252: 'moving-car',
            253: 'moving-bicyclist',
            254: 'moving-person',
            255: 'moving-motorcyclist',
            256: 'moving-on-rails',
            257: 'moving-bus',
            258: 'moving-truck',
            259: 'moving-other-vehicle'},
 'learning_ignore': {0: True,
                     1: False,
                     2: False,
                     3: False,
                     4: False,
                     5: False,
                     6: False,
                     7: False,
                     8: False,
                     9: False,
                     10: False,
                     11: False,
                     12: False,
                     13: False,
                     14: False,
                     15: False,
                     16: False,
                     17: False,
                     18: False,
                     19: False},
 'learning_map': {0: 0,
                  1: 0,
                  10: 1,
                  11: 2,
                  13: 5,
                  15: 3,
                  16: 5,
                  18: 4,
                  20: 5,
                  30: 6,
                  31: 7,
                  32: 8,
                  40: 9,
                  44: 10,
                  48: 11,
                  49: 12,
                  50: 13,
                  51: 14,
                  52: 0,
                  60: 9,
                  70: 15,
                  71: 16,
                  72: 17,
                  80: 18,
                  81: 19,
                  99: 0,
                  252: 1,
                  253: 7,
                  254: 6,
                  255: 8,
                  256: 5,
                  257: 5,
                  258: 4,
                  259: 5},
 'learning_map_inv': {0: 0,
                      1: 10,
                      2: 11,
                      3: 15,
                      4: 18,
                      5: 20,
                      6: 30,
                      7: 31,
                      8: 32,
                      9: 40,
                      10: 44,
                      11: 48,
                      12: 49,
                      13: 50,
                      14: 51,
                      15: 70,
                      16: 71,
                      17: 72,
                      18: 80,
                      19: 81},
 'name': 'kitti',
 'split': {'test': [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21],
           'train': [0, 1, 2, 3, 4, 5, 6, 7, 9, 10],
           'valid': [8]}}

## arch cfg

In [11]:
arch_cfg = {
 'dataset': {
     'labels': 'kitti',
     'max_points': 150000,
     'scans': 'kitti',

     # KITTI
     'sensor': {
         'fov_down': -25,
         'fov_up': 3,

         'img_means': [12.12, 10.88, 0.23, -1.04, 0.21], # range, x, y, z, signal
         'img_stds': [12.32, 11.47, 6.91, 0.86, 0.16], # range, x, y, z, signal

         'img_prop': {
             'height': 64,
             'width': 2048,
         },
         'name': 'HDL64',
         'type': 'spherical'
     },

     # HUSKY
#      'sensor': {
#          'fov_down': -30.67,
#          'fov_up': 10.67,

#          'img_means': [8.75550024, 0.07549276, -1.13823771, -0.13648431, 0.06386641], # range, x, y, z, signal
#          'img_stds': [10.08941738, 10.40510729, 8.21806914, 1.15425178, 0.07281147], # range, x, y, z, signal

#          'img_prop': {
#              'height': 32,
#              'width': 2048,
#              # TODO: scale?
#              # 'width': 2169,
#          },
#          'name': 'HDL32',
#          'type': 'spherical'
#      },
 },

 'post': {'CRF': {'params': False, 'train': True, 'use': False},
          'KNN': {'params': {'cutoff': 1.0,
                             'knn': 5,
                             'search': 5,
                             'sigma': 1.0},
                  'use': True}},

 'train': {'batch_size': 30,
           'epsilon_w': 0.001,
           'loss': 'xentropy',
           'lr': 0.05,
           'lr_decay': 0.99,
           'max_epochs': 40,
           'momentum': 0.9,
           'report_batch': 10,
           'report_epoch': 1,
           'save_scans': True,
           'save_summary': False,
           'show_scans': False,
           'w_decay': 0.0001,
           'workers': 4,
           'wup_epochs': 1}}

## other

In [12]:
# model_dir_path = '/home/crowbar/2-projects/SalsaNext/logs/40epoch-wo-rem'
# model_dir_path = '/home/crowbar/2-projects/SalsaNext/logs/40epoch'

In [13]:
dataset_dir_path = '/datasets/KITTI_Odometry/dataset/sequences/12/velodyne/'
# dataset_dir_path = '/datasets/Husky-NKBVS/14/velodyne_points/clouds'

In [14]:
# log_dir_path = '/home/crowbar/2-projects/SalsaNext/predicted/kitti/'
log_dir_path = '/home/crowbar/2-projects/SalsaNext/predicted/w-rem/kitti-sem-12/'
# log_dir_path = '/home/crowbar/2-projects/SalsaNext/predicted/w-rem/husky/'

In [15]:
model_name = 'salsanext'

In [16]:
user = User(arch_cfg, data_cfg, dataset_dir_path, log_dir_path, model_dir_path, model_name)

SENSOR:
{'fov_down': -25,
 'fov_up': 3,
 'img_means': [12.12, 10.88, 0.23, -1.04, 0.21],
 'img_prop': {'height': 64, 'width': 2048},
 'img_stds': [12.32, 11.47, 6.91, 0.86, 0.16],
 'name': 'HDL64',
 'type': 'spherical'}
Dataset folder exists! Using clouds from /datasets/KITTI_Odometry/dataset/sequences/12/velodyne/
Parsing dataset... 
Using 1061 scans
modeldir: /home/crowbar/2-projects/SalsaNext/logs/40epoch
model_path: /home/crowbar/2-projects/SalsaNext/logs/40epoch/SalsaNet
********************************************************************************
Cleaning point-clouds with kNN post-processing
kNN parameters:
knn: 5
search: 5
sigma: 1.0
cutoff: 1.0
nclasses: 20
********************************************************************************
Infering in device:  cuda


In [17]:
user.infer()

Scan: 000000.label
Infer time: 2.1194975689868443 sec
Scan: 000001.label
Infer time: 0.01803598797414452 sec
Scan: 000002.label
Infer time: 0.01265618996694684 sec
Scan: 000003.label
Infer time: 0.012610113015398383 sec
Scan: 000004.label
Infer time: 0.012641739042010158 sec
Scan: 000005.label
Infer time: 0.028232073003891855 sec
Scan: 000006.label
Infer time: 0.020533878006972373 sec
Scan: 000007.label
Infer time: 0.01653581199934706 sec
Scan: 000008.label
Infer time: 0.022206934983842075 sec
Scan: 000009.label
Infer time: 0.03361245500855148 sec
Scan: 000010.label
Infer time: 0.012850310013163835 sec
Scan: 000011.label
Infer time: 0.01280138100264594 sec
Scan: 000012.label
Infer time: 0.01252277399180457 sec
Scan: 000013.label
Infer time: 0.15979771997081116 sec
Scan: 000014.label
Infer time: 0.012641876994166523 sec
Scan: 000015.label
Infer time: 0.012730424001347274 sec
Scan: 000016.label
Infer time: 0.012720702972728759 sec
Scan: 000017.label
Infer time: 0.01275649998569861 sec
Sc

KeyboardInterrupt: 