In [2]:
import glob
import os
from datetime import datetime
from pathlib import Path

import laspy
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn.functional as F
import torchmetrics
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch import nn
from torch.optim import Adam

# from torch_cluster import fps
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import XConv, fps, global_mean_pool

In [92]:
class PointCNN(pl.LightningModule):
    def __init__(self, numfeatures=1):
        super().__init__()
        self.learning_rate = 1e-3  # learning rate
        self.train_acc = torchmetrics.Accuracy()  # traning accuracy
        self.val_acc = torchmetrics.Accuracy()  # validation accuracy
        self.test_acc = torchmetrics.Accuracy()  # test accuracy
        self.numfeatures = numfeatures  # number of features

        # First XConv layer
        self.conv1 = XConv(
            self.numfeatures, 48, dim=3, kernel_size=8, hidden_channels=32
        )

        # Second XConv layer
        self.conv2 = XConv(
            48, 96, dim=3, kernel_size=12, hidden_channels=64, dilation=2
        )

        # Third XConv layer
        self.conv3 = XConv(
            96, 192, dim=3, kernel_size=16, hidden_channels=128, dilation=2
        )

        # Fourth XConv layer
        self.conv4 = XConv(
            192, 384, dim=3, kernel_size=16, hidden_channels=256, dilation=2
        )

        # Multilayer Perceptrons (MLPs) at the end of the PointCNN
        self.lin1 = nn.Linear(384, 256)
        self.lin2 = nn.Linear(256, 125)
        self.lin3 = nn.Linear(128, 4)

    def forward(self, data):
        pos, batch = data.pos, data.batch
        x = data.x if self.numfeatures else None

        # First XConv with no features
        x = F.relu(self.conv1(x, pos, batch))
        # x = torch.nn.ReLU(self.conv1(x, pos, batch))

        # Farthest point sampling, keeping only 37.5%
        idx = fps(pos, batch, ratio=0.375)
        x, pos, batch = x[idx], pos[idx], batch[idx]
        print(x, "-", pos, "-", batch)

        # Second XConv
        x = F.relu(self.conv2(x, pos, batch))

        # Farthest point samplling, keepiong only 33.4%
        idx = fps(pos, batch, ratio=0.334)
        x, pos, batch = x[idx], pos[idx], batch[idx]

        # Two additional XConvs
        x = F.relu(self.conv3(x, pos, batch))
        x = F.relu(self.conv4(x, pos, batch))

        # Pooling batch-elements together
        # Each tree is described in one single point with 384 features
        x = global_mean_pool(x, batch)

        # MLPs at the end with ReLU
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))

        # Dropout: Set randomly to value of zero
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin3(x)

        # log-SofMax activation to callculate Negative Log Likelihood (NLL)
        return F.log_softmax(x, dim=-1)

    def training_step(self, data, batch_idx):
        y = data.y
        out = self(data)
        loss = F.nll_loss(out, y)
        self.train_acc(out, y)
        self.log("train_acc", self.train_acc, on_step=True, on_epoch=True)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, data, batch_idx):
        y = data.y
        out = self(data)
        val_loss = F.nll_loss(out, y)
        self.val_acc(out, y)
        self.log("val_acc", self.val_acc, on_step=True, on_epoch=True)
        self.log("val_loss", val_loss)  # , on_step=True, on_epoch=True)
        return val_loss

    def test_step(self, data, batch_idx):
        y = data.y
        out = self(data)
        test_loss = F.nll_loss(out, y)
        self.test_acc(out, y)
        self.log("test_loss", test_loss),
        return out

    def test_step_end(self, outs):
        return outs

    def test_epoch_end(self, outs):
        global res
        res = outs
        return outs

    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

In [93]:
def read_las(pointcloudfile, get_attributes=False, useevery=1):
    """
    :param pointcloudfile: specification of input file (format: las or laz)
    :param get_attributes: if True, will return all attributes in file, otherwise will only return XYZ (default is False)
    :param useevery: value specifies every n-th point to use from input, i.e. simple subsampling (default is 1, i.e. returning every point)
    :return: 3D array of points (x,y,z) of length number of points in input file (or subsampled by 'useevery')
    """

    # Read file
    inFile = laspy.read(pointcloudfile)

    # Get coordinates (XYZ)
    coords = np.vstack((inFile.x, inFile.y, inFile.z)).transpose()
    coords = coords[::useevery, :]

    # Return coordinates only
    if get_attributes == False:
        return coords

    # Return coordinates and attributes
    else:
        las_fields = [info.name for info in inFile.points.point_format.dimensions]
        attributes = {}
        # for las_field in las_fields[3:]:  # skip the X,Y,Z fields
        for las_field in las_fields:  # get all fields
            attributes[las_field] = inFile.points[las_field][::useevery]
        return (coords, attributes)

In [94]:
class PointCloudsInFiles(InMemoryDataset):
    """Point cloud dataset where one data point is a file."""

    def __init__(
        self, root_dir, glob="*", column_name="", max_points=200_000, use_columns=None
    ):
        """
        Args:
            root_dir (string): Directory with the datasets
            glob (string): Glob string passed to pathlib.Path.glob
            column_name (string): Column name to use as target variable (e.g. "Classification")
            use_columns (list[string]): Column names to add as additional input
        """
        self.files = list(Path(root_dir).glob(glob))
        self.column_name = column_name
        self.max_points = max_points
        if use_columns is None:
            use_columns = []
        self.use_columns = use_columns
        super().__init__()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        filename = str(self.files[idx])
        coords, attrs = read_las(filename, get_attributes=True)
        if coords.shape[0] >= self.max_points:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=False)
        else:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=True)
        if len(self.use_columns) > 0:
            x = np.empty((self.max_points, len(self.use_columns)), np.float32)
            for eix, entry in enumerate(self.use_columns):
                x[:, eix] = attrs[entry][use_idx]
        else:
            x = coords[use_idx, :]
        coords = coords - np.mean(coords, axis=0)  # centralize coordinates

        # impute target
        target = attrs[self.column_name]
        target[np.isnan(target)] = np.nanmean(target)

        sample = Data(
            x=torch.from_numpy(x).float(),
            y=torch.from_numpy(np.array(target[use_idx][:, np.newaxis])).float(),
            pos=torch.from_numpy(coords[use_idx, :]).float(),
        )
        if coords.shape[0] < 100:
            return None
        return sample

In [95]:
train_dataset = PointCloudsInFiles(
    r"D:\MurrayBrent\test\train",
    "*.laz",
    "classification",
    max_points=1024,
    use_columns=["intensity"],
)

In [96]:
test_dataset = PointCloudsInFiles(
    r"D:\MurrayBrent\test\test",
    "*.laz",
    "classification",
    max_points=1024,
    use_columns=["intensity"],
)

In [97]:
val_dataset = PointCloudsInFiles(
    r"D:\MurrayBrent\test\val",
    "*.laz",
    "classification",
    max_points=1024,
    use_columns=["intensity"],
)

In [98]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)

In [99]:
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0)

In [100]:
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)

In [101]:
checkpoint_callback = ModelCheckpoint(monitor="val_loss", save_top_k=1)
trainer = pl.Trainer(
    gpus=1,
    enable_progress_bar=True,
    callbacks=[EarlyStopping(monitor="val_loss", patience=20)],
    enable_checkpointing=checkpoint_callback,
    max_epochs=100,
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [102]:
model = PointCNN()

In [103]:
trainer.fit(model, train_loader, val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type     | Params
---------------------------------------
0 | train_acc | Accuracy | 0     
1 | val_acc   | Accuracy | 0     
2 | test_acc  | Accuracy | 0     
3 | conv1     | XConv    | 8.3 K 
4 | conv2     | XConv    | 26.9 K
5 | conv3     | XConv    | 87.3 K
6 | conv4     | XConv    | 270 K 
7 | lin1      | Linear   | 98.6 K
8 | lin2      | Linear   | 32.1 K
9 | lin3      | Linear   | 516   
---------------------------------------
524 K     Trainable params
0         Non-trainable params
524 K     Total params
2.097     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

RuntimeError: shape '[-1, 12, 64]' is invalid for input of size 879744