In [None]:
import glob
import os
from datetime import datetime
from pathlib import Path

import laspy
import numpy as np
import torch
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.loader import DataLoader


def read_las(pointcloudfile, get_attributes=False, useevery=1):
    """
    :param pointcloudfile: specification of input file (format: las or laz)
    :param get_attributes: if True, will return all attributes in file, otherwise will only return XYZ (default is False)
    :param useevery: value specifies every n-th point to use from input, i.e. simple subsampling (default is 1, i.e. returning every point)
    :return: 3D array of points (x,y,z) of length number of points in input file (or subsampled by 'useevery')
    """

    # Read file
    inFile = laspy.read(pointcloudfile)

    # Get coordinates (XYZ)
    coords = np.vstack((inFile.x, inFile.y, inFile.z)).transpose()
    coords = coords[::useevery, :]

    # Return coordinates only
    if get_attributes == False:
        return coords

    # Return coordinates and attributes
    else:
        las_fields = [info.name for info in inFile.points.point_format.dimensions]
        attributes = {}
        for las_field in las_fields[3:]:  # skip the X,Y,Z fields
            attributes[las_field] = inFile.points[las_field][::useevery]
        return (coords, attributes)

In [None]:
class PointCloudsInFiles(InMemoryDataset):
    """Point cloud dataset where one data point is a file."""

    def __init__(
        self, root_dir, glob="*", column_name="", max_points=200_000, use_columns=None
    ):
        """
        Args:
            root_dir (string): Directory with the datasets
            glob (string): Glob string passed to pathlib.Path.glob
            column_name (string): Column name to use as target variable (e.g. "Classification")
            use_columns (list[string]): Column names to add as additional input
        """
        self.files = list(Path(root_dir).glob(glob))
        self.column_name = column_name
        self.max_points = max_points
        if use_columns is None:
            use_columns = []
        self.use_columns = use_columns
        super().__init__()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        filename = str(self.files[idx])
        coords, attrs = read_las(filename, get_attributes=True)
        if coords.shape[0] >= self.max_points:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=False)
        else:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=True)
        if len(self.use_columns) > 0:
            x = np.empty((self.max_points, len(self.use_columns)), np.float32)
            for eix, entry in enumerate(self.use_columns):
                x[:, eix] = attrs[entry][use_idx]
        else:
            x = coords[use_idx, :]
        coords = coords - np.mean(coords, axis=0)  # centralize coordinates

        # impute target
        target = attrs[self.column_name]
        target[np.isnan(target)] = np.nanmean(target)

        sample = Data(
            x=torch.from_numpy(x).float(),
            y=torch.from_numpy(
                np.unique(np.array(target[use_idx][:, np.newaxis]))
            ).type(torch.LongTensor),
            pos=torch.from_numpy(coords[use_idx, :]).float(),
        )
        if coords.shape[0] < 100:
            return None
        return sample

In [None]:
import torch
import torch.nn.functional as F
from torch import nn
from torch.optim import Adam
from torch_geometric.nn import XConv, fps, global_mean_pool


class PointCNN(nn.Module):
    def __init__(self, numfeatures):
        super().__init__()
        self.numfeatures = numfeatures

        # First XConv layer
        self.conv1 = XConv(
            self.numfeatures, 48, dim=3, kernel_size=8, hidden_channels=32
        )

        # Second XConv layer
        self.conv2 = XConv(
            48, 96, dim=3, kernel_size=12, hidden_channels=64, dilation=2
        )

        # Third XConv layer
        self.conv3 = XConv(
            96, 192, dim=3, kernel_size=16, hidden_channels=128, dilation=2
        )

        # Fourth XConv layer
        self.conv4 = XConv(
            192, 384, dim=3, kernel_size=16, hidden_channels=256, dilation=2
        )

        # Multilayer Perceptrons (MLPs) at the end of the PointCNN
        self.lin1 = nn.Linear(384, 256)
        self.lin2 = nn.Linear(256, 128)
        self.lin3 = nn.Linear(128, 8)  # change last value for number of classes

    def forward(self, data):
        pos, batch = data.pos, data.batch
        x = data.x if self.numfeatures else None

        # First XConv with no features
        x = F.relu(self.conv1(x, pos, batch))
        # x = torch.nn.ReLU(self.conv1(x, pos, batch))

        # Farthest point sampling, keeping only 37.5%
        idx = fps(pos, batch, ratio=0.375)
        x, pos, batch = x[idx], pos[idx], batch[idx]
        # Second XConv
        x = F.relu(self.conv2(x, pos, batch))

        # Farthest point samplling, keepiong only 33.4%
        idx = fps(pos, batch, ratio=0.334)
        x, pos, batch = x[idx], pos[idx], batch[idx]

        # Two additional XConvs
        x = F.relu(self.conv3(x, pos, batch))
        x = F.relu(self.conv4(x, pos, batch))

        # Pooling batch-elements together
        # Each tree is described in one single point with 384 features
        x = global_mean_pool(x, batch)

        # MLPs at the end with ReLU
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))

        # Dropout: Set randomly to value of zero
        # x = F.dropout(x, p=0.5, training=self.training)
        x = F.dropout(x, p=0.5, training=True)
        return self.lin3(x)

        # # log-SofMax activation to callculate Negative Log Likelihood (NLL)
        # return F.log_softmax(x, dim=-1)

In [None]:
import logging
import os
import sys

import numpy as np
import torch
import torchvision
from tensorboardX import SummaryWriter
from torch_geometric.loader import DataLoader
from tqdm import tqdm

In [None]:
class IOStream:
    def __init__(self, path):
        self.f = open(path, "a")

    def cprint(self, text):
        print(text)
        self.f.write(text + "\n")
        self.f.flush

    def close(self):
        sefl.f.close()

In [None]:
def _init_(model_name):
    if not os.path.exists("checkpoints"):
        os.makedirs("checkpoints")
    if not os.path.exists("checkpoints/" + model_name):
        os.makedirs("checkpoints/" + model_name)
    if not os.path.exists("checkpoints/" + model_name + "/models"):
        os.makedirs("checkpoints/" + model_name + "/models")


def test_one_epoch(device, model, test_loader):
    model.eval()  # https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch
    test_loss = 0.0
    pred = 0.0
    count = 0

    for i, data in enumerate(tqdm(test_loader, desc="Testing", leave=False)):
        data.to(device)

        # Call model
        output = model(data)

        # Define validation loss using negative log likelihood loss and softmax
        loss_val = torch.nn.functional.nll_loss(
            torch.nn.functional.log_softmax(output, dim=1),
            target=data.y,
            size_average=False,
        )

        # Update test_lost and count
        test_loss += loss_val.item()
        count += output.size(0)

        # Update pred
        _, pred1 = output.max(dim=1)
        ag = pred1 == data.y
        am = ag.sum()
        pred += am.item()

    # Calculate test_loss and accuracy
    test_loss = float(test_loss) / count
    accuracy = float(pred) / count

    return test_loss, accuracy


def test(device, model, test_loader, textio):
    test_loss, test_accuracy = test_one_epoch(device, model, test_loader)
    textio.cprint(
        "Validation Loss: %f & Validation Accuracy: %f" % (test_loss, test_accuracy)
    )


def train_one_epoch(device, model, train_loader, optimizer, epoch_number):
    model.train()
    train_loss = 0.0
    pred = 0.0
    count = 0

    for i, data in enumerate(
        tqdm(train_loader, desc="Epoch: " + str(epoch_number), leave=False)
    ):
        # Send data to device
        data.to(device)

        # Call model
        output = model(data)

        # Define validation loss using negative log likelihood loss and softmax
        loss_val = torch.nn.functional.nll_loss(
            torch.nn.functional.log_softmax(output, dim=1),
            target=data.y,
            size_average=False,
        )

        # Forward + backward + optimize
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

        # Update train_loss and count
        train_loss += loss_val.item()
        count += output.size(0)

        # Update pred
        _, pred1 = output.max(dim=1)
        ag = pred1 == data.y
        am = ag.sum()
        pred += am.item()

    # Calculate train_loss and accuracy
    train_loss = float(train_loss) / count
    accuracy = float(pred) / count

    return train_loss, accuracy


def train(
    device,
    model,
    train_loader,
    test_loader,
    boardio,
    textio,
    checkpoint,
    model_name,
    optimizer="Adam",
    start_epoch=0,
    epochs=200,
):
    # Set up optimizer
    learnable_params = filter(lambda p: p.requires_grad, model.parameters())
    if optimizer == "Adam":  # Adam optimizer
        optimizer = torch.optim.Adam(learnable_params)
    else:  # SGD optimizer
        optimizer = torch.optim.SGD(learnable_params, lr=0.1)

    # Set up checkpoint
    if checkpoint is not None:
        min_loss = checkpoint["min_loss"]
        optimizer.load_state_dict(checkpoint["optimizer"])

    # Define best_test_loss
    best_test_loss = np.inf

    for epoch in range(start_epoch, epochs):
        # Train Model
        train_loss, train_accuracy = train_one_epoch(
            device, model, train_loader, optimizer, epoch + 1
        )

        # Test Model
        test_loss, test_accuracy = test_one_epoch(device, model, test_loader)

        # Save Best Model
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            snap = {
                # state_dict: https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.html
                "epoch": epoch + 1,
                "model": model.state_dict(),
                "min_loss": best_test_loss,
                "optimizer": optimizer.state_dict,
            }
            torch.save(snap, f"checkpoints/{model_name}/models/best_model_snap.t7")
            torch.save(
                model.state_dict, f"checkpoints/{model_name}/models/best_model.t7"
            )

        # Save model
        torch.save(snap, f"checkpoints/{model_name}/models/model_snap.t7")
        torch.save(model.state_dict, f"checkpoints/{model_name}/models/model.t7")

        boardio.add_scalar("Train Loss", train_loss, epoch + 1)
        boardio.add_scalar("Test Loss", test_loss, epoch + 1)
        boardio.add_scalar("Best Test Loss", best_test_loss, epoch + 1)
        boardio.add_scalar("Train Accuracy", train_accuracy, epoch + 1)
        boardio.add_scalar("Test Accuracy", test_accuracy, epoch + 1)

        textio.cprint(
            "EPOCH:: %d, Training Loss: %f, Testing Loss: %f, Best Loss: %f"
            % (epoch + 1, train_loss, test_loss, best_test_loss)
        )
        textio.cprint(
            "EPOCH:: %d, Training Accuracy: %f Testing Accuracy: %f"
            % (epoch + 1, train_accuracy, test_accuracy)
        )

In [None]:
def main():
    train_dataset_path = r"D:\MurrayBrent\git\point-dl\input\train"
    test_dataset_path = r"D:\MurrayBrent\git\point-dl\input\test"
    model_name = "PointCNN"
    use_columns = ["intensity"]

    boardio = SummaryWriter(log_dir="checkpoints/" + model_name)
    _init_(model_name)

    textio = IOStream("checkpoints/" + model_name + "/run.log")
    textio.cprint(model_name)

    # Get training and test datasets
    trainset = PointCloudsInFiles(
        train_dataset_path, "*.laz", "Class", max_points=1024, use_columns=use_columns
    )
    testset = PointCloudsInFiles(
        test_dataset_path, "*.laz", "Class", max_points=1024, use_columns=use_columns
    )

    # Load training and test datasets
    train_loader = DataLoader(trainset, batch_size=16, shuffle=True, num_workers=0)
    test_loader = DataLoader(testset, batch_size=16, shuffle=False, num_workers=0)

    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = PointCNN(numfeatures=len(use_columns))

    checkpoint = None

    model.to(device)

    train(
        device=device,
        model=model,
        model_name=model_name,
        train_loader=train_loader,
        test_loader=test_loader,
        boardio=boardio,
        textio=textio,
        checkpoint=checkpoint,
    )

In [None]:
if __name__ == "__main__":
    main()