In [1]:
import glob
import os
from datetime import datetime
from pathlib import Path

import laspy
import numpy as np
import torch
from torch_geometric.data import Data, InMemoryDataset
from torch_geometric.loader import DataLoader


def read_las(pointcloudfile, get_attributes=False, useevery=1):
    """
    :param pointcloudfile: specification of input file (format: las or laz)
    :param get_attributes: if True, will return all attributes in file, otherwise will only return XYZ (default is False)
    :param useevery: value specifies every n-th point to use from input, i.e. simple subsampling (default is 1, i.e. returning every point)
    :return: 3D array of points (x,y,z) of length number of points in input file (or subsampled by 'useevery')
    """

    # Read file
    inFile = laspy.read(pointcloudfile)

    # Get coordinates (XYZ)
    coords = np.vstack((inFile.x, inFile.y, inFile.z)).transpose()
    coords = coords[::useevery, :]

    # Return coordinates only
    if get_attributes == False:
        return coords

    # Return coordinates and attributes
    else:
        las_fields = [info.name for info in inFile.points.point_format.dimensions]
        attributes = {}
        for las_field in las_fields[3:]:  # skip the X,Y,Z fields
            attributes[las_field] = inFile.points[las_field][::useevery]
        return (coords, attributes)

In [2]:
class PointCloudsInFiles(InMemoryDataset):
    """Point cloud dataset where one data point is a file."""

    def __init__(
        self, root_dir, glob="*", column_name="", max_points=200_000, use_columns=None
    ):
        """
        Args:
            root_dir (string): Directory with the datasets
            glob (string): Glob string passed to pathlib.Path.glob
            column_name (string): Column name to use as target variable (e.g. "Classification")
            use_columns (list[string]): Column names to add as additional input
        """
        self.files = list(Path(root_dir).glob(glob))
        self.column_name = column_name
        self.max_points = max_points
        if use_columns is None:
            use_columns = []
        self.use_columns = use_columns
        super().__init__()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        filename = str(self.files[idx])
        coords, attrs = read_las(filename, get_attributes=True)
        if coords.shape[0] >= self.max_points:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=False)
        else:
            use_idx = np.random.choice(coords.shape[0], self.max_points, replace=True)
        if len(self.use_columns) > 0:
            x = np.empty((self.max_points, len(self.use_columns)), np.float32)
            for eix, entry in enumerate(self.use_columns):
                x[:, eix] = attrs[entry][use_idx]
        else:
            x = coords[use_idx, :]
        coords = coords - np.mean(coords, axis=0)  # centralize coordinates

        # impute target
        target = attrs[self.column_name]
        target[np.isnan(target)] = np.nanmean(target)

        sample = Data(
            x=torch.from_numpy(x).float(),
            y=torch.from_numpy(
                np.unique(np.array(target[use_idx][:, np.newaxis]))
            ).type(torch.LongTensor),
            pos=torch.from_numpy(coords[use_idx, :]).float(),
        )
        if coords.shape[0] < 100:
            return None
        return sample

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import PointNetConv, global_max_pool


class PointNet(torch.nn.Module):
    def __init__(
        self,
        num_features,
        emb_dims=1024,
        use_bn=False,
        global_feat=True,
    ):
        # emb_dims: Embedding Dimensions for PointNet.
        super(PointNet, self).__init__()
        self.input_shape = input_shape
        self.num_features = num_features
        self.emb_dims = emb_dims
        self.use_bn = use_bn
        self.global_feat = global_feat
        if not self.global_feat:
            self.pooling = global_max_pool()

        self.layers = self.create_structure()

    def create_structure(self):
        # PointNet architecture
        # self.conv1 = torch.nn.Conv1d(3 + self.num_features, 64, 1)
        self.conv1 = PointNetConv(3 + self.num_features, 64, 1)
        self.conv2 = PointNetConv(64, 64, 1)
        self.conv3 = PointNetConv(64, 64, 1)
        self.conv4 = PointNetConv(64, 128, 1)
        self.conv5 = PointNetConv(128, self.emb_dims, 1)
        self.relu = torch.nn.ReLU()

        # If using batch normalization
        if self.use_bn:
            self.bn1 = torch.nn.BatchNorm1d(64)
            self.bn2 = torch.nn.BatchNorm1d(64)
            self.bn3 = torch.nn.BatchNorm1d(64)
            self.bn4 = torch.nn.BatchNorm1d(128)
            self.bn5 = torch.nn.BatchNorm1d(self.emb_dims)

        # Set up arcitecture
        if self.use_bn:  # if using batch normalization
            layers = [
                self.conv1,
                self.bn1,
                self.relu,
                self.conv2,
                self.bn2,
                self.relu,
                self.conv3,
                self.bn3,
                self.relu,
                self.conv4,
                self.bn4,
                self.relu,
                self.conv5,
                self.bn5,
                self.relu,
            ]

        else:  # if not using batch normalization
            layers = [
                self.conv1,
                self.relu,
                self.conv2,
                self.relu,
                self.conv3,
                self.relu,
                self.conv4,
                self.relu,
                self.conv5,
                self.relu,
            ]

        return layers

    def forward(self, input_data):
        # output: PointNet Features (Batch x emb_dims)
        num_points = input_data.x.shape[0]

        if input_data.x.shape[1] != 3:
            raise RuntimeError("Shape of x must be [NumInPoints x 3]")

        # Create output
        x, pos, batch = input_data.x, input_data.pos, input_data.batch
        for idx, layer in enumerate(self.layers):
            x = layer(x, pos, batch)
            if idx == 1 and not self.global_feat:
                point_feature = x

        if self.global_feat:
            return x
        else:
            output = self.pooling(x, batch)
            output = output.view(1, self.emb_dims, 1).repeat(1, 1, num_points)
            return torch.cat([x, point_feature], 1)

In [4]:
class Classifier(nn.Module):
    def __init__(self, feature_model, num_classes=40):
        super(Classifier, self).__init__()
        self.feature_model = feature_model
        self.num_classes = num_classes

        self.linear1 = torch.nn.Linear(self.feature_model.emb_dims, 512)
        self.bn1 = torch.nn.BatchNorm1d(512)
        self.dropout1 = torch.nn.Dropout(p=0.7)
        self.linear2 = torch.nn.Linear(512, 256)
        self.bn2 = torch.nn.BatchNorm1d(256)
        self.dropout2 = torch.nn.Dropout(p=0.7)
        self.linear3 = torch.nn.Linear(256, self.num_classes)

        self.pooling = global_max_pool

    def forward(self, input_data):
        output = self.pooling(self.feature_model(input_data))
        output = F.relu(self.bn1(self.linear1(output)))
        output = self.dropout1(output)
        output = F.relu(self.bn2(self.linear2(output)))
        output = self.dropout2(output)
        output = self.linear3(output)
        return output

In [5]:
import logging
import os
import sys

import numpy as np
import torch
import torchvision
from tensorboardX import SummaryWriter
from torch_geometric.loader import DataLoader
from tqdm import tqdm

In [6]:
class IOStream:
    def __init__(self, path):
        self.f = open(path, "a")

    def cprint(self, text):
        print(text)
        self.f.write(text + "\n")
        self.f.flush

    def close(self):
        sefl.f.close()

In [8]:
def _init_(model_name):
    if not os.path.exists("checkpoints"):
        os.makedirs("checkpoints")
    if not os.path.exists("checkpoints/" + model_name):
        os.makedirs("checkpoints/" + model_name)
    if not os.path.exists("checkpoints/" + model_name + "/models"):
        os.makedirs("checkpoints/" + model_name + "/models")


def test_one_epoch(device, model, test_loader):
    model.eval()  # https://stackoverflow.com/questions/60018578/what-does-model-eval-do-in-pytorch
    test_loss = 0.0
    pred = 0.0
    count = 0

    for i, data in enumerate(tqdm(test_loader)):
        data.to(device)

        # Call model
        output = model(data)

        # Define validation loss using negative log likelihood loss and softmax
        loss_val = torch.nn.funcitonal.nll_loss(
            torch.nn.functional.log_softmax(output, dim=1), target, size_average=False
        )

        # Update test_lost and count
        test_loss += loss_val.item()
        count += output.size(0)

        # Update pred
        _, pred1 = output.max(dim=1)
        ag = pred1 == target
        am = ag.sum()
        pred += am.item()

    # Calculate test_loss and accuracy
    test_loss = float(test_loss) / count
    accuracy = float(pred) / count

    return test_loss, accuracy


def test(device, model, test_loader, textio):
    test_loss, test_accuracy = test_one_epoch(device, model, test_loader)
    textio.cprint(
        "Validation Loss: %f & Validation Accuracy: %f" % (test_loss, test_accuracy)
    )


def train_one_epoch(device, model, train_loader, optimizer):
    model.train()
    train_loss = 0.0
    pred = 0.0
    count = 0

    for i, data in enumerate(tqdm(train_loader)):
        # Send data to device
        data.to(device)

        # Call model
        output = model(data)

        # Define validation loss using negative log likelihood loss and softmax
        loss_val = torch.nn.funcitonal.nll_loss(
            torch.nn.functional.log_softmax(output, dim=1), target, size_average=False
        )

        # Forward + backward + optimize
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

        # Update train_loss and count
        train_loss += loss_val.item()
        count += output.size(0)

        # Update pred
        _, pred1 = output.max(dim=1)
        ag = pred1 == target
        am = ag.sum()
        pred += am.item()

    # Calculate train_loss and accuracy
    train_loss = float(train_loss) / count
    accuracy = float(pred) / count

    return train_loss, accuracy


def train(
    device,
    model,
    train_loader,
    test_loader,
    boardio,
    textio,
    checkpoint,
    optimizer="Adam",
    start_epoch=0,
    epochs=200,
):
    # Set up optimizer
    learnable_params = filter(lambda p: p.requires_grad, model.parameters())
    if optimizer == "Adam":  # Adam optimizer
        optimizer = torch.optim.Adam(learnable_params)
    else:  # SGD optimizer
        optimizer = torch.optim.SGD(learnable_params, lr=0.1)

    # Set up checkpoint
    if checkpoint is not None:
        min_loss = checkpoint["min_loss"]
        optimizer.load_state_dict(checkpoint["optimizer"])

    # Define best_test_loss
    best_test_los = np.inf

    for epoch in range(start_epoch, epochs):
        # Train Model
        train_loss, train_accuracy = train_one_epoch(
            device, model, train_loader, optimizer
        )

        # Test Model
        test_loss, test_accuracy = test_one_epoch(device, model, test_loader)

        # Save Best Model
        if test_loss < best_test_loss:
            best_test_lloss = test_loss
            snap = {
                # state_dict: https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.html
                "epoch": epoch + 1,
                "model": model.state_dict(),
                "min_loss": best_test_loss,
                "optimizer": optimizer.state_dict,
            }
            torch.save(snap, f"checkpoints/{model_name}/models/best_model_snap.t7")
            torch.save(
                model.state_dict, f"checkpoints/{model_name}/models/best_model.t7"
            )
            torch.save(
                model.feature_model.state_dict(),
                f"checkpoitns/{model_name}/models/best_ptnet_model.t7",
            )

        # Save model
        torch.save(snap, f"checkpoints/{model_name}/models/model_snap.t7")
        torch.save(model.state_dict, f"checkpoints/{model_name}/models/model.t7")
        torch.save(
            model.feature_model.state_dict(),
            f"checkpoitns/{model_name}/models/ptnet_model.t7",
        )

        boardio.add_scalar("Train Loss", train_loss, epoch + 1)
        boardio.add_scalar("Test Loss", test_loss, epoch + 1)
        boardio.add_scalar("Best Test Loss", best_test_loss, epoch + 1)
        boardio.add_scalar("Train Accuracy", train_accuracy, epoch + 1)
        boardio.add_scalar("Test Accuracy", test_accuracy, epoch + 1)

        textio.cprint(
            "EPOCH:: %d, Training Loss: %f, Testing Loss: %f, Best Loss: %f"
            % (epoch + 1, train_loss, test_loss, best_test_loss)
        )
        textio.cprint(
            "EPOCH:: %d, Training Accuracy: %f Testing Accuracy: %f"
            % (epoch + 1, train_accuracy, test_accuracy)
        )

In [9]:
def main():
    train_dataset_path = r"D:\MurrayBrent\git\point-dl\input\train"
    test_dataset_path = r"D:\MurrayBrent\git\point-dl\input\test"
    model_name = "PointNet"
    use_columns = []

    boardio = SummaryWriter(log_dir="checkpoints/" + model_name)
    _init_(model_name)

    textio = IOStream("checkpoints/" + model_name + "/run.log")
    textio.cprint(model_name)

    # Get training and test datasets
    trainset = PointCloudsInFiles(
        train_dataset_path, "*.laz", "Class", max_points=1024, use_columns=use_columns
    )
    testset = PointCloudsInFiles(
        test_dataset_path, "*.laz", "Class", max_points=1024, use_columns=use_columns
    )

    # Load training and test datasets
    train_loader = DataLoader(trainset, batch_size=16, shuffle=True, num_workers=0)
    test_loader = DataLoader(testset, batch_size=16, shuffle=False, num_workers=0)

    # Define device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ptnet = PointNet(num_features=len(use_columns), emb_dims=1024, use_bn=True)
    model = Classifier(feature_model=ptnet)

    checkpoint = None

    model.to(device)

    train(
        device=device,
        model=model,
        train_loader=train_loader,
        test_loader=test_loader,
        boardio=boardio,
        textio=textio,
        checkpoint=checkpoint,
    )

In [10]:
if __name__ == "__main__":
    main()

PointNet


  0%|                                                                                           | 0/12 [00:02<?, ?it/s]


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 16384 but got size 2 for tensor number 1 in the list.