In [1]:
import torch
from torch import optim
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
# from torchvision.transforms import ToTensorfrom 
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
import numpy as np
import os

from scipy.spatial import KDTree

In [25]:
class PointCloudDataset(Dataset):
    def __init__(self, pts_file, split=0):
        points = np.loadtxt(pts_file, delimiter=' ')
        points = np.delete(points, -2, 1)
        points = np.delete(points, -2, 1)
        # points = np.delete(points, -2, 1)

        if split == 1:
            points = points[:len(points)//2]
        elif split == 2:
            points = points[len(points)//2:]

        print(points.shape)
        # do i need to min max intensity, return number, number of returns, etc.? probably not
        for i in range(4):
            dim_min, dim_max = min(points[:,i]), max(points[:,i])
            points[:,i] = (points[:,i] - dim_min) / (dim_max - dim_min)
        self.data = points
        
    def __len__(self):
        return len(self.data) // 25000
        
    def __getitem__(self, idx):
        # Return batches of 2500 points
        xyzirn = self.data[idx * 25000: (idx + 1) * 25000, :-1]  # x, y, z, ***intensity, return number, number of returns***
        label = self.data[idx * 25000: (idx + 1) * 25000, -1] == 5

        xyzirn = torch.from_numpy(xyzirn.T).float() # intensity, z, y, x from top to bottom
        label = torch.tensor(label).long()
        
        return xyzirn, label

training_data = PointCloudDataset(r"C:\Users\and13375\Documents\3D Point Segmentation\Test\Vaihingen\3DLabeling\Vaihingen3D_Traininig.pts")
validation_data = PointCloudDataset(r"C:\Users\and13375\Documents\3D Point Segmentation\Test\Vaihingen\3DLabeling\Vaihingen3D_EVAL_WITH_REF.pts", split=1)
testing_data = PointCloudDataset(r"C:\Users\and13375\Documents\3D Point Segmentation\Test\Vaihingen\3DLabeling\Vaihingen3D_EVAL_WITH_REF.pts", split=2)
train_dataloader = DataLoader(training_data, batch_size=10, shuffle=True) # can/should i use shuffle, try lowering it?
validation_dataloader = DataLoader(validation_data, batch_size=10, shuffle=False)
test_dataloader = DataLoader(testing_data, batch_size=10, shuffle=False)

num_classes = 2

(753876, 5)
(205861, 5)
(205861, 5)


In [3]:
print(next(iter(train_dataloader))[0].size())

torch.Size([10, 4, 25000])


In [4]:
for i, data in enumerate(train_dataloader):
    print(data[1].size())

torch.Size([10, 25000])
torch.Size([10, 25000])
torch.Size([10, 25000])


In [5]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([10, 4, 25000])
Labels batch shape: torch.Size([10, 25000])


In [24]:
import open3d as o3d
import numpy as np
from collections import Counter

def plot_batch(entire_view = True):
    # Assuming pc is your point cloud data, in shape (N, 3)
    pc_num = 0

    if entire_view:
        pc = training_data.data[:, :3]
        labels = training_data.data[:, 3] == 8
    else:
        pc = training_data[pc_num][0].T.view(-1,3).numpy()
        labels = training_data[pc_num][1].numpy()

    print(Counter(labels), len(labels))

    # Define colors for each label
    color_map = {0: [0.5, 0.5, 0.5],  # Gray color for label 0
                1: [1.0, 0.0, 0.0]}  # Red color for label 1

    # Map each label to a color
    colors = np.array([color_map[label] for label in labels])

    # Create point cloud
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(pc)
    point_cloud.colors = o3d.utility.Vector3dVector(colors)

    # Visualize the point cloud
    o3d.visualization.draw_geometries([point_cloud])
# plot_batch(entire_view = False)

Counter({0: 597983, 1: 152017}) 750000


In [7]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device = 'cpu'
print(f"Using {device} device")

Using cpu device


In [8]:
class STNkd(nn.Module):
    def __init__(self, k=64):
        super(STNkd, self).__init__()
        self.mlp1 = nn.Sequential(torch.nn.Conv1d(k, 64, 1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp2 = nn.Sequential(torch.nn.Conv1d(64, 128, 1), nn.BatchNorm1d(128), nn.GELU())
        self.mlp3 = nn.Sequential(torch.nn.Conv1d(128, 1024, 1), nn.BatchNorm1d(1024), nn.GELU())
        self.mlp4 = nn.Sequential(nn.Linear(1024, 512), nn.BatchNorm1d(512), nn.GELU())
        self.mlp5 = nn.Sequential(nn.Linear(512, 256), nn.BatchNorm1d(256), nn.GELU())
        self.fc = nn.Linear(256, k*k)

        self.k = k

    def forward(self, x):
        batchsize = x.size()[0]
        x = self.mlp1(x)
        x = self.mlp2(x)
        x = self.mlp3(x)

        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)

        x = self.mlp4(x)
        x = self.mlp5(x)
        x = self.fc(x)

        iden = torch.eye(self.k, requires_grad=True).repeat(batchsize,1,1)
        if x.is_cuda:
            iden = iden.cuda()
        x = x.view(-1, self.k, self.k) + iden

        return x
    
class PoinNet(nn.Module):
    def __init__(self, input_dim=6, num_classes=2):
        super().__init__()

        # Should GELU be after LayerNorm?
        self.stn1 = STNkd(k=input_dim)
        self.mlp1 = nn.Sequential(nn.Conv1d(input_dim, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp2 = nn.Sequential(nn.Conv1d(64, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        
        self.stn2 = STNkd(k=64)
        self.mlp3 = nn.Sequential(nn.Conv1d(64, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp4 = nn.Sequential(nn.Conv1d(64, 128, kernel_size=1), nn.BatchNorm1d(128), nn.GELU())
        self.mlp5 = nn.Sequential(nn.Conv1d(128, 1024, kernel_size=1), nn.BatchNorm1d(1024), nn.GELU())
        
        self.mlp6 = nn.Sequential(nn.Linear(1088, 512), nn.LayerNorm(512), nn.GELU())
        self.mlp7 = nn.Sequential(nn.Linear(512, 256), nn.LayerNorm(256), nn.GELU())
        self.mlp8 = nn.Sequential(nn.Linear(256, 128), nn.LayerNorm(128), nn.GELU())
        self.fc = nn.Linear(128, num_classes)

        self.debug = nn.Conv1d(input_dim, num_classes, 1)

    def forward(self, x):
        n_pts = x.size()[2]

        trans6x6 = self.stn1(x) 
        x = x.transpose(2, 1)
        x = torch.bmm(x, trans6x6)
        x = x.transpose(2, 1)

        x = self.mlp1(x)
        # print(1, x.size())
        x = self.mlp2(x)
        # print(2, x.size())
        
        
        trans64x64 = self.stn2(x)
        x = x.transpose(2, 1)
        x = torch.bmm(x, trans64x64)
        local_features = x.transpose(2, 1)
        
        x = self.mlp3(local_features)
        # print(3, x.size())
        x = self.mlp4(x)
        # print(4, x.size())
        x = self.mlp5(x)
        # print(5, x.size())
        x = torch.max(x, 2)[0]
        # print(6, x.size())
        
        # FOR CLASSIFICATION
        # x = self.mlp6(x)
        # print(6, x.size())
        # x = self.mlp7(x)
        # print(7, x.size())
        # x = self.fc(x)
        # print(8, x.size())
        # return F.log_softmax(x, dim=1)

        global_features = x.unsqueeze(2).repeat(1, 1, n_pts)
        # print(7, global_features.size())
        x = torch.cat([local_features, global_features], 1)
        # print(8, x.size())

        x = x.transpose(2, 1)
        x = self.mlp6(x)
        # print(9, x.size())
        x = self.mlp7(x)
        # print(10, x.size())
        x = self.mlp8(x)
        # print(11, x.size())
        x = self.fc(x)
        print(12, x.size())

        # x = x.view(-1, 2)
        # print(x, x.size())
        x = F.log_softmax(x, dim=-1)
        # print(x, x.size())

        return x, trans64x64
        # return x, trans64x64

# model = PoinNet(input_dim=6, num_classes=2).to(device)

In [9]:
def feature_transform_regularizer(trans):
    d = trans.size(1)
    I = torch.eye(d).unsqueeze(0).to(device)
    loss = torch.linalg.norm(I - torch.bmm(trans, trans.transpose(2,1)), dim=(1,2))
    loss = torch.mean(loss)
    return loss

In [10]:
class CustomLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, inputs, targets, trans):
        d = trans.size(1)
        I = torch.eye(d).unsqueeze(0).to(device)
        loss = torch.linalg.norm(I - torch.bmm(trans, trans.transpose(2,1)), dim=(1,2))
        loss = torch.mean(loss)
        return self.cross_entropy_loss(inputs, targets) + loss

## Online Code

In [11]:
from sklearn.metrics import f1_score
from collections import Counter


writer = SummaryWriter()
loss_func = CustomLoss()
classifier = PoinNet(input_dim=4, num_classes=2).to(device)

optimizer = optim.Adam(classifier.parameters(), lr=0.0005, betas=(0.9, 0.999))

num_batch = len(training_data)

for epoch in range(80):
    classifier.train()
    train_loss, train_f1, train_acc = 0.0, 0.0, 0.0
    predictions, labels = np.array([]), np.array([])
    for i, data in enumerate(train_dataloader, 1):
        points, target = data
        points, target = points.to(device), target.to(device)
        optimizer.zero_grad()
        pred, trans_feat = classifier(points)
        pred = pred.view(-1, num_classes)
        target = target.view(-1, 1).squeeze()
        # print(pred.size(), target.size())

        loss = F.nll_loss(pred, target)
        loss += feature_transform_regularizer(trans_feat) * 0.001
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        predictions = np.append(predictions, pred.max(1)[1].cpu())
        labels = np.append(labels, target.cpu())

    train_f1 = f1_score(predictions, labels)
    train_acc = sum(predictions == labels)/float(len(labels))
    train_loss /= len(train_dataloader)

    classifier.eval()
    valid_loss, valid_f1, valid_acc = 0.0, 0.0, 0.0
    predictions, labels = np.array([]), np.array([])
    for i, data in enumerate(validation_dataloader):
        points, target = data
        points, target = points.to(device), target.to(device)
        pred,  trans_feat = classifier(points)
        pred = pred.view(-1, num_classes)
        target = target.view(-1, 1).squeeze()

        loss = F.nll_loss(pred, target)
        loss += feature_transform_regularizer(trans_feat) * 0.001
        valid_loss += loss.item()

        predictions = np.append(predictions, pred.max(1)[1].cpu())
        labels = np.append(labels, target.cpu())

    valid_f1 = f1_score(predictions, labels)
    valid_acc = sum(predictions == labels)/float(len(labels))
    valid_loss /= len(validation_dataloader)

    writer.add_scalars('losses', {'training':train_loss, 'validation':valid_loss}, global_step=epoch)
    writer.add_scalars('f1 scores', {'training':train_f1, 'validation':valid_f1}, global_step=epoch)

    print(f'[{epoch}] train loss: {train_loss} accuracy: {train_acc} f1 score: {train_f1}')
    print(f'[{epoch}] validation loss: {valid_loss} accuracy: {valid_acc} f1 score: {valid_f1}')
    print()

writer.flush()
writer.close()

12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([8, 25000, 2])
[0] train loss: 0.7099375327428182 accuracy: 0.6192346666666667 f1 score: 0.2556547758680909
[0] validation loss: 0.44683578610420227 accuracy: 0.86571 f1 score: 0.0

12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([8, 25000, 2])
[1] train loss: 0.589138905207316 accuracy: 0.8198506666666666 f1 score: 0.0
[1] validation loss: 0.419990599155426 accuracy: 0.86571 f1 score: 0.0

12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([8, 25000, 2])
[2] train loss: 0.5702248215675354 accuracy: 0.8198506666666666 f1 score: 0.0
[2] validation loss: 0.4211133122444153 accuracy: 0.86571 f1 score: 0.0

12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([10, 25000, 2])
12 torch.Size([8, 25000, 2])
[3] train loss: 0.5531713366508484 accuracy: 0.8198506666

In [12]:
from sklearn.metrics import f1_score

outputs = None
points = None
x = y = None
def test_PointNet(pointnet, test_dataloader, device):
    global outputs, points, x, y
    pointnet.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for idx, (points, labels) in enumerate(test_dataloader):
            points, labels = points.to(device), labels.to(device)
            outputs, _ = pointnet(points)
            _, predicted = torch.max(outputs.data, 2)
            total += labels.size(0) * labels.size(1)
            correct += (predicted == labels).sum().item()
            x = labels.view(-1).to('cpu').numpy()
            y = predicted.view(-1).to('cpu').numpy()
            f1 = f1_score(x, y)
            print('F1 score: ', f1)
    
test_PointNet(classifier, test_dataloader, device)

12 torch.Size([8, 25000, 2])
F1 score:  0.08487002888247056


In [13]:
print(sum(x))
print(sum(y))
print(outputs)

26763
27249
tensor([[[-0.2036, -1.6916],
         [-0.1744, -1.8326],
         [-0.1006, -2.3463],
         ...,
         [-0.0214, -3.8572],
         [-0.0180, -4.0255],
         [-0.0160, -4.1438]],

        [[-0.0083, -4.8015],
         [-0.0083, -4.7998],
         [-0.0172, -4.0712],
         ...,
         [-0.0080, -4.8382],
         [-0.0080, -4.8365],
         [-0.0080, -4.8377]],

        [[-0.0078, -4.8599],
         [-0.0078, -4.8592],
         [-0.0078, -4.8586],
         ...,
         [-0.6657, -0.7214],
         [-0.6377, -0.7519],
         [-0.6069, -0.7875]],

        ...,

        [[-0.0210, -3.8739],
         [-0.0210, -3.8740],
         [-0.0210, -3.8737],
         ...,
         [-0.3783, -1.1554],
         [-0.2735, -1.4302],
         [-0.2660, -1.4542]],

        [[-0.2603, -1.4732],
         [-0.2593, -1.4766],
         [-0.2497, -1.5096],
         ...,
         [-0.0239, -3.7439],
         [-0.0241, -3.7394],
         [-0.0241, -3.7374]],

        [[-0.0128, -4.36

In [14]:
print(f"Labels has {sum(x == 0)} zeroes and {sum(x == 1)} ones")
print(f"Predictions has {sum(y == 0)} zeroes and {sum(y == 1)} ones")


# writer.add_graph(model, points)
# writer.flush()
# writer.close()

Labels has 173237 zeroes and 26763 ones
Predictions has 172751 zeroes and 27249 ones


73.4029268292683% on roofs (5)
86.77414634146342% on trees (8)