In [1]:
import torch
from torch import optim
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
# from torchvision.transforms import ToTensorfrom 
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
import numpy as np
import os

writer = SummaryWriter()
writer.close()

In [2]:
class PointCloudDataset(Dataset):
    def __init__(self, pts_file):
        points = np.loadtxt(pts_file, delimiter=' ')
        # points = np.delete(points, -2, 1)
        # points = np.delete(points, -2, 1)
        # points = np.delete(points, -2, 1)

        print(points.shape)
        # do i need to min max intensity, return number, number of returns, etc.? probably not
        for i in range(3):
            dim_min, dim_max = min(points[:,i]), max(points[:,i])
            points[:,i] = (points[:,i] - dim_min) / (dim_max - dim_min)
        self.data = points
        
    def __len__(self):
        return len(self.data) // 25000
        
    def __getitem__(self, idx):
        # Return batches of 2500 points
        xyzirn = self.data[idx * 25000: (idx + 1) * 25000, :6]  # x, y, z, ***intensity, return number, number of returns***
        label = self.data[idx * 25000: (idx + 1) * 25000, 6] == 8

        xyzirn = torch.from_numpy(xyzirn.T).float()
        label = torch.tensor(label).long()
        
        return xyzirn, label

training_data = PointCloudDataset(r"C:\Users\and13375\Documents\3D Point Segmentation\Test\Vaihingen\3DLabeling\Vaihingen3D_Traininig.pts")
testing_data = PointCloudDataset(r"C:\Users\and13375\Documents\3D Point Segmentation\Test\Vaihingen\3DLabeling\Vaihingen3D_EVAL_WITH_REF.pts")
train_dataloader = DataLoader(training_data, batch_size=10, shuffle=False) # can/should i use shuffle, try lowering it?
test_dataloader = DataLoader(testing_data, batch_size=10, shuffle=False)

num_classes = 2

(753876, 7)
(411722, 7)


In [3]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([10, 6, 25000])
Labels batch shape: torch.Size([10, 25000])


In [4]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [5]:
import open3d as o3d
import numpy as np
from collections import Counter

def plot_batch(entire_view = True):
    # Assuming pc is your point cloud data, in shape (N, 3)
    pc_num = 0

    if entire_view:
        pc = training_data.data[:, :3]
        labels = training_data.data[:, 7] == 8
    else:
        print(training_data[pc_num][1].size())
        pc = training_data[pc_num][0].T.view(-1,6).numpy()[:, :3]
        labels = training_data[pc_num][1].numpy()

    print(Counter(labels), len(labels))

    # Define colors for each label
    color_map = {0: [0.5, 0.5, 0.5],  # Gray color for label 0
                1: [1.0, 0.0, 0.0]}  # Red color for label 1

    # Map each label to a color
    colors = np.array([color_map[label] for label in labels])

    # Create point cloud
    point_cloud = o3d.geometry.PointCloud()
    point_cloud.points = o3d.utility.Vector3dVector(pc)
    point_cloud.colors = o3d.utility.Vector3dVector(colors)

    # Visualize the point cloud
    o3d.visualization.draw_geometries([point_cloud])
# plot_batch(entire_view = False)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [6]:
class STNkd(nn.Module):
    def __init__(self, k=64):
        super(STNkd, self).__init__()
        self.mlp1 = nn.Sequential(torch.nn.Conv1d(k, 64, 1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp2 = nn.Sequential(torch.nn.Conv1d(64, 128, 1), nn.BatchNorm1d(128), nn.GELU())
        self.mlp3 = nn.Sequential(torch.nn.Conv1d(128, 1024, 1), nn.BatchNorm1d(1024), nn.GELU())
        self.mlp4 = nn.Sequential(nn.Linear(1024, 512), nn.BatchNorm1d(512), nn.GELU())
        self.mlp5 = nn.Sequential(nn.Linear(512, 256), nn.BatchNorm1d(256), nn.GELU())
        self.fc = nn.Linear(256, k*k)

        self.k = k

    def forward(self, x):
        batchsize = x.size()[0]
        x = self.mlp1(x)
        x = self.mlp2(x)
        x = self.mlp3(x)

        x = torch.max(x, 2, keepdim=True)[0]
        x = x.view(-1, 1024)

        x = self.mlp4(x)
        x = self.mlp5(x)
        x = self.fc(x)

        iden = torch.eye(self.k, requires_grad=True).repeat(batchsize,1,1)
        if x.is_cuda:
            iden = iden.cuda()
        x = x.view(-1, self.k, self.k) + iden

        return x

In [7]:
class PoinNet(nn.Module):
    def __init__(self, input_dim=6, num_classes=2):
        super().__init__()

        # Should GELU be after LayerNorm?
        self.stn1 = STNkd(k=input_dim)
        self.mlp1 = nn.Sequential(nn.Conv1d(input_dim, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp2 = nn.Sequential(nn.Conv1d(64, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        
        self.stn2 = STNkd(k=64)
        self.mlp3 = nn.Sequential(nn.Conv1d(64, 64, kernel_size=1), nn.BatchNorm1d(64), nn.GELU())
        self.mlp4 = nn.Sequential(nn.Conv1d(64, 128, kernel_size=1), nn.BatchNorm1d(128), nn.GELU())
        self.mlp5 = nn.Sequential(nn.Conv1d(128, 1024, kernel_size=1), nn.BatchNorm1d(1024), nn.GELU())
        
        self.mlp6 = nn.Sequential(nn.Linear(1088, 512), nn.LayerNorm(512), nn.GELU())
        self.mlp7 = nn.Sequential(nn.Linear(512, 256), nn.LayerNorm(256), nn.GELU())
        self.mlp8 = nn.Sequential(nn.Linear(256, 128), nn.LayerNorm(128), nn.GELU())
        self.fc = nn.Linear(128, num_classes)

        self.debug = nn.Conv1d(input_dim, num_classes, 1)

    def forward(self, x):
        n_pts = x.size()[2]

        trans6x6 = self.stn1(x) 
        x = x.transpose(2, 1)
        x = torch.bmm(x, trans6x6)
        x = x.transpose(2, 1)

        x = self.mlp1(x)
        # print(1, x.size())
        x = self.mlp2(x)
        # print(2, x.size())
        
        
        trans64x64 = self.stn2(x)
        x = x.transpose(2, 1)
        x = torch.bmm(x, trans64x64)
        local_features = x.transpose(2, 1)
        
        x = self.mlp3(local_features)
        # print(3, x.size())
        x = self.mlp4(x)
        # print(4, x.size())
        x = self.mlp5(x)
        # print(5, x.size())
        x = torch.max(x, 2)[0]
        # print(6, x.size())
        
        # FOR CLASSIFICATION
        # x = self.mlp6(x)
        # print(6, x.size())
        # x = self.mlp7(x)
        # print(7, x.size())
        # x = self.fc(x)
        # print(8, x.size())
        # return F.log_softmax(x, dim=1)

        global_features = x.unsqueeze(2).repeat(1, 1, n_pts)
        # print(7, global_features.size())
        x = torch.cat([local_features, global_features], 1)
        # print(8, x.size())

        x = x.transpose(2, 1)
        x = self.mlp6(x)
        # print(9, x.size())
        x = self.mlp7(x)
        # print(10, x.size())
        x = self.mlp8(x)
        # print(11, x.size())
        x = self.fc(x)
        print(12, x.size())

        # x = x.view(-1, 2)
        # print(x, x.size())
        x = F.log_softmax(x, dim=-1)
        # print(x, x.size())

        return x, trans64x64
        # return x, trans64x64

model = PoinNet(input_dim=6, num_classes=2).to(device)

In [8]:
class CrossEntropyCustomLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.cross_entropy_loss = nn.CrossEntropyLoss()

    def forward(self, inputs, targets, trans):
        d = trans.size(1)
        I = torch.eye(d).unsqueeze(0).to(device)
        loss = torch.linalg.norm(I - torch.bmm(trans, trans.transpose(2,1)), dim=(1,2))
        loss = torch.mean(loss)
        return self.cross_entropy_loss(inputs, targets) + loss

In [9]:
def feature_transform_regularizer(trans):
    d = trans.size(1)
    I = torch.eye(d).unsqueeze(0).to(device)
    loss = torch.linalg.norm(I - torch.bmm(trans, trans.transpose(2,1)), dim=(1,2))
    loss = torch.mean(loss)
    return loss

In [12]:
from sklearn.metrics import f1_score


def train_PointNet(num_epochs, pointnet, train_dataloader, device, num_classes):
    pointnet.train()

    # loss_func = CrossEntropyCustomLoss()
    optimizer = optim.Adam(pointnet.parameters(), lr = 0.001)

    # optimizer = optim.Adam(pointnet.parameters(), lr=0.001, betas=(0.9, 0.999))
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    for epoch in range(60, 60+num_epochs):
        # scheduler.step()
        for batch_idx, (points, labels) in enumerate(train_dataloader, 0):
            points, labels = points.to(device), labels.to(device)

            optimizer.zero_grad()
            out, trans = pointnet(points)
            out = out.view(-1, num_classes)
            labels = labels.view(-1)
            print('OUT', out.size(), '\tLABELS', labels.size())
            # loss = loss_func(out, labels, trans)
            loss = F.nll_loss(out, labels)
            loss += feature_transform_regularizer(trans) * 0.001
            loss.backward()
            optimizer.step()

            cross_entropy = False
            if cross_entropy == True:
                print(F.log_softmax(out, 1))
                out = F.log_softmax(out, 1)
                out = out.argmax(1)
            else:
                print(out)
                out = out.argmax(1)

            correct = out.eq(labels).cpu().sum()
            f1 = f1_score(out.cpu(), labels.cpu())
            if epoch == 0:
                print(f'Predicted ones: {out.sum()}\tTarget ones: {labels.sum()}')
            print('[%d: %d/%d] train loss: %f accuracy: %f f1 score: %f' % (epoch+1, batch_idx+1, 3, loss.item(), correct.item()/float(10 * 25000), f1))
            print()

            writer.add_scalar('training loss', loss.item(), global_step=epoch * len(train_dataloader) + batch_idx)
            writer.add_scalar('training f1 score', f1, global_step=epoch * len(train_dataloader) + batch_idx)
            writer.flush()
            

train_PointNet(60, model, train_dataloader, device, num_classes=2)

writer.flush()
writer.close()

12 torch.Size([10, 25000, 2])
OUT torch.Size([250000, 2]) 	LABELS torch.Size([250000])
tensor([[-0.3297, -1.2700],
        [-0.2958, -1.3624],
        [-0.2819, -1.4038],
        ...,
        [-0.1371, -2.0545],
        [-0.1632, -1.8930],
        [-0.1741, -1.8339]], device='cuda:0', grad_fn=<ViewBackward>)
[61: 1/3] train loss: 0.553858 accuracy: 0.736576 f1 score: 0.166527

12 torch.Size([10, 25000, 2])
OUT torch.Size([250000, 2]) 	LABELS torch.Size([250000])
tensor([[-0.0376, -3.2993],
        [-0.0372, -3.3092],
        [-0.0369, -3.3175],
        ...,
        [-0.0260, -3.6622],
        [-0.0258, -3.6687],
        [-0.0260, -3.6622]], device='cuda:0', grad_fn=<ViewBackward>)
[61: 2/3] train loss: 0.390321 accuracy: 0.892968 f1 score: 0.000000

12 torch.Size([10, 25000, 2])
OUT torch.Size([250000, 2]) 	LABELS torch.Size([250000])
tensor([[-0.0581, -2.8739],
        [-0.0571, -2.8914],
        [-0.0569, -2.8951],
        ...,
        [-0.0556, -2.9164],
        [-0.0556, -2.9171],


In [11]:
from sklearn.metrics import f1_score
outputs = None
points = None
x = y = None
def test_PointNet(pointnet, test_dataloader, device):
    global outputs, points, x, y
    pointnet.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for idx, (points, labels) in enumerate(test_dataloader):
            points, labels = points.to(device), labels.to(device)
            outputs, _ = pointnet(points) # DO LOG SOFTMAX HERE
            outputs = F.log_softmax(outputs)
            print(outputs.size())
            print('checkpoint', outputs[0, 0])
            _, predicted = torch.max(outputs.data, 2)
            print(predicted.size())
            total += labels.size(0) * labels.size(1)
            correct += (predicted == labels).sum().item()
            # print(points.transpose(1,2)[0][:10].size())
            # print(labels.size())
            print(np.hstack([points.transpose(1,2)[0][:10].to('cpu'), labels[:10].to('cpu'), (predicted[:10]==labels[:10]).to('cpu')])[:20])
            x = labels.view(-1).to('cpu').numpy()
            y = predicted.view(-1).to('cpu').numpy()
            print(sum(x != y))
            print(sum(y == 1))
            f1 = f1_score(x, y)
            print('F1 score: ', f1)
            # break
    
test_PointNet(model, test_dataloader, device)

12 torch.Size([10, 25000, 2])
torch.Size([10, 25000, 2])
checkpoint 

  outputs = F.log_softmax(outputs)


tensor([-2.3106, -2.2654], device='cuda:0')
torch.Size([10, 25000])
[[0.004283   0.4452669  0.0191022  ... 1.         1.         1.        ]
 [0.004283   0.44536626 0.01846546 ... 1.         1.         1.        ]
 [0.004283   0.44539109 0.01942057 ... 1.         1.         1.        ]
 ...
 [0.00430977 0.44561464 0.01942057 ... 0.         0.         0.        ]
 [0.00430977 0.44566432 0.0191022  ... 0.         0.         0.        ]
 [0.00430977 0.445714   0.01846546 ... 1.         0.         0.        ]]
101613
101907
F1 score:  0.25108895129015857
12 torch.Size([6, 25000, 2])
torch.Size([6, 25000, 2])
checkpoint 

  outputs = F.log_softmax(outputs)


tensor([-1.8147, -1.6780], device='cuda:0')
torch.Size([6, 25000])


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 10 and the array at index 1 has size 6

In [None]:
print(sum(x))
print(sum(y))
print(outputs.size())

12089
0
torch.Size([18, 5000, 2])


In [None]:
print(f"Labels has {sum(x == 0)} zeroes and {sum(x == 1)} ones")
print(f"Predictions has {sum(y == 0)} zeroes and {sum(y == 1)} ones")


# writer.add_graph(model, points)
# writer.flush()
# writer.close()

Labels has 77911 zeroes and 12089 ones
Predictions has 90000 zeroes and 0 ones


73.4029268292683% on roofs (5)
86.77414634146342% on trees (8)