## Clone the `Open3D-ML` repo and download `Toronto3D`

In [None]:
import numpy as np
import pandas as pd
import pickle
import os

In [None]:
!git clone https://github.com/isl-org/Open3D-ML.git

In [None]:
!pip install -q --upgrade pip
!pip install -q open3d

In [None]:
!pip install -q -r Open3D-ML/requirements-torch-cuda.txt

In [None]:
import open3d.ml.torch as ml3d

In [None]:
!chmod +x /kaggle/working/Open3D-ML/scripts/download_datasets/download_toronto3d.sh

In [None]:
%cd /kaggle/working/Open3D-ML/scripts/download_datasets/

In [None]:
!./download_toronto3d.sh /kaggle/working

## Load the test point cloud of Toronto3D and get the ground truth points and labels

In [None]:
import open3d.ml.torch as ml3d  

dataset = ml3d.datasets.Toronto3D(dataset_path='/kaggle/working/Toronto3D')

test_split = dataset.get_split("test")
data = test_split.get_data(0)

In [None]:
ground_truth_labels = data["label"]
print(f"Shape of ground truth labels: {ground_truth_labels.shape}")

ground_truth_points = data["point"]
print(f"Shape of ground truth points: {ground_truth_points.shape}")

## Store the Z-normalised point cloud data to aid in better distance computations in the KD-tree

In [None]:
X = data["point"][:,0]
Y = data["point"][:,1]
Z = data["point"][:,2]

df = pd.DataFrame({
    "x" : X,
    "y" : Y,
    "z" : Z
})

df.describe()

In [None]:
X_mean = df["x"].mean()
X_sd = df["x"].std()

Y_mean = df["y"].mean()
Y_sd = df["y"].std()

Z_mean = df["z"].mean()
Z_sd = df["z"].std()

In [None]:
all_xyz_points_norm = []
from tqdm import tqdm


for i in tqdm(range(len(data["point"]))):
    xyz = data["point"][i]
    
    # Normalise the points
    x = (xyz[0] - X_mean)/X_sd
    y = (xyz[1] - Y_mean)/Y_sd 
    z = (xyz[2] - Z_mean)/Z_sd
    
    norm_xyz = np.array([x, y, z])
    all_xyz_points_norm.append(norm_xyz)

assert len(all_xyz_points_norm) == len(data["point"])

In [None]:
N = len(all_xyz_points_norm)

## Set random seed for reproducibility

In [None]:
import random

random.seed(42)

In [None]:
import gc
del X
del Y
del Z
del df
gc.collect()

## Define the experimental conditions
`90% masking`

In [None]:
all_indices = list(range(N))

# 90 masked points
query_indices_90 = random.sample(all_indices, int(0.70 * N))
query_indices_90.sort()
print(f"Masked {len(query_indices_90)} points")
query_indices_map_90 = {}
for i in query_indices_90:
    query_indices_map_90[i] = True

In [None]:
from scipy.spatial import KDTree
import numpy as np


non_masked_v = []
non_masked_index_mapping = {}
k = 0

for i in tqdm(range(N)):
    if i not in query_indices_map_90:
        non_masked_v.append(all_xyz_points_norm[i])
        non_masked_index_mapping[k] = i
        k += 1

print(f"Number of indices in non_masked_kd_tree: {len(non_masked_v)}")
kdtree_non_masked = KDTree(non_masked_v)

## Generate and save candidate indices for masked points

In [None]:
#90% masking
C = 52
output_map = {}

for idx in tqdm(query_indices_90):
    if idx not in output_map:
        train_indices = []
        # Get KDTree output
        d, nbrs = kdtree_non_masked.query(all_xyz_points_norm[idx], k = C)
        
        # Filter neighbours
        for candidate in nbrs:
            candidate_mapped = non_masked_index_mapping[candidate]
            if candidate_mapped not in query_indices_map_90:
                train_indices.append(candidate_mapped)
                
        # Save candidate indices from the train set into output map
        assert len(train_indices) >= 51
        output_map[idx] = train_indices
        
print(f"Computed candidate indices for {len(output_map.keys())} masked points")

## Load up labels

In [None]:
gt_bin_labels = np.load('/kaggle/input/randlanet-toronto3d-models/nearest-neighbour-search-results/nearest-neighbour-search-results/gt_labels_toronto_test.npy')
randlanet_bin_labels = np.load('/kaggle/input/toronto3d-binary-labels/toronto-3d-labels/randlanet.npy')
kpconv_bin_labels = np.load('/kaggle/input/toronto3d-binary-labels/toronto-3d-labels/kpconv.npy')

assert gt_bin_labels.shape == randlanet_bin_labels.shape
N = len(gt_bin_labels)

## Compute consolidated accuracy and F1 scores for all points

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

acc_full = accuracy_score(gt_bin_labels, randlanet_bin_labels)
f1_full = f1_score(gt_bin_labels, randlanet_bin_labels, average = "macro")
print(f"Randlanet Consolidated accuracy = {acc_full} and f1 score = {f1_full}")

acc_full = accuracy_score(gt_bin_labels, kpconv_bin_labels)
f1_full = f1_score(gt_bin_labels, kpconv_bin_labels, average = "macro")
print(f"Randlanet Consolidated accuracy = {acc_full} and f1 score = {f1_full}")

## Define the experimental conditions
`k : [25, 50]`

In [None]:
def get_majority_output(output_map, query_index, comp_bin_labels, k, mode = "easy"):
    # Get target indices up to k
    all_target_indices = output_map[query_index]
    filtered_target_indices = all_target_indices[:k]
    
    # Get outputs
    candidate_outputs = [comp_bin_labels[x] for x in filtered_target_indices]
    maj_sum = sum(candidate_outputs)
    
    # Threshold
    mult = 1.0
    if mode != "easy":
        mult = 1.4
    if float(maj_sum) >= mult * k//2:
        return 1
    else:
        return 0

## 90\% Masking

In [None]:
query_indices_list = list(output_map.keys())

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

## Bit-flipping experiment
Paramaters: 10\% of the parent label sets 

In [None]:
def bit_flip(original_vector, fraction_points_to_flip=0.10):
    new_vector = original_vector.copy()
    N = len(original_vector)
    
    # Get a list of indices to flip
    all_indices = list(range(N))
    selected_indices = random.sample(all_indices, int(fraction_points_to_flip * N))
    
    # Propagate the changes to the new vector
    for idx in selected_indices:
        if original_vector[idx] == 0:
            new_vector[idx] = 1
        elif original_vector[idx] == 1:
            new_vector[idx] = 0
        else:
            new_vector[idx] = original_vector[idx]
    
    return new_vector

In [None]:
randlanet_bin_labels_mod = bit_flip(randlanet_bin_labels, 0.10)
kpconv_bin_labels_mod = bit_flip(kpconv_bin_labels, 0.10)

## Computing the metrics on 10\% bit-flipped data

In [None]:
query_indices_list = list(output_map.keys())

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

## Bit-flipping experiment
Paramaters: 30\% of the parent label sets 

In [None]:
randlanet_bin_labels_mod = bit_flip(randlanet_bin_labels, 0.30)
kpconv_bin_labels_mod = bit_flip(kpconv_bin_labels, 0.30)

## Computing the metrics on 30\% bit-flipped data

In [None]:
query_indices_list = list(output_map.keys())

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, randlanet_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[RANDLANET] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[RANDLANET] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 25

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

In [None]:
from tqdm import tqdm

n_data_points = len(query_indices_list)
k = 50

all_target_outputs_easy = []
all_target_outputs_hard = []
all_gt_inputs = []

for i in tqdm(range(n_data_points)):
    idx = query_indices_list[i]
    gt = gt_bin_labels[idx]
    comp_easy = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "easy")
    comp_hard = get_majority_output(output_map, idx, kpconv_bin_labels_mod, k, mode = "hard")
    all_gt_inputs.append(gt)
    all_target_outputs_easy.append(comp_easy)
    all_target_outputs_hard.append(comp_hard)
    
acc_full = accuracy_score(all_gt_inputs, all_target_outputs_easy)
f1_full = f1_score(all_gt_inputs, all_target_outputs_easy, average = "macro")
print(f"[KPCONV] mode = EASY, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")

acc_full = accuracy_score(all_gt_inputs, all_target_outputs_hard)
f1_full = f1_score(all_gt_inputs, all_target_outputs_hard, average = "macro")
print(f"[KPCONV] mode = HARD, k = {k}, M = {n_data_points}, accuracy = {acc_full}, f1 score = {f1_full}")