In [1]:
from itertools import product, combinations
import matplotlib.pyplot as plt
import numpy as np
from src import k_tree
from src.utils import data as dt
import torch
from src.k_tree import Ktree
from src.utils.data import loadData, loadData_3d
from src.metrics import Linf_3d

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
data, _ = loadData_3d(10000, 100)

learning_rate = 5e-3 # 1e-2, 5e-3
k = 4
teacher_args = {
    "optimizer_lr": learning_rate,
    "epochs": 800, # 300
    "times": 20,
    "alpha": 0.1, #  projection loss 0.1
    "beta": 0.1, # latent loss 0.1
    "gamma": 0.01, # repulsive loss 0.01
    #"gamma": 0.4, # repulsive loss 0.01
    "delta": 0.2, # fuzzy scale 0.01
    "f_clk": 10, # 20
    "scale": 1, # fuzzy scale 
    "scale_flag": False,
    "number_of_centroids": k,
    "latent_size": 200,
    "encoder_activation": False,
    "encoder_depth": 5,
    "predictor_width": 5*100,
    "predictor_depth": 5,
}
n = 20
un_args = {
    "N": n,
    "M": n**3-1, # 10000 - 1
    "epsilon": 0.15 # here a layer value should be somehow included. As layers get denser  epilson should be smaller
}
student_args = {
    "optimizer_lr": 5e-3,
    "epochs": 3000, # 30000, 3000
    "width": 200,
    "depth": 5,
}

# thresshold = 0.1 * len(data)
threshold = k * 100
ktree = Ktree(threshold, data, Linf_3d, teacher_args, un_args, student_args, device, 3)
ktree.create_tree()

Loading data...
Data loaded.

Creating student for node 0 that has 9999 data, which is more than the threshold 400.
Bounding box for node 0: [[-6, 4007], [-8, 4007], [-8, 4008]]
Creating teacher for node 0 with 4 centroids.
Training Teacher Model
Shuffling data
Epoch: 40/800.. 
 Training loss: 2402.98755.. 
 torch.mean(F): 2399.67651.. 
 Reg Proj: 0.00000.. 
 Reg Latent: 3.31046.. 
 Repulsive: 0.00055.. 
 Memory: 479.93530.. 
 Memory: 479.93530.. 
 Output: 
 [[ 626.0245   209.42534 1405.1426 ]
 [ 454.03613  150.55302 1021.22845]
 [ 252.99327   84.50912  568.4101 ]
 [ 394.3664   132.12302  885.15186]]
Epoch: 80/800.. 
 Training loss: 1564.89099.. 
 torch.mean(F): 1471.13281.. 
 Reg Proj: 92.64028.. 
 Reg Latent: 1.11785.. 
 Repulsive: 0.00016.. 
 Memory: 294.22656.. 
 Memory: 294.22656.. 
 Output: 
 [[1804.2294  1005.48444 2751.9534 ]
 [2824.924   1570.9458  4316.1343 ]
 [1249.7477   695.6946  1907.3336 ]
 [ 928.7075   519.90564 1407.6537 ]]
Epoch: 120/800.. 
 Training loss: 1431.48401.

In [2]:
# Report some tree stats.
leaves = ktree.get_leaves()
number_of_nodes = ktree.number_of_nodes

height = max([len(leaf.index) for leaf in leaves])
print(f"Tree height is {height}.")

leaf_sizes = [len(leaf.data) for leaf in leaves]
print(f"Created {len(leaves)} leaves with sizes")
print(leaf_sizes)
print(number_of_nodes)

Tree height is 5.
Created 58 leaves with sizes
[153, 399, 282, 358, 169, 100, 73, 15, 309, 13, 74, 160, 84, 340, 111, 79, 171, 360, 255, 175, 309, 141, 21, 81, 249, 132, 347, 108, 146, 192, 216, 148, 120, 274, 191, 355, 37, 75, 250, 109, 193, 140, 60, 306, 47, 229, 119, 228, 176, 213, 76, 71, 123, 251, 51, 148, 66, 321]
77


In [3]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        brute_force = ktree.root.query(query_point)

        for j, pred in enumerate(preds_per_layer):
            if np.array_equal(pred, brute_force):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[90.33333333 81.66666667 74.33333333 64.         12.66666667]
The percentage of correct predictions per layer is: 
[94.33333333 83.         72.33333333 62.33333333  9.66666667]
The percentage of correct predictions per layer is: 
[94.66666667 82.33333333 70.66666667 59.66666667 13.33333333]
The percentage of correct predictions per layer is: 
[92.66666667 82.33333333 73.33333333 60.33333333 12.66666667]
The mean percentage of correct predictions is: 
[93.         82.33333333 72.66666667 61.58333333 12.08333333]


In [4]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        k_nearest_neighbors = ktree.root.query(query_point, k=2)

        for j, pred in enumerate(preds_per_layer):
            if any(np.array_equal(pred[0], k_nearest_neighbors[ki]) for ki in range(2)):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[97.33333333 92.33333333 86.66666667 75.33333333 12.        ]
The percentage of correct predictions per layer is: 
[96.33333333 90.66666667 83.66666667 71.         15.33333333]
The percentage of correct predictions per layer is: 
[96.         87.         80.33333333 67.66666667 12.33333333]
The percentage of correct predictions per layer is: 
[97.33333333 89.         82.33333333 71.         15.33333333]
The mean percentage of correct predictions is: 
[96.75 89.75 83.25 71.25 13.75]


In [5]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        k_nearest_neighbors = ktree.root.query(query_point, k=5)

        for j, pred in enumerate(preds_per_layer):
            if any(np.array_equal(pred[0], k_nearest_neighbors[ki]) for ki in range(5)):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[99.         95.         92.33333333 82.66666667 13.33333333]
The percentage of correct predictions per layer is: 
[98.33333333 95.33333333 90.66666667 75.66666667 14.33333333]
The percentage of correct predictions per layer is: 
[99.33333333 97.66666667 94.66666667 84.         15.        ]
The percentage of correct predictions per layer is: 
[99.33333333 96.66666667 92.66666667 79.         15.        ]
The mean percentage of correct predictions is: 
[99.         96.16666667 92.58333333 80.33333333 14.41666667]


In [6]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        k_nearest_neighbors = ktree.root.query(query_point, k=10)

        for j, pred in enumerate(preds_per_layer):
            if any(np.array_equal(pred[0], k_nearest_neighbors[ki]) for ki in range(10)):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[99.66666667 99.66666667 99.33333333 88.66666667 20.33333333]
The percentage of correct predictions per layer is: 
[99.66666667 99.         97.33333333 87.33333333 18.        ]
The percentage of correct predictions per layer is: 
[99.33333333 97.66666667 96.33333333 82.33333333 21.        ]
The percentage of correct predictions per layer is: 
[100.          98.          96.33333333  84.          15.66666667]
The mean percentage of correct predictions is: 
[99.66666667 98.58333333 97.33333333 85.58333333 18.75      ]


In [7]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0

while times < 1:
    nop =  500
    random_p = torch.zeros(nop, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]
    num = int(nop ** (1/3))
    for i, x in enumerate(np.linspace(x_lim[0],x_lim[1], num)):
        for j, y in enumerate(np.linspace(y_lim[0],y_lim[1], num)):
            for k, z in enumerate(np.linspace(z_lim[0],z_lim[1], num)):
                random_p[i * (num ** 2) + j * num + k] = torch.Tensor([x, y, z])
    
    infer_points = random_p.to(device)
    # plt.scatter(infer_points[:,0], infer_points[:,1], infer_points[:,2])

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)

    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        brute_force = ktree.root.query(query_point)

        for j, pred in enumerate(preds_per_layer):
            if np.array_equal(pred, brute_force):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[94.2 89.8 83.  75.4  9. ]
The mean percentage of correct predictions is: 
[94.2 89.8 83.  75.4  9. ]
