In [2]:
from itertools import product, combinations
import matplotlib.pyplot as plt
import numpy as np
from src import k_tree
from src.utils import data as dt
import torch
from src.k_tree import Ktree
from src.utils.data import loadData, loadData_3d
from src.metrics import Linf_3d

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
data, _ = loadData_3d(10000, 100)

learning_rate = 5e-3 # 1e-2, 5e-3
k = 3
teacher_args = {
    "optimizer_lr": learning_rate,
    "epochs": 400, # 300
    "times": 20,
    "alpha": 0.1, #  projection loss 0.1
    "beta": 0.1, # latent loss 0.1
    "gamma": 0.01, # repulsive loss 0.01
    #"gamma": 0.4, # repulsive loss 0.01
    "delta": 0.2, # fuzzy scale 0.01
    "f_clk": 10, # 20
    "scale": 1, # fuzzy scale 
    "scale_flag": False,
    "number_of_centroids": k,
    "latent_size": 200,
    "encoder_activation": False,
    "encoder_depth": 5,
    "predictor_width": 5*100,
    "predictor_depth": 5,
}
n = 20
un_args = {
    "N": n,
    "M": n**3-1, # 10000 - 1
    "epsilon": 0.15 # here a layer value should be somehow included. As layers get denser  epilson should be smaller
}
student_args = {
    "optimizer_lr": 5e-3,
    "epochs": 3000, # 30000, 3000
    "width": 200,
    "depth": 5,
}

thresshold = k * 100
ktree = Ktree(thresshold, data, Linf_3d, teacher_args, un_args, student_args, device, 3)
ktree.create_tree(plot=False)





Loading data...
Data loaded.

Creating student for node 0 that has 9999 data, which is more than the threshold 300.
Bounding box for node 0: [[-6, 4007], [-8, 4007], [-8, 4008]]
Creating teacher for node 0 with 3 centroids.
Training Teacher Model
Shuffling data
Epoch: 20/400.. 
 Training loss: 1565.29382.. 
 torch.mean(F): 1564.92712.. 
 Reg Proj: 0.00000.. 
 Reg Latent: 0.36656.. 
 Repulsive: 0.00008.. 
 Memory: 312.98542.. 
 Memory: 312.98542.. 
 Output: 
 [[ 733.1857  1244.3877   520.14856]
 [1393.0173  2385.347   1006.71606]
 [1719.3788  3115.3877  1299.8488 ]]
Epoch: 40/400.. 
 Training loss: 2885.81494.. 
 torch.mean(F): 2885.67871.. 
 Reg Proj: 0.00000.. 
 Reg Latent: 0.13485.. 
 Repulsive: 0.00157.. 
 Memory: 577.13574.. 
 Memory: 577.13574.. 
 Output: 
 [[ 76.93006   67.429     83.85921 ]
 [182.79964  137.96019  133.5457  ]
 [ 48.616875  50.16146   61.205917]]
Epoch: 60/400.. 
 Training loss: 2422.77661.. 
 torch.mean(F): 2422.65186.. 
 Reg Proj: 0.00000.. 
 Reg Latent: 0.1245

In [3]:
# Report some tree stats.
leaves = ktree.get_leaves()
number_of_nodes = ktree.number_of_nodes

height = max([len(leaf.index) for leaf in leaves])
print(f"Tree height is {height}.")

leaf_sizes = [len(leaf.data) for leaf in leaves]
print(f"Created {len(leaves)} leaves with sizes")
print(leaf_sizes)
print(number_of_nodes)

Tree height is 7.
Created 61 leaves with sizes
[271, 292, 1, 42, 134, 149, 20, 178, 219, 187, 238, 83, 179, 284, 43, 119, 248, 224, 786, 31, 198, 183, 205, 37, 69, 170, 227, 140, 148, 103, 72, 176, 103, 191, 153, 61, 95, 153, 118, 146, 250, 115, 163, 195, 139, 172, 235, 183, 113, 289, 261, 90, 102, 136, 83, 145, 273, 268, 96, 74, 141]
93


In [5]:
from src.utils import accuracy as acc

k = [500,1000]

for different_k in k:
    print(f"Current k is: {different_k}")
    print("Printing results for random queries")
    acc.random_queries(ktree, k = different_k)

Current k is: 500
Printing results for random queries
The number of queries per layer are:
[300. 300. 287. 239.  91.  23.]
The percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
The number of queries per layer are:
[300. 300. 296. 258.  88.  24.]
The percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
The number of queries per layer are:
[300. 300. 289. 242.  88.  20.]
The percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
The number of queries per layer are:
[300. 300. 294. 248.  90.  23.]
The percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
The mean percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
Current k is: 1000
Printing results for random queries
The number of queries per layer are:
[300. 300. 287. 239.  91.  23.]
The percentage of correct predictions per layer is:
[100. 100. 100. 100. 100. 100.]
The number of queries per layer are:


In [3]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        brute_force = ktree.root.query(query_point)

        for j, pred in enumerate(preds_per_layer):
            if np.array_equal(pred, brute_force):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[86.66666667 75.66666667 65.66666667 57.66666667 42.33333333 11.
  1.66666667]
The percentage of correct predictions per layer is: 
[88.66666667 76.33333333 61.33333333 57.33333333 42.66666667 11.66666667
  2.        ]
The percentage of correct predictions per layer is: 
[87.66666667 79.33333333 67.66666667 61.66666667 45.66666667  9.
  1.        ]
The percentage of correct predictions per layer is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]
The mean percentage of correct predictions is: 
[88.08333333 77.58333333 65.83333333 59.91666667 44.58333333 10.83333333
  1.66666667]


In [4]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0
while times < 4:
    # now lets create randomly N points in the space 0-300, 0-300
    n = 300
    random_p = torch.zeros(n, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]

    for i in range(n):
        random_p[i] = torch.Tensor([np.random.randint(x_lim[0], x_lim[1]), np.random.randint(y_lim[0], y_lim[1]), np.random.randint(z_lim[0], z_lim[1])])
    infer_points = random_p.to(device)
    
    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        k_nearest_neighbors = ktree.root.query(query_point, k=2)

        for j, pred in enumerate(preds_per_layer):
            if any(np.array_equal(pred, neighbor) for neighbor in k_nearest_neighbors):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]
The percentage of correct predictions per layer is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]
The percentage of correct predictions per layer is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]
The percentage of correct predictions per layer is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]
The mean percentage of correct predictions is: 
[89.33333333 79.         68.66666667 63.         47.66666667 11.66666667
  2.        ]


In [5]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0

while times < 1:
    nop =  500
    random_p = torch.zeros(nop, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]
    num = int(nop ** (1/3))
    for i, x in enumerate(np.linspace(x_lim[0],x_lim[1], num)):
        for j, y in enumerate(np.linspace(y_lim[0],y_lim[1], num)):
            for k, z in enumerate(np.linspace(z_lim[0],z_lim[1], num)):
                random_p[i * (num ** 2) + j * num + k] = torch.Tensor([x, y, z])
    
    infer_points = random_p.to(device)
    # plt.scatter(infer_points[:,0], infer_points[:,1], infer_points[:,2])

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)

    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        brute_force = ktree.root.query(query_point)

        for j, pred in enumerate(preds_per_layer):
            if np.array_equal(pred, brute_force):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[88.4 83.  77.2 72.2 30.6  8.6  1.4]
The mean percentage of correct predictions is: 
[88.4 83.  77.2 72.2 30.6  8.6  1.4]


In [6]:
import numpy as np
import matplotlib.pyplot as plt
mean_acc_per_layer = np.zeros(height)
times=0

while times < 1:
    nop =  500
    random_p = torch.zeros(nop, 3)
    x_lim = [min(data[:, 0]), max(data[:, 0])]
    y_lim = [min(data[:, 1]), max(data[:, 1])]
    z_lim = [min(data[:, 2]), max(data[:, 2])]
    num = int(nop ** (1/3))
    for i, x in enumerate(np.linspace(x_lim[0],x_lim[1], num)):
        for j, y in enumerate(np.linspace(y_lim[0],y_lim[1], num)):
            for k, z in enumerate(np.linspace(z_lim[0],z_lim[1], num)):
                random_p[i * (num ** 2) + j * num + k] = torch.Tensor([x, y, z])
    
    infer_points = random_p.to(device)
    # plt.scatter(infer_points[:,0], infer_points[:,1], infer_points[:,2])

    correct_predictions_per_layer = np.zeros(height)
    accuracy_per_layer = np.zeros(height)

    qp = infer_points
    for i, query_point in enumerate(qp):
        preds_per_layer = ktree.query_verbose(query_point)["predictions per layer"]
        k_nearest_neighbors = ktree.root.query(query_point, k=3)

        
        for j, pred in enumerate(preds_per_layer):
            if any(np.array_equal(pred, neighbor) for neighbor in k_nearest_neighbors):
                correct_predictions_per_layer[j] += 1

    accuracy_per_layer = correct_predictions_per_layer / len(infer_points) * 100
    mean_acc_per_layer += accuracy_per_layer
    times += 1
    print(f"The percentage of correct predictions per layer is: ")
    print(accuracy_per_layer)

print(f"The mean percentage of correct predictions is: ")
print(mean_acc_per_layer/times)

The percentage of correct predictions per layer is: 
[0. 0. 0. 0. 0. 0. 0.]
The mean percentage of correct predictions is: 
[0. 0. 0. 0. 0. 0. 0.]
